[PATCH][BIONIC] UBUNTU: SAUCE: (noup) Update spl to 0.7.3-1ubuntu1, zfs to 0.7.3-1ubuntu1
Colin King
colin.king at canonical.com
Wed Nov 22 15:53:22 UTC 2017
From: Colin Ian King <colin.king at canonical.com>
Signed-off-by: Colin Ian King <colin.king at canonical.com>
---
spl/META | 2 +-
spl/Makefile.am | 1 +
spl/Makefile.in | 47 +-
spl/aclocal.m4 | 46 +-
spl/cmd/Makefile.am | 12 +-
spl/cmd/Makefile.in | 366 +-
spl/cmd/splat/Makefile.am | 11 +
spl/cmd/splat/Makefile.in | 703 ++
spl/cmd/{ => splat}/splat.c | 0
spl/cmd/{ => splat}/splat.h | 0
spl/cmd/splslab/Makefile.am | 2 +
spl/cmd/splslab/Makefile.in | 563 ++
spl/cmd/splslab/splslab.py | 202 +
spl/config/deb.am | 14 +-
spl/config/depcomp | 6 +-
spl/config/spl-build.m4 | 120 +-
spl/config/spl-meta.m4 | 8 +
spl/configure | 386 +-
spl/configure.ac | 2 +
spl/dkms.conf | 2 +-
spl/include/Makefile.in | 4 +-
spl/include/fs/Makefile.in | 4 +-
spl/include/linux/Makefile.in | 4 +-
spl/include/linux/file_compat.h | 41 +
spl/include/linux/rwsem_compat.h | 9 +-
spl/include/rpc/Makefile.in | 4 +-
spl/include/sharefs/Makefile.in | 4 +-
spl/include/sys/Makefile.am | 1 +
spl/include/sys/Makefile.in | 6 +-
spl/include/sys/byteorder.h | 23 +
spl/include/sys/condvar.h | 2 +
spl/include/sys/cred.h | 12 -
spl/include/sys/debug.h | 5 +
spl/include/sys/dkio.h | 18 +-
spl/include/sys/dkioc_free_util.h | 58 +
spl/include/sys/fm/Makefile.in | 4 +-
spl/include/sys/fs/Makefile.in | 4 +-
spl/include/sys/isa_defs.h | 30 +
spl/include/sys/kobj.h | 4 +-
spl/include/sys/mutex.h | 60 +-
spl/include/sys/param.h | 4 +-
spl/include/sys/random.h | 7 +-
spl/include/sys/rwlock.h | 205 +-
spl/include/sys/sunldi.h | 19 -
spl/include/sys/sysevent/Makefile.in | 4 +-
spl/include/sys/sysmacros.h | 8 +
spl/include/sys/taskq.h | 113 +-
spl/include/sys/time.h | 6 +
spl/include/sys/tsd.h | 1 +
spl/include/sys/user.h | 4 +-
spl/include/sys/vmsystm.h | 8 -
spl/include/util/Makefile.in | 4 +-
spl/include/vm/Makefile.in | 4 +-
spl/lib/Makefile.in | 4 +-
spl/man/Makefile.in | 4 +-
spl/man/man1/Makefile.in | 4 +-
spl/man/man5/Makefile.in | 4 +-
spl/man/man5/spl-module-parameters.5 | 43 +
spl/module/spl/spl-condvar.c | 65 +-
spl/module/spl/spl-err.c | 14 +
spl/module/spl/spl-generic.c | 312 +-
spl/module/spl/spl-kmem-cache.c | 58 +-
spl/module/spl/spl-kmem.c | 4 +-
spl/module/spl/spl-kobj.c | 11 +-
spl/module/spl/spl-kstat.c | 29 +
spl/module/spl/spl-proc.c | 257 +-
spl/module/spl/spl-rwlock.c | 36 +-
spl/module/spl/spl-taskq.c | 377 +-
spl/module/spl/spl-tsd.c | 27 +
spl/module/spl/spl-vmem.c | 34 +-
spl/module/spl/spl-vnode.c | 38 +-
spl/module/splat/splat-atomic.c | 4 +-
spl/module/splat/splat-condvar.c | 20 +-
spl/module/splat/splat-cred.c | 12 +-
spl/module/splat/splat-ctl.c | 88 +-
spl/module/splat/splat-generic.c | 24 +-
spl/module/splat/splat-internal.h | 78 +-
spl/module/splat/splat-kmem.c | 72 +-
spl/module/splat/splat-kobj.c | 8 +-
spl/module/splat/splat-linux.c | 4 +-
spl/module/splat/splat-list.c | 28 +-
spl/module/splat/splat-mutex.c | 46 +-
spl/module/splat/splat-random.c | 4 +-
spl/module/splat/splat-rwlock.c | 58 +-
spl/module/splat/splat-taskq.c | 75 +-
spl/module/splat/splat-thread.c | 12 +-
spl/module/splat/splat-time.c | 8 +-
spl/module/splat/splat-vnode.c | 24 +-
spl/module/splat/splat-zlib.c | 4 +-
spl/rpm/Makefile.in | 4 +-
spl/rpm/generic/Makefile.in | 4 +-
spl/rpm/generic/spl-dkms.spec.in | 17 +-
spl/rpm/generic/spl-kmod.spec.in | 86 +-
spl/rpm/generic/spl.spec.in | 83 +-
spl/rpm/redhat/Makefile.in | 4 +-
spl/rpm/redhat/spl-dkms.spec.in | 17 +-
spl/rpm/redhat/spl-kmod.spec.in | 4 +-
spl/rpm/redhat/spl.spec.in | 83 +-
spl/scripts/Makefile.in | 4 +-
spl/scripts/check.sh | 2 +-
spl/spl_config.h.in | 12 +-
zfs/META | 4 +-
zfs/Makefile.am | 42 +-
zfs/Makefile.in | 157 +-
zfs/README.markdown | 17 +-
zfs/aclocal.m4 | 67 +-
zfs/config/Rules.am | 1 +
zfs/config/always-arch.m4 | 22 +
zfs/config/deb.am | 32 +-
zfs/config/kernel-acl.m4 | 33 +-
zfs/config/kernel-bdi.m4 | 3 +-
zfs/config/kernel-bio_set_dev.m4 | 22 +
zfs/config/kernel-blk-queue-bdi.m4 | 20 +
zfs/config/kernel-check-disk-size-change.m4 | 18 -
zfs/config/kernel-file-dentry.m4 | 20 +
zfs/config/kernel-fpu.m4 | 18 +
zfs/config/kernel-generic_io_acct.m4 | 35 +-
zfs/config/kernel-inode-set-flags.m4 | 18 +
zfs/config/kernel-kobj-name-len.m4 | 21 -
zfs/config/kernel-kuid-helpers.m4 | 22 +
zfs/config/kernel-mkdir-umode-t.m4 | 2 +-
zfs/config/kernel-mod-param.m4 | 30 +
zfs/config/kernel-objtool.m4 | 19 +
zfs/config/kernel-super-userns.m4 | 21 +
zfs/config/kernel-tmpfile.m4 | 23 +
zfs/config/kernel-vm_node_stat.m4 | 22 +
zfs/config/kernel-xattr-handler.m4 | 4 +-
zfs/config/kernel.m4 | 139 +-
zfs/config/toolchain-simd.m4 | 361 +
zfs/config/user-arch.m4 | 19 -
zfs/config/user-libattr.m4 | 12 +
zfs/config/user-libblkid.m4 | 112 +-
zfs/config/user-libtirpc.m4 | 30 +
zfs/config/user-libudev.m4 | 19 +
zfs/config/user-libuuid.m4 | 4 +-
zfs/config/user-zlib.m4 | 6 +-
zfs/config/user.m4 | 21 +-
zfs/config/zfs-build.m4 | 54 +-
zfs/config/zfs-meta.m4 | 8 +
zfs/configure | 7535 +++++++++++++-------
zfs/configure.ac | 10 +-
zfs/dkms.conf | 6 +-
zfs/include/Makefile.in | 35 +-
zfs/include/libuutil.h | 6 -
zfs/include/libzfs.h | 108 +-
zfs/include/libzfs_core.h | 38 +-
zfs/include/libzfs_impl.h | 14 +-
zfs/include/linux/Makefile.am | 5 +-
zfs/include/linux/Makefile.in | 45 +-
zfs/include/linux/blkdev_compat.h | 49 +-
zfs/include/linux/mod_compat.h | 39 +
zfs/include/linux/simd_aarch64.h | 62 +
zfs/include/linux/simd_x86.h | 609 ++
zfs/include/linux/vfs_compat.h | 94 +-
zfs/include/sys/Makefile.am | 18 +-
zfs/include/sys/Makefile.in | 89 +-
zfs/include/sys/abd.h | 179 +
zfs/include/sys/arc.h | 121 +-
zfs/include/sys/arc_impl.h | 55 +-
zfs/include/sys/avl.h | 7 +
zfs/include/sys/blkptr.h | 1 +
zfs/include/sys/bqueue.h | 54 +
zfs/include/sys/crypto/Makefile.am | 20 +
zfs/include/sys/crypto/Makefile.in | 791 ++
zfs/include/sys/crypto/api.h | 425 ++
zfs/include/sys/crypto/common.h | 583 ++
zfs/include/sys/crypto/icp.h | 47 +
zfs/include/sys/dbuf.h | 26 +-
zfs/include/sys/ddt.h | 5 +-
zfs/include/sys/dmu.h | 105 +-
zfs/include/sys/dmu_impl.h | 9 +-
zfs/include/sys/dmu_objset.h | 44 +-
zfs/include/sys/dmu_send.h | 23 +-
zfs/include/sys/dmu_traverse.h | 2 +
zfs/include/sys/dmu_tx.h | 29 +-
zfs/include/sys/dmu_zfetch.h | 44 +-
zfs/include/sys/dnode.h | 232 +-
zfs/include/sys/dsl_dataset.h | 78 +-
zfs/include/sys/dsl_deleg.h | 4 +
zfs/include/sys/dsl_dir.h | 14 +-
zfs/include/sys/dsl_pool.h | 6 +-
zfs/include/sys/dsl_prop.h | 14 +-
zfs/include/sys/dsl_scan.h | 13 +-
zfs/include/sys/edonr.h | 98 +
zfs/include/sys/efi_partition.h | 237 +-
zfs/include/sys/fm/Makefile.in | 35 +-
zfs/include/sys/fm/fs/Makefile.in | 35 +-
zfs/include/sys/fm/fs/zfs.h | 24 +-
zfs/include/sys/fm/protocol.h | 2 +
zfs/include/sys/fm/util.h | 1 +
zfs/include/sys/fs/Makefile.in | 35 +-
zfs/include/sys/fs/zfs.h | 230 +-
zfs/include/sys/metaslab.h | 32 +-
zfs/include/sys/metaslab_impl.h | 249 +-
zfs/include/sys/mmp.h | 64 +
zfs/include/sys/multilist.h | 6 +-
zfs/include/sys/pathname.h | 70 +
zfs/include/sys/policy.h | 60 +
zfs/include/sys/refcount.h | 17 +-
zfs/include/sys/sa.h | 2 -
zfs/include/sys/sa_impl.h | 2 +-
zfs/include/sys/sdt.h | 20 +-
zfs/include/sys/sha2.h | 155 +
zfs/include/sys/skein.h | 183 +
zfs/include/sys/spa.h | 97 +-
zfs/include/sys/spa_checksum.h | 72 +
zfs/include/sys/spa_impl.h | 29 +-
zfs/include/sys/sysevent.h | 36 +
zfs/include/sys/sysevent/Makefile.am | 19 +
zfs/include/sys/sysevent/Makefile.in | 790 ++
zfs/include/sys/sysevent/dev.h | 261 +
zfs/include/sys/sysevent/eventdefs.h | 127 +
zfs/include/sys/trace_acl.h | 44 +-
zfs/include/sys/trace_arc.h | 138 +-
zfs/include/sys/trace_common.h | 112 +
zfs/include/sys/trace_dbgmsg.h | 68 +-
zfs/include/sys/trace_dbuf.h | 77 +-
zfs/include/sys/trace_dmu.h | 60 +-
zfs/include/sys/trace_dnode.h | 5 +-
zfs/include/sys/trace_multilist.h | 5 +-
zfs/include/sys/trace_txg.h | 5 +-
zfs/include/sys/trace_zil.h | 13 +-
zfs/include/sys/trace_zio.h | 89 +
zfs/include/sys/trace_zrlock.h | 9 +-
zfs/include/sys/txg.h | 7 +-
zfs/include/sys/txg_impl.h | 2 +-
zfs/include/sys/uberblock.h | 3 +-
zfs/include/sys/uberblock_impl.h | 8 +
zfs/include/sys/vdev.h | 20 +-
zfs/include/sys/vdev_file.h | 3 +
zfs/include/sys/vdev_impl.h | 44 +-
zfs/include/sys/vdev_raidz.h | 64 +
zfs/include/sys/vdev_raidz_impl.h | 370 +
zfs/include/sys/xvattr.h | 4 +-
zfs/include/sys/zap.h | 68 +-
zfs/include/sys/zap_impl.h | 19 +-
zfs/include/sys/zfs_acl.h | 6 +-
zfs/include/sys/zfs_context.h | 89 +-
zfs/include/sys/zfs_ctldir.h | 11 +-
zfs/include/sys/zfs_debug.h | 19 +-
zfs/include/sys/zfs_dir.h | 3 +-
zfs/include/sys/zfs_fuid.h | 19 +-
zfs/include/sys/zfs_ioctl.h | 89 +-
zfs/include/sys/zfs_ratelimit.h | 38 +
zfs/include/sys/zfs_rlock.h | 10 +-
zfs/include/sys/zfs_vfsops.h | 108 +-
zfs/include/sys/zfs_vnops.h | 7 +-
zfs/include/sys/zfs_znode.h | 47 +-
zfs/include/sys/zil.h | 27 +-
zfs/include/sys/zil_impl.h | 20 +-
zfs/include/sys/zio.h | 171 +-
zfs/include/sys/zio_checksum.h | 87 +-
zfs/include/sys/zio_compress.h | 47 +-
zfs/include/sys/zio_impl.h | 54 +-
zfs/include/sys/zio_priority.h | 39 +
zfs/include/sys/zpl.h | 7 +-
zfs/include/sys/zrlock.h | 9 +-
zfs/include/sys/zvol.h | 9 +
zfs/include/zfeature_common.h | 25 +-
zfs/include/zfs_deleg.h | 4 +
zfs/include/zfs_fletcher.h | 119 +-
zfs/include/zfs_namecheck.h | 3 +-
zfs/include/zpios-ctl.h | 7 +-
zfs/include/zpios-internal.h | 5 +-
zfs/module/Makefile.in | 16 +-
zfs/module/icp/Makefile.in | 93 +
zfs/module/icp/algs/aes/aes_impl.c | 1618 +++++
zfs/module/icp/algs/aes/aes_modes.c | 135 +
zfs/module/icp/algs/edonr/edonr.c | 751 ++
zfs/module/icp/algs/edonr/edonr_byteorder.h | 216 +
zfs/module/icp/algs/modes/cbc.c | 305 +
zfs/module/icp/algs/modes/ccm.c | 920 +++
zfs/module/icp/algs/modes/ctr.c | 238 +
zfs/module/icp/algs/modes/ecb.c | 143 +
zfs/module/icp/algs/modes/gcm.c | 748 ++
zfs/module/icp/algs/modes/modes.c | 159 +
zfs/module/icp/algs/sha1/sha1.c | 838 +++
zfs/module/icp/algs/sha2/sha2.c | 960 +++
zfs/module/icp/algs/skein/skein.c | 921 +++
zfs/module/icp/algs/skein/skein_block.c | 790 ++
zfs/module/icp/algs/skein/skein_impl.h | 289 +
zfs/module/icp/algs/skein/skein_iv.c | 185 +
zfs/module/icp/algs/skein/skein_port.h | 128 +
zfs/module/icp/api/kcf_cipher.c | 935 +++
zfs/module/icp/api/kcf_ctxops.c | 151 +
zfs/module/icp/api/kcf_digest.c | 494 ++
zfs/module/icp/api/kcf_mac.c | 648 ++
zfs/module/icp/api/kcf_miscapi.c | 127 +
zfs/module/icp/asm-x86_64/aes/aes_amd64.S | 906 +++
zfs/module/icp/asm-x86_64/aes/aes_intel.S | 749 ++
zfs/module/icp/asm-x86_64/aes/aeskey.c | 580 ++
zfs/module/icp/asm-x86_64/aes/aesopt.h | 770 ++
zfs/module/icp/asm-x86_64/aes/aestab.h | 165 +
zfs/module/icp/asm-x86_64/aes/aestab2.h | 594 ++
zfs/module/icp/asm-x86_64/modes/gcm_intel.S | 254 +
zfs/module/icp/asm-x86_64/sha1/sha1-x86_64.S | 1353 ++++
zfs/module/icp/asm-x86_64/sha2/sha256_impl.S | 2063 ++++++
zfs/module/icp/asm-x86_64/sha2/sha512_impl.S | 2088 ++++++
zfs/module/icp/core/kcf_callprov.c | 1567 ++++
zfs/module/icp/core/kcf_mech_tabs.c | 791 ++
zfs/module/icp/core/kcf_prov_lib.c | 229 +
zfs/module/icp/core/kcf_prov_tabs.c | 645 ++
zfs/module/icp/core/kcf_sched.c | 1782 +++++
zfs/module/icp/illumos-crypto.c | 156 +
zfs/module/icp/include/aes/aes_impl.h | 170 +
zfs/module/icp/include/modes/modes.h | 385 +
zfs/module/icp/include/sha1/sha1.h | 61 +
zfs/module/icp/include/sha1/sha1_consts.h | 65 +
zfs/module/icp/include/sha1/sha1_impl.h | 73 +
zfs/module/icp/include/sha2/sha2_consts.h | 219 +
zfs/module/icp/include/sha2/sha2_impl.h | 64 +
zfs/module/icp/include/sys/asm_linkage.h | 46 +
zfs/module/icp/include/sys/bitmap.h | 183 +
zfs/module/icp/include/sys/crypto/elfsign.h | 137 +
zfs/module/icp/include/sys/crypto/impl.h | 1363 ++++
zfs/module/icp/include/sys/crypto/ioctl.h | 1483 ++++
zfs/module/icp/include/sys/crypto/ioctladmin.h | 136 +
zfs/module/icp/include/sys/crypto/ops_impl.h | 630 ++
zfs/module/icp/include/sys/crypto/sched_impl.h | 531 ++
zfs/module/icp/include/sys/crypto/spi.h | 726 ++
zfs/module/icp/include/sys/ia32/asm_linkage.h | 307 +
zfs/module/icp/include/sys/ia32/stack.h | 160 +
zfs/module/icp/include/sys/ia32/trap.h | 107 +
zfs/module/icp/include/sys/modctl.h | 477 ++
zfs/module/icp/include/sys/modhash.h | 147 +
zfs/module/icp/include/sys/modhash_impl.h | 108 +
zfs/module/icp/include/sys/stack.h | 36 +
zfs/module/icp/include/sys/trap.h | 36 +
zfs/module/icp/io/aes.c | 1439 ++++
zfs/module/icp/io/edonr_mod.c | 63 +
zfs/module/icp/io/sha1_mod.c | 1239 ++++
zfs/module/icp/io/sha2_mod.c | 1409 ++++
zfs/module/icp/io/skein_mod.c | 735 ++
zfs/module/icp/os/modconf.c | 171 +
zfs/module/icp/os/modhash.c | 925 +++
zfs/module/icp/spi/kcf_spi.c | 924 +++
zfs/module/nvpair/nvpair.c | 11 +-
zfs/module/nvpair/nvpair_alloc_fixed.c | 2 +-
zfs/module/unicode/u8_textprep.c | 31 +-
zfs/module/zcommon/Makefile.in | 7 +
zfs/module/zcommon/zfs_comutil.c | 76 +
zfs/module/zcommon/zfs_deleg.c | 10 +-
zfs/module/zcommon/zfs_fletcher.c | 742 +-
zfs/module/zcommon/zfs_fletcher_aarch64_neon.c | 215 +
zfs/module/zcommon/zfs_fletcher_avx512.c | 171 +
zfs/module/zcommon/zfs_fletcher_intel.c | 173 +
zfs/module/zcommon/zfs_fletcher_sse.c | 231 +
zfs/module/zcommon/zfs_fletcher_superscalar.c | 162 +
zfs/module/zcommon/zfs_fletcher_superscalar4.c | 228 +
zfs/module/zcommon/zfs_namecheck.c | 146 +-
zfs/module/zcommon/zfs_prop.c | 85 +-
zfs/module/zcommon/zfs_uio.c | 15 +-
zfs/module/zcommon/zpool_prop.c | 11 +-
zfs/module/zfs/Makefile.in | 19 +
zfs/module/zfs/abd.c | 1543 ++++
zfs/module/zfs/arc.c | 4484 +++++++-----
zfs/module/zfs/blkptr.c | 35 +-
zfs/module/zfs/bpobj.c | 8 +-
zfs/module/zfs/bptree.c | 7 +-
zfs/module/zfs/bqueue.c | 112 +
zfs/module/zfs/dbuf.c | 1684 +++--
zfs/module/zfs/dbuf_stats.c | 5 +-
zfs/module/zfs/ddt.c | 53 +-
zfs/module/zfs/dmu.c | 693 +-
zfs/module/zfs/dmu_diff.c | 6 +-
zfs/module/zfs/dmu_object.c | 292 +-
zfs/module/zfs/dmu_objset.c | 742 +-
zfs/module/zfs/dmu_send.c | 2274 ++++--
zfs/module/zfs/dmu_traverse.c | 139 +-
zfs/module/zfs/dmu_tx.c | 971 +--
zfs/module/zfs/dmu_zfetch.c | 772 +-
zfs/module/zfs/dnode.c | 617 +-
zfs/module/zfs/dnode_sync.c | 80 +-
zfs/module/zfs/dsl_bookmark.c | 26 +-
zfs/module/zfs/dsl_dataset.c | 695 +-
zfs/module/zfs/dsl_deadlist.c | 61 +-
zfs/module/zfs/dsl_deleg.c | 15 +-
zfs/module/zfs/dsl_destroy.c | 44 +-
zfs/module/zfs/dsl_dir.c | 115 +-
zfs/module/zfs/dsl_pool.c | 83 +-
zfs/module/zfs/dsl_prop.c | 260 +-
zfs/module/zfs/dsl_scan.c | 411 +-
zfs/module/zfs/dsl_userhold.c | 8 +-
zfs/module/zfs/edonr_zfs.c | 115 +
zfs/module/zfs/fm.c | 152 +-
zfs/module/zfs/gzip.c | 19 +-
zfs/module/zfs/lz4.c | 35 +-
zfs/module/zfs/metaslab.c | 1598 ++++-
zfs/module/zfs/mmp.c | 524 ++
zfs/module/zfs/multilist.c | 62 +-
zfs/module/zfs/pathname.c | 89 +
zfs/module/zfs/policy.c | 303 +
zfs/module/zfs/qat_compress.c | 585 ++
zfs/module/zfs/qat_compress.h | 48 +
zfs/module/zfs/range_tree.c | 19 +-
zfs/module/zfs/refcount.c | 92 +-
zfs/module/zfs/rrwlock.c | 4 +-
zfs/module/zfs/sa.c | 145 +-
zfs/module/zfs/sha256.c | 146 +-
zfs/module/zfs/skein_zfs.c | 101 +
zfs/module/zfs/spa.c | 945 ++-
zfs/module/zfs/spa_config.c | 205 +-
zfs/module/zfs/spa_errlog.c | 8 +-
zfs/module/zfs/spa_history.c | 93 +-
zfs/module/zfs/spa_misc.c | 178 +-
zfs/module/zfs/spa_stats.c | 271 +-
zfs/module/zfs/space_map.c | 23 +-
zfs/module/zfs/space_reftree.c | 22 +-
zfs/module/zfs/trace.c | 1 +
zfs/module/zfs/txg.c | 79 +-
zfs/module/zfs/uberblock.c | 5 +-
zfs/module/zfs/unique.c | 12 +-
zfs/module/zfs/vdev.c | 537 +-
zfs/module/zfs/vdev_cache.c | 69 +-
zfs/module/zfs/vdev_disk.c | 79 +-
zfs/module/zfs/vdev_file.c | 53 +-
zfs/module/zfs/vdev_label.c | 404 +-
zfs/module/zfs/vdev_mirror.c | 349 +-
zfs/module/zfs/vdev_missing.c | 2 +
zfs/module/zfs/vdev_queue.c | 149 +-
zfs/module/zfs/vdev_raidz.c | 900 ++-
zfs/module/zfs/vdev_raidz_math.c | 652 ++
zfs/module/zfs/vdev_raidz_math_aarch64_neon.c | 2279 ++++++
.../zfs/vdev_raidz_math_aarch64_neon_common.h | 684 ++
zfs/module/zfs/vdev_raidz_math_aarch64_neonx2.c | 232 +
zfs/module/zfs/vdev_raidz_math_avx2.c | 411 ++
zfs/module/zfs/vdev_raidz_math_avx512bw.c | 410 ++
zfs/module/zfs/vdev_raidz_math_avx512f.c | 487 ++
zfs/module/zfs/vdev_raidz_math_impl.h | 1477 ++++
zfs/module/zfs/vdev_raidz_math_scalar.c | 336 +
zfs/module/zfs/vdev_raidz_math_sse2.c | 622 ++
zfs/module/zfs/vdev_raidz_math_ssse3.c | 2475 +++++++
zfs/module/zfs/vdev_root.c | 1 +
zfs/module/zfs/zap.c | 183 +-
zfs/module/zfs/zap_leaf.c | 24 +-
zfs/module/zfs/zap_micro.c | 621 +-
zfs/module/zfs/zfeature.c | 22 +-
zfs/module/zfs/zfeature_common.c | 132 +-
zfs/module/zfs/zfs_acl.c | 179 +-
zfs/module/zfs/zfs_ctldir.c | 370 +-
zfs/module/zfs/zfs_debug.c | 45 +-
zfs/module/zfs/zfs_dir.c | 301 +-
zfs/module/zfs/zfs_fm.c | 206 +-
zfs/module/zfs/zfs_fuid.c | 190 +-
zfs/module/zfs/zfs_ioctl.c | 1250 +++-
zfs/module/zfs/zfs_log.c | 102 +-
zfs/module/zfs/zfs_replay.c | 161 +-
zfs/module/zfs/zfs_rlock.c | 16 +-
zfs/module/zfs/zfs_sa.c | 117 +-
zfs/module/zfs/zfs_vfsops.c | 1438 ++--
zfs/module/zfs/zfs_vnops.c | 1200 ++--
zfs/module/zfs/zfs_znode.c | 608 +-
zfs/module/zfs/zil.c | 235 +-
zfs/module/zfs/zio.c | 1295 +++-
zfs/module/zfs/zio_checksum.c | 389 +-
zfs/module/zfs/zio_compress.c | 80 +-
zfs/module/zfs/zio_inject.c | 301 +-
zfs/module/zfs/zpl_ctldir.c | 51 +-
zfs/module/zfs/zpl_export.c | 1 +
zfs/module/zfs/zpl_file.c | 25 +-
zfs/module/zfs/zpl_inode.c | 91 +-
zfs/module/zfs/zpl_super.c | 235 +-
zfs/module/zfs/zpl_xattr.c | 50 +-
zfs/module/zfs/zrlock.c | 62 +-
zfs/module/zfs/zvol.c | 1543 +++-
zfs/module/zpios/pios.c | 97 +-
zfs/zfs_config.h.in | 116 +-
467 files changed, 97172 insertions(+), 16538 deletions(-)
create mode 100644 spl/cmd/splat/Makefile.am
create mode 100644 spl/cmd/splat/Makefile.in
rename spl/cmd/{ => splat}/splat.c (100%)
rename spl/cmd/{ => splat}/splat.h (100%)
create mode 100644 spl/cmd/splslab/Makefile.am
create mode 100644 spl/cmd/splslab/Makefile.in
create mode 100755 spl/cmd/splslab/splslab.py
create mode 100644 spl/include/sys/dkioc_free_util.h
create mode 100644 zfs/config/always-arch.m4
create mode 100644 zfs/config/kernel-bio_set_dev.m4
create mode 100644 zfs/config/kernel-blk-queue-bdi.m4
delete mode 100644 zfs/config/kernel-check-disk-size-change.m4
create mode 100644 zfs/config/kernel-file-dentry.m4
create mode 100644 zfs/config/kernel-fpu.m4
create mode 100644 zfs/config/kernel-inode-set-flags.m4
delete mode 100644 zfs/config/kernel-kobj-name-len.m4
create mode 100644 zfs/config/kernel-kuid-helpers.m4
create mode 100644 zfs/config/kernel-mod-param.m4
create mode 100644 zfs/config/kernel-objtool.m4
create mode 100644 zfs/config/kernel-super-userns.m4
create mode 100644 zfs/config/kernel-tmpfile.m4
create mode 100644 zfs/config/kernel-vm_node_stat.m4
create mode 100644 zfs/config/toolchain-simd.m4
delete mode 100644 zfs/config/user-arch.m4
create mode 100644 zfs/config/user-libattr.m4
create mode 100644 zfs/config/user-libtirpc.m4
create mode 100644 zfs/config/user-libudev.m4
create mode 100644 zfs/include/linux/mod_compat.h
create mode 100644 zfs/include/linux/simd_aarch64.h
create mode 100644 zfs/include/linux/simd_x86.h
create mode 100644 zfs/include/sys/abd.h
create mode 100644 zfs/include/sys/bqueue.h
create mode 100644 zfs/include/sys/crypto/Makefile.am
create mode 100644 zfs/include/sys/crypto/Makefile.in
create mode 100644 zfs/include/sys/crypto/api.h
create mode 100644 zfs/include/sys/crypto/common.h
create mode 100644 zfs/include/sys/crypto/icp.h
create mode 100644 zfs/include/sys/edonr.h
create mode 100644 zfs/include/sys/mmp.h
create mode 100644 zfs/include/sys/pathname.h
create mode 100644 zfs/include/sys/policy.h
create mode 100644 zfs/include/sys/sha2.h
create mode 100644 zfs/include/sys/skein.h
create mode 100644 zfs/include/sys/spa_checksum.h
create mode 100644 zfs/include/sys/sysevent.h
create mode 100644 zfs/include/sys/sysevent/Makefile.am
create mode 100644 zfs/include/sys/sysevent/Makefile.in
create mode 100644 zfs/include/sys/sysevent/dev.h
create mode 100644 zfs/include/sys/sysevent/eventdefs.h
create mode 100644 zfs/include/sys/trace_common.h
create mode 100644 zfs/include/sys/trace_zio.h
create mode 100644 zfs/include/sys/vdev_raidz.h
create mode 100644 zfs/include/sys/vdev_raidz_impl.h
create mode 100644 zfs/include/sys/zfs_ratelimit.h
create mode 100644 zfs/include/sys/zio_priority.h
create mode 100644 zfs/module/icp/Makefile.in
create mode 100644 zfs/module/icp/algs/aes/aes_impl.c
create mode 100644 zfs/module/icp/algs/aes/aes_modes.c
create mode 100644 zfs/module/icp/algs/edonr/edonr.c
create mode 100644 zfs/module/icp/algs/edonr/edonr_byteorder.h
create mode 100644 zfs/module/icp/algs/modes/cbc.c
create mode 100644 zfs/module/icp/algs/modes/ccm.c
create mode 100644 zfs/module/icp/algs/modes/ctr.c
create mode 100644 zfs/module/icp/algs/modes/ecb.c
create mode 100644 zfs/module/icp/algs/modes/gcm.c
create mode 100644 zfs/module/icp/algs/modes/modes.c
create mode 100644 zfs/module/icp/algs/sha1/sha1.c
create mode 100644 zfs/module/icp/algs/sha2/sha2.c
create mode 100644 zfs/module/icp/algs/skein/skein.c
create mode 100644 zfs/module/icp/algs/skein/skein_block.c
create mode 100644 zfs/module/icp/algs/skein/skein_impl.h
create mode 100644 zfs/module/icp/algs/skein/skein_iv.c
create mode 100644 zfs/module/icp/algs/skein/skein_port.h
create mode 100644 zfs/module/icp/api/kcf_cipher.c
create mode 100644 zfs/module/icp/api/kcf_ctxops.c
create mode 100644 zfs/module/icp/api/kcf_digest.c
create mode 100644 zfs/module/icp/api/kcf_mac.c
create mode 100644 zfs/module/icp/api/kcf_miscapi.c
create mode 100644 zfs/module/icp/asm-x86_64/aes/aes_amd64.S
create mode 100644 zfs/module/icp/asm-x86_64/aes/aes_intel.S
create mode 100644 zfs/module/icp/asm-x86_64/aes/aeskey.c
create mode 100644 zfs/module/icp/asm-x86_64/aes/aesopt.h
create mode 100644 zfs/module/icp/asm-x86_64/aes/aestab.h
create mode 100644 zfs/module/icp/asm-x86_64/aes/aestab2.h
create mode 100644 zfs/module/icp/asm-x86_64/modes/gcm_intel.S
create mode 100644 zfs/module/icp/asm-x86_64/sha1/sha1-x86_64.S
create mode 100644 zfs/module/icp/asm-x86_64/sha2/sha256_impl.S
create mode 100644 zfs/module/icp/asm-x86_64/sha2/sha512_impl.S
create mode 100644 zfs/module/icp/core/kcf_callprov.c
create mode 100644 zfs/module/icp/core/kcf_mech_tabs.c
create mode 100644 zfs/module/icp/core/kcf_prov_lib.c
create mode 100644 zfs/module/icp/core/kcf_prov_tabs.c
create mode 100644 zfs/module/icp/core/kcf_sched.c
create mode 100644 zfs/module/icp/illumos-crypto.c
create mode 100644 zfs/module/icp/include/aes/aes_impl.h
create mode 100644 zfs/module/icp/include/modes/modes.h
create mode 100644 zfs/module/icp/include/sha1/sha1.h
create mode 100644 zfs/module/icp/include/sha1/sha1_consts.h
create mode 100644 zfs/module/icp/include/sha1/sha1_impl.h
create mode 100644 zfs/module/icp/include/sha2/sha2_consts.h
create mode 100644 zfs/module/icp/include/sha2/sha2_impl.h
create mode 100644 zfs/module/icp/include/sys/asm_linkage.h
create mode 100644 zfs/module/icp/include/sys/bitmap.h
create mode 100644 zfs/module/icp/include/sys/crypto/elfsign.h
create mode 100644 zfs/module/icp/include/sys/crypto/impl.h
create mode 100644 zfs/module/icp/include/sys/crypto/ioctl.h
create mode 100644 zfs/module/icp/include/sys/crypto/ioctladmin.h
create mode 100644 zfs/module/icp/include/sys/crypto/ops_impl.h
create mode 100644 zfs/module/icp/include/sys/crypto/sched_impl.h
create mode 100644 zfs/module/icp/include/sys/crypto/spi.h
create mode 100644 zfs/module/icp/include/sys/ia32/asm_linkage.h
create mode 100644 zfs/module/icp/include/sys/ia32/stack.h
create mode 100644 zfs/module/icp/include/sys/ia32/trap.h
create mode 100644 zfs/module/icp/include/sys/modctl.h
create mode 100644 zfs/module/icp/include/sys/modhash.h
create mode 100644 zfs/module/icp/include/sys/modhash_impl.h
create mode 100644 zfs/module/icp/include/sys/stack.h
create mode 100644 zfs/module/icp/include/sys/trap.h
create mode 100644 zfs/module/icp/io/aes.c
create mode 100644 zfs/module/icp/io/edonr_mod.c
create mode 100644 zfs/module/icp/io/sha1_mod.c
create mode 100644 zfs/module/icp/io/sha2_mod.c
create mode 100644 zfs/module/icp/io/skein_mod.c
create mode 100644 zfs/module/icp/os/modconf.c
create mode 100644 zfs/module/icp/os/modhash.c
create mode 100644 zfs/module/icp/spi/kcf_spi.c
create mode 100644 zfs/module/zcommon/zfs_fletcher_aarch64_neon.c
create mode 100644 zfs/module/zcommon/zfs_fletcher_avx512.c
create mode 100644 zfs/module/zcommon/zfs_fletcher_intel.c
create mode 100644 zfs/module/zcommon/zfs_fletcher_sse.c
create mode 100644 zfs/module/zcommon/zfs_fletcher_superscalar.c
create mode 100644 zfs/module/zcommon/zfs_fletcher_superscalar4.c
create mode 100644 zfs/module/zfs/abd.c
create mode 100644 zfs/module/zfs/bqueue.c
mode change 100755 => 100644 zfs/module/zfs/dsl_pool.c
create mode 100644 zfs/module/zfs/edonr_zfs.c
create mode 100644 zfs/module/zfs/mmp.c
create mode 100644 zfs/module/zfs/pathname.c
create mode 100644 zfs/module/zfs/policy.c
create mode 100644 zfs/module/zfs/qat_compress.c
create mode 100644 zfs/module/zfs/qat_compress.h
create mode 100644 zfs/module/zfs/skein_zfs.c
create mode 100644 zfs/module/zfs/vdev_raidz_math.c
create mode 100644 zfs/module/zfs/vdev_raidz_math_aarch64_neon.c
create mode 100644 zfs/module/zfs/vdev_raidz_math_aarch64_neon_common.h
create mode 100644 zfs/module/zfs/vdev_raidz_math_aarch64_neonx2.c
create mode 100644 zfs/module/zfs/vdev_raidz_math_avx2.c
create mode 100644 zfs/module/zfs/vdev_raidz_math_avx512bw.c
create mode 100644 zfs/module/zfs/vdev_raidz_math_avx512f.c
create mode 100644 zfs/module/zfs/vdev_raidz_math_impl.h
create mode 100644 zfs/module/zfs/vdev_raidz_math_scalar.c
create mode 100644 zfs/module/zfs/vdev_raidz_math_sse2.c
create mode 100644 zfs/module/zfs/vdev_raidz_math_ssse3.c
diff --git a/spl/META b/spl/META
index 621807a782e5..e80542ace741 100644
--- a/spl/META
+++ b/spl/META
@@ -1,7 +1,7 @@
Meta: 1
Name: spl
Branch: 1.0
-Version: 0.6.5.11
+Version: 0.7.3
Release: 1ubuntu1
Release-Tags: relext
License: GPL
diff --git a/spl/Makefile.am b/spl/Makefile.am
index 4977448fda95..05107cb19e1c 100644
--- a/spl/Makefile.am
+++ b/spl/Makefile.am
@@ -50,5 +50,6 @@ etags:
tags: ctags etags
pkg: @DEFAULT_PACKAGE@
+pkg-dkms: @DEFAULT_PACKAGE at -dkms
pkg-kmod: @DEFAULT_PACKAGE at -kmod
pkg-utils: @DEFAULT_PACKAGE at -utils
diff --git a/spl/Makefile.in b/spl/Makefile.in
index c451112217b4..260d4e7b75f1 100644
--- a/spl/Makefile.in
+++ b/spl/Makefile.in
@@ -1,7 +1,7 @@
-# Makefile.in generated by automake 1.15 from Makefile.am.
+# Makefile.in generated by automake 1.15.1 from Makefile.am.
# @configure_input@
-# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+# Copyright (C) 1994-2017 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -751,7 +751,7 @@ distdir: $(DISTFILES)
! -type d ! -perm -444 -exec $(install_sh) -c -m a+r {} {} \; \
|| chmod -R a+r "$(distdir)"
dist-gzip: distdir
- tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz
+ tardir=$(distdir) && $(am__tar) | eval GZIP= gzip $(GZIP_ENV) -c >$(distdir).tar.gz
$(am__post_remove_distdir)
dist-bzip2: distdir
@@ -777,7 +777,7 @@ dist-shar: distdir
@echo WARNING: "Support for shar distribution archives is" \
"deprecated." >&2
@echo WARNING: "It will be removed altogether in Automake 2.0" >&2
- shar $(distdir) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).shar.gz
+ shar $(distdir) | eval GZIP= gzip $(GZIP_ENV) -c >$(distdir).shar.gz
$(am__post_remove_distdir)
dist-zip: distdir
@@ -795,7 +795,7 @@ dist dist-all:
distcheck: dist
case '$(DIST_ARCHIVES)' in \
*.tar.gz*) \
- GZIP=$(GZIP_ENV) gzip -dc $(distdir).tar.gz | $(am__untar) ;;\
+ eval GZIP= gzip $(GZIP_ENV) -dc $(distdir).tar.gz | $(am__untar) ;;\
*.tar.bz2*) \
bzip2 -dc $(distdir).tar.bz2 | $(am__untar) ;;\
*.tar.lz*) \
@@ -805,7 +805,7 @@ distcheck: dist
*.tar.Z*) \
uncompress -c $(distdir).tar.Z | $(am__untar) ;;\
*.shar.gz*) \
- GZIP=$(GZIP_ENV) gzip -dc $(distdir).shar.gz | unshar ;;\
+ eval GZIP= gzip $(GZIP_ENV) -dc $(distdir).shar.gz | unshar ;;\
*.zip*) \
unzip $(distdir).zip ;;\
esac
@@ -1103,22 +1103,30 @@ deb-local:
fi)
deb-kmod: deb-local rpm-kmod
- at CONFIG_KERNEL_TRUE@ name=${PACKAGE}; \
- at CONFIG_KERNEL_TRUE@ version=${VERSION}-${RELEASE}; \
- at CONFIG_KERNEL_TRUE@ arch=`$(RPM) -qp $${name}-kmod-$${version}.src.rpm --qf %{arch} | tail -1`; \
- at CONFIG_KERNEL_TRUE@ pkg1=kmod-$${name}*$${version}.$${arch}.rpm; \
- at CONFIG_KERNEL_TRUE@ fakeroot $(ALIEN) --bump=0 --scripts --to-deb $$pkg1; \
- at CONFIG_KERNEL_TRUE@ $(RM) $$pkg1
+ name=${PACKAGE}; \
+ version=${VERSION}-${RELEASE}; \
+ arch=`$(RPM) -qp $${name}-kmod-$${version}.src.rpm --qf %{arch} | tail -1`; \
+ pkg1=kmod-$${name}*$${version}.$${arch}.rpm; \
+ fakeroot $(ALIEN) --bump=0 --scripts --to-deb $$pkg1; \
+ $(RM) $$pkg1
+
+deb-dkms: deb-local rpm-dkms
+ name=${PACKAGE}; \
+ version=${VERSION}-${RELEASE}; \
+ arch=`$(RPM) -qp $${name}-dkms-$${version}.src.rpm --qf %{arch} | tail -1`; \
+ pkg1=$${name}-dkms-$${version}.$${arch}.rpm; \
+ fakeroot $(ALIEN) --bump=0 --scripts --to-deb $$pkg1; \
+ $(RM) $$pkg1
deb-utils: deb-local rpm-utils
- at CONFIG_USER_TRUE@ name=${PACKAGE}; \
- at CONFIG_USER_TRUE@ version=${VERSION}-${RELEASE}; \
- at CONFIG_USER_TRUE@ arch=`$(RPM) -qp $${name}-$${version}.src.rpm --qf %{arch} | tail -1`; \
- at CONFIG_USER_TRUE@ pkg1=$${name}-$${version}.$${arch}.rpm; \
- at CONFIG_USER_TRUE@ fakeroot $(ALIEN) --bump=0 --scripts --to-deb $$pkg1; \
- at CONFIG_USER_TRUE@ $(RM) $$pkg1
+ name=${PACKAGE}; \
+ version=${VERSION}-${RELEASE}; \
+ arch=`$(RPM) -qp $${name}-$${version}.src.rpm --qf %{arch} | tail -1`; \
+ pkg1=$${name}-$${version}.$${arch}.rpm; \
+ fakeroot $(ALIEN) --bump=0 --scripts --to-deb $$pkg1; \
+ $(RM) $$pkg1
-deb: deb-kmod deb-utils
+deb: deb-kmod deb-dkms deb-utils
tgz-local:
@(if test "${HAVE_ALIEN}" = "no"; then \
@@ -1174,6 +1182,7 @@ etags:
tags: ctags etags
pkg: @DEFAULT_PACKAGE@
+pkg-dkms: @DEFAULT_PACKAGE at -dkms
pkg-kmod: @DEFAULT_PACKAGE at -kmod
pkg-utils: @DEFAULT_PACKAGE at -utils
diff --git a/spl/aclocal.m4 b/spl/aclocal.m4
index 77267d92f94d..be61f1fb448a 100644
--- a/spl/aclocal.m4
+++ b/spl/aclocal.m4
@@ -1,6 +1,6 @@
-# generated automatically by aclocal 1.15 -*- Autoconf -*-
+# generated automatically by aclocal 1.15.1 -*- Autoconf -*-
-# Copyright (C) 1996-2014 Free Software Foundation, Inc.
+# Copyright (C) 1996-2017 Free Software Foundation, Inc.
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -20,7 +20,7 @@ You have another version of autoconf. It may work, but is not guaranteed to.
If you have problems, you may need to regenerate the build system entirely.
To do so, use the procedure documented by the package, typically 'autoreconf'.])])
-# Copyright (C) 2002-2014 Free Software Foundation, Inc.
+# Copyright (C) 2002-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -35,7 +35,7 @@ AC_DEFUN([AM_AUTOMAKE_VERSION],
[am__api_version='1.15'
dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to
dnl require some minimum version. Point them to the right macro.
-m4_if([$1], [1.15], [],
+m4_if([$1], [1.15.1], [],
[AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl
])
@@ -51,14 +51,14 @@ m4_define([_AM_AUTOCONF_VERSION], [])
# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced.
# This function is AC_REQUIREd by AM_INIT_AUTOMAKE.
AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
-[AM_AUTOMAKE_VERSION([1.15])dnl
+[AM_AUTOMAKE_VERSION([1.15.1])dnl
m4_ifndef([AC_AUTOCONF_VERSION],
[m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
_AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))])
# AM_AUX_DIR_EXPAND -*- Autoconf -*-
-# Copyright (C) 2001-2014 Free Software Foundation, Inc.
+# Copyright (C) 2001-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -110,7 +110,7 @@ am_aux_dir=`cd "$ac_aux_dir" && pwd`
# AM_CONDITIONAL -*- Autoconf -*-
-# Copyright (C) 1997-2014 Free Software Foundation, Inc.
+# Copyright (C) 1997-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -141,7 +141,7 @@ AC_CONFIG_COMMANDS_PRE(
Usually this means the macro was only invoked conditionally.]])
fi])])
-# Copyright (C) 1999-2014 Free Software Foundation, Inc.
+# Copyright (C) 1999-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -332,7 +332,7 @@ _AM_SUBST_NOTMAKE([am__nodep])dnl
# Generate code to set up dependency tracking. -*- Autoconf -*-
-# Copyright (C) 1999-2014 Free Software Foundation, Inc.
+# Copyright (C) 1999-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -408,7 +408,7 @@ AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS],
# Do all the work for Automake. -*- Autoconf -*-
-# Copyright (C) 1996-2014 Free Software Foundation, Inc.
+# Copyright (C) 1996-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -605,7 +605,7 @@ for _am_header in $config_headers :; do
done
echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count])
-# Copyright (C) 2001-2014 Free Software Foundation, Inc.
+# Copyright (C) 2001-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -626,7 +626,7 @@ if test x"${install_sh+set}" != xset; then
fi
AC_SUBST([install_sh])])
-# Copyright (C) 2003-2014 Free Software Foundation, Inc.
+# Copyright (C) 2003-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -648,7 +648,7 @@ AC_SUBST([am__leading_dot])])
# Add --enable-maintainer-mode option to configure. -*- Autoconf -*-
# From Jim Meyering
-# Copyright (C) 1996-2014 Free Software Foundation, Inc.
+# Copyright (C) 1996-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -683,7 +683,7 @@ AC_MSG_CHECKING([whether to enable maintainer-specific portions of Makefiles])
# Check to see how 'make' treats includes. -*- Autoconf -*-
-# Copyright (C) 2001-2014 Free Software Foundation, Inc.
+# Copyright (C) 2001-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -733,7 +733,7 @@ rm -f confinc confmf
# Fake the existence of programs that GNU maintainers use. -*- Autoconf -*-
-# Copyright (C) 1997-2014 Free Software Foundation, Inc.
+# Copyright (C) 1997-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -772,7 +772,7 @@ fi
# Helper functions for option handling. -*- Autoconf -*-
-# Copyright (C) 2001-2014 Free Software Foundation, Inc.
+# Copyright (C) 2001-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -801,7 +801,7 @@ AC_DEFUN([_AM_SET_OPTIONS],
AC_DEFUN([_AM_IF_OPTION],
[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])])
-# Copyright (C) 1999-2014 Free Software Foundation, Inc.
+# Copyright (C) 1999-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -848,7 +848,7 @@ AC_LANG_POP([C])])
# For backward compatibility.
AC_DEFUN_ONCE([AM_PROG_CC_C_O], [AC_REQUIRE([AC_PROG_CC])])
-# Copyright (C) 2001-2014 Free Software Foundation, Inc.
+# Copyright (C) 2001-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -867,7 +867,7 @@ AC_DEFUN([AM_RUN_LOG],
# Check to make sure that the build environment is sane. -*- Autoconf -*-
-# Copyright (C) 1996-2014 Free Software Foundation, Inc.
+# Copyright (C) 1996-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -948,7 +948,7 @@ AC_CONFIG_COMMANDS_PRE(
rm -f conftest.file
])
-# Copyright (C) 2009-2014 Free Software Foundation, Inc.
+# Copyright (C) 2009-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -1008,7 +1008,7 @@ AC_SUBST([AM_BACKSLASH])dnl
_AM_SUBST_NOTMAKE([AM_BACKSLASH])dnl
])
-# Copyright (C) 2001-2014 Free Software Foundation, Inc.
+# Copyright (C) 2001-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -1036,7 +1036,7 @@ fi
INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
AC_SUBST([INSTALL_STRIP_PROGRAM])])
-# Copyright (C) 2006-2014 Free Software Foundation, Inc.
+# Copyright (C) 2006-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -1055,7 +1055,7 @@ AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)])
# Check how to create a tarball. -*- Autoconf -*-
-# Copyright (C) 2004-2014 Free Software Foundation, Inc.
+# Copyright (C) 2004-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
diff --git a/spl/cmd/Makefile.am b/spl/cmd/Makefile.am
index 01afdcf2566d..63a3c76f9754 100644
--- a/spl/cmd/Makefile.am
+++ b/spl/cmd/Makefile.am
@@ -1,11 +1 @@
-include $(top_srcdir)/config/Rules.am
-
-DEFAULT_INCLUDES += \
- -I$(top_srcdir)/lib
-
-sbin_PROGRAMS = splat
-
-splat_SOURCES = splat.c
-splat_LDFLAGS = $(top_builddir)/lib/libcommon.la
-
-EXTRA_DIST = splat.h
+SUBDIRS = splat splslab
diff --git a/spl/cmd/Makefile.in b/spl/cmd/Makefile.in
index 681f095600f4..f332824322fc 100644
--- a/spl/cmd/Makefile.in
+++ b/spl/cmd/Makefile.in
@@ -1,7 +1,7 @@
-# Makefile.in generated by automake 1.15 from Makefile.am.
+# Makefile.in generated by automake 1.15.1 from Makefile.am.
# @configure_input@
-# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+# Copyright (C) 1994-2017 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -13,15 +13,6 @@
# PARTICULAR PURPOSE.
@SET_MAKE@
-
-###############################################################################
-# Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
-# Copyright (C) 2007 The Regents of the University of California.
-# Written by Brian Behlendorf <behlendorf1 at llnl.gov>.
-###############################################################################
-# Common rules for user space components.
-###############################################################################
-
VPATH = @srcdir@
am__is_gnu_make = { \
if test -z '$(MAKELEVEL)'; then \
@@ -97,7 +88,6 @@ POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
target_triplet = @target@
-sbin_PROGRAMS = splat$(EXEEXT)
subdir = cmd
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/config/libtool.m4 \
@@ -114,18 +104,6 @@ mkinstalldirs = $(install_sh) -d
CONFIG_HEADER = $(top_builddir)/spl_config.h
CONFIG_CLEAN_FILES =
CONFIG_CLEAN_VPATH_FILES =
-am__installdirs = "$(DESTDIR)$(sbindir)"
-PROGRAMS = $(sbin_PROGRAMS)
-am_splat_OBJECTS = splat.$(OBJEXT)
-splat_OBJECTS = $(am_splat_OBJECTS)
-splat_LDADD = $(LDADD)
-AM_V_lt = $(am__v_lt_ at AM_V@)
-am__v_lt_ = $(am__v_lt_ at AM_DEFAULT_V@)
-am__v_lt_0 = --silent
-am__v_lt_1 =
-splat_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
- $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
- $(splat_LDFLAGS) $(LDFLAGS) -o $@
AM_V_P = $(am__v_P_ at AM_V@)
am__v_P_ = $(am__v_P_ at AM_DEFAULT_V@)
am__v_P_0 = false
@@ -138,34 +116,29 @@ AM_V_at = $(am__v_at_ at AM_V@)
am__v_at_ = $(am__v_at_ at AM_DEFAULT_V@)
am__v_at_0 = @
am__v_at_1 =
-depcomp = $(SHELL) $(top_srcdir)/config/depcomp
-am__depfiles_maybe = depfiles
-am__mv = mv -f
-COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
- $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
-LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
- $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
- $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
- $(AM_CFLAGS) $(CFLAGS)
-AM_V_CC = $(am__v_CC_ at AM_V@)
-am__v_CC_ = $(am__v_CC_ at AM_DEFAULT_V@)
-am__v_CC_0 = @echo " CC " $@;
-am__v_CC_1 =
-CCLD = $(CC)
-LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
- $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
- $(AM_LDFLAGS) $(LDFLAGS) -o $@
-AM_V_CCLD = $(am__v_CCLD_ at AM_V@)
-am__v_CCLD_ = $(am__v_CCLD_ at AM_DEFAULT_V@)
-am__v_CCLD_0 = @echo " CCLD " $@;
-am__v_CCLD_1 =
-SOURCES = $(splat_SOURCES)
-DIST_SOURCES = $(splat_SOURCES)
+SOURCES =
+DIST_SOURCES =
+RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \
+ ctags-recursive dvi-recursive html-recursive info-recursive \
+ install-data-recursive install-dvi-recursive \
+ install-exec-recursive install-html-recursive \
+ install-info-recursive install-pdf-recursive \
+ install-ps-recursive install-recursive installcheck-recursive \
+ installdirs-recursive pdf-recursive ps-recursive \
+ tags-recursive uninstall-recursive
am__can_run_installinfo = \
case $$AM_UPDATE_INFO_DIR in \
n|no|NO) false;; \
*) (install-info --version) >/dev/null 2>&1;; \
esac
+RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \
+ distclean-recursive maintainer-clean-recursive
+am__recursive_targets = \
+ $(RECURSIVE_TARGETS) \
+ $(RECURSIVE_CLEAN_TARGETS) \
+ $(am__extra_recursive_targets)
+AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \
+ distdir
am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
# Read a list of newline-separated strings from the standard input,
# and print each of them once, without duplicates. Input order is
@@ -185,9 +158,34 @@ am__define_uniq_tagged_files = \
done | $(am__uniquify_input)`
ETAGS = etags
CTAGS = ctags
-am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/config/Rules.am \
- $(top_srcdir)/config/depcomp
+DIST_SUBDIRS = $(SUBDIRS)
+am__DIST_COMMON = $(srcdir)/Makefile.in
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+am__relativize = \
+ dir0=`pwd`; \
+ sed_first='s,^\([^/]*\)/.*$$,\1,'; \
+ sed_rest='s,^[^/]*/*,,'; \
+ sed_last='s,^.*/\([^/]*\)$$,\1,'; \
+ sed_butlast='s,/*[^/]*$$,,'; \
+ while test -n "$$dir1"; do \
+ first=`echo "$$dir1" | sed -e "$$sed_first"`; \
+ if test "$$first" != "."; then \
+ if test "$$first" = ".."; then \
+ dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
+ dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
+ else \
+ first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
+ if test "$$first2" = "$$first"; then \
+ dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
+ else \
+ dir2="../$$dir2"; \
+ fi; \
+ dir0="$$dir0"/"$$first"; \
+ fi; \
+ fi; \
+ dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
+ done; \
+ reldir="$$dir2"
ACLOCAL = @ACLOCAL@
ALIEN = @ALIEN@
ALIEN_VERSION = @ALIEN_VERSION@
@@ -357,20 +355,11 @@ target_vendor = @target_vendor@
top_build_prefix = @top_build_prefix@
top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
-DEFAULT_INCLUDES = -include ${top_builddir}/spl_config.h \
- -I$(top_srcdir)/lib
-AM_LIBTOOLFLAGS = --silent
-AM_CPPFLAGS = -D__USE_LARGEFILE64
-AM_CFLAGS = -Wall -Wshadow -Wstrict-prototypes -fno-strict-aliasing \
- ${DEBUG_CFLAGS}
-splat_SOURCES = splat.c
-splat_LDFLAGS = $(top_builddir)/lib/libcommon.la
-EXTRA_DIST = splat.h
-all: all-am
+SUBDIRS = splat splslab
+all: all-recursive
.SUFFIXES:
-.SUFFIXES: .c .lo .o .obj
-$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/config/Rules.am $(am__configure_deps)
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
@for dep in $?; do \
case '$(am__configure_deps)' in \
*$$dep*) \
@@ -390,7 +379,6 @@ Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
esac;
-$(top_srcdir)/config/Rules.am $(am__empty):
$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
@@ -400,88 +388,6 @@ $(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
$(am__aclocal_m4_deps):
-install-sbinPROGRAMS: $(sbin_PROGRAMS)
- @$(NORMAL_INSTALL)
- @list='$(sbin_PROGRAMS)'; test -n "$(sbindir)" || list=; \
- if test -n "$$list"; then \
- echo " $(MKDIR_P) '$(DESTDIR)$(sbindir)'"; \
- $(MKDIR_P) "$(DESTDIR)$(sbindir)" || exit 1; \
- fi; \
- for p in $$list; do echo "$$p $$p"; done | \
- sed 's/$(EXEEXT)$$//' | \
- while read p p1; do if test -f $$p \
- || test -f $$p1 \
- ; then echo "$$p"; echo "$$p"; else :; fi; \
- done | \
- sed -e 'p;s,.*/,,;n;h' \
- -e 's|.*|.|' \
- -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \
- sed 'N;N;N;s,\n, ,g' | \
- $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \
- { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \
- if ($$2 == $$4) files[d] = files[d] " " $$1; \
- else { print "f", $$3 "/" $$4, $$1; } } \
- END { for (d in files) print "f", d, files[d] }' | \
- while read type dir files; do \
- if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \
- test -z "$$files" || { \
- echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(sbindir)$$dir'"; \
- $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(sbindir)$$dir" || exit $$?; \
- } \
- ; done
-
-uninstall-sbinPROGRAMS:
- @$(NORMAL_UNINSTALL)
- @list='$(sbin_PROGRAMS)'; test -n "$(sbindir)" || list=; \
- files=`for p in $$list; do echo "$$p"; done | \
- sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \
- -e 's/$$/$(EXEEXT)/' \
- `; \
- test -n "$$list" || exit 0; \
- echo " ( cd '$(DESTDIR)$(sbindir)' && rm -f" $$files ")"; \
- cd "$(DESTDIR)$(sbindir)" && rm -f $$files
-
-clean-sbinPROGRAMS:
- @list='$(sbin_PROGRAMS)'; test -n "$$list" || exit 0; \
- echo " rm -f" $$list; \
- rm -f $$list || exit $$?; \
- test -n "$(EXEEXT)" || exit 0; \
- list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
- echo " rm -f" $$list; \
- rm -f $$list
-
-splat$(EXEEXT): $(splat_OBJECTS) $(splat_DEPENDENCIES) $(EXTRA_splat_DEPENDENCIES)
- @rm -f splat$(EXEEXT)
- $(AM_V_CCLD)$(splat_LINK) $(splat_OBJECTS) $(splat_LDADD) $(LIBS)
-
-mostlyclean-compile:
- -rm -f *.$(OBJEXT)
-
-distclean-compile:
- -rm -f *.tab.c
-
- at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/splat.Po at am__quote@
-
-.c.o:
- at am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
- at am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(COMPILE) -c -o $@ $<
-
-.c.obj:
- at am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
- at am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
-
-.c.lo:
- at am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
- at am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
- at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
- at am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(LTCOMPILE) -c -o $@ $<
mostlyclean-libtool:
-rm -f *.lo
@@ -489,14 +395,61 @@ mostlyclean-libtool:
clean-libtool:
-rm -rf .libs _libs
+# This directory's subdirectories are mostly independent; you can cd
+# into them and run 'make' without going through this Makefile.
+# To change the values of 'make' variables: instead of editing Makefiles,
+# (1) if the variable is set in 'config.status', edit 'config.status'
+# (which will cause the Makefiles to be regenerated when you run 'make');
+# (2) otherwise, pass the desired values on the 'make' command line.
+$(am__recursive_targets):
+ @fail=; \
+ if $(am__make_keepgoing); then \
+ failcom='fail=yes'; \
+ else \
+ failcom='exit 1'; \
+ fi; \
+ dot_seen=no; \
+ target=`echo $@ | sed s/-recursive//`; \
+ case "$@" in \
+ distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
+ *) list='$(SUBDIRS)' ;; \
+ esac; \
+ for subdir in $$list; do \
+ echo "Making $$target in $$subdir"; \
+ if test "$$subdir" = "."; then \
+ dot_seen=yes; \
+ local_target="$$target-am"; \
+ else \
+ local_target="$$target"; \
+ fi; \
+ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+ || eval $$failcom; \
+ done; \
+ if test "$$dot_seen" = "no"; then \
+ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
+ fi; test -z "$$fail"
+
ID: $(am__tagged_files)
$(am__define_uniq_tagged_files); mkid -fID $$unique
-tags: tags-am
+tags: tags-recursive
TAGS: tags
tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
set x; \
here=`pwd`; \
+ if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
+ include_option=--etags-include; \
+ empty_fix=.; \
+ else \
+ include_option=--include; \
+ empty_fix=; \
+ fi; \
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ if test "$$subdir" = .; then :; else \
+ test ! -f $$subdir/TAGS || \
+ set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
+ fi; \
+ done; \
$(am__define_uniq_tagged_files); \
shift; \
if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
@@ -509,7 +462,7 @@ tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
$$unique; \
fi; \
fi
-ctags: ctags-am
+ctags: ctags-recursive
CTAGS: ctags
ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
@@ -522,7 +475,7 @@ GTAGS:
here=`$(am__cd) $(top_builddir) && pwd` \
&& $(am__cd) $(top_srcdir) \
&& gtags -i $(GTAGS_ARGS) "$$here"
-cscopelist: cscopelist-am
+cscopelist: cscopelist-recursive
cscopelist-am: $(am__tagged_files)
list='$(am__tagged_files)'; \
@@ -571,22 +524,45 @@ distdir: $(DISTFILES)
|| exit 1; \
fi; \
done
-check-am: all-am
-check: check-am
-all-am: Makefile $(PROGRAMS)
-installdirs:
- for dir in "$(DESTDIR)$(sbindir)"; do \
- test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+ @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
+ if test "$$subdir" = .; then :; else \
+ $(am__make_dryrun) \
+ || test -d "$(distdir)/$$subdir" \
+ || $(MKDIR_P) "$(distdir)/$$subdir" \
+ || exit 1; \
+ dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
+ $(am__relativize); \
+ new_distdir=$$reldir; \
+ dir1=$$subdir; dir2="$(top_distdir)"; \
+ $(am__relativize); \
+ new_top_distdir=$$reldir; \
+ echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
+ echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
+ ($(am__cd) $$subdir && \
+ $(MAKE) $(AM_MAKEFLAGS) \
+ top_distdir="$$new_top_distdir" \
+ distdir="$$new_distdir" \
+ am__remove_distdir=: \
+ am__skip_length_check=: \
+ am__skip_mode_fix=: \
+ distdir) \
+ || exit 1; \
+ fi; \
done
-install: install-am
-install-exec: install-exec-am
-install-data: install-data-am
-uninstall: uninstall-am
+check-am: all-am
+check: check-recursive
+all-am: Makefile
+installdirs: installdirs-recursive
+installdirs-am:
+install: install-recursive
+install-exec: install-exec-recursive
+install-data: install-data-recursive
+uninstall: uninstall-recursive
install-am: all-am
@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
-installcheck: installcheck-am
+installcheck: installcheck-recursive
install-strip:
if test -z '$(STRIP)'; then \
$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
@@ -608,92 +584,86 @@ distclean-generic:
maintainer-clean-generic:
@echo "This command is intended for maintainers to use"
@echo "it deletes files that may require special tools to rebuild."
-clean: clean-am
+clean: clean-recursive
-clean-am: clean-generic clean-libtool clean-sbinPROGRAMS \
- mostlyclean-am
+clean-am: clean-generic clean-libtool mostlyclean-am
-distclean: distclean-am
- -rm -rf ./$(DEPDIR)
+distclean: distclean-recursive
-rm -f Makefile
-distclean-am: clean-am distclean-compile distclean-generic \
- distclean-tags
+distclean-am: clean-am distclean-generic distclean-tags
-dvi: dvi-am
+dvi: dvi-recursive
dvi-am:
-html: html-am
+html: html-recursive
html-am:
-info: info-am
+info: info-recursive
info-am:
install-data-am:
-install-dvi: install-dvi-am
+install-dvi: install-dvi-recursive
install-dvi-am:
-install-exec-am: install-sbinPROGRAMS
+install-exec-am:
-install-html: install-html-am
+install-html: install-html-recursive
install-html-am:
-install-info: install-info-am
+install-info: install-info-recursive
install-info-am:
install-man:
-install-pdf: install-pdf-am
+install-pdf: install-pdf-recursive
install-pdf-am:
-install-ps: install-ps-am
+install-ps: install-ps-recursive
install-ps-am:
installcheck-am:
-maintainer-clean: maintainer-clean-am
- -rm -rf ./$(DEPDIR)
+maintainer-clean: maintainer-clean-recursive
-rm -f Makefile
maintainer-clean-am: distclean-am maintainer-clean-generic
-mostlyclean: mostlyclean-am
+mostlyclean: mostlyclean-recursive
-mostlyclean-am: mostlyclean-compile mostlyclean-generic \
- mostlyclean-libtool
+mostlyclean-am: mostlyclean-generic mostlyclean-libtool
-pdf: pdf-am
+pdf: pdf-recursive
pdf-am:
-ps: ps-am
+ps: ps-recursive
ps-am:
-uninstall-am: uninstall-sbinPROGRAMS
-
-.MAKE: install-am install-strip
-
-.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \
- clean-libtool clean-sbinPROGRAMS cscopelist-am ctags ctags-am \
- distclean distclean-compile distclean-generic \
- distclean-libtool distclean-tags distdir dvi dvi-am html \
- html-am info info-am install install-am install-data \
- install-data-am install-dvi install-dvi-am install-exec \
- install-exec-am install-html install-html-am install-info \
- install-info-am install-man install-pdf install-pdf-am \
- install-ps install-ps-am install-sbinPROGRAMS install-strip \
- installcheck installcheck-am installdirs maintainer-clean \
- maintainer-clean-generic mostlyclean mostlyclean-compile \
- mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
- tags tags-am uninstall uninstall-am uninstall-sbinPROGRAMS
+uninstall-am:
+
+.MAKE: $(am__recursive_targets) install-am install-strip
+
+.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \
+ check-am clean clean-generic clean-libtool cscopelist-am ctags \
+ ctags-am distclean distclean-generic distclean-libtool \
+ distclean-tags distdir dvi dvi-am html html-am info info-am \
+ install install-am install-data install-data-am install-dvi \
+ install-dvi-am install-exec install-exec-am install-html \
+ install-html-am install-info install-info-am install-man \
+ install-pdf install-pdf-am install-ps install-ps-am \
+ install-strip installcheck installcheck-am installdirs \
+ installdirs-am maintainer-clean maintainer-clean-generic \
+ mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \
+ ps ps-am tags tags-am uninstall uninstall-am
.PRECIOUS: Makefile
diff --git a/spl/cmd/splat/Makefile.am b/spl/cmd/splat/Makefile.am
new file mode 100644
index 000000000000..01afdcf2566d
--- /dev/null
+++ b/spl/cmd/splat/Makefile.am
@@ -0,0 +1,11 @@
+include $(top_srcdir)/config/Rules.am
+
+DEFAULT_INCLUDES += \
+ -I$(top_srcdir)/lib
+
+sbin_PROGRAMS = splat
+
+splat_SOURCES = splat.c
+splat_LDFLAGS = $(top_builddir)/lib/libcommon.la
+
+EXTRA_DIST = splat.h
diff --git a/spl/cmd/splat/Makefile.in b/spl/cmd/splat/Makefile.in
new file mode 100644
index 000000000000..149ef8dff190
--- /dev/null
+++ b/spl/cmd/splat/Makefile.in
@@ -0,0 +1,703 @@
+# Makefile.in generated by automake 1.15.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2017 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+ at SET_MAKE@
+
+###############################################################################
+# Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+# Copyright (C) 2007 The Regents of the University of California.
+# Written by Brian Behlendorf <behlendorf1 at llnl.gov>.
+###############################################################################
+# Common rules for user space components.
+###############################################################################
+
+VPATH = @srcdir@
+am__is_gnu_make = { \
+ if test -z '$(MAKELEVEL)'; then \
+ false; \
+ elif test -n '$(MAKE_HOST)'; then \
+ true; \
+ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+ true; \
+ else \
+ false; \
+ fi; \
+}
+am__make_running_with_option = \
+ case $${target_option-} in \
+ ?) ;; \
+ *) echo "am__make_running_with_option: internal error: invalid" \
+ "target option '$${target_option-}' specified" >&2; \
+ exit 1;; \
+ esac; \
+ has_opt=no; \
+ sane_makeflags=$$MAKEFLAGS; \
+ if $(am__is_gnu_make); then \
+ sane_makeflags=$$MFLAGS; \
+ else \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ bs=\\; \
+ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
+ esac; \
+ fi; \
+ skip_next=no; \
+ strip_trailopt () \
+ { \
+ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+ }; \
+ for flg in $$sane_makeflags; do \
+ test $$skip_next = yes && { skip_next=no; continue; }; \
+ case $$flg in \
+ *=*|--*) continue;; \
+ -*I) strip_trailopt 'I'; skip_next=yes;; \
+ -*I?*) strip_trailopt 'I';; \
+ -*O) strip_trailopt 'O'; skip_next=yes;; \
+ -*O?*) strip_trailopt 'O';; \
+ -*l) strip_trailopt 'l'; skip_next=yes;; \
+ -*l?*) strip_trailopt 'l';; \
+ -[dEDm]) skip_next=yes;; \
+ -[JT]) skip_next=yes;; \
+ esac; \
+ case $$flg in \
+ *$$target_option*) has_opt=yes; break;; \
+ esac; \
+ done; \
+ test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+target_triplet = @target@
+sbin_PROGRAMS = splat$(EXEEXT)
+subdir = cmd/splat
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/config/libtool.m4 \
+ $(top_srcdir)/config/ltoptions.m4 \
+ $(top_srcdir)/config/ltsugar.m4 \
+ $(top_srcdir)/config/ltversion.m4 \
+ $(top_srcdir)/config/lt~obsolete.m4 \
+ $(top_srcdir)/config/spl-build.m4 \
+ $(top_srcdir)/config/spl-meta.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/spl_config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__installdirs = "$(DESTDIR)$(sbindir)"
+PROGRAMS = $(sbin_PROGRAMS)
+am_splat_OBJECTS = splat.$(OBJEXT)
+splat_OBJECTS = $(am_splat_OBJECTS)
+splat_LDADD = $(LDADD)
+AM_V_lt = $(am__v_lt_ at AM_V@)
+am__v_lt_ = $(am__v_lt_ at AM_DEFAULT_V@)
+am__v_lt_0 = --silent
+am__v_lt_1 =
+splat_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(splat_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_P = $(am__v_P_ at AM_V@)
+am__v_P_ = $(am__v_P_ at AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_ at AM_V@)
+am__v_GEN_ = $(am__v_GEN_ at AM_DEFAULT_V@)
+am__v_GEN_0 = @echo " GEN " $@;
+am__v_GEN_1 =
+AM_V_at = $(am__v_at_ at AM_V@)
+am__v_at_ = $(am__v_at_ at AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 =
+depcomp = $(SHELL) $(top_srcdir)/config/depcomp
+am__depfiles_maybe = depfiles
+am__mv = mv -f
+COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \
+ $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
+LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \
+ $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \
+ $(AM_CFLAGS) $(CFLAGS)
+AM_V_CC = $(am__v_CC_ at AM_V@)
+am__v_CC_ = $(am__v_CC_ at AM_DEFAULT_V@)
+am__v_CC_0 = @echo " CC " $@;
+am__v_CC_1 =
+CCLD = $(CC)
+LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \
+ $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
+ $(AM_LDFLAGS) $(LDFLAGS) -o $@
+AM_V_CCLD = $(am__v_CCLD_ at AM_V@)
+am__v_CCLD_ = $(am__v_CCLD_ at AM_DEFAULT_V@)
+am__v_CCLD_0 = @echo " CCLD " $@;
+am__v_CCLD_1 =
+SOURCES = $(splat_SOURCES)
+DIST_SOURCES = $(splat_SOURCES)
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates. Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+ BEGIN { nonempty = 0; } \
+ { items[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique. This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+ list='$(am__tagged_files)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/config/Rules.am \
+ $(top_srcdir)/config/depcomp
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+ALIEN = @ALIEN@
+ALIEN_VERSION = @ALIEN_VERSION@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEBUG_CFLAGS = @DEBUG_CFLAGS@
+DEBUG_KMEM = @DEBUG_KMEM@
+DEBUG_KMEM_TRACKING = @DEBUG_KMEM_TRACKING@
+DEBUG_SPL = @DEBUG_SPL@
+DEFAULT_PACKAGE = @DEFAULT_PACKAGE@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DPKG = @DPKG@
+DPKGBUILD = @DPKGBUILD@
+DPKGBUILD_VERSION = @DPKGBUILD_VERSION@
+DPKG_VERSION = @DPKG_VERSION@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+HAVE_ALIEN = @HAVE_ALIEN@
+HAVE_DPKG = @HAVE_DPKG@
+HAVE_DPKGBUILD = @HAVE_DPKGBUILD@
+HAVE_RPM = @HAVE_RPM@
+HAVE_RPMBUILD = @HAVE_RPMBUILD@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+KERNELCPPFLAGS = @KERNELCPPFLAGS@
+KERNELMAKE_PARAMS = @KERNELMAKE_PARAMS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LINUX = @LINUX@
+LINUX_OBJ = @LINUX_OBJ@
+LINUX_SYMBOLS = @LINUX_SYMBOLS@
+LINUX_VERSION = @LINUX_VERSION@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+RELEASE = @RELEASE@
+RPM = @RPM@
+RPMBUILD = @RPMBUILD@
+RPMBUILD_VERSION = @RPMBUILD_VERSION@
+RPM_DEFINE_COMMON = @RPM_DEFINE_COMMON@
+RPM_DEFINE_DKMS = @RPM_DEFINE_DKMS@
+RPM_DEFINE_KMOD = @RPM_DEFINE_KMOD@
+RPM_DEFINE_UTIL = @RPM_DEFINE_UTIL@
+RPM_SPEC_DIR = @RPM_SPEC_DIR@
+RPM_VERSION = @RPM_VERSION@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SPL_CONFIG = @SPL_CONFIG@
+SPL_META_ALIAS = @SPL_META_ALIAS@
+SPL_META_AUTHOR = @SPL_META_AUTHOR@
+SPL_META_DATA = @SPL_META_DATA@
+SPL_META_LICENSE = @SPL_META_LICENSE@
+SPL_META_LT_AGE = @SPL_META_LT_AGE@
+SPL_META_LT_CURRENT = @SPL_META_LT_CURRENT@
+SPL_META_LT_REVISION = @SPL_META_LT_REVISION@
+SPL_META_NAME = @SPL_META_NAME@
+SPL_META_RELEASE = @SPL_META_RELEASE@
+SPL_META_VERSION = @SPL_META_VERSION@
+SRPM_DEFINE_COMMON = @SRPM_DEFINE_COMMON@
+SRPM_DEFINE_DKMS = @SRPM_DEFINE_DKMS@
+SRPM_DEFINE_KMOD = @SRPM_DEFINE_KMOD@
+SRPM_DEFINE_UTIL = @SRPM_DEFINE_UTIL@
+STRIP = @STRIP@
+VENDOR = @VENDOR@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+runstatedir = @runstatedir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target = @target@
+target_alias = @target_alias@
+target_cpu = @target_cpu@
+target_os = @target_os@
+target_vendor = @target_vendor@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+DEFAULT_INCLUDES = -include ${top_builddir}/spl_config.h \
+ -I$(top_srcdir)/lib
+AM_LIBTOOLFLAGS = --silent
+AM_CPPFLAGS = -D__USE_LARGEFILE64
+AM_CFLAGS = -Wall -Wshadow -Wstrict-prototypes -fno-strict-aliasing \
+ ${DEBUG_CFLAGS}
+splat_SOURCES = splat.c
+splat_LDFLAGS = $(top_builddir)/lib/libcommon.la
+EXTRA_DIST = splat.h
+all: all-am
+
+.SUFFIXES:
+.SUFFIXES: .c .lo .o .obj
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(top_srcdir)/config/Rules.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu cmd/splat/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --gnu cmd/splat/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+$(top_srcdir)/config/Rules.am $(am__empty):
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-sbinPROGRAMS: $(sbin_PROGRAMS)
+ @$(NORMAL_INSTALL)
+ @list='$(sbin_PROGRAMS)'; test -n "$(sbindir)" || list=; \
+ if test -n "$$list"; then \
+ echo " $(MKDIR_P) '$(DESTDIR)$(sbindir)'"; \
+ $(MKDIR_P) "$(DESTDIR)$(sbindir)" || exit 1; \
+ fi; \
+ for p in $$list; do echo "$$p $$p"; done | \
+ sed 's/$(EXEEXT)$$//' | \
+ while read p p1; do if test -f $$p \
+ || test -f $$p1 \
+ ; then echo "$$p"; echo "$$p"; else :; fi; \
+ done | \
+ sed -e 'p;s,.*/,,;n;h' \
+ -e 's|.*|.|' \
+ -e 'p;x;s,.*/,,;s/$(EXEEXT)$$//;$(transform);s/$$/$(EXEEXT)/' | \
+ sed 'N;N;N;s,\n, ,g' | \
+ $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1 } \
+ { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \
+ if ($$2 == $$4) files[d] = files[d] " " $$1; \
+ else { print "f", $$3 "/" $$4, $$1; } } \
+ END { for (d in files) print "f", d, files[d] }' | \
+ while read type dir files; do \
+ if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \
+ test -z "$$files" || { \
+ echo " $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files '$(DESTDIR)$(sbindir)$$dir'"; \
+ $(INSTALL_PROGRAM_ENV) $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_PROGRAM) $$files "$(DESTDIR)$(sbindir)$$dir" || exit $$?; \
+ } \
+ ; done
+
+uninstall-sbinPROGRAMS:
+ @$(NORMAL_UNINSTALL)
+ @list='$(sbin_PROGRAMS)'; test -n "$(sbindir)" || list=; \
+ files=`for p in $$list; do echo "$$p"; done | \
+ sed -e 'h;s,^.*/,,;s/$(EXEEXT)$$//;$(transform)' \
+ -e 's/$$/$(EXEEXT)/' \
+ `; \
+ test -n "$$list" || exit 0; \
+ echo " ( cd '$(DESTDIR)$(sbindir)' && rm -f" $$files ")"; \
+ cd "$(DESTDIR)$(sbindir)" && rm -f $$files
+
+clean-sbinPROGRAMS:
+ @list='$(sbin_PROGRAMS)'; test -n "$$list" || exit 0; \
+ echo " rm -f" $$list; \
+ rm -f $$list || exit $$?; \
+ test -n "$(EXEEXT)" || exit 0; \
+ list=`for p in $$list; do echo "$$p"; done | sed 's/$(EXEEXT)$$//'`; \
+ echo " rm -f" $$list; \
+ rm -f $$list
+
+splat$(EXEEXT): $(splat_OBJECTS) $(splat_DEPENDENCIES) $(EXTRA_splat_DEPENDENCIES)
+ @rm -f splat$(EXEEXT)
+ $(AM_V_CCLD)$(splat_LINK) $(splat_OBJECTS) $(splat_LDADD) $(LIBS)
+
+mostlyclean-compile:
+ -rm -f *.$(OBJEXT)
+
+distclean-compile:
+ -rm -f *.tab.c
+
+ at AMDEP_TRUE@@am__include@ @am__quote at ./$(DEPDIR)/splat.Po at am__quote@
+
+.c.o:
+ at am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+ at am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(COMPILE) -c -o $@ $<
+
+.c.obj:
+ at am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'`
+ at am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'`
+
+.c.lo:
+ at am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
+ at am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@
+ at AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+ at am__fastdepCC_FALSE@ $(AM_V_CC at am__nodep@)$(LTCOMPILE) -c -o $@ $<
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+ID: $(am__tagged_files)
+ $(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ set x; \
+ here=`pwd`; \
+ $(am__define_uniq_tagged_files); \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ $(am__define_uniq_tagged_files); \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+ list='$(am__tagged_files)'; \
+ case "$(srcdir)" in \
+ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+ *) sdir=$(subdir)/$(srcdir) ;; \
+ esac; \
+ for i in $$list; do \
+ if test -f "$$i"; then \
+ echo "$(subdir)/$$i"; \
+ else \
+ echo "$$sdir/$$i"; \
+ fi; \
+ done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(PROGRAMS)
+installdirs:
+ for dir in "$(DESTDIR)$(sbindir)"; do \
+ test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+ done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool clean-sbinPROGRAMS \
+ mostlyclean-am
+
+distclean: distclean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+distclean-am: clean-am distclean-compile distclean-generic \
+ distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am: install-sbinPROGRAMS
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -rf ./$(DEPDIR)
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-compile mostlyclean-generic \
+ mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-sbinPROGRAMS
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \
+ clean-libtool clean-sbinPROGRAMS cscopelist-am ctags ctags-am \
+ distclean distclean-compile distclean-generic \
+ distclean-libtool distclean-tags distdir dvi dvi-am html \
+ html-am info info-am install install-am install-data \
+ install-data-am install-dvi install-dvi-am install-exec \
+ install-exec-am install-html install-html-am install-info \
+ install-info-am install-man install-pdf install-pdf-am \
+ install-ps install-ps-am install-sbinPROGRAMS install-strip \
+ installcheck installcheck-am installdirs maintainer-clean \
+ maintainer-clean-generic mostlyclean mostlyclean-compile \
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ tags tags-am uninstall uninstall-am uninstall-sbinPROGRAMS
+
+.PRECIOUS: Makefile
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/spl/cmd/splat.c b/spl/cmd/splat/splat.c
similarity index 100%
rename from spl/cmd/splat.c
rename to spl/cmd/splat/splat.c
diff --git a/spl/cmd/splat.h b/spl/cmd/splat/splat.h
similarity index 100%
rename from spl/cmd/splat.h
rename to spl/cmd/splat/splat.h
diff --git a/spl/cmd/splslab/Makefile.am b/spl/cmd/splslab/Makefile.am
new file mode 100644
index 000000000000..b18d52d7ecef
--- /dev/null
+++ b/spl/cmd/splslab/Makefile.am
@@ -0,0 +1,2 @@
+bin_SCRIPTS = splslab.py
+EXTRA_DIST = $(bin_SCRIPTS)
diff --git a/spl/cmd/splslab/Makefile.in b/spl/cmd/splslab/Makefile.in
new file mode 100644
index 000000000000..f3839a8dcd0f
--- /dev/null
+++ b/spl/cmd/splslab/Makefile.in
@@ -0,0 +1,563 @@
+# Makefile.in generated by automake 1.15.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2017 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+ at SET_MAKE@
+
+VPATH = @srcdir@
+am__is_gnu_make = { \
+ if test -z '$(MAKELEVEL)'; then \
+ false; \
+ elif test -n '$(MAKE_HOST)'; then \
+ true; \
+ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+ true; \
+ else \
+ false; \
+ fi; \
+}
+am__make_running_with_option = \
+ case $${target_option-} in \
+ ?) ;; \
+ *) echo "am__make_running_with_option: internal error: invalid" \
+ "target option '$${target_option-}' specified" >&2; \
+ exit 1;; \
+ esac; \
+ has_opt=no; \
+ sane_makeflags=$$MAKEFLAGS; \
+ if $(am__is_gnu_make); then \
+ sane_makeflags=$$MFLAGS; \
+ else \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ bs=\\; \
+ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
+ esac; \
+ fi; \
+ skip_next=no; \
+ strip_trailopt () \
+ { \
+ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+ }; \
+ for flg in $$sane_makeflags; do \
+ test $$skip_next = yes && { skip_next=no; continue; }; \
+ case $$flg in \
+ *=*|--*) continue;; \
+ -*I) strip_trailopt 'I'; skip_next=yes;; \
+ -*I?*) strip_trailopt 'I';; \
+ -*O) strip_trailopt 'O'; skip_next=yes;; \
+ -*O?*) strip_trailopt 'O';; \
+ -*l) strip_trailopt 'l'; skip_next=yes;; \
+ -*l?*) strip_trailopt 'l';; \
+ -[dEDm]) skip_next=yes;; \
+ -[JT]) skip_next=yes;; \
+ esac; \
+ case $$flg in \
+ *$$target_option*) has_opt=yes; break;; \
+ esac; \
+ done; \
+ test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+target_triplet = @target@
+subdir = cmd/splslab
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/config/libtool.m4 \
+ $(top_srcdir)/config/ltoptions.m4 \
+ $(top_srcdir)/config/ltsugar.m4 \
+ $(top_srcdir)/config/ltversion.m4 \
+ $(top_srcdir)/config/lt~obsolete.m4 \
+ $(top_srcdir)/config/spl-build.m4 \
+ $(top_srcdir)/config/spl-meta.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/spl_config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+ *) f=$$p;; \
+ esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+ for p in $$list; do echo "$$p $$p"; done | \
+ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+ if (++n[$$2] == $(am__install_max)) \
+ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+ END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
+am__installdirs = "$(DESTDIR)$(bindir)"
+SCRIPTS = $(bin_SCRIPTS)
+AM_V_P = $(am__v_P_ at AM_V@)
+am__v_P_ = $(am__v_P_ at AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_ at AM_V@)
+am__v_GEN_ = $(am__v_GEN_ at AM_DEFAULT_V@)
+am__v_GEN_0 = @echo " GEN " $@;
+am__v_GEN_1 =
+AM_V_at = $(am__v_at_ at AM_V@)
+am__v_at_ = $(am__v_at_ at AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 =
+SOURCES =
+DIST_SOURCES =
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+am__DIST_COMMON = $(srcdir)/Makefile.in
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+ALIEN = @ALIEN@
+ALIEN_VERSION = @ALIEN_VERSION@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEBUG_CFLAGS = @DEBUG_CFLAGS@
+DEBUG_KMEM = @DEBUG_KMEM@
+DEBUG_KMEM_TRACKING = @DEBUG_KMEM_TRACKING@
+DEBUG_SPL = @DEBUG_SPL@
+DEFAULT_PACKAGE = @DEFAULT_PACKAGE@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DPKG = @DPKG@
+DPKGBUILD = @DPKGBUILD@
+DPKGBUILD_VERSION = @DPKGBUILD_VERSION@
+DPKG_VERSION = @DPKG_VERSION@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+HAVE_ALIEN = @HAVE_ALIEN@
+HAVE_DPKG = @HAVE_DPKG@
+HAVE_DPKGBUILD = @HAVE_DPKGBUILD@
+HAVE_RPM = @HAVE_RPM@
+HAVE_RPMBUILD = @HAVE_RPMBUILD@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+KERNELCPPFLAGS = @KERNELCPPFLAGS@
+KERNELMAKE_PARAMS = @KERNELMAKE_PARAMS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LINUX = @LINUX@
+LINUX_OBJ = @LINUX_OBJ@
+LINUX_SYMBOLS = @LINUX_SYMBOLS@
+LINUX_VERSION = @LINUX_VERSION@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+RANLIB = @RANLIB@
+RELEASE = @RELEASE@
+RPM = @RPM@
+RPMBUILD = @RPMBUILD@
+RPMBUILD_VERSION = @RPMBUILD_VERSION@
+RPM_DEFINE_COMMON = @RPM_DEFINE_COMMON@
+RPM_DEFINE_DKMS = @RPM_DEFINE_DKMS@
+RPM_DEFINE_KMOD = @RPM_DEFINE_KMOD@
+RPM_DEFINE_UTIL = @RPM_DEFINE_UTIL@
+RPM_SPEC_DIR = @RPM_SPEC_DIR@
+RPM_VERSION = @RPM_VERSION@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SPL_CONFIG = @SPL_CONFIG@
+SPL_META_ALIAS = @SPL_META_ALIAS@
+SPL_META_AUTHOR = @SPL_META_AUTHOR@
+SPL_META_DATA = @SPL_META_DATA@
+SPL_META_LICENSE = @SPL_META_LICENSE@
+SPL_META_LT_AGE = @SPL_META_LT_AGE@
+SPL_META_LT_CURRENT = @SPL_META_LT_CURRENT@
+SPL_META_LT_REVISION = @SPL_META_LT_REVISION@
+SPL_META_NAME = @SPL_META_NAME@
+SPL_META_RELEASE = @SPL_META_RELEASE@
+SPL_META_VERSION = @SPL_META_VERSION@
+SRPM_DEFINE_COMMON = @SRPM_DEFINE_COMMON@
+SRPM_DEFINE_DKMS = @SRPM_DEFINE_DKMS@
+SRPM_DEFINE_KMOD = @SRPM_DEFINE_KMOD@
+SRPM_DEFINE_UTIL = @SRPM_DEFINE_UTIL@
+STRIP = @STRIP@
+VENDOR = @VENDOR@
+VERSION = @VERSION@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+runstatedir = @runstatedir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target = @target@
+target_alias = @target_alias@
+target_cpu = @target_cpu@
+target_os = @target_os@
+target_vendor = @target_vendor@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+bin_SCRIPTS = splslab.py
+EXTRA_DIST = $(bin_SCRIPTS)
+all: all-am
+
+.SUFFIXES:
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu cmd/splslab/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --gnu cmd/splslab/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+install-binSCRIPTS: $(bin_SCRIPTS)
+ @$(NORMAL_INSTALL)
+ @list='$(bin_SCRIPTS)'; test -n "$(bindir)" || list=; \
+ if test -n "$$list"; then \
+ echo " $(MKDIR_P) '$(DESTDIR)$(bindir)'"; \
+ $(MKDIR_P) "$(DESTDIR)$(bindir)" || exit 1; \
+ fi; \
+ for p in $$list; do \
+ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+ if test -f "$$d$$p"; then echo "$$d$$p"; echo "$$p"; else :; fi; \
+ done | \
+ sed -e 'p;s,.*/,,;n' \
+ -e 'h;s|.*|.|' \
+ -e 'p;x;s,.*/,,;$(transform)' | sed 'N;N;N;s,\n, ,g' | \
+ $(AWK) 'BEGIN { files["."] = ""; dirs["."] = 1; } \
+ { d=$$3; if (dirs[d] != 1) { print "d", d; dirs[d] = 1 } \
+ if ($$2 == $$4) { files[d] = files[d] " " $$1; \
+ if (++n[d] == $(am__install_max)) { \
+ print "f", d, files[d]; n[d] = 0; files[d] = "" } } \
+ else { print "f", d "/" $$4, $$1 } } \
+ END { for (d in files) print "f", d, files[d] }' | \
+ while read type dir files; do \
+ if test "$$dir" = .; then dir=; else dir=/$$dir; fi; \
+ test -z "$$files" || { \
+ echo " $(INSTALL_SCRIPT) $$files '$(DESTDIR)$(bindir)$$dir'"; \
+ $(INSTALL_SCRIPT) $$files "$(DESTDIR)$(bindir)$$dir" || exit $$?; \
+ } \
+ ; done
+
+uninstall-binSCRIPTS:
+ @$(NORMAL_UNINSTALL)
+ @list='$(bin_SCRIPTS)'; test -n "$(bindir)" || exit 0; \
+ files=`for p in $$list; do echo "$$p"; done | \
+ sed -e 's,.*/,,;$(transform)'`; \
+ dir='$(DESTDIR)$(bindir)'; $(am__uninstall_files_from_dir)
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+tags TAGS:
+
+ctags CTAGS:
+
+cscope cscopelist:
+
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(SCRIPTS)
+installdirs:
+ for dir in "$(DESTDIR)$(bindir)"; do \
+ test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+ done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+ -rm -f Makefile
+distclean-am: clean-am distclean-generic
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am: install-binSCRIPTS
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-generic mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-binSCRIPTS
+
+.MAKE: install-am install-strip
+
+.PHONY: all all-am check check-am clean clean-generic clean-libtool \
+ cscopelist-am ctags-am distclean distclean-generic \
+ distclean-libtool distdir dvi dvi-am html html-am info info-am \
+ install install-am install-binSCRIPTS install-data \
+ install-data-am install-dvi install-dvi-am install-exec \
+ install-exec-am install-html install-html-am install-info \
+ install-info-am install-man install-pdf install-pdf-am \
+ install-ps install-ps-am install-strip installcheck \
+ installcheck-am installdirs maintainer-clean \
+ maintainer-clean-generic mostlyclean mostlyclean-generic \
+ mostlyclean-libtool pdf pdf-am ps ps-am tags-am uninstall \
+ uninstall-am uninstall-binSCRIPTS
+
+.PRECIOUS: Makefile
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/spl/cmd/splslab/splslab.py b/spl/cmd/splslab/splslab.py
new file mode 100755
index 000000000000..160fb2776e6f
--- /dev/null
+++ b/spl/cmd/splslab/splslab.py
@@ -0,0 +1,202 @@
+#!/usr/bin/python
+
+import sys
+import time
+import getopt
+import re
+import signal
+from collections import defaultdict
+
+class Stat:
+ # flag definitions based on the kmem.h
+ NOTOUCH = 1
+ NODEBUG = 2
+ KMEM = 32
+ VMEM = 64
+ SLAB = 128
+ OFFSLAB = 256
+ NOEMERGENCY = 512
+ DEADLOCKED = 16384
+ GROWING = 32768
+ REAPING = 65536
+ DESTROY = 131072
+
+ fdefs = {
+ NOTOUCH : "NTCH",
+ NODEBUG : "NDBG",
+ KMEM : "KMEM",
+ VMEM : "VMEM",
+ SLAB : "SLAB",
+ OFFSLAB : "OFSL",
+ NOEMERGENCY : "NEMG",
+ DEADLOCKED : "DDLK",
+ GROWING : "GROW",
+ REAPING : "REAP",
+ DESTROY : "DSTR"
+ }
+
+ def __init__(self, name, flags, size, alloc, slabsize, objsize):
+ self._name = name
+ self._flags = self.f2str(flags)
+ self._size = size
+ self._alloc = alloc
+ self._slabsize = slabsize
+ self._objsize = objsize
+
+ def f2str(self, flags):
+ fstring = ''
+ for k in Stat.fdefs.keys():
+ if flags & k:
+ fstring = fstring + Stat.fdefs[k] + '|'
+
+ fstring = fstring[:-1]
+ return fstring
+
+class CumulativeStat:
+ def __init__(self, skey="a"):
+ self._size = 0
+ self._alloc = 0
+ self._pct = 0
+ self._skey = skey
+ self._regexp = \
+ re.compile('(\w+)\s+(\w+)\s+(\w+)\s+(\w+)\s+(\w+)\s+(\w+)\s+');
+ self._stats = defaultdict(list)
+
+ # Add another stat to the dictionary and re-calculate the totals
+ def add(self, s):
+ key = 0
+ if self._skey == "a":
+ key = s._alloc
+ else:
+ key = s._size
+ self._stats[key].append(s)
+ self._size = self._size + s._size
+ self._alloc = self._alloc + s._alloc
+ if self._size:
+ self._pct = self._alloc * 100 / self._size
+ else:
+ self._pct = 0
+
+ # Parse the slab info in the procfs
+ # Calculate cumulative stats
+ def slab_update(self):
+ k = [line.strip() for line in open('/proc/spl/kmem/slab')]
+
+ if not k:
+ sys.stderr.write("No SPL slab stats found\n")
+ sys.exit(1)
+
+ del k[0:2]
+
+ for s in k:
+ if not s:
+ continue
+ m = self._regexp.match(s)
+ if m:
+ self.add(Stat(m.group(1), int(m.group(2),16), int(m.group(3)),
+ int(m.group(4)), int(m.group(5)), int(m.group(6))))
+ else:
+ sys.stderr.write("Error: unexpected input format\n" % s)
+ exit(-1)
+
+ def show_header(self):
+ sys.stdout.write("\n%25s %20s %15s %15s %15s %15s\n\n" % \
+ ("cache name", "flags", "size", "alloc", "slabsize", "objsize"))
+
+ # Show up to the number of 'rows' of output sorted in descending order
+ # by the key specified earlier; if rows == 0, all rows are shown
+ def show(self, rows):
+ self.show_header()
+ i = 1
+ done = False
+ for k in reversed(sorted(self._stats.keys())):
+ for s in self._stats[k]:
+ sys.stdout.write("%25s %20s %15d %15d %15d %15d\n" % \
+ (s._name, s._flags, s._size, s._alloc, \
+ s._slabsize, s._objsize))
+ i = i + 1
+ if rows != 0 and i > rows:
+ done = True
+ break
+ if done:
+ break
+ sys.stdout.write("%25s %36d %15d (%d%%)\n\n" % \
+ ("Totals:", self._size, self._alloc, self._pct))
+
+def usage():
+ cmd = "Usage: splslab.py [-n|--num-rows] number [-s|--sort-by] " + \
+ "[interval] [count]";
+ sys.stderr.write("%s\n" % cmd)
+ sys.stderr.write("\t-h : print help\n")
+ sys.stderr.write("\t-n : --num-rows N : limit output to N top " +
+ "largest slabs (default: all)\n")
+ sys.stderr.write("\t-s : --sort-by key : sort output in descending " +
+ "order by total size (s)\n\t\tor allocated size (a) " +
+ "(default: a)\n")
+ sys.stderr.write("\tinterval : repeat every interval seconds\n")
+ sys.stderr.write("\tcount : output statistics count times and exit\n")
+
+
+def main():
+
+ rows = 0
+ count = 0
+ skey = "a"
+ interval = 1
+
+ signal.signal(signal.SIGINT, signal.SIG_DFL)
+
+ try:
+ opts, args = getopt.getopt(
+ sys.argv[1:],
+ "n:s:h",
+ [
+ "num-rows",
+ "sort-by",
+ "help"
+ ]
+ )
+ except getopt.error as e:
+ sys.stderr.write("Error: %s\n" % e.msg)
+ usage()
+ exit(-1)
+
+ i = 1
+ for opt, arg in opts:
+ if opt in ('-n', '--num-rows'):
+ rows = int(arg)
+ i = i + 2
+ elif opt in ('-s', '--sort-by'):
+ if arg != "s" and arg != "a":
+ sys.stderr.write("Error: invalid sorting key \"%s\"\n" % arg)
+ usage()
+ exit(-1)
+ skey = arg
+ i = i + 2
+ elif opt in ('-h', '--help'):
+ usage()
+ exit(0)
+ else:
+ break
+
+ args = sys.argv[i:]
+
+ interval = int(args[0]) if len(args) else interval
+ count = int(args[1]) if len(args) > 1 else count
+
+ i = 0
+ while True:
+ cs = CumulativeStat(skey)
+ cs.slab_update()
+ cs.show(rows)
+
+ i = i + 1
+ if count and i >= count:
+ break
+
+ time.sleep(interval)
+
+ return 0
+
+if __name__ == '__main__':
+ main()
diff --git a/spl/config/deb.am b/spl/config/deb.am
index a2bad0260d32..e05a175a4dce 100644
--- a/spl/config/deb.am
+++ b/spl/config/deb.am
@@ -29,23 +29,27 @@ deb-local:
fi)
deb-kmod: deb-local rpm-kmod
-if CONFIG_KERNEL
name=${PACKAGE}; \
version=${VERSION}-${RELEASE}; \
arch=`$(RPM) -qp $${name}-kmod-$${version}.src.rpm --qf %{arch} | tail -1`; \
pkg1=kmod-$${name}*$${version}.$${arch}.rpm; \
fakeroot $(ALIEN) --bump=0 --scripts --to-deb $$pkg1; \
$(RM) $$pkg1
-endif
+
+deb-dkms: deb-local rpm-dkms
+ name=${PACKAGE}; \
+ version=${VERSION}-${RELEASE}; \
+ arch=`$(RPM) -qp $${name}-dkms-$${version}.src.rpm --qf %{arch} | tail -1`; \
+ pkg1=$${name}-dkms-$${version}.$${arch}.rpm; \
+ fakeroot $(ALIEN) --bump=0 --scripts --to-deb $$pkg1; \
+ $(RM) $$pkg1
deb-utils: deb-local rpm-utils
-if CONFIG_USER
name=${PACKAGE}; \
version=${VERSION}-${RELEASE}; \
arch=`$(RPM) -qp $${name}-$${version}.src.rpm --qf %{arch} | tail -1`; \
pkg1=$${name}-$${version}.$${arch}.rpm; \
fakeroot $(ALIEN) --bump=0 --scripts --to-deb $$pkg1; \
$(RM) $$pkg1
-endif
-deb: deb-kmod deb-utils
+deb: deb-kmod deb-dkms deb-utils
diff --git a/spl/config/depcomp b/spl/config/depcomp
index fc98710e2a1d..b39f98f9ae9f 100755
--- a/spl/config/depcomp
+++ b/spl/config/depcomp
@@ -1,9 +1,9 @@
#! /bin/sh
# depcomp - compile a program generating dependencies as side-effects
-scriptversion=2013-05-30.07; # UTC
+scriptversion=2016-01-11.22; # UTC
-# Copyright (C) 1999-2014 Free Software Foundation, Inc.
+# Copyright (C) 1999-2017 Free Software Foundation, Inc.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -786,6 +786,6 @@ exit 0
# eval: (add-hook 'write-file-hooks 'time-stamp)
# time-stamp-start: "scriptversion="
# time-stamp-format: "%:y-%02m-%02d.%02H"
-# time-stamp-time-zone: "UTC"
+# time-stamp-time-zone: "UTC0"
# time-stamp-end: "; # UTC"
# End:
diff --git a/spl/config/spl-build.m4 b/spl/config/spl-build.m4
index 603fc65ec356..accf6759bfbe 100644
--- a/spl/config/spl-build.m4
+++ b/spl/config/spl-build.m4
@@ -50,10 +50,12 @@ AC_DEFUN([SPL_AC_CONFIG_KERNEL], [
SPL_AC_KMEM_CACHE_ALLOCFLAGS
SPL_AC_WAIT_ON_BIT
SPL_AC_INODE_LOCK
- SPL_AC_MUTEX_OWNER
SPL_AC_GROUP_INFO_GID
+ SPL_AC_KMEM_CACHE_CREATE_USERCOPY
SPL_AC_WAIT_QUEUE_ENTRY_T
SPL_AC_WAIT_QUEUE_HEAD_ENTRY
+ SPL_AC_KERNEL_WRITE
+ SPL_AC_KERNEL_READ
])
AC_DEFUN([SPL_AC_MODULE_SYMVERS], [
@@ -114,6 +116,7 @@ AC_DEFUN([SPL_AC_KERNEL], [
if test "$kernelsrc" = "NONE"; then
kernsrcver=NONE
fi
+ withlinux=yes
fi
AC_MSG_RESULT([$kernelsrc])
@@ -126,7 +129,7 @@ AC_DEFUN([SPL_AC_KERNEL], [
AC_MSG_CHECKING([kernel build directory])
if test -z "$kernelbuild"; then
- if test -e "/lib/modules/$(uname -r)/build"; then
+ if test x$withlinux != xyes -a -e "/lib/modules/$(uname -r)/build"; then
kernelbuild=`readlink -f /lib/modules/$(uname -r)/build`
elif test -d ${kernelsrc}-obj/${target_cpu}/${target_cpu}; then
kernelbuild=${kernelsrc}-obj/${target_cpu}/${target_cpu}
@@ -1585,28 +1588,21 @@ AC_DEFUN([SPL_AC_INODE_LOCK], [
])
dnl #
-dnl # Check whether mutex has owner with task_struct type.
-dnl #
-dnl # Note that before Linux 3.0, mutex owner is of type thread_info.
-dnl #
-dnl # Note that in Linux 3.18, the condition for owner is changed from
-dnl # defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_SMP) to
-dnl # defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER)
+dnl # 4.9 API change
+dnl # group_info changed from 2d array via >blocks to 1d array via ->gid
dnl #
-AC_DEFUN([SPL_AC_MUTEX_OWNER], [
- AC_MSG_CHECKING([whether mutex has owner])
+AC_DEFUN([SPL_AC_GROUP_INFO_GID], [
+ AC_MSG_CHECKING([whether group_info->gid exists])
tmp_flags="$EXTRA_KCFLAGS"
EXTRA_KCFLAGS="-Werror"
SPL_LINUX_TRY_COMPILE([
- #include <linux/mutex.h>
- #include <linux/spinlock.h>
+ #include <linux/cred.h>
],[
- DEFINE_MUTEX(m);
- struct task_struct *t __attribute__ ((unused));
- t = m.owner;
+ struct group_info *gi = groups_alloc(1);
+ gi->gid[0] = KGIDT_INIT(0);
],[
AC_MSG_RESULT(yes)
- AC_DEFINE(HAVE_MUTEX_OWNER, 1, [yes])
+ AC_DEFINE(HAVE_GROUP_INFO_GID, 1, [group_info->gid exists])
],[
AC_MSG_RESULT(no)
])
@@ -1614,21 +1610,35 @@ AC_DEFUN([SPL_AC_MUTEX_OWNER], [
])
dnl #
-dnl # 4.9 API change
-dnl # group_info changed from 2d array via >blocks to 1d array via ->gid
+dnl # grsecurity API change,
+dnl # kmem_cache_create() with SLAB_USERCOPY flag replaced by
+dnl # kmem_cache_create_usercopy().
dnl #
-AC_DEFUN([SPL_AC_GROUP_INFO_GID], [
- AC_MSG_CHECKING([whether group_info->gid exists])
+AC_DEFUN([SPL_AC_KMEM_CACHE_CREATE_USERCOPY], [
+ AC_MSG_CHECKING([whether kmem_cache_create_usercopy() exists])
tmp_flags="$EXTRA_KCFLAGS"
EXTRA_KCFLAGS="-Werror"
SPL_LINUX_TRY_COMPILE([
- #include <linux/cred.h>
- ],[
- struct group_info *gi = groups_alloc(1);
- gi->gid[0] = KGIDT_INIT(0);
+ #include <linux/slab.h>
+ static void ctor(void *foo)
+ {
+ // fake ctor
+ }
+ ],[
+ struct kmem_cache *skc_linux_cache;
+ const char *name = "test";
+ size_t size = 4096;
+ size_t align = 8;
+ unsigned long flags = 0;
+ size_t useroffset = 0;
+ size_t usersize = size - useroffset;
+
+ skc_linux_cache = kmem_cache_create_usercopy(
+ name, size, align, flags, useroffset, usersize, ctor);
],[
AC_MSG_RESULT(yes)
- AC_DEFINE(HAVE_GROUP_INFO_GID, 1, [group_info->gid exists])
+ AC_DEFINE(HAVE_KMEM_CACHE_CREATE_USERCOPY, 1,
+ [kmem_cache_create_usercopy() exists])
],[
AC_MSG_RESULT(no)
])
@@ -1687,3 +1697,61 @@ AC_DEFUN([SPL_AC_WAIT_QUEUE_HEAD_ENTRY], [
AC_MSG_RESULT(no)
])
])
+
+dnl #
+dnl # 4.14 API change
+dnl # kernel_write() which was introduced in 3.9 was updated to take
+dnl # the offset as a pointer which is needed by vn_rdwr().
+dnl #
+AC_DEFUN([SPL_AC_KERNEL_WRITE], [
+ AC_MSG_CHECKING([whether kernel_write() takes loff_t pointer])
+ tmp_flags="$EXTRA_KCFLAGS"
+ EXTRA_KCFLAGS="-Werror"
+ SPL_LINUX_TRY_COMPILE([
+ #include <linux/fs.h>
+ ],[
+ struct file *file = NULL;
+ const void *buf = NULL;
+ size_t count = 0;
+ loff_t *pos = NULL;
+ ssize_t ret;
+
+ ret = kernel_write(file, buf, count, pos);
+ ],[
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_KERNEL_WRITE_PPOS, 1,
+ [kernel_write() take loff_t pointer])
+ ],[
+ AC_MSG_RESULT(no)
+ ])
+ EXTRA_KCFLAGS="$tmp_flags"
+])
+
+dnl #
+dnl # 4.14 API change
+dnl # kernel_read() which has existed for forever was updated to take
+dnl # the offset as a pointer which is needed by vn_rdwr().
+dnl #
+AC_DEFUN([SPL_AC_KERNEL_READ], [
+ AC_MSG_CHECKING([whether kernel_read() takes loff_t pointer])
+ tmp_flags="$EXTRA_KCFLAGS"
+ EXTRA_KCFLAGS="-Werror"
+ SPL_LINUX_TRY_COMPILE([
+ #include <linux/fs.h>
+ ],[
+ struct file *file = NULL;
+ void *buf = NULL;
+ size_t count = 0;
+ loff_t *pos = NULL;
+ ssize_t ret;
+
+ ret = kernel_read(file, buf, count, pos);
+ ],[
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_KERNEL_READ_PPOS, 1,
+ [kernel_read() take loff_t pointer])
+ ],[
+ AC_MSG_RESULT(no)
+ ])
+ EXTRA_KCFLAGS="$tmp_flags"
+])
diff --git a/spl/config/spl-meta.m4 b/spl/config/spl-meta.m4
index 0561fbbc580e..fbfaec4ab519 100644
--- a/spl/config/spl-meta.m4
+++ b/spl/config/spl-meta.m4
@@ -63,6 +63,14 @@ AC_DEFUN([SPL_AC_META], [
if test -n "${_release}"; then
SPL_META_RELEASE=${_release}
_spl_ac_meta_type="git describe"
+ else
+ _match="${SPL_META_NAME}-${SPL_META_VERSION}-${SPL_META_RELEASE}"
+ _alias=$(git describe --match=${_match} 2>/dev/null)
+ _release=$(echo ${_alias}|cut -f3- -d'-'|sed 's/-/_/g')
+ if test -n "${_release}"; then
+ SPL_META_RELEASE=${_release}
+ _spl_ac_meta_type="git describe"
+ fi
fi
fi
diff --git a/spl/configure b/spl/configure
index 608eff9beffa..c7a7c43caa53 100755
--- a/spl/configure
+++ b/spl/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for spl 0.6.5.11.
+# Generated by GNU Autoconf 2.69 for spl 0.7.3.
#
#
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -587,8 +587,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='spl'
PACKAGE_TARNAME='spl'
-PACKAGE_VERSION='0.6.5.11'
-PACKAGE_STRING='spl 0.6.5.11'
+PACKAGE_VERSION='0.7.3'
+PACKAGE_STRING='spl 0.7.3'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''
@@ -1389,7 +1389,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures spl 0.6.5.11 to adapt to many kinds of systems.
+\`configure' configures spl 0.7.3 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1461,7 +1461,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of spl 0.6.5.11:";;
+ short | recursive ) echo "Configuration of spl 0.7.3:";;
esac
cat <<\_ACEOF
@@ -1586,7 +1586,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-spl configure 0.6.5.11
+spl configure 0.7.3
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -1864,7 +1864,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by spl $as_me 0.6.5.11, which was
+It was created by spl $as_me 0.7.3, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -2297,6 +2297,14 @@ _ACEOF
if test -n "${_release}"; then
SPL_META_RELEASE=${_release}
_spl_ac_meta_type="git describe"
+ else
+ _match="${SPL_META_NAME}-${SPL_META_VERSION}-${SPL_META_RELEASE}"
+ _alias=$(git describe --match=${_match} 2>/dev/null)
+ _release=$(echo ${_alias}|cut -f3- -d'-'|sed 's/-/_/g')
+ if test -n "${_release}"; then
+ SPL_META_RELEASE=${_release}
+ _spl_ac_meta_type="git describe"
+ fi
fi
fi
@@ -2997,7 +3005,7 @@ fi
# Define the identity of the package.
PACKAGE='spl'
- VERSION='0.6.5.11'
+ VERSION='0.7.3'
cat >>confdefs.h <<_ACEOF
@@ -12424,6 +12432,7 @@ $as_echo_n "checking kernel source directory... " >&6; }
if test "$kernelsrc" = "NONE"; then
kernsrcver=NONE
fi
+ withlinux=yes
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $kernelsrc" >&5
@@ -12438,7 +12447,7 @@ $as_echo "$kernelsrc" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking kernel build directory" >&5
$as_echo_n "checking kernel build directory... " >&6; }
if test -z "$kernelbuild"; then
- if test -e "/lib/modules/$(uname -r)/build"; then
+ if test x$withlinux != xyes -a -e "/lib/modules/$(uname -r)/build"; then
kernelbuild=`readlink -f /lib/modules/$(uname -r)/build`
elif test -d ${kernelsrc}-obj/${target_cpu}/${target_cpu}; then
kernelbuild=${kernelsrc}-obj/${target_cpu}/${target_cpu}
@@ -15240,8 +15249,8 @@ fi
EXTRA_KCFLAGS="$tmp_flags"
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether mutex has owner" >&5
-$as_echo_n "checking whether mutex has owner... " >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether group_info->gid exists" >&5
+$as_echo_n "checking whether group_info->gid exists... " >&6; }
tmp_flags="$EXTRA_KCFLAGS"
EXTRA_KCFLAGS="-Werror"
@@ -15249,16 +15258,14 @@ $as_echo_n "checking whether mutex has owner... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.c
- #include <linux/mutex.h>
- #include <linux/spinlock.h>
+ #include <linux/cred.h>
int
main (void)
{
- DEFINE_MUTEX(m);
- struct task_struct *t __attribute__ ((unused));
- t = m.owner;
+ struct group_info *gi = groups_alloc(1);
+ gi->gid[0] = KGIDT_INIT(0);
;
return 0;
@@ -15286,7 +15293,7 @@ _ACEOF
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
-$as_echo "#define HAVE_MUTEX_OWNER 1" >>confdefs.h
+$as_echo "#define HAVE_GROUP_INFO_GID 1" >>confdefs.h
else
@@ -15305,8 +15312,8 @@ fi
EXTRA_KCFLAGS="$tmp_flags"
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether group_info->gid exists" >&5
-$as_echo_n "checking whether group_info->gid exists... " >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether kmem_cache_create_usercopy() exists" >&5
+$as_echo_n "checking whether kmem_cache_create_usercopy() exists... " >&6; }
tmp_flags="$EXTRA_KCFLAGS"
EXTRA_KCFLAGS="-Werror"
@@ -15314,14 +15321,26 @@ $as_echo_n "checking whether group_info->gid exists... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.c
- #include <linux/cred.h>
+ #include <linux/slab.h>
+ static void ctor(void *foo)
+ {
+ // fake ctor
+ }
int
main (void)
{
- struct group_info *gi = groups_alloc(1);
- gi->gid[0] = KGIDT_INIT(0);
+ struct kmem_cache *skc_linux_cache;
+ const char *name = "test";
+ size_t size = 4096;
+ size_t align = 8;
+ unsigned long flags = 0;
+ size_t useroffset = 0;
+ size_t usersize = size - useroffset;
+
+ skc_linux_cache = kmem_cache_create_usercopy(
+ name, size, align, flags, useroffset, usersize, ctor);
;
return 0;
@@ -15349,7 +15368,7 @@ _ACEOF
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
-$as_echo "#define HAVE_GROUP_INFO_GID 1" >>confdefs.h
+$as_echo "#define HAVE_KMEM_CACHE_CREATE_USERCOPY 1" >>confdefs.h
else
@@ -15499,6 +15518,142 @@ fi
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether kernel_write() takes loff_t pointer" >&5
+$as_echo_n "checking whether kernel_write() takes loff_t pointer... " >&6; }
+ tmp_flags="$EXTRA_KCFLAGS"
+ EXTRA_KCFLAGS="-Werror"
+
+
+cat confdefs.h - <<_ACEOF >conftest.c
+
+
+ #include <linux/fs.h>
+
+int
+main (void)
+{
+
+ struct file *file = NULL;
+ const void *buf = NULL;
+ size_t count = 0;
+ loff_t *pos = NULL;
+ ssize_t ret;
+
+ ret = kernel_write(file, buf, count, pos);
+
+ ;
+ return 0;
+}
+
+_ACEOF
+
+
+ rm -Rf build && mkdir -p build && touch build/conftest.mod.c
+ echo "obj-m := conftest.o" >build/Makefile
+ modpost_flag=''
+ test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage
+ if { ac_try='cp conftest.c build && make modules -C $LINUX_OBJ EXTRA_CFLAGS="-Werror-implicit-function-declaration $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; } >/dev/null && { ac_try='test -s build/conftest.o'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+$as_echo "#define HAVE_KERNEL_WRITE_PPOS 1" >>confdefs.h
+
+
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+
+
+fi
+ rm -Rf build
+
+
+ EXTRA_KCFLAGS="$tmp_flags"
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether kernel_read() takes loff_t pointer" >&5
+$as_echo_n "checking whether kernel_read() takes loff_t pointer... " >&6; }
+ tmp_flags="$EXTRA_KCFLAGS"
+ EXTRA_KCFLAGS="-Werror"
+
+
+cat confdefs.h - <<_ACEOF >conftest.c
+
+
+ #include <linux/fs.h>
+
+int
+main (void)
+{
+
+ struct file *file = NULL;
+ void *buf = NULL;
+ size_t count = 0;
+ loff_t *pos = NULL;
+ ssize_t ret;
+
+ ret = kernel_read(file, buf, count, pos);
+
+ ;
+ return 0;
+}
+
+_ACEOF
+
+
+ rm -Rf build && mkdir -p build && touch build/conftest.mod.c
+ echo "obj-m := conftest.o" >build/Makefile
+ modpost_flag=''
+ test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage
+ if { ac_try='cp conftest.c build && make modules -C $LINUX_OBJ EXTRA_CFLAGS="-Werror-implicit-function-declaration $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; } >/dev/null && { ac_try='test -s build/conftest.o'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+$as_echo "#define HAVE_KERNEL_READ_PPOS 1" >>confdefs.h
+
+
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+
+
+fi
+ rm -Rf build
+
+
+ EXTRA_KCFLAGS="$tmp_flags"
+
;;
user) ;;
all)
@@ -15541,6 +15696,7 @@ $as_echo_n "checking kernel source directory... " >&6; }
if test "$kernelsrc" = "NONE"; then
kernsrcver=NONE
fi
+ withlinux=yes
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $kernelsrc" >&5
@@ -15555,7 +15711,7 @@ $as_echo "$kernelsrc" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking kernel build directory" >&5
$as_echo_n "checking kernel build directory... " >&6; }
if test -z "$kernelbuild"; then
- if test -e "/lib/modules/$(uname -r)/build"; then
+ if test x$withlinux != xyes -a -e "/lib/modules/$(uname -r)/build"; then
kernelbuild=`readlink -f /lib/modules/$(uname -r)/build`
elif test -d ${kernelsrc}-obj/${target_cpu}/${target_cpu}; then
kernelbuild=${kernelsrc}-obj/${target_cpu}/${target_cpu}
@@ -18357,8 +18513,8 @@ fi
EXTRA_KCFLAGS="$tmp_flags"
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether mutex has owner" >&5
-$as_echo_n "checking whether mutex has owner... " >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether group_info->gid exists" >&5
+$as_echo_n "checking whether group_info->gid exists... " >&6; }
tmp_flags="$EXTRA_KCFLAGS"
EXTRA_KCFLAGS="-Werror"
@@ -18366,16 +18522,14 @@ $as_echo_n "checking whether mutex has owner... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.c
- #include <linux/mutex.h>
- #include <linux/spinlock.h>
+ #include <linux/cred.h>
int
main (void)
{
- DEFINE_MUTEX(m);
- struct task_struct *t __attribute__ ((unused));
- t = m.owner;
+ struct group_info *gi = groups_alloc(1);
+ gi->gid[0] = KGIDT_INIT(0);
;
return 0;
@@ -18403,7 +18557,7 @@ _ACEOF
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
-$as_echo "#define HAVE_MUTEX_OWNER 1" >>confdefs.h
+$as_echo "#define HAVE_GROUP_INFO_GID 1" >>confdefs.h
else
@@ -18422,8 +18576,8 @@ fi
EXTRA_KCFLAGS="$tmp_flags"
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether group_info->gid exists" >&5
-$as_echo_n "checking whether group_info->gid exists... " >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether kmem_cache_create_usercopy() exists" >&5
+$as_echo_n "checking whether kmem_cache_create_usercopy() exists... " >&6; }
tmp_flags="$EXTRA_KCFLAGS"
EXTRA_KCFLAGS="-Werror"
@@ -18431,14 +18585,26 @@ $as_echo_n "checking whether group_info->gid exists... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.c
- #include <linux/cred.h>
+ #include <linux/slab.h>
+ static void ctor(void *foo)
+ {
+ // fake ctor
+ }
int
main (void)
{
- struct group_info *gi = groups_alloc(1);
- gi->gid[0] = KGIDT_INIT(0);
+ struct kmem_cache *skc_linux_cache;
+ const char *name = "test";
+ size_t size = 4096;
+ size_t align = 8;
+ unsigned long flags = 0;
+ size_t useroffset = 0;
+ size_t usersize = size - useroffset;
+
+ skc_linux_cache = kmem_cache_create_usercopy(
+ name, size, align, flags, useroffset, usersize, ctor);
;
return 0;
@@ -18466,7 +18632,7 @@ _ACEOF
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
-$as_echo "#define HAVE_GROUP_INFO_GID 1" >>confdefs.h
+$as_echo "#define HAVE_KMEM_CACHE_CREATE_USERCOPY 1" >>confdefs.h
else
@@ -18617,6 +18783,142 @@ fi
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether kernel_write() takes loff_t pointer" >&5
+$as_echo_n "checking whether kernel_write() takes loff_t pointer... " >&6; }
+ tmp_flags="$EXTRA_KCFLAGS"
+ EXTRA_KCFLAGS="-Werror"
+
+
+cat confdefs.h - <<_ACEOF >conftest.c
+
+
+ #include <linux/fs.h>
+
+int
+main (void)
+{
+
+ struct file *file = NULL;
+ const void *buf = NULL;
+ size_t count = 0;
+ loff_t *pos = NULL;
+ ssize_t ret;
+
+ ret = kernel_write(file, buf, count, pos);
+
+ ;
+ return 0;
+}
+
+_ACEOF
+
+
+ rm -Rf build && mkdir -p build && touch build/conftest.mod.c
+ echo "obj-m := conftest.o" >build/Makefile
+ modpost_flag=''
+ test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage
+ if { ac_try='cp conftest.c build && make modules -C $LINUX_OBJ EXTRA_CFLAGS="-Werror-implicit-function-declaration $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; } >/dev/null && { ac_try='test -s build/conftest.o'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+$as_echo "#define HAVE_KERNEL_WRITE_PPOS 1" >>confdefs.h
+
+
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+
+
+fi
+ rm -Rf build
+
+
+ EXTRA_KCFLAGS="$tmp_flags"
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether kernel_read() takes loff_t pointer" >&5
+$as_echo_n "checking whether kernel_read() takes loff_t pointer... " >&6; }
+ tmp_flags="$EXTRA_KCFLAGS"
+ EXTRA_KCFLAGS="-Werror"
+
+
+cat confdefs.h - <<_ACEOF >conftest.c
+
+
+ #include <linux/fs.h>
+
+int
+main (void)
+{
+
+ struct file *file = NULL;
+ void *buf = NULL;
+ size_t count = 0;
+ loff_t *pos = NULL;
+ ssize_t ret;
+
+ ret = kernel_read(file, buf, count, pos);
+
+ ;
+ return 0;
+}
+
+_ACEOF
+
+
+ rm -Rf build && mkdir -p build && touch build/conftest.mod.c
+ echo "obj-m := conftest.o" >build/Makefile
+ modpost_flag=''
+ test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage
+ if { ac_try='cp conftest.c build && make modules -C $LINUX_OBJ EXTRA_CFLAGS="-Werror-implicit-function-declaration $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; } >/dev/null && { ac_try='test -s build/conftest.o'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+$as_echo "#define HAVE_KERNEL_READ_PPOS 1" >>confdefs.h
+
+
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+
+
+fi
+ rm -Rf build
+
+
+ EXTRA_KCFLAGS="$tmp_flags"
+
+
;;
srpm) ;;
*)
@@ -18645,7 +18947,7 @@ fi
-ac_config_files="$ac_config_files Makefile man/Makefile man/man1/Makefile man/man5/Makefile lib/Makefile cmd/Makefile module/Makefile module/spl/Makefile module/splat/Makefile include/Makefile include/fs/Makefile include/linux/Makefile include/rpc/Makefile include/sharefs/Makefile include/sys/Makefile include/sys/fm/Makefile include/sys/fs/Makefile include/sys/sysevent/Makefile include/util/Makefile include/vm/Makefile scripts/Makefile rpm/Makefile rpm/redhat/Makefile rpm/redhat/spl.spec rpm/redhat/spl-kmod.spec rpm/redhat/spl-dkms.spec rpm/generic/Makefile rpm/generic/spl.spec rpm/generic/spl-kmod.spec rpm/generic/spl-dkms.spec spl.release"
+ac_config_files="$ac_config_files Makefile man/Makefile man/man1/Makefile man/man5/Makefile lib/Makefile cmd/Makefile cmd/splat/Makefile cmd/splslab/Makefile module/Makefile module/spl/Makefile module/splat/Makefile include/Makefile include/fs/Makefile include/linux/Makefile include/rpc/Makefile include/sharefs/Makefile include/sys/Makefile include/sys/fm/Makefile include/sys/fs/Makefile include/sys/sysevent/Makefile include/util/Makefile include/vm/Makefile scripts/Makefile rpm/Makefile rpm/redhat/Makefile rpm/redhat/spl.spec rpm/redhat/spl-kmod.spec rpm/redhat/spl-dkms.spec rpm/generic/Makefile rpm/generic/spl.spec rpm/generic/spl-kmod.spec rpm/generic/spl-dkms.spec spl.release"
cat >confcache <<\_ACEOF
@@ -19190,7 +19492,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by spl $as_me 0.6.5.11, which was
+This file was extended by spl $as_me 0.7.3, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -19256,7 +19558,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
-spl config.status 0.6.5.11
+spl config.status 0.7.3
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"
@@ -19677,6 +19979,8 @@ do
"man/man5/Makefile") CONFIG_FILES="$CONFIG_FILES man/man5/Makefile" ;;
"lib/Makefile") CONFIG_FILES="$CONFIG_FILES lib/Makefile" ;;
"cmd/Makefile") CONFIG_FILES="$CONFIG_FILES cmd/Makefile" ;;
+ "cmd/splat/Makefile") CONFIG_FILES="$CONFIG_FILES cmd/splat/Makefile" ;;
+ "cmd/splslab/Makefile") CONFIG_FILES="$CONFIG_FILES cmd/splslab/Makefile" ;;
"module/Makefile") CONFIG_FILES="$CONFIG_FILES module/Makefile" ;;
"module/spl/Makefile") CONFIG_FILES="$CONFIG_FILES module/spl/Makefile" ;;
"module/splat/Makefile") CONFIG_FILES="$CONFIG_FILES module/splat/Makefile" ;;
diff --git a/spl/configure.ac b/spl/configure.ac
index efeb243cba69..70735ce2cf42 100644
--- a/spl/configure.ac
+++ b/spl/configure.ac
@@ -54,6 +54,8 @@ AC_CONFIG_FILES([
man/man5/Makefile
lib/Makefile
cmd/Makefile
+ cmd/splat/Makefile
+ cmd/splslab/Makefile
module/Makefile
module/spl/Makefile
module/splat/Makefile
diff --git a/spl/dkms.conf b/spl/dkms.conf
index 0d8ab942d12a..fc43607928f5 100644
--- a/spl/dkms.conf
+++ b/spl/dkms.conf
@@ -1,6 +1,6 @@
AUTOINSTALL="yes"
PACKAGE_NAME="spl"
-PACKAGE_VERSION="0.6.5.11"
+PACKAGE_VERSION="0.7.3"
PRE_BUILD="configure
--prefix=/usr
--with-config=kernel
diff --git a/spl/include/Makefile.in b/spl/include/Makefile.in
index 9cabebdabc37..c9319cb27d77 100644
--- a/spl/include/Makefile.in
+++ b/spl/include/Makefile.in
@@ -1,7 +1,7 @@
-# Makefile.in generated by automake 1.15 from Makefile.am.
+# Makefile.in generated by automake 1.15.1 from Makefile.am.
# @configure_input@
-# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+# Copyright (C) 1994-2017 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
diff --git a/spl/include/fs/Makefile.in b/spl/include/fs/Makefile.in
index 12c9050cf860..cdfd165b87bf 100644
--- a/spl/include/fs/Makefile.in
+++ b/spl/include/fs/Makefile.in
@@ -1,7 +1,7 @@
-# Makefile.in generated by automake 1.15 from Makefile.am.
+# Makefile.in generated by automake 1.15.1 from Makefile.am.
# @configure_input@
-# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+# Copyright (C) 1994-2017 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
diff --git a/spl/include/linux/Makefile.in b/spl/include/linux/Makefile.in
index 603dda889c90..e3bad5e58c04 100644
--- a/spl/include/linux/Makefile.in
+++ b/spl/include/linux/Makefile.in
@@ -1,7 +1,7 @@
-# Makefile.in generated by automake 1.15 from Makefile.am.
+# Makefile.in generated by automake 1.15.1 from Makefile.am.
# @configure_input@
-# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+# Copyright (C) 1994-2017 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
diff --git a/spl/include/linux/file_compat.h b/spl/include/linux/file_compat.h
index 916514566a23..2fd6e5fc8779 100644
--- a/spl/include/linux/file_compat.h
+++ b/spl/include/linux/file_compat.h
@@ -26,6 +26,7 @@
#define _SPL_FILE_COMPAT_H
#include <linux/fs.h>
+#include <linux/uaccess.h>
#ifdef HAVE_FDTABLE_HEADER
#include <linux/fdtable.h>
#endif
@@ -70,6 +71,46 @@ spl_filp_fallocate(struct file *fp, int mode, loff_t offset, loff_t len)
return (error);
}
+static inline ssize_t
+spl_kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos)
+{
+#if defined(HAVE_KERNEL_WRITE_PPOS)
+ return (kernel_write(file, buf, count, pos));
+#else
+ mm_segment_t saved_fs;
+ ssize_t ret;
+
+ saved_fs = get_fs();
+ set_fs(get_ds());
+
+ ret = vfs_write(file, (__force const char __user *)buf, count, pos);
+
+ set_fs(saved_fs);
+
+ return (ret);
+#endif
+}
+
+static inline ssize_t
+spl_kernel_read(struct file *file, void *buf, size_t count, loff_t *pos)
+{
+#if defined(HAVE_KERNEL_READ_PPOS)
+ return (kernel_read(file, buf, count, pos));
+#else
+ mm_segment_t saved_fs;
+ ssize_t ret;
+
+ saved_fs = get_fs();
+ set_fs(get_ds());
+
+ ret = vfs_read(file, (void __user *)buf, count, pos);
+
+ set_fs(saved_fs);
+
+ return (ret);
+#endif
+}
+
#ifdef HAVE_2ARGS_VFS_FSYNC
#define spl_filp_fsync(fp, sync) vfs_fsync(fp, sync)
#else
diff --git a/spl/include/linux/rwsem_compat.h b/spl/include/linux/rwsem_compat.h
index c874885b0c9d..de513debeafe 100644
--- a/spl/include/linux/rwsem_compat.h
+++ b/spl/include/linux/rwsem_compat.h
@@ -27,7 +27,10 @@
#include <linux/rwsem.h>
-#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
+#if defined(CONFIG_PREEMPT_RT_FULL)
+#define SPL_RWSEM_SINGLE_READER_VALUE (1)
+#define SPL_RWSEM_SINGLE_WRITER_VALUE (0)
+#elif defined(CONFIG_RWSEM_GENERIC_SPINLOCK)
#define SPL_RWSEM_SINGLE_READER_VALUE (1)
#define SPL_RWSEM_SINGLE_WRITER_VALUE (-1)
#else
@@ -36,7 +39,9 @@
#endif
/* Linux 3.16 changed activity to count for rwsem-spinlock */
-#if defined(HAVE_RWSEM_ACTIVITY)
+#if defined(CONFIG_PREEMPT_RT_FULL)
+#define RWSEM_COUNT(sem) sem->read_depth
+#elif defined(HAVE_RWSEM_ACTIVITY)
#define RWSEM_COUNT(sem) sem->activity
/* Linux 4.8 changed count to an atomic_long_t for !rwsem-spinlock */
#elif defined(HAVE_RWSEM_ATOMIC_LONG_COUNT)
diff --git a/spl/include/rpc/Makefile.in b/spl/include/rpc/Makefile.in
index c1c162e68e59..3e958254e85d 100644
--- a/spl/include/rpc/Makefile.in
+++ b/spl/include/rpc/Makefile.in
@@ -1,7 +1,7 @@
-# Makefile.in generated by automake 1.15 from Makefile.am.
+# Makefile.in generated by automake 1.15.1 from Makefile.am.
# @configure_input@
-# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+# Copyright (C) 1994-2017 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
diff --git a/spl/include/sharefs/Makefile.in b/spl/include/sharefs/Makefile.in
index 96273fcaa20b..40d1cb7c833f 100644
--- a/spl/include/sharefs/Makefile.in
+++ b/spl/include/sharefs/Makefile.in
@@ -1,7 +1,7 @@
-# Makefile.in generated by automake 1.15 from Makefile.am.
+# Makefile.in generated by automake 1.15.1 from Makefile.am.
# @configure_input@
-# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+# Copyright (C) 1994-2017 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
diff --git a/spl/include/sys/Makefile.am b/spl/include/sys/Makefile.am
index 73c4a8421782..a16bd6ce6fed 100644
--- a/spl/include/sys/Makefile.am
+++ b/spl/include/sys/Makefile.am
@@ -29,6 +29,7 @@ KERNEL_H = \
$(top_srcdir)/include/sys/dirent.h \
$(top_srcdir)/include/sys/disp.h \
$(top_srcdir)/include/sys/dkio.h \
+ $(top_srcdir)/include/sys/dkioc_free_util.h \
$(top_srcdir)/include/sys/dklabel.h \
$(top_srcdir)/include/sys/dnlc.h \
$(top_srcdir)/include/sys/dumphdr.h \
diff --git a/spl/include/sys/Makefile.in b/spl/include/sys/Makefile.in
index 46e7c059b968..b908b221f9e6 100644
--- a/spl/include/sys/Makefile.in
+++ b/spl/include/sys/Makefile.in
@@ -1,7 +1,7 @@
-# Makefile.in generated by automake 1.15 from Makefile.am.
+# Makefile.in generated by automake 1.15.1 from Makefile.am.
# @configure_input@
-# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+# Copyright (C) 1994-2017 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -159,6 +159,7 @@ am__kernel_HEADERS_DIST = $(top_srcdir)/include/sys/acl.h \
$(top_srcdir)/include/sys/dirent.h \
$(top_srcdir)/include/sys/disp.h \
$(top_srcdir)/include/sys/dkio.h \
+ $(top_srcdir)/include/sys/dkioc_free_util.h \
$(top_srcdir)/include/sys/dklabel.h \
$(top_srcdir)/include/sys/dnlc.h \
$(top_srcdir)/include/sys/dumphdr.h \
@@ -513,6 +514,7 @@ KERNEL_H = \
$(top_srcdir)/include/sys/dirent.h \
$(top_srcdir)/include/sys/disp.h \
$(top_srcdir)/include/sys/dkio.h \
+ $(top_srcdir)/include/sys/dkioc_free_util.h \
$(top_srcdir)/include/sys/dklabel.h \
$(top_srcdir)/include/sys/dnlc.h \
$(top_srcdir)/include/sys/dumphdr.h \
diff --git a/spl/include/sys/byteorder.h b/spl/include/sys/byteorder.h
index 5350a0b33aa3..184b52d51ddd 100644
--- a/spl/include/sys/byteorder.h
+++ b/spl/include/sys/byteorder.h
@@ -26,6 +26,7 @@
#define _SPL_BYTEORDER_H
#include <asm/byteorder.h>
+#include <sys/isa_defs.h>
#define LE_16(x) cpu_to_le16(x)
#define LE_32(x) cpu_to_le32(x)
@@ -43,4 +44,26 @@
#define BE_IN32(xa) \
(((uint32_t)BE_IN16(xa) << 16) | BE_IN16((uint8_t *)(xa)+2))
+#ifdef _BIG_ENDIAN
+static __inline__ uint64_t
+htonll(uint64_t n) {
+ return (n);
+}
+
+static __inline__ uint64_t
+ntohll(uint64_t n) {
+ return (n);
+}
+#else
+static __inline__ uint64_t
+htonll(uint64_t n) {
+ return ((((uint64_t)htonl(n)) << 32) + htonl(n >> 32));
+}
+
+static __inline__ uint64_t
+ntohll(uint64_t n) {
+ return ((((uint64_t)ntohl(n)) << 32) + ntohl(n >> 32));
+}
+#endif
+
#endif /* SPL_BYTEORDER_H */
diff --git a/spl/include/sys/condvar.h b/spl/include/sys/condvar.h
index 8a4aab44b5ab..5479e75ad652 100644
--- a/spl/include/sys/condvar.h
+++ b/spl/include/sys/condvar.h
@@ -59,6 +59,8 @@ extern clock_t __cv_timedwait(kcondvar_t *, kmutex_t *, clock_t);
extern clock_t __cv_timedwait_sig(kcondvar_t *, kmutex_t *, clock_t);
extern clock_t cv_timedwait_hires(kcondvar_t *, kmutex_t *, hrtime_t,
hrtime_t res, int flag);
+extern clock_t cv_timedwait_sig_hires(kcondvar_t *, kmutex_t *, hrtime_t,
+ hrtime_t res, int flag);
extern void __cv_signal(kcondvar_t *);
extern void __cv_broadcast(kcondvar_t *c);
diff --git a/spl/include/sys/cred.h b/spl/include/sys/cred.h
index 480e268f1dfa..2ad7115e0a3b 100644
--- a/spl/include/sys/cred.h
+++ b/spl/include/sys/cred.h
@@ -41,18 +41,6 @@ typedef struct cred cred_t;
#ifdef HAVE_KUIDGID_T
-/*
- * Linux 3.8+ uses typedefs to redefine uid_t and gid_t. We have to rename the
- * typedefs to recover the original types. We then can use them provided that
- * we are careful about translating from k{g,u}id_t to the original versions
- * and vice versa.
- */
-#define uid_t xuid_t
-#define gid_t xgid_t
-#include <linux/uidgid.h>
-#undef uid_t
-#undef gid_t
-
#define KUID_TO_SUID(x) (__kuid_val(x))
#define KGID_TO_SGID(x) (__kgid_val(x))
#define SUID_TO_KUID(x) (KUIDT_INIT(x))
diff --git a/spl/include/sys/debug.h b/spl/include/sys/debug.h
index a37740036446..98ccbaf05ad7 100644
--- a/spl/include/sys/debug.h
+++ b/spl/include/sys/debug.h
@@ -31,11 +31,13 @@
* PANIC() - Panic the node and print message.
* ASSERT() - Assert X is true, if not panic.
* ASSERTV() - Wraps a variable declaration which is only used by ASSERT().
+ * ASSERT3B() - Assert boolean X OP Y is true, if not panic.
* ASSERT3S() - Assert signed X OP Y is true, if not panic.
* ASSERT3U() - Assert unsigned X OP Y is true, if not panic.
* ASSERT3P() - Assert pointer X OP Y is true, if not panic.
* ASSERT0() - Assert value is zero, if not panic.
* VERIFY() - Verify X is true, if not panic.
+ * VERIFY3B() - Verify boolean X OP Y is true, if not panic.
* VERIFY3S() - Verify signed X OP Y is true, if not panic.
* VERIFY3U() - Verify unsigned X OP Y is true, if not panic.
* VERIFY3P() - Verify pointer X OP Y is true, if not panic.
@@ -67,6 +69,7 @@ void spl_dumpstack(void);
"failed (" FMT " " #OP " " FMT ")\n", \
CAST (LEFT), CAST (RIGHT)))
+#define VERIFY3B(x,y,z) VERIFY3_IMPL(x, y, z, boolean_t, "%d", (boolean_t))
#define VERIFY3S(x,y,z) VERIFY3_IMPL(x, y, z, int64_t, "%lld", (long long))
#define VERIFY3U(x,y,z) VERIFY3_IMPL(x, y, z, uint64_t, "%llu", \
(unsigned long long))
@@ -88,6 +91,7 @@ void spl_dumpstack(void);
#define SPL_DEBUG_STR ""
#define ASSERT(x) ((void)0)
#define ASSERTV(x)
+#define ASSERT3B(x,y,z) ((void)0)
#define ASSERT3S(x,y,z) ((void)0)
#define ASSERT3U(x,y,z) ((void)0)
#define ASSERT3P(x,y,z) ((void)0)
@@ -103,6 +107,7 @@ void spl_dumpstack(void);
#define SPL_DEBUG_STR " (DEBUG mode)"
#define ASSERT(cond) VERIFY(cond)
#define ASSERTV(x) x
+#define ASSERT3B(x,y,z) VERIFY3B(x, y, z)
#define ASSERT3S(x,y,z) VERIFY3S(x, y, z)
#define ASSERT3U(x,y,z) VERIFY3U(x, y, z)
#define ASSERT3P(x,y,z) VERIFY3P(x, y, z)
diff --git a/spl/include/sys/dkio.h b/spl/include/sys/dkio.h
index d8c700718ff1..dd7a95f137b9 100644
--- a/spl/include/sys/dkio.h
+++ b/spl/include/sys/dkio.h
@@ -25,14 +25,16 @@
#ifndef _SPL_DKIO_H
#define _SPL_DKIO_H
-struct dk_callback {
- void (*dkc_callback)(void *dkc_cookie, int error);
- void *dkc_cookie;
- int dkc_flag;
-};
+#define DFL_SZ(num_exts) \
+ (sizeof (dkioc_free_list_t) + (num_exts - 1) * 16)
-#define DKIOC (0x04 << 8)
-#define DKIOCFLUSHWRITECACHE (DKIOC | 34)
-#define DKIOCTRIM (DKIOC | 35)
+#define DKIOC (0x04 << 8)
+#define DKIOCFLUSHWRITECACHE (DKIOC|34) /* flush cache to phys medium */
+
+/*
+ * ioctl to free space (e.g. SCSI UNMAP) off a disk.
+ * Pass a dkioc_free_list_t containing a list of extents to be freed.
+ */
+#define DKIOCFREE (DKIOC|50)
#endif /* _SPL_DKIO_H */
diff --git a/spl/include/sys/dkioc_free_util.h b/spl/include/sys/dkioc_free_util.h
new file mode 100644
index 000000000000..bea5a5bbc993
--- /dev/null
+++ b/spl/include/sys/dkioc_free_util.h
@@ -0,0 +1,58 @@
+/*****************************************************************************\
+ * Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
+ * Copyright (C) 2007 The Regents of the University of California.
+ * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ * Written by Brian Behlendorf <behlendorf1 at llnl.gov>.
+ * UCRL-CODE-235197
+ *
+ * This file is part of the SPL, Solaris Porting Layer.
+ * For details, see <http://zfsonlinux.org/>.
+ *
+ * The SPL is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * The SPL is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with the SPL. If not, see <http://www.gnu.org/licenses/>.
+\*****************************************************************************/
+
+#ifndef _SPL_DKIOC_UTIL_H
+#define _SPL_DKIOC_UTIL_H
+
+#include <sys/dkio.h>
+
+typedef struct dkioc_free_list_ext_s {
+ uint64_t dfle_start;
+ uint64_t dfle_length;
+} dkioc_free_list_ext_t;
+
+typedef struct dkioc_free_list_s {
+ uint64_t dfl_flags;
+ uint64_t dfl_num_exts;
+ int64_t dfl_offset;
+
+ /*
+ * N.B. this is only an internal debugging API! This is only called
+ * from debug builds of sd for pre-release checking. Remove before GA!
+ */
+ void (*dfl_ck_func)(uint64_t, uint64_t, void *);
+ void *dfl_ck_arg;
+
+ dkioc_free_list_ext_t dfl_exts[1];
+} dkioc_free_list_t;
+
+static inline void dfl_free(dkioc_free_list_t *dfl) {
+ vmem_free(dfl, DFL_SZ(dfl->dfl_num_exts));
+}
+
+static inline dkioc_free_list_t *dfl_alloc(uint64_t dfl_num_exts, int flags) {
+ return vmem_zalloc(DFL_SZ(dfl_num_exts), flags);
+}
+
+#endif /* _SPL_DKIOC_UTIL_H */
diff --git a/spl/include/sys/fm/Makefile.in b/spl/include/sys/fm/Makefile.in
index cd8e7d8c1822..a245c8b6fe89 100644
--- a/spl/include/sys/fm/Makefile.in
+++ b/spl/include/sys/fm/Makefile.in
@@ -1,7 +1,7 @@
-# Makefile.in generated by automake 1.15 from Makefile.am.
+# Makefile.in generated by automake 1.15.1 from Makefile.am.
# @configure_input@
-# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+# Copyright (C) 1994-2017 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
diff --git a/spl/include/sys/fs/Makefile.in b/spl/include/sys/fs/Makefile.in
index d7b244bcc7ed..0495087c8c54 100644
--- a/spl/include/sys/fs/Makefile.in
+++ b/spl/include/sys/fs/Makefile.in
@@ -1,7 +1,7 @@
-# Makefile.in generated by automake 1.15 from Makefile.am.
+# Makefile.in generated by automake 1.15.1 from Makefile.am.
# @configure_input@
-# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+# Copyright (C) 1994-2017 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
diff --git a/spl/include/sys/isa_defs.h b/spl/include/sys/isa_defs.h
index 53dead38c19b..738795c708c4 100644
--- a/spl/include/sys/isa_defs.h
+++ b/spl/include/sys/isa_defs.h
@@ -44,6 +44,9 @@
#define _LP64
#endif
+#define _ALIGNMENT_REQUIRED 1
+
+
/* i386 arch specific defines */
#elif defined(__i386) || defined(__i386__)
@@ -59,6 +62,8 @@
#define _ILP32
#endif
+#define _ALIGNMENT_REQUIRED 0
+
/* powerpc (ppc64) arch specific defines */
#elif defined(__powerpc) || defined(__powerpc__) || defined(__powerpc64__)
@@ -80,6 +85,12 @@
#endif
#endif
+/*
+ * Illumos doesn't define _ALIGNMENT_REQUIRED for PPC, so default to 1
+ * out of paranoia.
+ */
+#define _ALIGNMENT_REQUIRED 1
+
/* arm arch specific defines */
#elif defined(__arm) || defined(__arm__) || defined(__aarch64__)
@@ -107,6 +118,12 @@
#define _BIG_ENDIAN
#endif
+/*
+ * Illumos doesn't define _ALIGNMENT_REQUIRED for ARM, so default to 1
+ * out of paranoia.
+ */
+#define _ALIGNMENT_REQUIRED 1
+
/* sparc arch specific defines */
#elif defined(__sparc) || defined(__sparc__)
@@ -130,6 +147,7 @@
#define _BIG_ENDIAN
#define _SUNOS_VTOC_16
+#define _ALIGNMENT_REQUIRED 1
/* s390 arch specific defines */
#elif defined(__s390__)
@@ -145,6 +163,12 @@
#define _BIG_ENDIAN
+/*
+ * Illumos doesn't define _ALIGNMENT_REQUIRED for s390, so default to 1
+ * out of paranoia.
+ */
+#define _ALIGNMENT_REQUIRED 1
+
/* MIPS arch specific defines */
#elif defined(__mips__)
@@ -162,6 +186,12 @@
#define _SUNOS_VTOC_16
+/*
+ * Illumos doesn't define _ALIGNMENT_REQUIRED for MIPS, so default to 1
+ * out of paranoia.
+ */
+#define _ALIGNMENT_REQUIRED 1
+
#else
/*
* Currently supported:
diff --git a/spl/include/sys/kobj.h b/spl/include/sys/kobj.h
index f95fa8039762..334449a8e234 100644
--- a/spl/include/sys/kobj.h
+++ b/spl/include/sys/kobj.h
@@ -35,8 +35,8 @@ typedef struct _buf buf_t;
extern struct _buf *kobj_open_file(const char *name);
extern void kobj_close_file(struct _buf *file);
-extern int kobj_read_file(struct _buf *file, char *buf,
- ssize_t size, offset_t off);
+extern int kobj_read_file(struct _buf *file, char *buf, unsigned size,
+ unsigned off);
extern int kobj_get_filesize(struct _buf *file, uint64_t *size);
#endif /* SPL_KOBJ_H */
diff --git a/spl/include/sys/mutex.h b/spl/include/sys/mutex.h
index be69deaab5d5..8cbab7c477eb 100644
--- a/spl/include/sys/mutex.h
+++ b/spl/include/sys/mutex.h
@@ -28,20 +28,22 @@
#include <sys/types.h>
#include <linux/mutex.h>
#include <linux/compiler_compat.h>
+#include <linux/lockdep.h>
typedef enum {
MUTEX_DEFAULT = 0,
MUTEX_SPIN = 1,
- MUTEX_ADAPTIVE = 2
+ MUTEX_ADAPTIVE = 2,
+ MUTEX_NOLOCKDEP = 3
} kmutex_type_t;
typedef struct {
struct mutex m_mutex;
spinlock_t m_lock; /* used for serializing mutex_exit */
-#ifndef HAVE_MUTEX_OWNER
- /* only when kernel doesn't have owner */
kthread_t *m_owner;
-#endif
+#ifdef CONFIG_LOCKDEP
+ kmutex_type_t m_type;
+#endif /* CONFIG_LOCKDEP */
} kmutex_t;
#define MUTEX(mp) (&((mp)->m_mutex))
@@ -49,32 +51,44 @@ typedef struct {
static inline void
spl_mutex_set_owner(kmutex_t *mp)
{
- /*
- * kernel will handle its owner, so we don't need to do anything if it
- * is defined.
- */
-#ifndef HAVE_MUTEX_OWNER
mp->m_owner = current;
-#endif
}
static inline void
spl_mutex_clear_owner(kmutex_t *mp)
{
-#ifndef HAVE_MUTEX_OWNER
mp->m_owner = NULL;
-#endif
}
-#ifdef HAVE_MUTEX_OWNER
-#define mutex_owner(mp) (ACCESS_ONCE(MUTEX(mp)->owner))
-#else
#define mutex_owner(mp) (ACCESS_ONCE((mp)->m_owner))
-#endif
#define mutex_owned(mp) (mutex_owner(mp) == current)
#define MUTEX_HELD(mp) mutex_owned(mp)
#define MUTEX_NOT_HELD(mp) (!MUTEX_HELD(mp))
+#ifdef CONFIG_LOCKDEP
+static inline void
+spl_mutex_set_type(kmutex_t *mp, kmutex_type_t type)
+{
+ mp->m_type = type;
+}
+static inline void
+spl_mutex_lockdep_off_maybe(kmutex_t *mp) \
+{ \
+ if (mp && mp->m_type == MUTEX_NOLOCKDEP) \
+ lockdep_off(); \
+}
+static inline void
+spl_mutex_lockdep_on_maybe(kmutex_t *mp) \
+{ \
+ if (mp && mp->m_type == MUTEX_NOLOCKDEP) \
+ lockdep_on(); \
+}
+#else /* CONFIG_LOCKDEP */
+#define spl_mutex_set_type(mp, type)
+#define spl_mutex_lockdep_off_maybe(mp)
+#define spl_mutex_lockdep_on_maybe(mp)
+#endif /* CONFIG_LOCKDEP */
+
/*
* The following functions must be a #define and not static inline.
* This ensures that the native linux mutex functions (lock/unlock)
@@ -85,11 +99,12 @@ spl_mutex_clear_owner(kmutex_t *mp)
#define mutex_init(mp, name, type, ibc) \
{ \
static struct lock_class_key __key; \
- ASSERT(type == MUTEX_DEFAULT); \
+ ASSERT(type == MUTEX_DEFAULT || type == MUTEX_NOLOCKDEP); \
\
__mutex_init(MUTEX(mp), (name) ? (#name) : (#mp), &__key); \
spin_lock_init(&(mp)->m_lock); \
spl_mutex_clear_owner(mp); \
+ spl_mutex_set_type(mp, type); \
}
#undef mutex_destroy
@@ -102,8 +117,10 @@ spl_mutex_clear_owner(kmutex_t *mp)
({ \
int _rc_; \
\
+ spl_mutex_lockdep_off_maybe(mp); \
if ((_rc_ = mutex_trylock(MUTEX(mp))) == 1) \
spl_mutex_set_owner(mp); \
+ spl_mutex_lockdep_on_maybe(mp); \
\
_rc_; \
})
@@ -112,14 +129,18 @@ spl_mutex_clear_owner(kmutex_t *mp)
#define mutex_enter_nested(mp, subclass) \
{ \
ASSERT3P(mutex_owner(mp), !=, current); \
+ spl_mutex_lockdep_off_maybe(mp); \
mutex_lock_nested(MUTEX(mp), (subclass)); \
+ spl_mutex_lockdep_on_maybe(mp); \
spl_mutex_set_owner(mp); \
}
#else /* CONFIG_DEBUG_LOCK_ALLOC */
#define mutex_enter_nested(mp, subclass) \
{ \
ASSERT3P(mutex_owner(mp), !=, current); \
+ spl_mutex_lockdep_off_maybe(mp); \
mutex_lock(MUTEX(mp)); \
+ spl_mutex_lockdep_on_maybe(mp); \
spl_mutex_set_owner(mp); \
}
#endif /* CONFIG_DEBUG_LOCK_ALLOC */
@@ -147,10 +168,13 @@ spl_mutex_clear_owner(kmutex_t *mp)
*/
#define mutex_exit(mp) \
{ \
- spin_lock(&(mp)->m_lock); \
spl_mutex_clear_owner(mp); \
+ spin_lock(&(mp)->m_lock); \
+ spl_mutex_lockdep_off_maybe(mp); \
mutex_unlock(MUTEX(mp)); \
+ spl_mutex_lockdep_on_maybe(mp); \
spin_unlock(&(mp)->m_lock); \
+ /* NOTE: do not dereference mp after this point */ \
}
int spl_mutex_init(void);
diff --git a/spl/include/sys/param.h b/spl/include/sys/param.h
index 5b5b5f550bab..665f641b5e89 100644
--- a/spl/include/sys/param.h
+++ b/spl/include/sys/param.h
@@ -28,8 +28,8 @@
#include <asm/page.h>
/* Pages to bytes and back */
-#define ptob(pages) (pages << PAGE_SHIFT)
-#define btop(bytes) (bytes >> PAGE_SHIFT)
+#define ptob(pages) ((pages) << PAGE_SHIFT)
+#define btop(bytes) ((bytes) >> PAGE_SHIFT)
#define MAXUID UINT32_MAX
diff --git a/spl/include/sys/random.h b/spl/include/sys/random.h
index 2bf581f2651f..64f70ee52f4a 100644
--- a/spl/include/sys/random.h
+++ b/spl/include/sys/random.h
@@ -35,11 +35,6 @@ random_get_bytes(uint8_t *ptr, size_t len)
return 0;
}
-static __inline__ int
-random_get_pseudo_bytes(uint8_t *ptr, size_t len)
-{
- get_random_bytes((void *)ptr,(int)len);
- return 0;
-}
+extern int random_get_pseudo_bytes(uint8_t *ptr, size_t len);
#endif /* _SPL_RANDOM_H */
diff --git a/spl/include/sys/rwlock.h b/spl/include/sys/rwlock.h
index c8ed223d4461..ffb7b90b6f28 100644
--- a/spl/include/sys/rwlock.h
+++ b/spl/include/sys/rwlock.h
@@ -30,55 +30,86 @@
#include <linux/rwsem_compat.h>
typedef enum {
- RW_DRIVER = 2,
- RW_DEFAULT = 4
+ RW_DRIVER = 2,
+ RW_DEFAULT = 4,
+ RW_NOLOCKDEP = 5
} krw_type_t;
typedef enum {
- RW_NONE = 0,
- RW_WRITER = 1,
- RW_READER = 2
+ RW_NONE = 0,
+ RW_WRITER = 1,
+ RW_READER = 2
} krw_t;
+/*
+ * If CONFIG_RWSEM_SPIN_ON_OWNER is defined, rw_semaphore will have an owner
+ * field, so we don't need our own.
+ */
typedef struct {
- struct rw_semaphore rw_rwlock;
- kthread_t *rw_owner;
+ struct rw_semaphore rw_rwlock;
+#ifndef CONFIG_RWSEM_SPIN_ON_OWNER
+ kthread_t *rw_owner;
+#endif
+#ifdef CONFIG_LOCKDEP
+ krw_type_t rw_type;
+#endif /* CONFIG_LOCKDEP */
} krwlock_t;
-#define SEM(rwp) ((struct rw_semaphore *)(rwp))
+#define SEM(rwp) (&(rwp)->rw_rwlock)
static inline void
spl_rw_set_owner(krwlock_t *rwp)
{
- unsigned long flags;
-
- spl_rwsem_lock_irqsave(&SEM(rwp)->wait_lock, flags);
- rwp->rw_owner = current;
- spl_rwsem_unlock_irqrestore(&SEM(rwp)->wait_lock, flags);
+/*
+ * If CONFIG_RWSEM_SPIN_ON_OWNER is defined, down_write, up_write,
+ * downgrade_write and __init_rwsem will set/clear owner for us.
+ */
+#ifndef CONFIG_RWSEM_SPIN_ON_OWNER
+ rwp->rw_owner = current;
+#endif
}
static inline void
spl_rw_clear_owner(krwlock_t *rwp)
{
- unsigned long flags;
-
- spl_rwsem_lock_irqsave(&SEM(rwp)->wait_lock, flags);
- rwp->rw_owner = NULL;
- spl_rwsem_unlock_irqrestore(&SEM(rwp)->wait_lock, flags);
+#ifndef CONFIG_RWSEM_SPIN_ON_OWNER
+ rwp->rw_owner = NULL;
+#endif
}
static inline kthread_t *
rw_owner(krwlock_t *rwp)
{
- unsigned long flags;
- kthread_t *owner;
-
- spl_rwsem_lock_irqsave(&SEM(rwp)->wait_lock, flags);
- owner = rwp->rw_owner;
- spl_rwsem_unlock_irqrestore(&SEM(rwp)->wait_lock, flags);
+#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
+ return SEM(rwp)->owner;
+#else
+ return rwp->rw_owner;
+#endif
+}
- return owner;
+#ifdef CONFIG_LOCKDEP
+static inline void
+spl_rw_set_type(krwlock_t *rwp, krw_type_t type)
+{
+ rwp->rw_type = type;
+}
+static inline void
+spl_rw_lockdep_off_maybe(krwlock_t *rwp) \
+{ \
+ if (rwp && rwp->rw_type == RW_NOLOCKDEP) \
+ lockdep_off(); \
}
+static inline void
+spl_rw_lockdep_on_maybe(krwlock_t *rwp) \
+{ \
+ if (rwp && rwp->rw_type == RW_NOLOCKDEP) \
+ lockdep_on(); \
+}
+#else /* CONFIG_LOCKDEP */
+#define spl_rw_set_type(rwp, type)
+#define spl_rw_lockdep_off_maybe(rwp)
+#define spl_rw_lockdep_on_maybe(rwp)
+#endif /* CONFIG_LOCKDEP */
static inline int
RW_READ_HELD(krwlock_t *rwp)
@@ -94,7 +125,7 @@ RW_READ_HELD(krwlock_t *rwp)
static inline int
RW_WRITE_HELD(krwlock_t *rwp)
{
- return (spl_rwsem_is_locked(SEM(rwp)) && rw_owner(rwp) == current);
+ return (rw_owner(rwp) == current);
}
static inline int
@@ -109,77 +140,79 @@ RW_LOCK_HELD(krwlock_t *rwp)
* will be correctly located in the users code which is important
* for the built in kernel lock analysis tools
*/
-#define rw_init(rwp, name, type, arg) \
-({ \
- static struct lock_class_key __key; \
- \
- __init_rwsem(SEM(rwp), #rwp, &__key); \
- spl_rw_clear_owner(rwp); \
+#define rw_init(rwp, name, type, arg) \
+({ \
+ static struct lock_class_key __key; \
+ ASSERT(type == RW_DEFAULT || type == RW_NOLOCKDEP); \
+ \
+ __init_rwsem(SEM(rwp), #rwp, &__key); \
+ spl_rw_clear_owner(rwp); \
+ spl_rw_set_type(rwp, type); \
})
-#define rw_destroy(rwp) \
-({ \
- VERIFY(!RW_LOCK_HELD(rwp)); \
+#define rw_destroy(rwp) \
+({ \
+ VERIFY(!RW_LOCK_HELD(rwp)); \
})
-#define rw_tryenter(rwp, rw) \
-({ \
- int _rc_ = 0; \
- \
- switch (rw) { \
- case RW_READER: \
- _rc_ = down_read_trylock(SEM(rwp)); \
- break; \
- case RW_WRITER: \
- if ((_rc_ = down_write_trylock(SEM(rwp)))) \
- spl_rw_set_owner(rwp); \
- break; \
- default: \
- VERIFY(0); \
- } \
- _rc_; \
+#define rw_tryenter(rwp, rw) \
+({ \
+ int _rc_ = 0; \
+ \
+ spl_rw_lockdep_off_maybe(rwp); \
+ switch (rw) { \
+ case RW_READER: \
+ _rc_ = down_read_trylock(SEM(rwp)); \
+ break; \
+ case RW_WRITER: \
+ if ((_rc_ = down_write_trylock(SEM(rwp)))) \
+ spl_rw_set_owner(rwp); \
+ break; \
+ default: \
+ VERIFY(0); \
+ } \
+ spl_rw_lockdep_on_maybe(rwp); \
+ _rc_; \
})
-#define rw_enter(rwp, rw) \
-({ \
- switch (rw) { \
- case RW_READER: \
- down_read(SEM(rwp)); \
- break; \
- case RW_WRITER: \
- down_write(SEM(rwp)); \
- spl_rw_set_owner(rwp); \
- break; \
- default: \
- VERIFY(0); \
- } \
+#define rw_enter(rwp, rw) \
+({ \
+ spl_rw_lockdep_off_maybe(rwp); \
+ switch (rw) { \
+ case RW_READER: \
+ down_read(SEM(rwp)); \
+ break; \
+ case RW_WRITER: \
+ down_write(SEM(rwp)); \
+ spl_rw_set_owner(rwp); \
+ break; \
+ default: \
+ VERIFY(0); \
+ } \
+ spl_rw_lockdep_on_maybe(rwp); \
})
-#define rw_exit(rwp) \
-({ \
- if (RW_WRITE_HELD(rwp)) { \
- spl_rw_clear_owner(rwp); \
- up_write(SEM(rwp)); \
- } else { \
- ASSERT(RW_READ_HELD(rwp)); \
- up_read(SEM(rwp)); \
- } \
+#define rw_exit(rwp) \
+({ \
+ spl_rw_lockdep_off_maybe(rwp); \
+ if (RW_WRITE_HELD(rwp)) { \
+ spl_rw_clear_owner(rwp); \
+ up_write(SEM(rwp)); \
+ } else { \
+ ASSERT(RW_READ_HELD(rwp)); \
+ up_read(SEM(rwp)); \
+ } \
+ spl_rw_lockdep_on_maybe(rwp); \
})
-#define rw_downgrade(rwp) \
-({ \
- spl_rw_clear_owner(rwp); \
- downgrade_write(SEM(rwp)); \
+#define rw_downgrade(rwp) \
+({ \
+ spl_rw_lockdep_off_maybe(rwp); \
+ spl_rw_clear_owner(rwp); \
+ downgrade_write(SEM(rwp)); \
+ spl_rw_lockdep_on_maybe(rwp); \
})
-/*
- * This implementation of rw_tryupgrade() behaves slightly differently
- * from its counterparts on other platforms. It drops the RW_READER lock
- * and then acquires the RW_WRITER lock leaving a small window where no
- * lock is held. On other platforms the lock is never released during
- * the upgrade process. This is necessary under Linux because the kernel
- * does not provide an upgrade function.
- */
#define rw_tryupgrade(rwp) \
({ \
int _rc_ = 0; \
@@ -187,8 +220,10 @@ RW_LOCK_HELD(krwlock_t *rwp)
if (RW_WRITE_HELD(rwp)) { \
_rc_ = 1; \
} else { \
+ spl_rw_lockdep_off_maybe(rwp); \
if ((_rc_ = rwsem_tryupgrade(SEM(rwp)))) \
spl_rw_set_owner(rwp); \
+ spl_rw_lockdep_on_maybe(rwp); \
} \
_rc_; \
})
diff --git a/spl/include/sys/sunldi.h b/spl/include/sys/sunldi.h
index b4ff7391a4a7..ec8420231e99 100644
--- a/spl/include/sys/sunldi.h
+++ b/spl/include/sys/sunldi.h
@@ -34,23 +34,4 @@
#define SECTOR_SIZE 512
-typedef struct modlinkage {
- int ml_rev;
- struct modlfs *ml_modlfs;
- struct modldrv *ml_modldrv;
- major_t ml_major;
- unsigned ml_minors;
- void *pad1;
-} modlinkage_t;
-
-typedef struct ldi_ident {
- char li_modname[MAXNAMELEN];
- dev_t li_dev;
-} *ldi_ident_t;
-
-typedef struct block_device *ldi_handle_t;
-
-extern int ldi_ident_from_mod(struct modlinkage *modlp, ldi_ident_t *lip);
-extern void ldi_ident_release(ldi_ident_t li);
-
#endif /* SPL_SUNLDI_H */
diff --git a/spl/include/sys/sysevent/Makefile.in b/spl/include/sys/sysevent/Makefile.in
index 74d8881fafbc..32056b64e2bd 100644
--- a/spl/include/sys/sysevent/Makefile.in
+++ b/spl/include/sys/sysevent/Makefile.in
@@ -1,7 +1,7 @@
-# Makefile.in generated by automake 1.15 from Makefile.am.
+# Makefile.in generated by automake 1.15.1 from Makefile.am.
# @configure_input@
-# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+# Copyright (C) 1994-2017 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
diff --git a/spl/include/sys/sysmacros.h b/spl/include/sys/sysmacros.h
index 4a96e075ff3f..a4a9f3e98b23 100644
--- a/spl/include/sys/sysmacros.h
+++ b/spl/include/sys/sysmacros.h
@@ -32,6 +32,7 @@
#include <sys/varargs.h>
#include <sys/zone.h>
#include <sys/signal.h>
+#include <asm/page.h>
#ifdef HAVE_SCHED_RT_HEADER
#include <linux/sched/rt.h>
@@ -111,6 +112,10 @@
#define PAGESIZE PAGE_SIZE
#endif
+#ifndef PAGESHIFT
+#define PAGESHIFT PAGE_SHIFT
+#endif
+
/* from Solaris sys/byteorder.h */
#define BSWAP_8(x) ((x) & 0xff)
#define BSWAP_16(x) ((BSWAP_8(x) << 8) | BSWAP_8((x) >> 8))
@@ -158,6 +163,9 @@ extern uint32_t zone_get_hostid(void *zone);
extern void spl_setup(void);
extern void spl_cleanup(void);
+#define highbit(x) __fls(x)
+#define lowbit(x) __ffs(x)
+
#define highbit64(x) fls64(x)
#define makedevice(maj,min) makedev(maj,min)
diff --git a/spl/include/sys/taskq.h b/spl/include/sys/taskq.h
index fa4b2703e462..c5ccec715be6 100644
--- a/spl/include/sys/taskq.h
+++ b/spl/include/sys/taskq.h
@@ -1,4 +1,4 @@
-/*****************************************************************************\
+/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -20,10 +20,10 @@
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
-\*****************************************************************************/
+ */
#ifndef _SPL_TASKQ_H
-#define _SPL_TASKQ_H
+#define _SPL_TASKQ_H
#include <linux/module.h>
#include <linux/gfp.h>
@@ -33,55 +33,73 @@
#include <linux/wait_compat.h>
#include <sys/types.h>
#include <sys/thread.h>
+#include <sys/rwlock.h>
-#define TASKQ_NAMELEN 31
+#define TASKQ_NAMELEN 31
-#define TASKQ_PREPOPULATE 0x00000001
-#define TASKQ_CPR_SAFE 0x00000002
-#define TASKQ_DYNAMIC 0x00000004
-#define TASKQ_THREADS_CPU_PCT 0x00000008
-#define TASKQ_DC_BATCH 0x00000010
-#define TASKQ_ACTIVE 0x80000000
+#define TASKQ_PREPOPULATE 0x00000001
+#define TASKQ_CPR_SAFE 0x00000002
+#define TASKQ_DYNAMIC 0x00000004
+#define TASKQ_THREADS_CPU_PCT 0x00000008
+#define TASKQ_DC_BATCH 0x00000010
+#define TASKQ_ACTIVE 0x80000000
/*
* Flags for taskq_dispatch. TQ_SLEEP/TQ_NOSLEEP should be same as
* KM_SLEEP/KM_NOSLEEP. TQ_NOQUEUE/TQ_NOALLOC are set particularly
* large so as not to conflict with already used GFP_* defines.
*/
-#define TQ_SLEEP 0x00000000
-#define TQ_NOSLEEP 0x00000001
-#define TQ_PUSHPAGE 0x00000002
-#define TQ_NOQUEUE 0x01000000
-#define TQ_NOALLOC 0x02000000
-#define TQ_NEW 0x04000000
-#define TQ_FRONT 0x08000000
+#define TQ_SLEEP 0x00000000
+#define TQ_NOSLEEP 0x00000001
+#define TQ_PUSHPAGE 0x00000002
+#define TQ_NOQUEUE 0x01000000
+#define TQ_NOALLOC 0x02000000
+#define TQ_NEW 0x04000000
+#define TQ_FRONT 0x08000000
+
+/*
+ * Reserved taskqid values.
+ */
+#define TASKQID_INVALID ((taskqid_t)0)
+#define TASKQID_INITIAL ((taskqid_t)1)
+
+/*
+ * spin_lock(lock) and spin_lock_nested(lock,0) are equivalent,
+ * so TQ_LOCK_DYNAMIC must not evaluate to 0
+ */
+typedef enum tq_lock_role {
+ TQ_LOCK_GENERAL = 0,
+ TQ_LOCK_DYNAMIC = 1,
+} tq_lock_role_t;
typedef unsigned long taskqid_t;
typedef void (task_func_t)(void *);
typedef struct taskq {
- spinlock_t tq_lock; /* protects taskq_t */
- unsigned long tq_lock_flags; /* interrupt state */
- char *tq_name; /* taskq name */
- struct list_head tq_thread_list;/* list of all threads */
- struct list_head tq_active_list;/* list of active threads */
- int tq_nactive; /* # of active threads */
- int tq_nthreads; /* # of existing threads */
- int tq_nspawn; /* # of threads being spawned */
- int tq_maxthreads; /* # of threads maximum */
- int tq_pri; /* priority */
- int tq_minalloc; /* min task_t pool size */
- int tq_maxalloc; /* max task_t pool size */
- int tq_nalloc; /* cur task_t pool size */
- uint_t tq_flags; /* flags */
- taskqid_t tq_next_id; /* next pend/work id */
- taskqid_t tq_lowest_id; /* lowest pend/work id */
- struct list_head tq_free_list; /* free task_t's */
- struct list_head tq_pend_list; /* pending task_t's */
- struct list_head tq_prio_list; /* priority pending task_t's */
- struct list_head tq_delay_list; /* delayed task_t's */
- spl_wait_queue_head_t tq_work_waitq; /* new work waitq */
- spl_wait_queue_head_t tq_wait_waitq; /* wait waitq */
+ spinlock_t tq_lock; /* protects taskq_t */
+ char *tq_name; /* taskq name */
+ int tq_instance; /* instance of tq_name */
+ struct list_head tq_thread_list; /* list of all threads */
+ struct list_head tq_active_list; /* list of active threads */
+ int tq_nactive; /* # of active threads */
+ int tq_nthreads; /* # of existing threads */
+ int tq_nspawn; /* # of threads being spawned */
+ int tq_maxthreads; /* # of threads maximum */
+ int tq_pri; /* priority */
+ int tq_minalloc; /* min taskq_ent_t pool size */
+ int tq_maxalloc; /* max taskq_ent_t pool size */
+ int tq_nalloc; /* cur taskq_ent_t pool size */
+ uint_t tq_flags; /* flags */
+ taskqid_t tq_next_id; /* next pend/work id */
+ taskqid_t tq_lowest_id; /* lowest pend/work id */
+ struct list_head tq_free_list; /* free taskq_ent_t's */
+ struct list_head tq_pend_list; /* pending taskq_ent_t's */
+ struct list_head tq_prio_list; /* priority pending taskq_ent_t's */
+ struct list_head tq_delay_list; /* delayed taskq_ent_t's */
+ struct list_head tq_taskqs; /* all taskq_t's */
+ spl_wait_queue_head_t tq_work_waitq; /* new work waitq */
+ spl_wait_queue_head_t tq_wait_waitq; /* wait waitq */
+ tq_lock_role_t tq_lock_class; /* class when taking tq_lock */
} taskq_t;
typedef struct taskq_ent {
@@ -94,10 +112,11 @@ typedef struct taskq_ent {
void *tqent_arg;
taskq_t *tqent_taskq;
uintptr_t tqent_flags;
+ unsigned long tqent_birth;
} taskq_ent_t;
-#define TQENT_FLAG_PREALLOC 0x1
-#define TQENT_FLAG_CANCEL 0x2
+#define TQENT_FLAG_PREALLOC 0x1
+#define TQENT_FLAG_CANCEL 0x2
typedef struct taskq_thread {
struct list_head tqt_thread_list;
@@ -111,6 +130,12 @@ typedef struct taskq_thread {
/* Global system-wide dynamic task queue available for all consumers */
extern taskq_t *system_taskq;
+/* Global dynamic task queue for long delay */
+extern taskq_t *system_delay_taskq;
+
+/* List of all taskqs */
+extern struct list_head tq_list;
+extern struct rw_semaphore tq_list_sem;
extern taskqid_t taskq_dispatch(taskq_t *, task_func_t, void *, uint_t);
extern taskqid_t taskq_dispatch_delay(taskq_t *, task_func_t, void *,
@@ -125,11 +150,11 @@ extern void taskq_wait_id(taskq_t *, taskqid_t);
extern void taskq_wait_outstanding(taskq_t *, taskqid_t);
extern void taskq_wait(taskq_t *);
extern int taskq_cancel_id(taskq_t *, taskqid_t);
-extern int taskq_member(taskq_t *, void *);
+extern int taskq_member(taskq_t *, kthread_t *);
-#define taskq_create_proc(name, nthreads, pri, min, max, proc, flags) \
+#define taskq_create_proc(name, nthreads, pri, min, max, proc, flags) \
taskq_create(name, nthreads, pri, min, max, flags)
-#define taskq_create_sysdc(name, nthreads, min, max, proc, dc, flags) \
+#define taskq_create_sysdc(name, nthreads, min, max, proc, dc, flags) \
taskq_create(name, nthreads, maxclsyspri, min, max, flags)
int spl_taskq_init(void);
diff --git a/spl/include/sys/time.h b/spl/include/sys/time.h
index 650166145b1a..ddda6deadaea 100644
--- a/spl/include/sys/time.h
+++ b/spl/include/sys/time.h
@@ -46,6 +46,12 @@
#define MSEC2NSEC(m) ((hrtime_t)(m) * (NANOSEC / MILLISEC))
#define NSEC2MSEC(n) ((n) / (NANOSEC / MILLISEC))
+#define USEC2NSEC(m) ((hrtime_t)(m) * (NANOSEC / MICROSEC))
+#define NSEC2USEC(n) ((n) / (NANOSEC / MICROSEC))
+
+#define NSEC2SEC(n) ((n) / (NANOSEC / SEC))
+#define SEC2NSEC(m) ((hrtime_t)(m) * (NANOSEC / SEC))
+
static const int hz = HZ;
#define TIMESPEC_OVERFLOW(ts) \
diff --git a/spl/include/sys/tsd.h b/spl/include/sys/tsd.h
index ebc55b09b92e..1894a8232316 100644
--- a/spl/include/sys/tsd.h
+++ b/spl/include/sys/tsd.h
@@ -35,6 +35,7 @@ typedef void (*dtor_func_t)(void *);
extern int tsd_set(uint_t, void *);
extern void *tsd_get(uint_t);
+extern void *tsd_get_by_thread(uint_t, kthread_t *);
extern void tsd_create(uint_t *, dtor_func_t);
extern void tsd_destroy(uint_t *);
extern void tsd_exit(void);
diff --git a/spl/include/sys/user.h b/spl/include/sys/user.h
index ebbe8f68eb83..2b25dd33c01a 100644
--- a/spl/include/sys/user.h
+++ b/spl/include/sys/user.h
@@ -30,8 +30,8 @@
* about the Linux task_struct. Since this is internal to our compatibility
* layer, we make it an opaque type.
*
- * XXX: If the descriptor changes under us, we would get an incorrect
- * reference.
+ * XXX: If the descriptor changes under us and we do not do a getf() between
+ * the change and using it, we would get an incorrect reference.
*/
struct uf_info;
diff --git a/spl/include/sys/vmsystm.h b/spl/include/sys/vmsystm.h
index edc0b38249b2..9d334fe0a16e 100644
--- a/spl/include/sys/vmsystm.h
+++ b/spl/include/sys/vmsystm.h
@@ -30,23 +30,15 @@
#include <linux/swap.h>
#include <linux/highmem.h>
#include <linux/vmalloc.h>
-#include <linux/version.h>
#include <sys/types.h>
#include <asm/uaccess.h>
#define membar_producer() smp_wmb()
#define physmem totalram_pages
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4,14,0)
#define freemem (nr_free_pages() + \
global_page_state(NR_INACTIVE_FILE) + \
global_page_state(NR_INACTIVE_ANON) + \
global_page_state(NR_SLAB_RECLAIMABLE))
-#else
-#define freemem (nr_free_pages() + \
- global_zone_page_state(NR_INACTIVE_FILE) + \
- global_zone_page_state(NR_INACTIVE_ANON) + \
- global_zone_page_state(NR_SLAB_RECLAIMABLE))
-#endif
#define xcopyin(from, to, size) copy_from_user(to, from, size)
#define xcopyout(from, to, size) copy_to_user(to, from, size)
diff --git a/spl/include/util/Makefile.in b/spl/include/util/Makefile.in
index b01e799b33ae..86b788436808 100644
--- a/spl/include/util/Makefile.in
+++ b/spl/include/util/Makefile.in
@@ -1,7 +1,7 @@
-# Makefile.in generated by automake 1.15 from Makefile.am.
+# Makefile.in generated by automake 1.15.1 from Makefile.am.
# @configure_input@
-# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+# Copyright (C) 1994-2017 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
diff --git a/spl/include/vm/Makefile.in b/spl/include/vm/Makefile.in
index b0529392b460..f3c78c1843a6 100644
--- a/spl/include/vm/Makefile.in
+++ b/spl/include/vm/Makefile.in
@@ -1,7 +1,7 @@
-# Makefile.in generated by automake 1.15 from Makefile.am.
+# Makefile.in generated by automake 1.15.1 from Makefile.am.
# @configure_input@
-# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+# Copyright (C) 1994-2017 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
diff --git a/spl/lib/Makefile.in b/spl/lib/Makefile.in
index 4bd76fcbe3c8..80ccfb408718 100644
--- a/spl/lib/Makefile.in
+++ b/spl/lib/Makefile.in
@@ -1,7 +1,7 @@
-# Makefile.in generated by automake 1.15 from Makefile.am.
+# Makefile.in generated by automake 1.15.1 from Makefile.am.
# @configure_input@
-# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+# Copyright (C) 1994-2017 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
diff --git a/spl/man/Makefile.in b/spl/man/Makefile.in
index 1d074a32a634..42679a02d0b3 100644
--- a/spl/man/Makefile.in
+++ b/spl/man/Makefile.in
@@ -1,7 +1,7 @@
-# Makefile.in generated by automake 1.15 from Makefile.am.
+# Makefile.in generated by automake 1.15.1 from Makefile.am.
# @configure_input@
-# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+# Copyright (C) 1994-2017 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
diff --git a/spl/man/man1/Makefile.in b/spl/man/man1/Makefile.in
index 67b56c82841b..c1a591394cd4 100644
--- a/spl/man/man1/Makefile.in
+++ b/spl/man/man1/Makefile.in
@@ -1,7 +1,7 @@
-# Makefile.in generated by automake 1.15 from Makefile.am.
+# Makefile.in generated by automake 1.15.1 from Makefile.am.
# @configure_input@
-# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+# Copyright (C) 1994-2017 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
diff --git a/spl/man/man5/Makefile.in b/spl/man/man5/Makefile.in
index f56476a323f8..d856b96513b5 100644
--- a/spl/man/man5/Makefile.in
+++ b/spl/man/man5/Makefile.in
@@ -1,7 +1,7 @@
-# Makefile.in generated by automake 1.15 from Makefile.am.
+# Makefile.in generated by automake 1.15.1 from Makefile.am.
# @configure_input@
-# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+# Copyright (C) 1994-2017 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
diff --git a/spl/man/man5/spl-module-parameters.5 b/spl/man/man5/spl-module-parameters.5
index acdd5b658ff8..97241fd53780 100644
--- a/spl/man/man5/spl-module-parameters.5
+++ b/spl/man/man5/spl-module-parameters.5
@@ -41,6 +41,20 @@ concurrently.
Default value: \fB0x02\fR
.RE
+.sp
+.ne 2
+.na
+\fBspl_kmem_cache_kmem_threads\fR (uint)
+.ad
+.RS 12n
+The number of threads created for the spl_kmem_cache task queue. This task
+queue is responsible for allocating new slabs for use by the kmem caches.
+For the majority of systems and workloads only a small number of threads are
+required.
+.sp
+Default value: \fB4\fR
+.RE
+
.sp
.ne 2
.na
@@ -236,6 +250,20 @@ may be overridden for non-standard configurations.
Default value: \fB/etc/hostid\fR
.RE
+.sp
+.ne 2
+.na
+\fBspl_taskq_kick\fR (uint)
+.ad
+.RS 12n
+Kick stuck taskq to spawn threads. When writing a non-zero value to it, it will
+scan all the taskqs. If any of them have a pending task more than 5 seconds old,
+it will kick it to spawn more threads. This can be used if you find a rare
+deadlock occurs because one or more taskqs didn't spawn a thread when it should.
+.sp
+Default value: \fB0\fR
+.RE
+
.sp
.ne 2
.na
@@ -298,3 +326,18 @@ configurations.
.sp
Default value: \fB4\fR
.RE
+
+.sp
+.ne 2
+.na
+\fBspl_max_show_tasks\fR (uint)
+.ad
+.RS 12n
+The maximum number of tasks per pending list in each taskq shown in
+/proc/spl/{taskq,taskq-all}. Write 0 to turn off the limit. The proc file will
+walk the lists with lock held, reading it could cause a lock up if the list
+grow too large without limiting the output. "(truncated)" will be shown if the
+list is larger than the limit.
+.sp
+Default value: \fB512\fR
+.RE
diff --git a/spl/module/spl/spl-condvar.c b/spl/module/spl/spl-condvar.c
index c420d18cadfe..80c2ef09051f 100644
--- a/spl/module/spl/spl-condvar.c
+++ b/spl/module/spl/spl-condvar.c
@@ -26,6 +26,7 @@
#include <sys/condvar.h>
#include <sys/time.h>
+#include <linux/hrtimer.h>
void
__cv_init(kcondvar_t *cvp, char *name, kcv_type_t type, void *arg)
@@ -165,22 +166,19 @@ __cv_timedwait_common(kcondvar_t *cvp, kmutex_t *mp, clock_t expire_time,
ASSERT(mp);
ASSERT(cvp->cv_magic == CV_MAGIC);
ASSERT(mutex_owned(mp));
- atomic_inc(&cvp->cv_refs);
+ /* XXX - Does not handle jiffie wrap properly */
+ time_left = expire_time - jiffies;
+ if (time_left <= 0)
+ return (-1);
+
+ atomic_inc(&cvp->cv_refs);
m = ACCESS_ONCE(cvp->cv_mutex);
if (!m)
m = xchg(&cvp->cv_mutex, mp);
/* Ensure the same mutex is used by all callers */
ASSERT(m == NULL || m == mp);
- /* XXX - Does not handle jiffie wrap properly */
- time_left = expire_time - jiffies;
- if (time_left <= 0) {
- /* XXX - doesn't reset cv_mutex */
- atomic_dec(&cvp->cv_refs);
- return (-1);
- }
-
prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
atomic_inc(&cvp->cv_waiters);
@@ -237,29 +235,25 @@ __cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time,
{
DEFINE_WAIT(wait);
kmutex_t *m;
- hrtime_t time_left, now;
- unsigned long time_left_us;
+ hrtime_t time_left;
+ ktime_t ktime_left;
ASSERT(cvp);
ASSERT(mp);
ASSERT(cvp->cv_magic == CV_MAGIC);
ASSERT(mutex_owned(mp));
- atomic_inc(&cvp->cv_refs);
+ time_left = expire_time - gethrtime();
+ if (time_left <= 0)
+ return (-1);
+
+ atomic_inc(&cvp->cv_refs);
m = ACCESS_ONCE(cvp->cv_mutex);
if (!m)
m = xchg(&cvp->cv_mutex, mp);
/* Ensure the same mutex is used by all callers */
ASSERT(m == NULL || m == mp);
- now = gethrtime();
- time_left = expire_time - now;
- if (time_left <= 0) {
- atomic_dec(&cvp->cv_refs);
- return (-1);
- }
- time_left_us = time_left / NSEC_PER_USEC;
-
prepare_to_wait_exclusive(&cvp->cv_event, &wait, state);
atomic_inc(&cvp->cv_waiters);
@@ -273,7 +267,9 @@ __cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time,
* Allow a 100 us range to give kernel an opportunity to coalesce
* interrupts
*/
- usleep_range(time_left_us, time_left_us + 100);
+ ktime_left = ktime_set(0, time_left);
+ schedule_hrtimeout_range(&ktime_left, 100 * NSEC_PER_USEC,
+ HRTIMER_MODE_REL);
/* No more waiters a different mutex could be used */
if (atomic_dec_and_test(&cvp->cv_waiters)) {
@@ -290,15 +286,15 @@ __cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t expire_time,
mutex_enter(mp);
time_left = expire_time - gethrtime();
- return (time_left > 0 ? time_left : -1);
+ return (time_left > 0 ? NSEC_TO_TICK(time_left) : -1);
}
/*
* Compatibility wrapper for the cv_timedwait_hires() Illumos interface.
*/
-clock_t
-cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res,
- int flag)
+static clock_t
+cv_timedwait_hires_common(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res,
+ int flag, int state)
{
if (res > 1) {
/*
@@ -312,10 +308,27 @@ cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res,
if (!(flag & CALLOUT_FLAG_ABSOLUTE))
tim += gethrtime();
- return (__cv_timedwait_hires(cvp, mp, tim, TASK_UNINTERRUPTIBLE));
+ return (__cv_timedwait_hires(cvp, mp, tim, state));
+}
+
+clock_t
+cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res,
+ int flag)
+{
+ return (cv_timedwait_hires_common(cvp, mp, tim, res, flag,
+ TASK_UNINTERRUPTIBLE));
}
EXPORT_SYMBOL(cv_timedwait_hires);
+clock_t
+cv_timedwait_sig_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim, hrtime_t res,
+ int flag)
+{
+ return (cv_timedwait_hires_common(cvp, mp, tim, res, flag,
+ TASK_INTERRUPTIBLE));
+}
+EXPORT_SYMBOL(cv_timedwait_sig_hires);
+
void
__cv_signal(kcondvar_t *cvp)
{
diff --git a/spl/module/spl/spl-err.c b/spl/module/spl/spl-err.c
index cf9f096b11cb..2f66b6e23cb6 100644
--- a/spl/module/spl/spl-err.c
+++ b/spl/module/spl/spl-err.c
@@ -28,6 +28,17 @@
#include <sys/cmn_err.h>
#include <linux/ratelimit.h>
+/*
+ * It is often useful to actually have the panic crash the node so you
+ * can then get notified of the event, get the crashdump for later
+ * analysis and other such goodies.
+ * But we would still default to the current default of not to do that.
+ */
+unsigned int spl_panic_halt;
+module_param(spl_panic_halt, uint, 0644);
+MODULE_PARM_DESC(spl_panic_halt,
+ "Cause kernel panic on assertion failures");
+
/*
* Limit the number of stack traces dumped to not more than 5 every
* 60 seconds to prevent denial-of-service attacks from debug code.
@@ -62,6 +73,9 @@ spl_panic(const char *file, const char *func, int line, const char *fmt, ...) {
printk(KERN_EMERG "%s", msg);
printk(KERN_EMERG "PANIC at %s:%d:%s()\n", newfile, line, func);
+ if (spl_panic_halt)
+ panic("%s", msg);
+
spl_dumpstack();
/* Halt the thread to facilitate further debugging */
diff --git a/spl/module/spl/spl-generic.c b/spl/module/spl/spl-generic.c
index b2bd1b837a9f..f6782dae73af 100644
--- a/spl/module/spl/spl-generic.c
+++ b/spl/module/spl/spl-generic.c
@@ -41,6 +41,8 @@
#include <sys/kstat.h>
#include <sys/file.h>
#include <linux/ctype.h>
+#include <sys/disp.h>
+#include <sys/random.h>
#include <linux/kmod.h>
#include <linux/math64_compat.h>
#include <linux/proc_compat.h>
@@ -56,6 +58,112 @@ MODULE_PARM_DESC(spl_hostid, "The system hostid.");
proc_t p0;
EXPORT_SYMBOL(p0);
+/*
+ * Xorshift Pseudo Random Number Generator based on work by Sebastiano Vigna
+ *
+ * "Further scramblings of Marsaglia's xorshift generators"
+ * http://vigna.di.unimi.it/ftp/papers/xorshiftplus.pdf
+ *
+ * random_get_pseudo_bytes() is an API function on Illumos whose sole purpose
+ * is to provide bytes containing random numbers. It is mapped to /dev/urandom
+ * on Illumos, which uses a "FIPS 186-2 algorithm". No user of the SPL's
+ * random_get_pseudo_bytes() needs bytes that are of cryptographic quality, so
+ * we can implement it using a fast PRNG that we seed using Linux' actual
+ * equivalent to random_get_pseudo_bytes(). We do this by providing each CPU
+ * with an independent seed so that all calls to random_get_pseudo_bytes() are
+ * free of atomic instructions.
+ *
+ * A consequence of using a fast PRNG is that using random_get_pseudo_bytes()
+ * to generate words larger than 128 bits will paradoxically be limited to
+ * `2^128 - 1` possibilities. This is because we have a sequence of `2^128 - 1`
+ * 128-bit words and selecting the first will implicitly select the second. If
+ * a caller finds this behavior undesireable, random_get_bytes() should be used
+ * instead.
+ *
+ * XXX: Linux interrupt handlers that trigger within the critical section
+ * formed by `s[1] = xp[1];` and `xp[0] = s[0];` and call this function will
+ * see the same numbers. Nothing in the code currently calls this in an
+ * interrupt handler, so this is considered to be okay. If that becomes a
+ * problem, we could create a set of per-cpu variables for interrupt handlers
+ * and use them when in_interrupt() from linux/preempt_mask.h evaluates to
+ * true.
+ */
+static DEFINE_PER_CPU(uint64_t[2], spl_pseudo_entropy);
+
+/*
+ * spl_rand_next()/spl_rand_jump() are copied from the following CC-0 licensed
+ * file:
+ *
+ * http://xorshift.di.unimi.it/xorshift128plus.c
+ */
+
+static inline uint64_t
+spl_rand_next(uint64_t *s) {
+ uint64_t s1 = s[0];
+ const uint64_t s0 = s[1];
+ s[0] = s0;
+ s1 ^= s1 << 23; // a
+ s[1] = s1 ^ s0 ^ (s1 >> 18) ^ (s0 >> 5); // b, c
+ return (s[1] + s0);
+}
+
+static inline void
+spl_rand_jump(uint64_t *s) {
+ static const uint64_t JUMP[] = { 0x8a5cd789635d2dff, 0x121fd2155c472f96 };
+
+ uint64_t s0 = 0;
+ uint64_t s1 = 0;
+ int i, b;
+ for(i = 0; i < sizeof JUMP / sizeof *JUMP; i++)
+ for(b = 0; b < 64; b++) {
+ if (JUMP[i] & 1ULL << b) {
+ s0 ^= s[0];
+ s1 ^= s[1];
+ }
+ (void) spl_rand_next(s);
+ }
+
+ s[0] = s0;
+ s[1] = s1;
+}
+
+int
+random_get_pseudo_bytes(uint8_t *ptr, size_t len)
+{
+ uint64_t *xp, s[2];
+
+ ASSERT(ptr);
+
+ xp = get_cpu_var(spl_pseudo_entropy);
+
+ s[0] = xp[0];
+ s[1] = xp[1];
+
+ while (len) {
+ union {
+ uint64_t ui64;
+ uint8_t byte[sizeof (uint64_t)];
+ }entropy;
+ int i = MIN(len, sizeof (uint64_t));
+
+ len -= i;
+ entropy.ui64 = spl_rand_next(s);
+
+ while (i--)
+ *ptr++ = entropy.byte[i];
+ }
+
+ xp[0] = s[0];
+ xp[1] = s[1];
+
+ put_cpu_var(spl_pseudo_entropy);
+
+ return (0);
+}
+
+
+EXPORT_SYMBOL(random_get_pseudo_bytes);
+
#if BITS_PER_LONG == 32
/*
* Support 64/64 => 64 division on a 32-bit platform. While the kernel
@@ -169,6 +277,49 @@ __umoddi3(uint64_t dividend, uint64_t divisor)
}
EXPORT_SYMBOL(__umoddi3);
+/*
+ * Implementation of 64-bit unsigned division/modulo for 32-bit machines.
+ */
+uint64_t
+__udivmoddi4(uint64_t n, uint64_t d, uint64_t *r)
+{
+ uint64_t q = __udivdi3(n, d);
+ if (r)
+ *r = n - d * q;
+ return (q);
+}
+EXPORT_SYMBOL(__udivmoddi4);
+
+/*
+ * Implementation of 64-bit signed division/modulo for 32-bit machines.
+ */
+int64_t
+__divmoddi4(int64_t n, int64_t d, int64_t *r)
+{
+ int64_t q, rr;
+ boolean_t nn = B_FALSE;
+ boolean_t nd = B_FALSE;
+ if (n < 0) {
+ nn = B_TRUE;
+ n = -n;
+ }
+ if (d < 0) {
+ nd = B_TRUE;
+ d = -d;
+ }
+
+ q = __udivmoddi4(n, d, (uint64_t *)&rr);
+
+ if (nn != nd)
+ q = -q;
+ if (nn)
+ rr = -rr;
+ if (r)
+ *r = rr;
+ return (q);
+}
+EXPORT_SYMBOL(__divmoddi4);
+
#if defined(__arm) || defined(__arm__)
/*
* Implementation of 64-bit (un)signed division for 32-bit arm machines.
@@ -391,80 +542,63 @@ module_param(spl_hostid_path, charp, 0444);
MODULE_PARM_DESC(spl_hostid_path, "The system hostid file (/etc/hostid)");
static int
-hostid_read(void)
+hostid_read(uint32_t *hostid)
{
- int result;
uint64_t size;
struct _buf *file;
- uint32_t hostid = 0;
+ uint32_t value = 0;
+ int error;
file = kobj_open_file(spl_hostid_path);
-
if (file == (struct _buf *)-1)
- return -1;
-
- result = kobj_get_filesize(file, &size);
+ return (ENOENT);
- if (result != 0) {
- printk(KERN_WARNING
- "SPL: kobj_get_filesize returned %i on %s\n",
- result, spl_hostid_path);
+ error = kobj_get_filesize(file, &size);
+ if (error) {
kobj_close_file(file);
- return -2;
+ return (error);
}
if (size < sizeof(HW_HOSTID_MASK)) {
- printk(KERN_WARNING
- "SPL: Ignoring the %s file because it is %llu bytes; "
- "expecting %lu bytes instead.\n", spl_hostid_path,
- size, (unsigned long)sizeof(HW_HOSTID_MASK));
kobj_close_file(file);
- return -3;
+ return (EINVAL);
}
- /* Read directly into the variable like eglibc does. */
- /* Short reads are okay; native behavior is preserved. */
- result = kobj_read_file(file, (char *)&hostid, sizeof(hostid), 0);
-
- if (result < 0) {
- printk(KERN_WARNING
- "SPL: kobj_read_file returned %i on %s\n",
- result, spl_hostid_path);
+ /*
+ * Read directly into the variable like eglibc does.
+ * Short reads are okay; native behavior is preserved.
+ */
+ error = kobj_read_file(file, (char *)&value, sizeof(value), 0);
+ if (error < 0) {
kobj_close_file(file);
- return -4;
+ return (EIO);
}
/* Mask down to 32 bits like coreutils does. */
- spl_hostid = hostid & HW_HOSTID_MASK;
+ *hostid = (value & HW_HOSTID_MASK);
kobj_close_file(file);
+
return 0;
}
+/*
+ * Return the system hostid. Preferentially use the spl_hostid module option
+ * when set, otherwise use the value in the /etc/hostid file.
+ */
uint32_t
zone_get_hostid(void *zone)
{
- static int first = 1;
-
- /* Only the global zone is supported */
- ASSERT(zone == NULL);
+ uint32_t hostid;
- if (first) {
- first = 0;
+ ASSERT3P(zone, ==, NULL);
- spl_hostid &= HW_HOSTID_MASK;
- /*
- * Get the hostid if it was not passed as a module parameter.
- * Try reading the /etc/hostid file directly.
- */
- if (spl_hostid == 0 && hostid_read())
- spl_hostid = 0;
+ if (spl_hostid != 0)
+ return ((uint32_t)(spl_hostid & HW_HOSTID_MASK));
+ if (hostid_read(&hostid) == 0)
+ return (hostid);
- printk(KERN_NOTICE "SPL: using hostid 0x%08x\n",
- (unsigned int) spl_hostid);
- }
-
- return spl_hostid;
+ return (0);
}
EXPORT_SYMBOL(zone_get_hostid);
@@ -475,29 +609,58 @@ spl_kvmem_init(void)
rc = spl_kmem_init();
if (rc)
- goto out1;
+ return (rc);
rc = spl_vmem_init();
- if (rc)
- goto out2;
-
- rc = spl_kmem_cache_init();
- if (rc)
- goto out3;
+ if (rc) {
+ spl_kmem_fini();
+ return (rc);
+ }
return (rc);
-out3:
- spl_vmem_fini();
-out2:
- spl_kmem_fini();
-out1:
- return (rc);
+}
+
+/*
+ * We initialize the random number generator with 128 bits of entropy from the
+ * system random number generator. In the improbable case that we have a zero
+ * seed, we fallback to the system jiffies, unless it is also zero, in which
+ * situation we use a preprogrammed seed. We step forward by 2^64 iterations to
+ * initialize each of the per-cpu seeds so that the sequences generated on each
+ * CPU are guaranteed to never overlap in practice.
+ */
+static void __init
+spl_random_init(void)
+{
+ uint64_t s[2];
+ int i;
+
+ get_random_bytes(s, sizeof (s));
+
+ if (s[0] == 0 && s[1] == 0) {
+ if (jiffies != 0) {
+ s[0] = jiffies;
+ s[1] = ~0 - jiffies;
+ } else {
+ (void) memcpy(s, "improbable seed", sizeof (s));
+ }
+ printk("SPL: get_random_bytes() returned 0 "
+ "when generating random seed. Setting initial seed to "
+ "0x%016llx%016llx.", cpu_to_be64(s[0]), cpu_to_be64(s[1]));
+ }
+
+ for_each_possible_cpu(i) {
+ uint64_t *wordp = per_cpu(spl_pseudo_entropy, i);
+
+ spl_rand_jump(s);
+
+ wordp[0] = s[0];
+ wordp[1] = s[1];
+ }
}
static void
spl_kvmem_fini(void)
{
- spl_kmem_cache_fini();
spl_vmem_fini();
spl_kmem_fini();
}
@@ -508,6 +671,7 @@ spl_init(void)
int rc = 0;
bzero(&p0, sizeof (proc_t));
+ spl_random_init();
if ((rc = spl_kvmem_init()))
goto out1;
@@ -518,38 +682,43 @@ spl_init(void)
if ((rc = spl_rw_init()))
goto out3;
- if ((rc = spl_taskq_init()))
+ if ((rc = spl_tsd_init()))
goto out4;
- if ((rc = spl_vn_init()))
+ if ((rc = spl_taskq_init()))
goto out5;
- if ((rc = spl_proc_init()))
+ if ((rc = spl_kmem_cache_init()))
goto out6;
- if ((rc = spl_kstat_init()))
+ if ((rc = spl_vn_init()))
goto out7;
- if ((rc = spl_tsd_init()))
+ if ((rc = spl_proc_init()))
goto out8;
- if ((rc = spl_zlib_init()))
+ if ((rc = spl_kstat_init()))
goto out9;
+ if ((rc = spl_zlib_init()))
+ goto out10;
+
printk(KERN_NOTICE "SPL: Loaded module v%s-%s%s\n", SPL_META_VERSION,
SPL_META_RELEASE, SPL_DEBUG_STR);
return (rc);
+out10:
+ spl_kstat_fini();
out9:
- spl_tsd_fini();
+ spl_proc_fini();
out8:
- spl_kstat_fini();
+ spl_vn_fini();
out7:
- spl_proc_fini();
+ spl_kmem_cache_fini();
out6:
- spl_vn_fini();
-out5:
spl_taskq_fini();
+out5:
+ spl_tsd_fini();
out4:
spl_rw_fini();
out3:
@@ -570,11 +739,12 @@ spl_fini(void)
printk(KERN_NOTICE "SPL: Unloaded module v%s-%s%s\n",
SPL_META_VERSION, SPL_META_RELEASE, SPL_DEBUG_STR);
spl_zlib_fini();
- spl_tsd_fini();
spl_kstat_fini();
spl_proc_fini();
spl_vn_fini();
+ spl_kmem_cache_fini();
spl_taskq_fini();
+ spl_tsd_fini();
spl_rw_fini();
spl_mutex_fini();
spl_kvmem_fini();
diff --git a/spl/module/spl/spl-kmem-cache.c b/spl/module/spl/spl-kmem-cache.c
index b58f128cbde5..45576b9761e7 100644
--- a/spl/module/spl/spl-kmem-cache.c
+++ b/spl/module/spl/spl-kmem-cache.c
@@ -88,7 +88,7 @@ MODULE_PARM_DESC(spl_kmem_cache_expire, "By age (0x1) or low memory (0x2)");
unsigned int spl_kmem_cache_magazine_size = 0;
module_param(spl_kmem_cache_magazine_size, uint, 0444);
MODULE_PARM_DESC(spl_kmem_cache_magazine_size,
- "Default magazine size (2-256), set automatically (0)\n");
+ "Default magazine size (2-256), set automatically (0)");
/*
* The default behavior is to report the number of objects remaining in the
@@ -1001,8 +1001,17 @@ spl_kmem_cache_create(char *name, size_t size, size_t align,
slabflags |= SLAB_USERCOPY;
#endif
- skc->skc_linux_cache = kmem_cache_create(
- skc->skc_name, size, align, slabflags, NULL);
+#if defined(HAVE_KMEM_CACHE_CREATE_USERCOPY)
+ /*
+ * Newer grsec patchset uses kmem_cache_create_usercopy()
+ * instead of SLAB_USERCOPY flag
+ */
+ skc->skc_linux_cache = kmem_cache_create_usercopy(
+ skc->skc_name, size, align, slabflags, 0, size, NULL);
+#else
+ skc->skc_linux_cache = kmem_cache_create(
+ skc->skc_name, size, align, slabflags, NULL);
+#endif
if (skc->skc_linux_cache == NULL) {
rc = ENOMEM;
goto out;
@@ -1149,15 +1158,13 @@ spl_cache_obj(spl_kmem_cache_t *skc, spl_kmem_slab_t *sks)
* It is responsible for allocating a new slab, linking it in to the list
* of partial slabs, and then waking any waiters.
*/
-static void
-spl_cache_grow_work(void *data)
+static int
+__spl_cache_grow(spl_kmem_cache_t *skc, int flags)
{
- spl_kmem_alloc_t *ska = (spl_kmem_alloc_t *)data;
- spl_kmem_cache_t *skc = ska->ska_cache;
spl_kmem_slab_t *sks;
fstrans_cookie_t cookie = spl_fstrans_mark();
- sks = spl_slab_alloc(skc, ska->ska_flags);
+ sks = spl_slab_alloc(skc, flags);
spl_fstrans_unmark(cookie);
spin_lock(&skc->skc_lock);
@@ -1165,15 +1172,29 @@ spl_cache_grow_work(void *data)
skc->skc_slab_total++;
skc->skc_obj_total += sks->sks_objs;
list_add_tail(&sks->sks_list, &skc->skc_partial_list);
+
+ smp_mb__before_atomic();
+ clear_bit(KMC_BIT_DEADLOCKED, &skc->skc_flags);
+ smp_mb__after_atomic();
+ wake_up_all(&skc->skc_waitq);
}
+ spin_unlock(&skc->skc_lock);
+
+ return (sks == NULL ? -ENOMEM : 0);
+}
+
+static void
+spl_cache_grow_work(void *data)
+{
+ spl_kmem_alloc_t *ska = (spl_kmem_alloc_t *)data;
+ spl_kmem_cache_t *skc = ska->ska_cache;
+
+ (void)__spl_cache_grow(skc, ska->ska_flags);
atomic_dec(&skc->skc_ref);
smp_mb__before_atomic();
clear_bit(KMC_BIT_GROWING, &skc->skc_flags);
- clear_bit(KMC_BIT_DEADLOCKED, &skc->skc_flags);
smp_mb__after_atomic();
- wake_up_all(&skc->skc_waitq);
- spin_unlock(&skc->skc_lock);
kfree(ska);
}
@@ -1213,6 +1234,21 @@ spl_cache_grow(spl_kmem_cache_t *skc, int flags, void **obj)
return (rc ? rc : -EAGAIN);
}
+ /*
+ * To reduce the overhead of context switch and improve NUMA locality,
+ * it tries to allocate a new slab in the current process context with
+ * KM_NOSLEEP flag. If it fails, it will launch a new taskq to do the
+ * allocation.
+ *
+ * However, this can't be applied to KVM_VMEM due to a bug that
+ * __vmalloc() doesn't honor gfp flags in page table allocation.
+ */
+ if (!(skc->skc_flags & KMC_VMEM)) {
+ rc = __spl_cache_grow(skc, flags | KM_NOSLEEP);
+ if (rc == 0)
+ return (0);
+ }
+
/*
* This is handled by dispatching a work request to the global work
* queue. This allows us to asynchronously allocate a new slab while
diff --git a/spl/module/spl/spl-kmem.c b/spl/module/spl/spl-kmem.c
index 2b68c297a44d..41bec75d2695 100755
--- a/spl/module/spl/spl-kmem.c
+++ b/spl/module/spl/spl-kmem.c
@@ -35,7 +35,7 @@
* rate limited warning will be printed to the console for any kmem_alloc()
* which exceeds a reasonable threshold.
*
- * The default warning threshold is set to eight pages but capped at 32K to
+ * The default warning threshold is set to sixteen pages but capped at 64K to
* accommodate systems using large pages. This value was selected to be small
* enough to ensure the largest allocations are quickly noticed and fixed.
* But large enough to avoid logging any warnings when a allocation size is
@@ -44,7 +44,7 @@
* allocations are quickly caught. These warnings may be disabled by setting
* the threshold to zero.
*/
-unsigned int spl_kmem_alloc_warn = MAX(8 * PAGE_SIZE, 32 * 1024);
+unsigned int spl_kmem_alloc_warn = MIN(16 * PAGE_SIZE, 64 * 1024);
module_param(spl_kmem_alloc_warn, uint, 0644);
MODULE_PARM_DESC(spl_kmem_alloc_warn,
"Warning threshold in bytes for a kmem_alloc()");
diff --git a/spl/module/spl/spl-kobj.c b/spl/module/spl/spl-kobj.c
index 4dd14ba41760..b79fcb82836c 100644
--- a/spl/module/spl/spl-kobj.c
+++ b/spl/module/spl/spl-kobj.c
@@ -57,10 +57,15 @@ kobj_close_file(struct _buf *file)
EXPORT_SYMBOL(kobj_close_file);
int
-kobj_read_file(struct _buf *file, char *buf, ssize_t size, offset_t off)
+kobj_read_file(struct _buf *file, char *buf, unsigned size, unsigned off)
{
- return (vn_rdwr(UIO_READ, file->vp, buf, size, off,
- UIO_SYSSPACE, 0, RLIM64_INFINITY, 0, NULL));
+ ssize_t resid;
+
+ if (vn_rdwr(UIO_READ, file->vp, buf, size, (offset_t)off,
+ UIO_SYSSPACE, 0, 0, 0, &resid) != 0)
+ return (-1);
+
+ return (size - resid);
} /* kobj_read_file() */
EXPORT_SYMBOL(kobj_read_file);
diff --git a/spl/module/spl/spl-kstat.c b/spl/module/spl/spl-kstat.c
index e8917a3ea80c..1b6a7df9b348 100644
--- a/spl/module/spl/spl-kstat.c
+++ b/spl/module/spl/spl-kstat.c
@@ -27,6 +27,7 @@
#include <linux/seq_file.h>
#include <sys/kstat.h>
#include <sys/vmem.h>
+#include <sys/cmn_err.h>
#ifndef HAVE_PDE_DATA
#define PDE_DATA(x) (PDE(x)->data)
@@ -608,6 +609,29 @@ __kstat_create(const char *ks_module, int ks_instance, const char *ks_name,
}
EXPORT_SYMBOL(__kstat_create);
+static int
+kstat_detect_collision(kstat_t *ksp)
+{
+ kstat_module_t *module;
+ kstat_t *tmp;
+ char parent[KSTAT_STRLEN+1];
+ char *cp;
+
+ (void) strlcpy(parent, ksp->ks_module, sizeof(parent));
+
+ if ((cp = strrchr(parent, '/')) == NULL)
+ return (0);
+
+ cp[0] = '\0';
+ if ((module = kstat_find_module(parent)) != NULL) {
+ list_for_each_entry(tmp, &module->ksm_kstat_list, ks_list)
+ if (strncmp(tmp->ks_name, cp+1, KSTAT_STRLEN) == 0)
+ return (EEXIST);
+ }
+
+ return (0);
+}
+
void
__kstat_install(kstat_t *ksp)
{
@@ -620,6 +644,11 @@ __kstat_install(kstat_t *ksp)
module = kstat_find_module(ksp->ks_module);
if (module == NULL) {
+ if (kstat_detect_collision(ksp) != 0) {
+ cmn_err(CE_WARN, "kstat_create('%s', '%s'): namespace" \
+ " collision", ksp->ks_module, ksp->ks_name);
+ goto out;
+ }
module = kstat_create_module(ksp->ks_module);
if (module == NULL)
goto out;
diff --git a/spl/module/spl/spl-proc.c b/spl/module/spl/spl-proc.c
index 08ca974145f3..05c1a5dbc220 100644
--- a/spl/module/spl/spl-proc.c
+++ b/spl/module/spl/spl-proc.c
@@ -29,6 +29,7 @@
#include <sys/kmem.h>
#include <sys/kmem_cache.h>
#include <sys/vmem.h>
+#include <sys/taskq.h>
#include <linux/ctype.h>
#include <linux/kmod.h>
#include <linux/seq_file.h>
@@ -49,6 +50,8 @@ static struct ctl_table_header *spl_header = NULL;
static struct proc_dir_entry *proc_spl = NULL;
static struct proc_dir_entry *proc_spl_kmem = NULL;
static struct proc_dir_entry *proc_spl_kmem_slab = NULL;
+static struct proc_dir_entry *proc_spl_taskq_all = NULL;
+static struct proc_dir_entry *proc_spl_taskq = NULL;
struct proc_dir_entry *proc_spl_kstat = NULL;
static int
@@ -200,7 +203,8 @@ proc_dohostid(struct ctl_table *table, int write,
return (-EINVAL);
} else {
- len = snprintf(str, sizeof(str), "%lx", spl_hostid);
+ len = snprintf(str, sizeof(str), "%lx",
+ (unsigned long) zone_get_hostid(NULL));
if (*ppos >= len)
rc = 0;
else
@@ -215,6 +219,193 @@ proc_dohostid(struct ctl_table *table, int write,
return (rc);
}
+static void
+taskq_seq_show_headers(struct seq_file *f)
+{
+ seq_printf(f, "%-25s %5s %5s %5s %5s %5s %5s %12s %5s %10s\n",
+ "taskq", "act", "nthr", "spwn", "maxt", "pri",
+ "mina", "maxa", "cura", "flags");
+}
+
+/* indices into the lheads array below */
+#define LHEAD_PEND 0
+#define LHEAD_PRIO 1
+#define LHEAD_DELAY 2
+#define LHEAD_WAIT 3
+#define LHEAD_ACTIVE 4
+#define LHEAD_SIZE 5
+
+static unsigned int spl_max_show_tasks = 512;
+module_param(spl_max_show_tasks, uint, 0644);
+MODULE_PARM_DESC(spl_max_show_tasks, "Max number of tasks shown in taskq proc");
+
+static int
+taskq_seq_show_impl(struct seq_file *f, void *p, boolean_t allflag)
+{
+ taskq_t *tq = p;
+ taskq_thread_t *tqt;
+ spl_wait_queue_entry_t *wq;
+ struct task_struct *tsk;
+ taskq_ent_t *tqe;
+ char name[100];
+ struct list_head *lheads[LHEAD_SIZE], *lh;
+ static char *list_names[LHEAD_SIZE] =
+ {"pend", "prio", "delay", "wait", "active" };
+ int i, j, have_lheads = 0;
+ unsigned long wflags, flags;
+
+ spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
+ spin_lock_irqsave(&tq->tq_wait_waitq.lock, wflags);
+
+ /* get the various lists and check whether they're empty */
+ lheads[LHEAD_PEND] = &tq->tq_pend_list;
+ lheads[LHEAD_PRIO] = &tq->tq_prio_list;
+ lheads[LHEAD_DELAY] = &tq->tq_delay_list;
+#ifdef HAVE_WAIT_QUEUE_HEAD_ENTRY
+ lheads[LHEAD_WAIT] = &tq->tq_wait_waitq.head;
+#else
+ lheads[LHEAD_WAIT] = &tq->tq_wait_waitq.task_list;
+#endif
+ lheads[LHEAD_ACTIVE] = &tq->tq_active_list;
+
+ for (i = 0; i < LHEAD_SIZE; ++i) {
+ if (list_empty(lheads[i]))
+ lheads[i] = NULL;
+ else
+ ++have_lheads;
+ }
+
+ /* early return in non-"all" mode if lists are all empty */
+ if (!allflag && !have_lheads) {
+ spin_unlock_irqrestore(&tq->tq_wait_waitq.lock, wflags);
+ spin_unlock_irqrestore(&tq->tq_lock, flags);
+ return (0);
+ }
+
+ /* unlock the waitq quickly */
+ if (!lheads[LHEAD_WAIT])
+ spin_unlock_irqrestore(&tq->tq_wait_waitq.lock, wflags);
+
+ /* show the base taskq contents */
+ snprintf(name, sizeof(name), "%s/%d", tq->tq_name, tq->tq_instance);
+ seq_printf(f, "%-25s ", name);
+ seq_printf(f, "%5d %5d %5d %5d %5d %5d %12d %5d %10x\n",
+ tq->tq_nactive, tq->tq_nthreads, tq->tq_nspawn,
+ tq->tq_maxthreads, tq->tq_pri, tq->tq_minalloc, tq->tq_maxalloc,
+ tq->tq_nalloc, tq->tq_flags);
+
+ /* show the active list */
+ if (lheads[LHEAD_ACTIVE]) {
+ j = 0;
+ list_for_each_entry(tqt, &tq->tq_active_list, tqt_active_list) {
+ if (j == 0)
+ seq_printf(f, "\t%s:", list_names[LHEAD_ACTIVE]);
+ else if (j == 2) {
+ seq_printf(f, "\n\t ");
+ j = 0;
+ }
+ seq_printf(f, " [%d]%pf(%ps)",
+ tqt->tqt_thread->pid,
+ tqt->tqt_task->tqent_func,
+ tqt->tqt_task->tqent_arg);
+ ++j;
+ }
+ seq_printf(f, "\n");
+ }
+
+ for (i = LHEAD_PEND; i <= LHEAD_WAIT; ++i)
+ if (lheads[i]) {
+ j = 0;
+ list_for_each(lh, lheads[i]) {
+ if (spl_max_show_tasks != 0 &&
+ j >= spl_max_show_tasks) {
+ seq_printf(f, "\n\t(truncated)");
+ break;
+ }
+ /* show the wait waitq list */
+ if (i == LHEAD_WAIT) {
+#ifdef HAVE_WAIT_QUEUE_HEAD_ENTRY
+ wq = list_entry(lh,
+ spl_wait_queue_entry_t, entry);
+#else
+ wq = list_entry(lh,
+ spl_wait_queue_entry_t, task_list);
+#endif
+ if (j == 0)
+ seq_printf(f, "\t%s:",
+ list_names[i]);
+ else if (j % 8 == 0)
+ seq_printf(f, "\n\t ");
+
+ tsk = wq->private;
+ seq_printf(f, " %d", tsk->pid);
+ /* pend, prio and delay lists */
+ } else {
+ tqe = list_entry(lh, taskq_ent_t,
+ tqent_list);
+ if (j == 0)
+ seq_printf(f, "\t%s:",
+ list_names[i]);
+ else if (j % 2 == 0)
+ seq_printf(f, "\n\t ");
+
+ seq_printf(f, " %pf(%ps)",
+ tqe->tqent_func,
+ tqe->tqent_arg);
+ }
+ ++j;
+ }
+ seq_printf(f, "\n");
+ }
+ if (lheads[LHEAD_WAIT])
+ spin_unlock_irqrestore(&tq->tq_wait_waitq.lock, wflags);
+ spin_unlock_irqrestore(&tq->tq_lock, flags);
+
+ return (0);
+}
+
+static int
+taskq_all_seq_show(struct seq_file *f, void *p)
+{
+ return (taskq_seq_show_impl(f, p, B_TRUE));
+}
+
+static int
+taskq_seq_show(struct seq_file *f, void *p)
+{
+ return (taskq_seq_show_impl(f, p, B_FALSE));
+}
+
+static void *
+taskq_seq_start(struct seq_file *f, loff_t *pos)
+{
+ struct list_head *p;
+ loff_t n = *pos;
+
+ down_read(&tq_list_sem);
+ if (!n)
+ taskq_seq_show_headers(f);
+
+ p = tq_list.next;
+ while (n--) {
+ p = p->next;
+ if (p == &tq_list)
+ return (NULL);
+ }
+
+ return (list_entry(p, taskq_t, tq_taskqs));
+}
+
+static void *
+taskq_seq_next(struct seq_file *f, void *p, loff_t *pos)
+{
+ taskq_t *tq = p;
+
+ ++*pos;
+ return ((tq->tq_taskqs.next == &tq_list) ?
+ NULL : list_entry(tq->tq_taskqs.next, taskq_t, tq_taskqs));
+}
+
static void
slab_seq_show_headers(struct seq_file *f)
{
@@ -325,6 +516,52 @@ static struct file_operations proc_slab_operations = {
.release = seq_release,
};
+static void
+taskq_seq_stop(struct seq_file *f, void *v)
+{
+ up_read(&tq_list_sem);
+}
+
+static struct seq_operations taskq_all_seq_ops = {
+ .show = taskq_all_seq_show,
+ .start = taskq_seq_start,
+ .next = taskq_seq_next,
+ .stop = taskq_seq_stop,
+};
+
+static struct seq_operations taskq_seq_ops = {
+ .show = taskq_seq_show,
+ .start = taskq_seq_start,
+ .next = taskq_seq_next,
+ .stop = taskq_seq_stop,
+};
+
+static int
+proc_taskq_all_open(struct inode *inode, struct file *filp)
+{
+ return seq_open(filp, &taskq_all_seq_ops);
+}
+
+static int
+proc_taskq_open(struct inode *inode, struct file *filp)
+{
+ return seq_open(filp, &taskq_seq_ops);
+}
+
+static struct file_operations proc_taskq_all_operations = {
+ .open = proc_taskq_all_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
+static struct file_operations proc_taskq_operations = {
+ .open = proc_taskq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release,
+};
+
static struct ctl_table spl_kmem_table[] = {
#ifdef DEBUG_KMEM
{
@@ -476,6 +713,20 @@ spl_proc_init(void)
goto out;
}
+ proc_spl_taskq_all = proc_create_data("taskq-all", 0444,
+ proc_spl, &proc_taskq_all_operations, NULL);
+ if (proc_spl_taskq_all == NULL) {
+ rc = -EUNATCH;
+ goto out;
+ }
+
+ proc_spl_taskq = proc_create_data("taskq", 0444,
+ proc_spl, &proc_taskq_operations, NULL);
+ if (proc_spl_taskq == NULL) {
+ rc = -EUNATCH;
+ goto out;
+ }
+
proc_spl_kmem = proc_mkdir("kmem", proc_spl);
if (proc_spl_kmem == NULL) {
rc = -EUNATCH;
@@ -499,6 +750,8 @@ spl_proc_init(void)
remove_proc_entry("kstat", proc_spl);
remove_proc_entry("slab", proc_spl_kmem);
remove_proc_entry("kmem", proc_spl);
+ remove_proc_entry("taskq-all", proc_spl);
+ remove_proc_entry("taskq", proc_spl);
remove_proc_entry("spl", NULL);
unregister_sysctl_table(spl_header);
}
@@ -512,6 +765,8 @@ spl_proc_fini(void)
remove_proc_entry("kstat", proc_spl);
remove_proc_entry("slab", proc_spl_kmem);
remove_proc_entry("kmem", proc_spl);
+ remove_proc_entry("taskq-all", proc_spl);
+ remove_proc_entry("taskq", proc_spl);
remove_proc_entry("spl", NULL);
ASSERT(spl_header != NULL);
diff --git a/spl/module/spl/spl-rwlock.c b/spl/module/spl/spl-rwlock.c
index 77f46f2d6fdf..d99ef4f922d5 100644
--- a/spl/module/spl/spl-rwlock.c
+++ b/spl/module/spl/spl-rwlock.c
@@ -32,7 +32,41 @@
#define DEBUG_SUBSYSTEM S_RWLOCK
-#if defined(CONFIG_RWSEM_GENERIC_SPINLOCK)
+#if defined(CONFIG_PREEMPT_RT_FULL)
+
+#include <linux/rtmutex.h>
+#define RT_MUTEX_OWNER_MASKALL 1UL
+
+static int
+__rwsem_tryupgrade(struct rw_semaphore *rwsem)
+{
+
+ ASSERT((struct task_struct *)
+ ((unsigned long)rwsem->lock.owner & ~RT_MUTEX_OWNER_MASKALL) ==
+ current);
+
+ /*
+ * Under the realtime patch series, rwsem is implemented as a
+ * single mutex held by readers and writers alike. However,
+ * this implementation would prevent a thread from taking a
+ * read lock twice, as the mutex would already be locked on
+ * the second attempt. Therefore the implementation allows a
+ * single thread to take a rwsem as read lock multiple times
+ * tracking that nesting as read_depth counter.
+ */
+ if (rwsem->read_depth <= 1) {
+ /*
+ * In case, the current thread has not taken the lock
+ * more than once as read lock, we can allow an
+ * upgrade to a write lock. rwsem_rt.h implements
+ * write locks as read_depth == 0.
+ */
+ rwsem->read_depth = 0;
+ return (1);
+ }
+ return (0);
+}
+#elif defined(CONFIG_RWSEM_GENERIC_SPINLOCK)
static int
__rwsem_tryupgrade(struct rw_semaphore *rwsem)
{
diff --git a/spl/module/spl/spl-taskq.c b/spl/module/spl/spl-taskq.c
index 017b410ac03a..86915e62f225 100644
--- a/spl/module/spl/spl-taskq.c
+++ b/spl/module/spl/spl-taskq.c
@@ -1,4 +1,4 @@
-/*****************************************************************************\
+/*
* Copyright (C) 2007-2010 Lawrence Livermore National Security, LLC.
* Copyright (C) 2007 The Regents of the University of California.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -20,50 +20,76 @@
*
* You should have received a copy of the GNU General Public License along
* with the SPL. If not, see <http://www.gnu.org/licenses/>.
- *****************************************************************************
+ *
* Solaris Porting Layer (SPL) Task Queue Implementation.
-\*****************************************************************************/
+ */
#include <sys/taskq.h>
#include <sys/kmem.h>
+#include <sys/tsd.h>
int spl_taskq_thread_bind = 0;
module_param(spl_taskq_thread_bind, int, 0644);
MODULE_PARM_DESC(spl_taskq_thread_bind, "Bind taskq thread to CPU by default");
-int spl_taskq_thread_dynamic = 0;
+int spl_taskq_thread_dynamic = 1;
module_param(spl_taskq_thread_dynamic, int, 0644);
MODULE_PARM_DESC(spl_taskq_thread_dynamic, "Allow dynamic taskq threads");
int spl_taskq_thread_priority = 1;
module_param(spl_taskq_thread_priority, int, 0644);
MODULE_PARM_DESC(spl_taskq_thread_priority,
- "Allow non-default priority for taskq threads");
+ "Allow non-default priority for taskq threads");
int spl_taskq_thread_sequential = 4;
module_param(spl_taskq_thread_sequential, int, 0644);
MODULE_PARM_DESC(spl_taskq_thread_sequential,
- "Create new taskq threads after N sequential tasks");
+ "Create new taskq threads after N sequential tasks");
/* Global system-wide dynamic task queue available for all consumers */
taskq_t *system_taskq;
EXPORT_SYMBOL(system_taskq);
+/* Global dynamic task queue for long delay */
+taskq_t *system_delay_taskq;
+EXPORT_SYMBOL(system_delay_taskq);
/* Private dedicated taskq for creating new taskq threads on demand. */
static taskq_t *dynamic_taskq;
static taskq_thread_t *taskq_thread_create(taskq_t *);
+/* List of all taskqs */
+LIST_HEAD(tq_list);
+DECLARE_RWSEM(tq_list_sem);
+static uint_t taskq_tsd;
+
static int
task_km_flags(uint_t flags)
{
if (flags & TQ_NOSLEEP)
- return KM_NOSLEEP;
+ return (KM_NOSLEEP);
if (flags & TQ_PUSHPAGE)
- return KM_PUSHPAGE;
+ return (KM_PUSHPAGE);
- return KM_SLEEP;
+ return (KM_SLEEP);
+}
+
+/*
+ * taskq_find_by_name - Find the largest instance number of a named taskq.
+ */
+static int
+taskq_find_by_name(const char *name)
+{
+ struct list_head *tql;
+ taskq_t *tq;
+
+ list_for_each_prev(tql, &tq_list) {
+ tq = list_entry(tql, taskq_t, tq_taskqs);
+ if (strcmp(name, tq->tq_name) == 0)
+ return tq->tq_instance;
+ }
+ return (-1);
}
/*
@@ -71,7 +97,7 @@ task_km_flags(uint_t flags)
* is not attached to the free, work, or pending taskq lists.
*/
static taskq_ent_t *
-task_alloc(taskq_t *tq, uint_t flags)
+task_alloc(taskq_t *tq, uint_t flags, unsigned long *irqflags)
{
taskq_ent_t *t;
int count = 0;
@@ -111,18 +137,19 @@ task_alloc(taskq_t *tq, uint_t flags)
* end up delaying the task allocation by one second, thereby
* throttling the task dispatch rate.
*/
- spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+ spin_unlock_irqrestore(&tq->tq_lock, *irqflags);
schedule_timeout(HZ / 100);
- spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+ spin_lock_irqsave_nested(&tq->tq_lock, *irqflags,
+ tq->tq_lock_class);
if (count < 100) {
count++;
goto retry;
}
}
- spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
- t = kmem_alloc(sizeof(taskq_ent_t), task_km_flags(flags));
- spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+ spin_unlock_irqrestore(&tq->tq_lock, *irqflags);
+ t = kmem_alloc(sizeof (taskq_ent_t), task_km_flags(flags));
+ spin_lock_irqsave_nested(&tq->tq_lock, *irqflags, tq->tq_lock_class);
if (t) {
taskq_init_ent(t);
@@ -145,7 +172,7 @@ task_free(taskq_t *tq, taskq_ent_t *t)
ASSERT(list_empty(&t->tqent_list));
ASSERT(!timer_pending(&t->tqent_timer));
- kmem_free(t, sizeof(taskq_ent_t));
+ kmem_free(t, sizeof (taskq_ent_t));
tq->tq_nalloc--;
}
@@ -166,7 +193,7 @@ task_done(taskq_t *tq, taskq_ent_t *t)
list_del_init(&t->tqent_list);
if (tq->tq_nalloc <= tq->tq_minalloc) {
- t->tqent_id = 0;
+ t->tqent_id = TASKQID_INVALID;
t->tqent_func = NULL;
t->tqent_arg = NULL;
t->tqent_flags = 0;
@@ -187,15 +214,17 @@ task_expire(unsigned long data)
taskq_ent_t *w, *t = (taskq_ent_t *)data;
taskq_t *tq = t->tqent_taskq;
struct list_head *l;
+ unsigned long flags;
- spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+ spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
if (t->tqent_flags & TQENT_FLAG_CANCEL) {
ASSERT(list_empty(&t->tqent_list));
- spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+ spin_unlock_irqrestore(&tq->tq_lock, flags);
return;
}
+ t->tqent_birth = jiffies;
/*
* The priority list must be maintained in strict task id order
* from lowest to highest for lowest_id to be easily calculable.
@@ -211,7 +240,7 @@ task_expire(unsigned long data)
if (l == &tq->tq_prio_list)
list_add(&t->tqent_list, &tq->tq_prio_list);
- spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+ spin_unlock_irqrestore(&tq->tq_lock, flags);
wake_up(&tq->tq_work_waitq);
}
@@ -250,7 +279,7 @@ taskq_lowest_id(taskq_t *tq)
if (!list_empty(&tq->tq_active_list)) {
tqt = list_entry(tq->tq_active_list.next, taskq_thread_t,
tqt_active_list);
- ASSERT(tqt->tqt_id != 0);
+ ASSERT(tqt->tqt_id != TASKQID_INVALID);
lowest_id = MIN(lowest_id, tqt->tqt_id);
}
@@ -378,10 +407,11 @@ taskq_wait_id_check(taskq_t *tq, taskqid_t id)
{
int active = 0;
int rc;
+ unsigned long flags;
- spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+ spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
rc = (taskq_find(tq, id, &active) == NULL);
- spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+ spin_unlock_irqrestore(&tq->tq_lock, flags);
return (rc);
}
@@ -401,10 +431,11 @@ static int
taskq_wait_outstanding_check(taskq_t *tq, taskqid_t id)
{
int rc;
+ unsigned long flags;
- spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+ spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
rc = (id < tq->tq_lowest_id);
- spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+ spin_unlock_irqrestore(&tq->tq_lock, flags);
return (rc);
}
@@ -428,10 +459,11 @@ static int
taskq_wait_check(taskq_t *tq)
{
int rc;
+ unsigned long flags;
- spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+ spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
rc = (tq->tq_lowest_id == tq->tq_next_id);
- spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+ spin_unlock_irqrestore(&tq->tq_lock, flags);
return (rc);
}
@@ -448,37 +480,10 @@ taskq_wait(taskq_t *tq)
}
EXPORT_SYMBOL(taskq_wait);
-static int
-taskq_member_impl(taskq_t *tq, void *t)
-{
- struct list_head *l;
- taskq_thread_t *tqt;
- int found = 0;
-
- ASSERT(tq);
- ASSERT(t);
- ASSERT(spin_is_locked(&tq->tq_lock));
-
- list_for_each(l, &tq->tq_thread_list) {
- tqt = list_entry(l, taskq_thread_t, tqt_thread_list);
- if (tqt->tqt_thread == (struct task_struct *)t) {
- found = 1;
- break;
- }
- }
- return (found);
-}
-
int
-taskq_member(taskq_t *tq, void *t)
+taskq_member(taskq_t *tq, kthread_t *t)
{
- int found;
-
- spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
- found = taskq_member_impl(tq, t);
- spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
-
- return (found);
+ return (tq == (taskq_t *)tsd_get_by_thread(taskq_tsd, t));
}
EXPORT_SYMBOL(taskq_member);
@@ -494,10 +499,11 @@ taskq_cancel_id(taskq_t *tq, taskqid_t id)
taskq_ent_t *t;
int active = 0;
int rc = ENOENT;
+ unsigned long flags;
ASSERT(tq);
- spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+ spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
t = taskq_find(tq, id, &active);
if (t && !active) {
list_del_init(&t->tqent_list);
@@ -517,9 +523,10 @@ taskq_cancel_id(taskq_t *tq, taskqid_t id)
* drop the lock before synchronously cancelling the timer.
*/
if (timer_pending(&t->tqent_timer)) {
- spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+ spin_unlock_irqrestore(&tq->tq_lock, flags);
del_timer_sync(&t->tqent_timer);
- spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+ spin_lock_irqsave_nested(&tq->tq_lock, flags,
+ tq->tq_lock_class);
}
if (!(t->tqent_flags & TQENT_FLAG_PREALLOC))
@@ -527,7 +534,7 @@ taskq_cancel_id(taskq_t *tq, taskqid_t id)
rc = 0;
}
- spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+ spin_unlock_irqrestore(&tq->tq_lock, flags);
if (active) {
taskq_wait_id(tq, id);
@@ -544,12 +551,13 @@ taskqid_t
taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)
{
taskq_ent_t *t;
- taskqid_t rc = 0;
+ taskqid_t rc = TASKQID_INVALID;
+ unsigned long irqflags;
ASSERT(tq);
ASSERT(func);
- spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+ spin_lock_irqsave_nested(&tq->tq_lock, irqflags, tq->tq_lock_class);
/* Taskq being destroyed and all tasks drained */
if (!(tq->tq_flags & TASKQ_ACTIVE))
@@ -557,16 +565,22 @@ taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)
/* Do not queue the task unless there is idle thread for it */
ASSERT(tq->tq_nactive <= tq->tq_nthreads);
- if ((flags & TQ_NOQUEUE) && (tq->tq_nactive == tq->tq_nthreads))
- goto out;
+ if ((flags & TQ_NOQUEUE) && (tq->tq_nactive == tq->tq_nthreads)) {
+ /* Dynamic taskq may be able to spawn another thread */
+ if (!(tq->tq_flags & TASKQ_DYNAMIC) || taskq_thread_spawn(tq) == 0)
+ goto out;
+ }
- if ((t = task_alloc(tq, flags)) == NULL)
+ if ((t = task_alloc(tq, flags, &irqflags)) == NULL)
goto out;
spin_lock(&t->tqent_lock);
+ /* Queue to the front of the list to enforce TQ_NOQUEUE semantics */
+ if (flags & TQ_NOQUEUE)
+ list_add(&t->tqent_list, &tq->tq_prio_list);
/* Queue to the priority list instead of the pending list */
- if (flags & TQ_FRONT)
+ else if (flags & TQ_FRONT)
list_add_tail(&t->tqent_list, &tq->tq_prio_list);
else
list_add_tail(&t->tqent_list, &tq->tq_pend_list);
@@ -579,6 +593,7 @@ taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)
t->tqent_timer.data = 0;
t->tqent_timer.function = NULL;
t->tqent_timer.expires = 0;
+ t->tqent_birth = jiffies;
ASSERT(!(t->tqent_flags & TQENT_FLAG_PREALLOC));
@@ -587,10 +602,10 @@ taskq_dispatch(taskq_t *tq, task_func_t func, void *arg, uint_t flags)
wake_up(&tq->tq_work_waitq);
out:
/* Spawn additional taskq threads if required. */
- if (tq->tq_nactive == tq->tq_nthreads)
+ if (!(flags & TQ_NOQUEUE) && tq->tq_nactive == tq->tq_nthreads)
(void) taskq_thread_spawn(tq);
- spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+ spin_unlock_irqrestore(&tq->tq_lock, irqflags);
return (rc);
}
EXPORT_SYMBOL(taskq_dispatch);
@@ -599,19 +614,20 @@ taskqid_t
taskq_dispatch_delay(taskq_t *tq, task_func_t func, void *arg,
uint_t flags, clock_t expire_time)
{
- taskqid_t rc = 0;
+ taskqid_t rc = TASKQID_INVALID;
taskq_ent_t *t;
+ unsigned long irqflags;
ASSERT(tq);
ASSERT(func);
- spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+ spin_lock_irqsave_nested(&tq->tq_lock, irqflags, tq->tq_lock_class);
/* Taskq being destroyed and all tasks drained */
if (!(tq->tq_flags & TASKQ_ACTIVE))
goto out;
- if ((t = task_alloc(tq, flags)) == NULL)
+ if ((t = task_alloc(tq, flags, &irqflags)) == NULL)
goto out;
spin_lock(&t->tqent_lock);
@@ -636,28 +652,43 @@ taskq_dispatch_delay(taskq_t *tq, task_func_t func, void *arg,
/* Spawn additional taskq threads if required. */
if (tq->tq_nactive == tq->tq_nthreads)
(void) taskq_thread_spawn(tq);
- spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+ spin_unlock_irqrestore(&tq->tq_lock, irqflags);
return (rc);
}
EXPORT_SYMBOL(taskq_dispatch_delay);
void
taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
- taskq_ent_t *t)
+ taskq_ent_t *t)
{
+ unsigned long irqflags;
ASSERT(tq);
ASSERT(func);
- spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+ spin_lock_irqsave_nested(&tq->tq_lock, irqflags,
+ tq->tq_lock_class);
/* Taskq being destroyed and all tasks drained */
if (!(tq->tq_flags & TASKQ_ACTIVE)) {
- t->tqent_id = 0;
+ t->tqent_id = TASKQID_INVALID;
goto out;
}
+ if ((flags & TQ_NOQUEUE) && (tq->tq_nactive == tq->tq_nthreads)) {
+ /* Dynamic taskq may be able to spawn another thread */
+ if (!(tq->tq_flags & TASKQ_DYNAMIC) || taskq_thread_spawn(tq) == 0)
+ goto out2;
+ flags |= TQ_FRONT;
+ }
+
spin_lock(&t->tqent_lock);
+ /*
+ * Make sure the entry is not on some other taskq; it is important to
+ * ASSERT() under lock
+ */
+ ASSERT(taskq_empty_ent(t));
+
/*
* Mark it as a prealloc'd task. This is important
* to ensure that we don't free it later.
@@ -675,6 +706,7 @@ taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
t->tqent_func = func;
t->tqent_arg = arg;
t->tqent_taskq = tq;
+ t->tqent_birth = jiffies;
spin_unlock(&t->tqent_lock);
@@ -683,14 +715,15 @@ taskq_dispatch_ent(taskq_t *tq, task_func_t func, void *arg, uint_t flags,
/* Spawn additional taskq threads if required. */
if (tq->tq_nactive == tq->tq_nthreads)
(void) taskq_thread_spawn(tq);
- spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+out2:
+ spin_unlock_irqrestore(&tq->tq_lock, irqflags);
}
EXPORT_SYMBOL(taskq_dispatch_ent);
int
taskq_empty_ent(taskq_ent_t *t)
{
- return list_empty(&t->tqent_list);
+ return (list_empty(&t->tqent_list));
}
EXPORT_SYMBOL(taskq_empty_ent);
@@ -737,17 +770,18 @@ static void
taskq_thread_spawn_task(void *arg)
{
taskq_t *tq = (taskq_t *)arg;
+ unsigned long flags;
if (taskq_thread_create(tq) == NULL) {
/* restore spawning count if failed */
- spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+ spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
tq->tq_nspawn--;
- spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+ spin_unlock_irqrestore(&tq->tq_lock, flags);
}
}
/*
- * Spawn addition threads for dynamic taskqs (TASKQ_DYNMAIC) the current
+ * Spawn addition threads for dynamic taskqs (TASKQ_DYNAMIC) the current
* number of threads is insufficient to handle the pending tasks. These
* new threads must be created by the dedicated dynamic_taskq to avoid
* deadlocks between thread creation and memory reclaim. The system_taskq
@@ -797,7 +831,7 @@ taskq_thread_should_stop(taskq_t *tq, taskq_thread_t *tqt)
(tq->tq_nactive == 0) && /* No threads are handling tasks */
(tq->tq_nthreads > 1) && /* More than 1 thread is running */
(!taskq_next_ent(tq)) && /* There are no pending tasks */
- (spl_taskq_thread_dynamic));/* Dynamic taskqs are allowed */
+ (spl_taskq_thread_dynamic)); /* Dynamic taskqs are allowed */
}
static int
@@ -809,8 +843,10 @@ taskq_thread(void *args)
taskq_t *tq;
taskq_ent_t *t;
int seq_tasks = 0;
+ unsigned long flags;
ASSERT(tqt);
+ ASSERT(tqt->tqt_tq);
tq = tqt->tqt_tq;
current->flags |= PF_NOFREEZE;
@@ -820,7 +856,8 @@ taskq_thread(void *args)
sigprocmask(SIG_BLOCK, &blocked, NULL);
flush_signals(current);
- spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+ tsd_set(taskq_tsd, tq);
+ spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
/*
* If we are dynamically spawned, decrease spawning count. Note that
* we could be created during taskq_create, in which case we shouldn't
@@ -850,12 +887,13 @@ taskq_thread(void *args)
}
add_wait_queue_exclusive(&tq->tq_work_waitq, &wait);
- spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+ spin_unlock_irqrestore(&tq->tq_lock, flags);
schedule();
seq_tasks = 0;
- spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+ spin_lock_irqsave_nested(&tq->tq_lock, flags,
+ tq->tq_lock_class);
remove_wait_queue(&tq->tq_work_waitq, &wait);
} else {
__set_current_state(TASK_RUNNING);
@@ -864,27 +902,32 @@ taskq_thread(void *args)
if ((t = taskq_next_ent(tq)) != NULL) {
list_del_init(&t->tqent_list);
- /* In order to support recursively dispatching a
+ /*
+ * In order to support recursively dispatching a
* preallocated taskq_ent_t, tqent_id must be
- * stored prior to executing tqent_func. */
+ * stored prior to executing tqent_func.
+ */
tqt->tqt_id = t->tqent_id;
tqt->tqt_task = t;
- /* We must store a copy of the flags prior to
+ /*
+ * We must store a copy of the flags prior to
* servicing the task (servicing a prealloc'd task
* returns the ownership of the tqent back to
* the caller of taskq_dispatch). Thus,
- * tqent_flags _may_ change within the call. */
+ * tqent_flags _may_ change within the call.
+ */
tqt->tqt_flags = t->tqent_flags;
taskq_insert_in_order(tq, tqt);
tq->tq_nactive++;
- spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+ spin_unlock_irqrestore(&tq->tq_lock, flags);
/* Perform the requested task */
t->tqent_func(t->tqent_arg);
- spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+ spin_lock_irqsave_nested(&tq->tq_lock, flags,
+ tq->tq_lock_class);
tq->tq_nactive--;
list_del_init(&tqt->tqt_active_list);
tqt->tqt_task = NULL;
@@ -893,8 +936,10 @@ taskq_thread(void *args)
if (!(tqt->tqt_flags & TQENT_FLAG_PREALLOC))
task_done(tq, t);
- /* When the current lowest outstanding taskqid is
- * done calculate the new lowest outstanding id */
+ /*
+ * When the current lowest outstanding taskqid is
+ * done calculate the new lowest outstanding id
+ */
if (tq->tq_lowest_id == tqt->tqt_id) {
tq->tq_lowest_id = taskq_lowest_id(tq);
ASSERT3S(tq->tq_lowest_id, >, tqt->tqt_id);
@@ -905,7 +950,7 @@ taskq_thread(void *args)
taskq_thread_spawn(tq))
seq_tasks = 0;
- tqt->tqt_id = 0;
+ tqt->tqt_id = TASKQID_INVALID;
tqt->tqt_flags = 0;
wake_up_all(&tq->tq_wait_waitq);
} else {
@@ -922,7 +967,9 @@ taskq_thread(void *args)
list_del_init(&tqt->tqt_thread_list);
error:
kmem_free(tqt, sizeof (taskq_thread_t));
- spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+ spin_unlock_irqrestore(&tq->tq_lock, flags);
+
+ tsd_set(taskq_tsd, NULL);
return (0);
}
@@ -937,7 +984,7 @@ taskq_thread_create(taskq_t *tq)
INIT_LIST_HEAD(&tqt->tqt_thread_list);
INIT_LIST_HEAD(&tqt->tqt_active_list);
tqt->tqt_tq = tq;
- tqt->tqt_id = 0;
+ tqt->tqt_id = TASKQID_INVALID;
tqt->tqt_thread = spl_kthread_create(taskq_thread, tqt,
"%s", tq->tq_name);
@@ -966,6 +1013,7 @@ taskq_create(const char *name, int nthreads, pri_t pri,
taskq_t *tq;
taskq_thread_t *tqt;
int count = 0, rc = 0, i;
+ unsigned long irqflags;
ASSERT(name != NULL);
ASSERT(minalloc >= 0);
@@ -988,32 +1036,36 @@ taskq_create(const char *name, int nthreads, pri_t pri,
spin_lock_init(&tq->tq_lock);
INIT_LIST_HEAD(&tq->tq_thread_list);
INIT_LIST_HEAD(&tq->tq_active_list);
- tq->tq_name = strdup(name);
- tq->tq_nactive = 0;
- tq->tq_nthreads = 0;
- tq->tq_nspawn = 0;
+ tq->tq_name = strdup(name);
+ tq->tq_nactive = 0;
+ tq->tq_nthreads = 0;
+ tq->tq_nspawn = 0;
tq->tq_maxthreads = nthreads;
- tq->tq_pri = pri;
- tq->tq_minalloc = minalloc;
- tq->tq_maxalloc = maxalloc;
- tq->tq_nalloc = 0;
- tq->tq_flags = (flags | TASKQ_ACTIVE);
- tq->tq_next_id = 1;
- tq->tq_lowest_id = 1;
+ tq->tq_pri = pri;
+ tq->tq_minalloc = minalloc;
+ tq->tq_maxalloc = maxalloc;
+ tq->tq_nalloc = 0;
+ tq->tq_flags = (flags | TASKQ_ACTIVE);
+ tq->tq_next_id = TASKQID_INITIAL;
+ tq->tq_lowest_id = TASKQID_INITIAL;
INIT_LIST_HEAD(&tq->tq_free_list);
INIT_LIST_HEAD(&tq->tq_pend_list);
INIT_LIST_HEAD(&tq->tq_prio_list);
INIT_LIST_HEAD(&tq->tq_delay_list);
init_waitqueue_head(&tq->tq_work_waitq);
init_waitqueue_head(&tq->tq_wait_waitq);
+ tq->tq_lock_class = TQ_LOCK_GENERAL;
+ INIT_LIST_HEAD(&tq->tq_taskqs);
if (flags & TASKQ_PREPOPULATE) {
- spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+ spin_lock_irqsave_nested(&tq->tq_lock, irqflags,
+ tq->tq_lock_class);
for (i = 0; i < minalloc; i++)
- task_done(tq, task_alloc(tq, TQ_PUSHPAGE | TQ_NEW));
+ task_done(tq, task_alloc(tq, TQ_PUSHPAGE | TQ_NEW,
+ &irqflags));
- spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+ spin_unlock_irqrestore(&tq->tq_lock, irqflags);
}
if ((flags & TASKQ_DYNAMIC) && spl_taskq_thread_dynamic)
@@ -1038,6 +1090,11 @@ taskq_create(const char *name, int nthreads, pri_t pri,
if (rc) {
taskq_destroy(tq);
tq = NULL;
+ } else {
+ down_write(&tq_list_sem);
+ tq->tq_instance = taskq_find_by_name(name) + 1;
+ list_add_tail(&tq->tq_taskqs, &tq_list);
+ up_write(&tq_list_sem);
}
return (tq);
@@ -1050,11 +1107,12 @@ taskq_destroy(taskq_t *tq)
struct task_struct *thread;
taskq_thread_t *tqt;
taskq_ent_t *t;
+ unsigned long flags;
ASSERT(tq);
- spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+ spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
tq->tq_flags &= ~TASKQ_ACTIVE;
- spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+ spin_unlock_irqrestore(&tq->tq_lock, flags);
/*
* When TASKQ_ACTIVE is clear new tasks may not be added nor may
@@ -1065,12 +1123,17 @@ taskq_destroy(taskq_t *tq)
taskq_wait(tq);
- spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+ /* remove taskq from global list used by the kstats */
+ down_write(&tq_list_sem);
+ list_del(&tq->tq_taskqs);
+ up_write(&tq_list_sem);
+
+ spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
/* wait for spawning threads to insert themselves to the list */
while (tq->tq_nspawn) {
- spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+ spin_unlock_irqrestore(&tq->tq_lock, flags);
schedule_timeout_interruptible(1);
- spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+ spin_lock_irqsave_nested(&tq->tq_lock, flags, tq->tq_lock_class);
}
/*
@@ -1083,11 +1146,12 @@ taskq_destroy(taskq_t *tq)
tqt = list_entry(tq->tq_thread_list.next,
taskq_thread_t, tqt_thread_list);
thread = tqt->tqt_thread;
- spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+ spin_unlock_irqrestore(&tq->tq_lock, flags);
kthread_stop(thread);
- spin_lock_irqsave(&tq->tq_lock, tq->tq_lock_flags);
+ spin_lock_irqsave_nested(&tq->tq_lock, flags,
+ tq->tq_lock_class);
}
while (!list_empty(&tq->tq_free_list)) {
@@ -1109,28 +1173,102 @@ taskq_destroy(taskq_t *tq)
ASSERT(list_empty(&tq->tq_prio_list));
ASSERT(list_empty(&tq->tq_delay_list));
- spin_unlock_irqrestore(&tq->tq_lock, tq->tq_lock_flags);
+ spin_unlock_irqrestore(&tq->tq_lock, flags);
strfree(tq->tq_name);
kmem_free(tq, sizeof (taskq_t));
}
EXPORT_SYMBOL(taskq_destroy);
+
+static unsigned int spl_taskq_kick = 0;
+
+/*
+ * 2.6.36 API Change
+ * module_param_cb is introduced to take kernel_param_ops and
+ * module_param_call is marked as obsolete. Also set and get operations
+ * were changed to take a 'const struct kernel_param *'.
+ */
+static int
+#ifdef module_param_cb
+param_set_taskq_kick(const char *val, const struct kernel_param *kp)
+#else
+param_set_taskq_kick(const char *val, struct kernel_param *kp)
+#endif
+{
+ int ret;
+ taskq_t *tq;
+ taskq_ent_t *t;
+ unsigned long flags;
+
+ ret = param_set_uint(val, kp);
+ if (ret < 0 || !spl_taskq_kick)
+ return (ret);
+ /* reset value */
+ spl_taskq_kick = 0;
+
+ down_read(&tq_list_sem);
+ list_for_each_entry(tq, &tq_list, tq_taskqs) {
+ spin_lock_irqsave_nested(&tq->tq_lock, flags,
+ tq->tq_lock_class);
+ /* Check if the first pending is older than 5 seconds */
+ t = taskq_next_ent(tq);
+ if (t && time_after(jiffies, t->tqent_birth + 5*HZ)) {
+ (void) taskq_thread_spawn(tq);
+ printk(KERN_INFO "spl: Kicked taskq %s/%d\n",
+ tq->tq_name, tq->tq_instance);
+ }
+ spin_unlock_irqrestore(&tq->tq_lock, flags);
+ }
+ up_read(&tq_list_sem);
+ return (ret);
+}
+
+#ifdef module_param_cb
+static const struct kernel_param_ops param_ops_taskq_kick = {
+ .set = param_set_taskq_kick,
+ .get = param_get_uint,
+};
+module_param_cb(spl_taskq_kick, ¶m_ops_taskq_kick, &spl_taskq_kick, 0644);
+#else
+module_param_call(spl_taskq_kick, param_set_taskq_kick, param_get_uint,
+ &spl_taskq_kick, 0644);
+#endif
+MODULE_PARM_DESC(spl_taskq_kick,
+ "Write nonzero to kick stuck taskqs to spawn more threads");
+
int
spl_taskq_init(void)
{
+ tsd_create(&taskq_tsd, NULL);
+
system_taskq = taskq_create("spl_system_taskq", MAX(boot_ncpus, 64),
maxclsyspri, boot_ncpus, INT_MAX, TASKQ_PREPOPULATE|TASKQ_DYNAMIC);
if (system_taskq == NULL)
return (1);
+ system_delay_taskq = taskq_create("spl_delay_taskq", MAX(boot_ncpus, 4),
+ maxclsyspri, boot_ncpus, INT_MAX, TASKQ_PREPOPULATE|TASKQ_DYNAMIC);
+ if (system_delay_taskq == NULL) {
+ taskq_destroy(system_taskq);
+ return (1);
+ }
+
dynamic_taskq = taskq_create("spl_dynamic_taskq", 1,
maxclsyspri, boot_ncpus, INT_MAX, TASKQ_PREPOPULATE);
if (dynamic_taskq == NULL) {
taskq_destroy(system_taskq);
+ taskq_destroy(system_delay_taskq);
return (1);
}
+ /*
+ * This is used to annotate tq_lock, so
+ * taskq_dispatch -> taskq_thread_spawn -> taskq_dispatch
+ * does not trigger a lockdep warning re: possible recursive locking
+ */
+ dynamic_taskq->tq_lock_class = TQ_LOCK_DYNAMIC;
+
return (0);
}
@@ -1140,6 +1278,11 @@ spl_taskq_fini(void)
taskq_destroy(dynamic_taskq);
dynamic_taskq = NULL;
+ taskq_destroy(system_delay_taskq);
+ system_delay_taskq = NULL;
+
taskq_destroy(system_taskq);
system_taskq = NULL;
+
+ tsd_destroy(&taskq_tsd);
}
diff --git a/spl/module/spl/spl-tsd.c b/spl/module/spl/spl-tsd.c
index 4d0800e5a11f..bf8235063be3 100644
--- a/spl/module/spl/spl-tsd.c
+++ b/spl/module/spl/spl-tsd.c
@@ -527,6 +527,33 @@ tsd_get(uint_t key)
}
EXPORT_SYMBOL(tsd_get);
+/*
+ * tsd_get_by_thread - get thread specific data for specified thread
+ * @key: lookup key
+ * @thread: thread to lookup
+ *
+ * Caller must prevent racing tsd_create() or tsd_destroy(). This
+ * implementation is designed to be fast and scalable, it does not
+ * lock the entire table only a single hash bin.
+ */
+void *
+tsd_get_by_thread(uint_t key, kthread_t *thread)
+{
+ tsd_hash_entry_t *entry;
+
+ ASSERT3P(tsd_hash_table, !=, NULL);
+
+ if ((key == 0) || (key > TSD_KEYS_MAX))
+ return (NULL);
+
+ entry = tsd_hash_search(tsd_hash_table, key, thread->pid);
+ if (entry == NULL)
+ return (NULL);
+
+ return (entry->he_value);
+}
+EXPORT_SYMBOL(tsd_get_by_thread);
+
/*
* tsd_create - create thread specific data key
* @keyp: lookup key address
diff --git a/spl/module/spl/spl-vmem.c b/spl/module/spl/spl-vmem.c
index e177988a7e2d..dd10607dcdfb 100644
--- a/spl/module/spl/spl-vmem.c
+++ b/spl/module/spl/spl-vmem.c
@@ -24,6 +24,7 @@
#include <sys/debug.h>
#include <sys/vmem.h>
+#include <sys/kmem_cache.h>
#include <linux/mm_compat.h>
#include <linux/module.h>
@@ -36,14 +37,39 @@ EXPORT_SYMBOL(zio_alloc_arena);
vmem_t *zio_arena = NULL;
EXPORT_SYMBOL(zio_arena);
+#define VMEM_FLOOR_SIZE (4 * 1024 * 1024) /* 4MB floor */
+
+/*
+ * Return approximate virtual memory usage based on these assumptions:
+ *
+ * 1) The major SPL consumer of virtual memory is the kmem cache.
+ * 2) Memory allocated with vmem_alloc() is short lived and can be ignored.
+ * 3) Allow a 4MB floor as a generous pad given normal consumption.
+ * 4) The spl_kmem_cache_sem only contends with cache create/destroy.
+ */
size_t
vmem_size(vmem_t *vmp, int typemask)
{
- ASSERT3P(vmp, ==, NULL);
- ASSERT3S(typemask & VMEM_ALLOC, ==, VMEM_ALLOC);
- ASSERT3S(typemask & VMEM_FREE, ==, VMEM_FREE);
+ spl_kmem_cache_t *skc;
+ size_t alloc = VMEM_FLOOR_SIZE;
+
+ if ((typemask & VMEM_ALLOC) && (typemask & VMEM_FREE))
+ return (VMALLOC_TOTAL);
+
+
+ down_read(&spl_kmem_cache_sem);
+ list_for_each_entry(skc, &spl_kmem_cache_list, skc_list) {
+ if (skc->skc_flags & KMC_VMEM)
+ alloc += skc->skc_slab_size * skc->skc_slab_total;
+ }
+ up_read(&spl_kmem_cache_sem);
- return (VMALLOC_TOTAL);
+ if (typemask & VMEM_ALLOC)
+ return (MIN(alloc, VMALLOC_TOTAL));
+ else if (typemask & VMEM_FREE)
+ return (MAX(VMALLOC_TOTAL - alloc, 0));
+ else
+ return (0);
}
EXPORT_SYMBOL(vmem_size);
diff --git a/spl/module/spl/spl-vnode.c b/spl/module/spl/spl-vnode.c
index e3188a1af8b0..ca19d7343ca0 100644
--- a/spl/module/spl/spl-vnode.c
+++ b/spl/module/spl/spl-vnode.c
@@ -63,9 +63,6 @@ vn_mode_to_vtype(mode_t mode)
if (S_ISSOCK(mode))
return VSOCK;
- if (S_ISCHR(mode))
- return VCHR;
-
return VNON;
} /* vn_mode_to_vtype() */
EXPORT_SYMBOL(vn_mode_to_vtype);
@@ -214,36 +211,22 @@ int
vn_rdwr(uio_rw_t uio, vnode_t *vp, void *addr, ssize_t len, offset_t off,
uio_seg_t seg, int ioflag, rlim64_t x2, void *x3, ssize_t *residp)
{
- loff_t offset;
- mm_segment_t saved_fs;
- struct file *fp;
+ struct file *fp = vp->v_file;
+ loff_t offset = off;
int rc;
ASSERT(uio == UIO_WRITE || uio == UIO_READ);
- ASSERT(vp);
- ASSERT(vp->v_file);
ASSERT(seg == UIO_SYSSPACE);
ASSERT((ioflag & ~FAPPEND) == 0);
- ASSERT(x2 == RLIM64_INFINITY);
-
- fp = vp->v_file;
- offset = off;
if (ioflag & FAPPEND)
offset = fp->f_pos;
- /* Writable user data segment must be briefly increased for this
- * process so we can use the user space read call paths to write
- * in to memory allocated by the kernel. */
- saved_fs = get_fs();
- set_fs(get_ds());
-
if (uio & UIO_WRITE)
- rc = vfs_write(fp, addr, len, &offset);
+ rc = spl_kernel_write(fp, addr, len, &offset);
else
- rc = vfs_read(fp, addr, len, &offset);
+ rc = spl_kernel_read(fp, addr, len, &offset);
- set_fs(saved_fs);
fp->f_pos = offset;
if (rc < 0)
@@ -677,6 +660,19 @@ vn_getf(int fd)
fp = file_find(fd, current);
if (fp) {
+ lfp = fget(fd);
+ fput(fp->f_file);
+ /*
+ * areleasef() can cause us to see a stale reference when
+ * userspace has reused a file descriptor before areleasef()
+ * has run. fput() the stale reference and replace it. We
+ * retain the original reference count such that the concurrent
+ * areleasef() will decrement its reference and terminate.
+ */
+ if (lfp != fp->f_file) {
+ fp->f_file = lfp;
+ fp->f_vnode->v_file = lfp;
+ }
atomic_inc(&fp->f_ref);
spin_unlock(&vn_file_lock);
return (fp);
diff --git a/spl/module/splat/splat-atomic.c b/spl/module/splat/splat-atomic.c
index b0a8c70615b7..b8759e01f29b 100644
--- a/spl/module/splat/splat-atomic.c
+++ b/spl/module/splat/splat-atomic.c
@@ -212,7 +212,7 @@ splat_atomic_init(void)
spin_lock_init(&sub->test_lock);
sub->desc.id = SPLAT_SUBSYSTEM_ATOMIC;
- SPLAT_TEST_INIT(sub, SPLAT_ATOMIC_TEST1_NAME, SPLAT_ATOMIC_TEST1_DESC,
+ splat_test_init(sub, SPLAT_ATOMIC_TEST1_NAME, SPLAT_ATOMIC_TEST1_DESC,
SPLAT_ATOMIC_TEST1_ID, splat_atomic_test1);
return sub;
@@ -222,7 +222,7 @@ void
splat_atomic_fini(splat_subsystem_t *sub)
{
ASSERT(sub);
- SPLAT_TEST_FINI(sub, SPLAT_ATOMIC_TEST1_ID);
+ splat_test_fini(sub, SPLAT_ATOMIC_TEST1_ID);
kfree(sub);
}
diff --git a/spl/module/splat/splat-condvar.c b/spl/module/splat/splat-condvar.c
index bdbaf79c8fd7..5a9b40f92dcb 100644
--- a/spl/module/splat/splat-condvar.c
+++ b/spl/module/splat/splat-condvar.c
@@ -478,15 +478,15 @@ splat_condvar_init(void)
spin_lock_init(&sub->test_lock);
sub->desc.id = SPLAT_SUBSYSTEM_CONDVAR;
- SPLAT_TEST_INIT(sub, SPLAT_CONDVAR_TEST1_NAME, SPLAT_CONDVAR_TEST1_DESC,
+ splat_test_init(sub, SPLAT_CONDVAR_TEST1_NAME, SPLAT_CONDVAR_TEST1_DESC,
SPLAT_CONDVAR_TEST1_ID, splat_condvar_test1);
- SPLAT_TEST_INIT(sub, SPLAT_CONDVAR_TEST2_NAME, SPLAT_CONDVAR_TEST2_DESC,
+ splat_test_init(sub, SPLAT_CONDVAR_TEST2_NAME, SPLAT_CONDVAR_TEST2_DESC,
SPLAT_CONDVAR_TEST2_ID, splat_condvar_test2);
- SPLAT_TEST_INIT(sub, SPLAT_CONDVAR_TEST3_NAME, SPLAT_CONDVAR_TEST3_DESC,
+ splat_test_init(sub, SPLAT_CONDVAR_TEST3_NAME, SPLAT_CONDVAR_TEST3_DESC,
SPLAT_CONDVAR_TEST3_ID, splat_condvar_test3);
- SPLAT_TEST_INIT(sub, SPLAT_CONDVAR_TEST4_NAME, SPLAT_CONDVAR_TEST4_DESC,
+ splat_test_init(sub, SPLAT_CONDVAR_TEST4_NAME, SPLAT_CONDVAR_TEST4_DESC,
SPLAT_CONDVAR_TEST4_ID, splat_condvar_test4);
- SPLAT_TEST_INIT(sub, SPLAT_CONDVAR_TEST5_NAME, SPLAT_CONDVAR_TEST5_DESC,
+ splat_test_init(sub, SPLAT_CONDVAR_TEST5_NAME, SPLAT_CONDVAR_TEST5_DESC,
SPLAT_CONDVAR_TEST5_ID, splat_condvar_test5);
return sub;
@@ -496,11 +496,11 @@ void
splat_condvar_fini(splat_subsystem_t *sub)
{
ASSERT(sub);
- SPLAT_TEST_FINI(sub, SPLAT_CONDVAR_TEST5_ID);
- SPLAT_TEST_FINI(sub, SPLAT_CONDVAR_TEST4_ID);
- SPLAT_TEST_FINI(sub, SPLAT_CONDVAR_TEST3_ID);
- SPLAT_TEST_FINI(sub, SPLAT_CONDVAR_TEST2_ID);
- SPLAT_TEST_FINI(sub, SPLAT_CONDVAR_TEST1_ID);
+ splat_test_fini(sub, SPLAT_CONDVAR_TEST5_ID);
+ splat_test_fini(sub, SPLAT_CONDVAR_TEST4_ID);
+ splat_test_fini(sub, SPLAT_CONDVAR_TEST3_ID);
+ splat_test_fini(sub, SPLAT_CONDVAR_TEST2_ID);
+ splat_test_fini(sub, SPLAT_CONDVAR_TEST1_ID);
kfree(sub);
}
diff --git a/spl/module/splat/splat-cred.c b/spl/module/splat/splat-cred.c
index f6b70ce34ebe..5b07a1fe7140 100644
--- a/spl/module/splat/splat-cred.c
+++ b/spl/module/splat/splat-cred.c
@@ -270,11 +270,11 @@ splat_cred_init(void)
spin_lock_init(&sub->test_lock);
sub->desc.id = SPLAT_SUBSYSTEM_CRED;
- SPLAT_TEST_INIT(sub, SPLAT_CRED_TEST1_NAME, SPLAT_CRED_TEST1_DESC,
+ splat_test_init(sub, SPLAT_CRED_TEST1_NAME, SPLAT_CRED_TEST1_DESC,
SPLAT_CRED_TEST1_ID, splat_cred_test1);
- SPLAT_TEST_INIT(sub, SPLAT_CRED_TEST2_NAME, SPLAT_CRED_TEST2_DESC,
+ splat_test_init(sub, SPLAT_CRED_TEST2_NAME, SPLAT_CRED_TEST2_DESC,
SPLAT_CRED_TEST2_ID, splat_cred_test2);
- SPLAT_TEST_INIT(sub, SPLAT_CRED_TEST3_NAME, SPLAT_CRED_TEST3_DESC,
+ splat_test_init(sub, SPLAT_CRED_TEST3_NAME, SPLAT_CRED_TEST3_DESC,
SPLAT_CRED_TEST3_ID, splat_cred_test3);
return sub;
@@ -285,9 +285,9 @@ splat_cred_fini(splat_subsystem_t *sub)
{
ASSERT(sub);
- SPLAT_TEST_FINI(sub, SPLAT_CRED_TEST3_ID);
- SPLAT_TEST_FINI(sub, SPLAT_CRED_TEST2_ID);
- SPLAT_TEST_FINI(sub, SPLAT_CRED_TEST1_ID);
+ splat_test_fini(sub, SPLAT_CRED_TEST3_ID);
+ splat_test_fini(sub, SPLAT_CRED_TEST2_ID);
+ splat_test_fini(sub, SPLAT_CRED_TEST1_ID);
kfree(sub);
} /* splat_cred_fini() */
diff --git a/spl/module/splat/splat-ctl.c b/spl/module/splat/splat-ctl.c
index 4d4148d427f2..8452f1363904 100644
--- a/spl/module/splat/splat-ctl.c
+++ b/spl/module/splat/splat-ctl.c
@@ -33,10 +33,10 @@
* the kmem interfaces have been implemented correctly. When the splat
* module is loaded splat_*_init() will be called for each subsystems
* tests. It is the responsibility of splat_*_init() to register all
- * the tests for this subsystem using the SPLAT_TEST_INIT() macro.
+ * the tests for this subsystem using the splat_test_init().
* Similarly splat_*_fini() is called when the splat module is removed
- * and is responsible for unregistering its tests via the SPLAT_TEST_FINI
- * macro. Once a test is registered it can then be run with an ioctl()
+ * and is responsible for unregistering its tests via the splat_test_fini.
+ * Once a test is registered it can then be run with an ioctl()
* call which specifies the subsystem and test to be run. The provided
* splat command line tool can be used to display all available
* subsystems and tests. It can also be used to run the full suite
@@ -599,6 +599,88 @@ static struct miscdevice splat_misc = {
.fops = &splat_fops,
};
+static void splat_subsystem_init(const char *name,
+ splat_subsystem_t *(*init)(void))
+{
+ splat_subsystem_t *sub;
+ sub = init();
+ if (sub == NULL) {
+ printk(KERN_ERR "splat: Error initializing: %s\n", name);
+ return;
+ }
+ spin_lock(&splat_module_lock);
+ list_add_tail(&sub->subsystem_list, &splat_module_list);
+ spin_unlock(&splat_module_lock);
+}
+
+static void splat_subsystem_fini(const char *name,
+ int (*id_func)(void), void (*fini)(splat_subsystem_t *))
+{
+ splat_subsystem_t *sub, *tmp;
+ int id, flag = 0;
+
+ id = id_func();
+ spin_lock(&splat_module_lock);
+ list_for_each_entry_safe(sub, tmp, &splat_module_list, subsystem_list) {
+ if (sub->desc.id == id) {
+ list_del_init(&sub->subsystem_list);
+ flag = 1;
+ break;
+ }
+ }
+ spin_unlock(&splat_module_lock);
+ if (flag == 0)
+ printk(KERN_ERR "splat: Error finalizing: %s\n", name);
+ else
+ fini(sub);
+}
+
+#define SPLAT_SUBSYSTEM_INIT(type) \
+ splat_subsystem_init(#type, splat_##type##_init)
+#define SPLAT_SUBSYSTEM_FINI(type) \
+ splat_subsystem_fini(#type, splat_##type##_id, splat_##type##_fini)
+
+void splat_test_init(splat_subsystem_t *sub, const char *name,
+ const char *desc, unsigned int tid, splat_test_func_t func)
+{
+ splat_test_t *test;
+ test = kmalloc(sizeof (splat_test_t), GFP_KERNEL);
+ if (test == NULL) {
+ printk(KERN_ERR "splat: Error initializing: %s/%u\n",
+ name, tid);
+ return;
+ }
+ memset(test, 0, sizeof (splat_test_t));
+ strncpy(test->desc.name, name, SPLAT_NAME_SIZE-1);
+ strncpy(test->desc.desc, desc, SPLAT_DESC_SIZE-1);
+ test->desc.id = tid;
+ test->test = func;
+ INIT_LIST_HEAD(&test->test_list);
+ spin_lock(&sub->test_lock);
+ list_add_tail(&test->test_list, &sub->test_list);
+ spin_unlock(&sub->test_lock);
+}
+
+void splat_test_fini(splat_subsystem_t *sub, unsigned int tid)
+{
+ splat_test_t *test, *tmp;
+ int flag = 0;
+
+ spin_lock(&sub->test_lock);
+ list_for_each_entry_safe(test, tmp, &sub->test_list, test_list) {
+ if (test->desc.id == tid) {
+ list_del_init(&test->test_list);
+ kfree(test);
+ flag = 1;
+ break;
+ }
+ }
+ spin_unlock(&sub->test_lock);
+
+ if (flag == 0)
+ printk(KERN_ERR "splat: Error finalizing: %u\n", tid);
+}
+
static int __init
splat_init(void)
{
diff --git a/spl/module/splat/splat-generic.c b/spl/module/splat/splat-generic.c
index 3f8119b1ee04..b8f6edc65f07 100644
--- a/spl/module/splat/splat-generic.c
+++ b/spl/module/splat/splat-generic.c
@@ -329,17 +329,17 @@ splat_generic_init(void)
spin_lock_init(&sub->test_lock);
sub->desc.id = SPLAT_SUBSYSTEM_GENERIC;
- SPLAT_TEST_INIT(sub, SPLAT_GENERIC_TEST1_NAME, SPLAT_GENERIC_TEST1_DESC,
+ splat_test_init(sub, SPLAT_GENERIC_TEST1_NAME, SPLAT_GENERIC_TEST1_DESC,
SPLAT_GENERIC_TEST1_ID, splat_generic_test_strtoul);
- SPLAT_TEST_INIT(sub, SPLAT_GENERIC_TEST2_NAME, SPLAT_GENERIC_TEST2_DESC,
+ splat_test_init(sub, SPLAT_GENERIC_TEST2_NAME, SPLAT_GENERIC_TEST2_DESC,
SPLAT_GENERIC_TEST2_ID, splat_generic_test_strtol);
- SPLAT_TEST_INIT(sub, SPLAT_GENERIC_TEST3_NAME, SPLAT_GENERIC_TEST3_DESC,
+ splat_test_init(sub, SPLAT_GENERIC_TEST3_NAME, SPLAT_GENERIC_TEST3_DESC,
SPLAT_GENERIC_TEST3_ID, splat_generic_test_strtoull);
- SPLAT_TEST_INIT(sub, SPLAT_GENERIC_TEST4_NAME, SPLAT_GENERIC_TEST4_DESC,
+ splat_test_init(sub, SPLAT_GENERIC_TEST4_NAME, SPLAT_GENERIC_TEST4_DESC,
SPLAT_GENERIC_TEST4_ID, splat_generic_test_strtoll);
- SPLAT_TEST_INIT(sub, SPLAT_GENERIC_TEST5_NAME, SPLAT_GENERIC_TEST5_DESC,
+ splat_test_init(sub, SPLAT_GENERIC_TEST5_NAME, SPLAT_GENERIC_TEST5_DESC,
SPLAT_GENERIC_TEST5_ID, splat_generic_test_udivdi3);
- SPLAT_TEST_INIT(sub, SPLAT_GENERIC_TEST6_NAME, SPLAT_GENERIC_TEST6_DESC,
+ splat_test_init(sub, SPLAT_GENERIC_TEST6_NAME, SPLAT_GENERIC_TEST6_DESC,
SPLAT_GENERIC_TEST6_ID, splat_generic_test_divdi3);
return sub;
@@ -350,12 +350,12 @@ splat_generic_fini(splat_subsystem_t *sub)
{
ASSERT(sub);
- SPLAT_TEST_FINI(sub, SPLAT_GENERIC_TEST6_ID);
- SPLAT_TEST_FINI(sub, SPLAT_GENERIC_TEST5_ID);
- SPLAT_TEST_FINI(sub, SPLAT_GENERIC_TEST4_ID);
- SPLAT_TEST_FINI(sub, SPLAT_GENERIC_TEST3_ID);
- SPLAT_TEST_FINI(sub, SPLAT_GENERIC_TEST2_ID);
- SPLAT_TEST_FINI(sub, SPLAT_GENERIC_TEST1_ID);
+ splat_test_fini(sub, SPLAT_GENERIC_TEST6_ID);
+ splat_test_fini(sub, SPLAT_GENERIC_TEST5_ID);
+ splat_test_fini(sub, SPLAT_GENERIC_TEST4_ID);
+ splat_test_fini(sub, SPLAT_GENERIC_TEST3_ID);
+ splat_test_fini(sub, SPLAT_GENERIC_TEST2_ID);
+ splat_test_fini(sub, SPLAT_GENERIC_TEST1_ID);
kfree(sub);
}
diff --git a/spl/module/splat/splat-internal.h b/spl/module/splat/splat-internal.h
index 36cf04da1ec0..9ae6c1d0c25c 100644
--- a/spl/module/splat/splat-internal.h
+++ b/spl/module/splat/splat-internal.h
@@ -30,80 +30,6 @@
#include <linux/file_compat.h>
#include <linux/version.h>
-#define SPLAT_SUBSYSTEM_INIT(type) \
-({ splat_subsystem_t *_sub_; \
- \
- _sub_ = (splat_subsystem_t *)splat_##type##_init(); \
- if (_sub_ == NULL) { \
- printk(KERN_ERR "splat: Error initializing: " #type "\n"); \
- } else { \
- spin_lock(&splat_module_lock); \
- list_add_tail(&(_sub_->subsystem_list), \
- &splat_module_list); \
- spin_unlock(&splat_module_lock); \
- } \
-})
-
-#define SPLAT_SUBSYSTEM_FINI(type) \
-({ splat_subsystem_t *_sub_, *_tmp_; \
- int _id_, _flag_ = 0; \
- \
- _id_ = splat_##type##_id(); \
- spin_lock(&splat_module_lock); \
- list_for_each_entry_safe(_sub_, _tmp_, &splat_module_list, \
- subsystem_list) { \
- if (_sub_->desc.id == _id_) { \
- list_del_init(&(_sub_->subsystem_list)); \
- spin_unlock(&splat_module_lock); \
- splat_##type##_fini(_sub_); \
- spin_lock(&splat_module_lock); \
- _flag_ = 1; \
- } \
- } \
- spin_unlock(&splat_module_lock); \
- \
- if (!_flag_) \
- printk(KERN_ERR "splat: Error finalizing: " #type "\n"); \
-})
-
-#define SPLAT_TEST_INIT(sub, n, d, tid, func) \
-({ splat_test_t *_test_; \
- \
- _test_ = (splat_test_t *)kmalloc(sizeof(*_test_), GFP_KERNEL); \
- if (_test_ == NULL) { \
- printk(KERN_ERR "splat: Error initializing: " n "/" #tid" \n");\
- } else { \
- memset(_test_, 0, sizeof(*_test_)); \
- strncpy(_test_->desc.name, n, SPLAT_NAME_SIZE-1); \
- strncpy(_test_->desc.desc, d, SPLAT_DESC_SIZE-1); \
- _test_->desc.id = tid; \
- _test_->test = func; \
- INIT_LIST_HEAD(&(_test_->test_list)); \
- spin_lock(&((sub)->test_lock)); \
- list_add_tail(&(_test_->test_list),&((sub)->test_list));\
- spin_unlock(&((sub)->test_lock)); \
- } \
-})
-
-#define SPLAT_TEST_FINI(sub, tid) \
-({ splat_test_t *_test_, *_tmp_; \
- int _flag_ = 0; \
- \
- spin_lock(&((sub)->test_lock)); \
- list_for_each_entry_safe(_test_, _tmp_, \
- &((sub)->test_list), test_list) { \
- if (_test_->desc.id == tid) { \
- list_del_init(&(_test_->test_list)); \
- kfree(_test_); \
- _flag_ = 1; \
- } \
- } \
- spin_unlock(&((sub)->test_lock)); \
- \
- if (!_flag_) \
- printk(KERN_ERR "splat: Error finalizing: " #tid "\n"); \
-})
-
typedef int (*splat_test_func_t)(struct file *, void *);
typedef struct splat_test {
@@ -119,6 +45,10 @@ typedef struct splat_subsystem {
struct list_head test_list;
} splat_subsystem_t;
+void splat_test_init(splat_subsystem_t *sub, const char *name,
+ const char *desc, unsigned int tid, splat_test_func_t func);
+void splat_test_fini(splat_subsystem_t *sub, unsigned int tid);
+
#define SPLAT_INFO_BUFFER_SIZE 65536
#define SPLAT_INFO_BUFFER_REDZONE 256
diff --git a/spl/module/splat/splat-kmem.c b/spl/module/splat/splat-kmem.c
index b9b566f3e3da..d0649ad9a058 100644
--- a/spl/module/splat/splat-kmem.c
+++ b/spl/module/splat/splat-kmem.c
@@ -590,6 +590,9 @@ splat_kmem_cache_test(struct file *file, void *arg, char *name,
kmem_cache_data_t **kcd = NULL;
int i, rc = 0, objs = 0;
+ /* Limit size for low memory machines (1/128 of memory) */
+ size = MIN(size, (physmem * PAGE_SIZE) >> 7);
+
splat_vprint(file, name,
"Testing size=%d, align=%d, flags=0x%04x\n",
size, align, flags);
@@ -619,7 +622,7 @@ splat_kmem_cache_test(struct file *file, void *arg, char *name,
* it to a single slab for the purposes of this test.
*/
#ifdef _LP64
- objs = SPL_KMEM_CACHE_OBJ_PER_SLAB * 4;
+ objs = kcp->kcp_cache->skc_slab_objs * 4;
#else
objs = 1;
#endif
@@ -1128,9 +1131,15 @@ splat_kmem_test9(struct file *file, void *arg)
static int
splat_kmem_test10(struct file *file, void *arg)
{
- uint64_t size, alloc, rc = 0;
+ uint64_t size, alloc, maxsize, limit, rc = 0;
+
+#if defined(CONFIG_64BIT)
+ maxsize = (1024 * 1024);
+#else
+ maxsize = (128 * 1024);
+#endif
- for (size = 32; size <= 1024*1024; size *= 2) {
+ for (size = 32; size <= maxsize; size *= 2) {
splat_vprint(file, SPLAT_KMEM_TEST10_NAME, "%-22s %s", "name",
"time (sec)\tslabs \tobjs \thash\n");
@@ -1139,8 +1148,10 @@ splat_kmem_test10(struct file *file, void *arg)
for (alloc = 1; alloc <= 1024; alloc *= 2) {
- /* Skip tests which exceed 1/2 of physical memory. */
- if (size * alloc * SPLAT_KMEM_THREADS > physmem / 2)
+ /* Skip tests which exceed 1/2 of memory. */
+ limit = MIN(physmem * PAGE_SIZE,
+ vmem_size(NULL, VMEM_ALLOC | VMEM_FREE)) / 2;
+ if (size * alloc * SPLAT_KMEM_THREADS > limit)
continue;
rc = splat_kmem_cache_thread_test(file, arg,
@@ -1220,7 +1231,8 @@ splat_kmem_test13(struct file *file, void *arg)
int i, rc = 0, max_time = 10;
size = 128 * 1024;
- count = ((physmem * PAGE_SIZE) / 4 / size);
+ count = MIN(physmem * PAGE_SIZE, vmem_size(NULL,
+ VMEM_ALLOC | VMEM_FREE)) / 4 / size;
kcp = splat_kmem_cache_test_kcp_alloc(file, SPLAT_KMEM_TEST13_NAME,
size, 0, 0);
@@ -1340,31 +1352,31 @@ splat_kmem_init(void)
spin_lock_init(&sub->test_lock);
sub->desc.id = SPLAT_SUBSYSTEM_KMEM;
- SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST1_NAME, SPLAT_KMEM_TEST1_DESC,
+ splat_test_init(sub, SPLAT_KMEM_TEST1_NAME, SPLAT_KMEM_TEST1_DESC,
SPLAT_KMEM_TEST1_ID, splat_kmem_test1);
- SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST2_NAME, SPLAT_KMEM_TEST2_DESC,
+ splat_test_init(sub, SPLAT_KMEM_TEST2_NAME, SPLAT_KMEM_TEST2_DESC,
SPLAT_KMEM_TEST2_ID, splat_kmem_test2);
- SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST3_NAME, SPLAT_KMEM_TEST3_DESC,
+ splat_test_init(sub, SPLAT_KMEM_TEST3_NAME, SPLAT_KMEM_TEST3_DESC,
SPLAT_KMEM_TEST3_ID, splat_kmem_test3);
- SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST4_NAME, SPLAT_KMEM_TEST4_DESC,
+ splat_test_init(sub, SPLAT_KMEM_TEST4_NAME, SPLAT_KMEM_TEST4_DESC,
SPLAT_KMEM_TEST4_ID, splat_kmem_test4);
- SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST5_NAME, SPLAT_KMEM_TEST5_DESC,
+ splat_test_init(sub, SPLAT_KMEM_TEST5_NAME, SPLAT_KMEM_TEST5_DESC,
SPLAT_KMEM_TEST5_ID, splat_kmem_test5);
- SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST6_NAME, SPLAT_KMEM_TEST6_DESC,
+ splat_test_init(sub, SPLAT_KMEM_TEST6_NAME, SPLAT_KMEM_TEST6_DESC,
SPLAT_KMEM_TEST6_ID, splat_kmem_test6);
- SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST7_NAME, SPLAT_KMEM_TEST7_DESC,
+ splat_test_init(sub, SPLAT_KMEM_TEST7_NAME, SPLAT_KMEM_TEST7_DESC,
SPLAT_KMEM_TEST7_ID, splat_kmem_test7);
- SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST8_NAME, SPLAT_KMEM_TEST8_DESC,
+ splat_test_init(sub, SPLAT_KMEM_TEST8_NAME, SPLAT_KMEM_TEST8_DESC,
SPLAT_KMEM_TEST8_ID, splat_kmem_test8);
- SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST9_NAME, SPLAT_KMEM_TEST9_DESC,
+ splat_test_init(sub, SPLAT_KMEM_TEST9_NAME, SPLAT_KMEM_TEST9_DESC,
SPLAT_KMEM_TEST9_ID, splat_kmem_test9);
- SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST10_NAME, SPLAT_KMEM_TEST10_DESC,
+ splat_test_init(sub, SPLAT_KMEM_TEST10_NAME, SPLAT_KMEM_TEST10_DESC,
SPLAT_KMEM_TEST10_ID, splat_kmem_test10);
#if 0
- SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST11_NAME, SPLAT_KMEM_TEST11_DESC,
+ splat_test_init(sub, SPLAT_KMEM_TEST11_NAME, SPLAT_KMEM_TEST11_DESC,
SPLAT_KMEM_TEST11_ID, splat_kmem_test11);
#endif
- SPLAT_TEST_INIT(sub, SPLAT_KMEM_TEST13_NAME, SPLAT_KMEM_TEST13_DESC,
+ splat_test_init(sub, SPLAT_KMEM_TEST13_NAME, SPLAT_KMEM_TEST13_DESC,
SPLAT_KMEM_TEST13_ID, splat_kmem_test13);
return sub;
@@ -1374,20 +1386,20 @@ void
splat_kmem_fini(splat_subsystem_t *sub)
{
ASSERT(sub);
- SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST13_ID);
+ splat_test_fini(sub, SPLAT_KMEM_TEST13_ID);
#if 0
- SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST11_ID);
+ splat_test_fini(sub, SPLAT_KMEM_TEST11_ID);
#endif
- SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST10_ID);
- SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST9_ID);
- SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST8_ID);
- SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST7_ID);
- SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST6_ID);
- SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST5_ID);
- SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST4_ID);
- SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST3_ID);
- SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST2_ID);
- SPLAT_TEST_FINI(sub, SPLAT_KMEM_TEST1_ID);
+ splat_test_fini(sub, SPLAT_KMEM_TEST10_ID);
+ splat_test_fini(sub, SPLAT_KMEM_TEST9_ID);
+ splat_test_fini(sub, SPLAT_KMEM_TEST8_ID);
+ splat_test_fini(sub, SPLAT_KMEM_TEST7_ID);
+ splat_test_fini(sub, SPLAT_KMEM_TEST6_ID);
+ splat_test_fini(sub, SPLAT_KMEM_TEST5_ID);
+ splat_test_fini(sub, SPLAT_KMEM_TEST4_ID);
+ splat_test_fini(sub, SPLAT_KMEM_TEST3_ID);
+ splat_test_fini(sub, SPLAT_KMEM_TEST2_ID);
+ splat_test_fini(sub, SPLAT_KMEM_TEST1_ID);
kfree(sub);
}
diff --git a/spl/module/splat/splat-kobj.c b/spl/module/splat/splat-kobj.c
index a0d4097d5943..6d78cb3e0b36 100644
--- a/spl/module/splat/splat-kobj.c
+++ b/spl/module/splat/splat-kobj.c
@@ -140,9 +140,9 @@ splat_kobj_init(void)
spin_lock_init(&sub->test_lock);
sub->desc.id = SPLAT_SUBSYSTEM_KOBJ;
- SPLAT_TEST_INIT(sub, SPLAT_KOBJ_TEST1_NAME, SPLAT_KOBJ_TEST1_DESC,
+ splat_test_init(sub, SPLAT_KOBJ_TEST1_NAME, SPLAT_KOBJ_TEST1_DESC,
SPLAT_KOBJ_TEST1_ID, splat_kobj_test1);
- SPLAT_TEST_INIT(sub, SPLAT_KOBJ_TEST2_NAME, SPLAT_KOBJ_TEST2_DESC,
+ splat_test_init(sub, SPLAT_KOBJ_TEST2_NAME, SPLAT_KOBJ_TEST2_DESC,
SPLAT_KOBJ_TEST2_ID, splat_kobj_test2);
return sub;
@@ -153,8 +153,8 @@ splat_kobj_fini(splat_subsystem_t *sub)
{
ASSERT(sub);
- SPLAT_TEST_FINI(sub, SPLAT_KOBJ_TEST2_ID);
- SPLAT_TEST_FINI(sub, SPLAT_KOBJ_TEST1_ID);
+ splat_test_fini(sub, SPLAT_KOBJ_TEST2_ID);
+ splat_test_fini(sub, SPLAT_KOBJ_TEST1_ID);
kfree(sub);
} /* splat_kobj_fini() */
diff --git a/spl/module/splat/splat-linux.c b/spl/module/splat/splat-linux.c
index 3652267f940c..b5a1f142e548 100644
--- a/spl/module/splat/splat-linux.c
+++ b/spl/module/splat/splat-linux.c
@@ -216,7 +216,7 @@ splat_linux_init(void)
spin_lock_init(&sub->test_lock);
sub->desc.id = SPLAT_SUBSYSTEM_LINUX;
- SPLAT_TEST_INIT(sub, SPLAT_LINUX_TEST1_NAME, SPLAT_LINUX_TEST1_DESC,
+ splat_test_init(sub, SPLAT_LINUX_TEST1_NAME, SPLAT_LINUX_TEST1_DESC,
SPLAT_LINUX_TEST1_ID, splat_linux_test1);
return sub;
@@ -226,7 +226,7 @@ void
splat_linux_fini(splat_subsystem_t *sub)
{
ASSERT(sub);
- SPLAT_TEST_FINI(sub, SPLAT_LINUX_TEST1_ID);
+ splat_test_fini(sub, SPLAT_LINUX_TEST1_ID);
kfree(sub);
}
diff --git a/spl/module/splat/splat-list.c b/spl/module/splat/splat-list.c
index f59394c14253..bfbaf23c75c5 100644
--- a/spl/module/splat/splat-list.c
+++ b/spl/module/splat/splat-list.c
@@ -434,19 +434,19 @@ splat_list_init(void)
spin_lock_init(&sub->test_lock);
sub->desc.id = SPLAT_SUBSYSTEM_LIST;
- SPLAT_TEST_INIT(sub, SPLAT_LIST_TEST1_NAME, SPLAT_LIST_TEST1_DESC,
+ splat_test_init(sub, SPLAT_LIST_TEST1_NAME, SPLAT_LIST_TEST1_DESC,
SPLAT_LIST_TEST1_ID, splat_list_test1);
- SPLAT_TEST_INIT(sub, SPLAT_LIST_TEST2_NAME, SPLAT_LIST_TEST2_DESC,
+ splat_test_init(sub, SPLAT_LIST_TEST2_NAME, SPLAT_LIST_TEST2_DESC,
SPLAT_LIST_TEST2_ID, splat_list_test2);
- SPLAT_TEST_INIT(sub, SPLAT_LIST_TEST3_NAME, SPLAT_LIST_TEST3_DESC,
+ splat_test_init(sub, SPLAT_LIST_TEST3_NAME, SPLAT_LIST_TEST3_DESC,
SPLAT_LIST_TEST3_ID, splat_list_test3);
- SPLAT_TEST_INIT(sub, SPLAT_LIST_TEST4_NAME, SPLAT_LIST_TEST4_DESC,
+ splat_test_init(sub, SPLAT_LIST_TEST4_NAME, SPLAT_LIST_TEST4_DESC,
SPLAT_LIST_TEST4_ID, splat_list_test4);
- SPLAT_TEST_INIT(sub, SPLAT_LIST_TEST5_NAME, SPLAT_LIST_TEST5_DESC,
+ splat_test_init(sub, SPLAT_LIST_TEST5_NAME, SPLAT_LIST_TEST5_DESC,
SPLAT_LIST_TEST5_ID, splat_list_test5);
- SPLAT_TEST_INIT(sub, SPLAT_LIST_TEST6_NAME, SPLAT_LIST_TEST6_DESC,
+ splat_test_init(sub, SPLAT_LIST_TEST6_NAME, SPLAT_LIST_TEST6_DESC,
SPLAT_LIST_TEST6_ID, splat_list_test6);
- SPLAT_TEST_INIT(sub, SPLAT_LIST_TEST7_NAME, SPLAT_LIST_TEST7_DESC,
+ splat_test_init(sub, SPLAT_LIST_TEST7_NAME, SPLAT_LIST_TEST7_DESC,
SPLAT_LIST_TEST7_ID, splat_list_test7);
return sub;
@@ -457,13 +457,13 @@ splat_list_fini(splat_subsystem_t *sub)
{
ASSERT(sub);
- SPLAT_TEST_FINI(sub, SPLAT_LIST_TEST7_ID);
- SPLAT_TEST_FINI(sub, SPLAT_LIST_TEST6_ID);
- SPLAT_TEST_FINI(sub, SPLAT_LIST_TEST5_ID);
- SPLAT_TEST_FINI(sub, SPLAT_LIST_TEST4_ID);
- SPLAT_TEST_FINI(sub, SPLAT_LIST_TEST3_ID);
- SPLAT_TEST_FINI(sub, SPLAT_LIST_TEST2_ID);
- SPLAT_TEST_FINI(sub, SPLAT_LIST_TEST1_ID);
+ splat_test_fini(sub, SPLAT_LIST_TEST7_ID);
+ splat_test_fini(sub, SPLAT_LIST_TEST6_ID);
+ splat_test_fini(sub, SPLAT_LIST_TEST5_ID);
+ splat_test_fini(sub, SPLAT_LIST_TEST4_ID);
+ splat_test_fini(sub, SPLAT_LIST_TEST3_ID);
+ splat_test_fini(sub, SPLAT_LIST_TEST2_ID);
+ splat_test_fini(sub, SPLAT_LIST_TEST1_ID);
kfree(sub);
}
diff --git a/spl/module/splat/splat-mutex.c b/spl/module/splat/splat-mutex.c
index 86bef8ee31be..71291bbd4d49 100644
--- a/spl/module/splat/splat-mutex.c
+++ b/spl/module/splat/splat-mutex.c
@@ -81,7 +81,8 @@ splat_mutex_test1(struct file *file, void *arg)
{
mutex_priv_t *mp;
taskq_t *tq;
- int id, rc = 0;
+ taskqid_t id;
+ int rc = 0;
mp = (mutex_priv_t *)kmalloc(sizeof(*mp), GFP_KERNEL);
if (mp == NULL)
@@ -105,8 +106,8 @@ splat_mutex_test1(struct file *file, void *arg)
* function will indicate this status in the passed private data.
*/
mp->mp_rc = -EINVAL;
- id = taskq_dispatch(tq, splat_mutex_test1_func, mp, TQ_SLEEP);
- if (id == 0) {
+ id = taskq_dispatch(tq, splat_mutex_test1_func, mp, TQ_SLEEP);
+ if (id == TASKQID_INVALID) {
mutex_exit(&mp->mp_mtx);
splat_vprint(file, SPLAT_MUTEX_TEST1_NAME, "%s",
"taskq_dispatch() failed\n");
@@ -120,8 +121,8 @@ splat_mutex_test1(struct file *file, void *arg)
/* Task function successfully acquired mutex, very bad! */
if (mp->mp_rc != -EBUSY) {
splat_vprint(file, SPLAT_MUTEX_TEST1_NAME,
- "mutex_trylock() incorrectly succeeded when "
- "the mutex was held, %d/%d\n", id, mp->mp_rc);
+ "mutex_trylock() incorrectly succeeded when "
+ "the mutex was held, %d/%d\n", (int)id, mp->mp_rc);
rc = -EINVAL;
goto out;
} else {
@@ -136,8 +137,8 @@ splat_mutex_test1(struct file *file, void *arg)
* can be verified by checking the private data.
*/
mp->mp_rc = -EINVAL;
- id = taskq_dispatch(tq, splat_mutex_test1_func, mp, TQ_SLEEP);
- if (id == 0) {
+ id = taskq_dispatch(tq, splat_mutex_test1_func, mp, TQ_SLEEP);
+ if (id == TASKQID_INVALID) {
splat_vprint(file, SPLAT_MUTEX_TEST1_NAME, "%s",
"taskq_dispatch() failed\n");
rc = -EINVAL;
@@ -149,8 +150,8 @@ splat_mutex_test1(struct file *file, void *arg)
/* Task function failed to acquire mutex, very bad! */
if (mp->mp_rc != 0) {
splat_vprint(file, SPLAT_MUTEX_TEST1_NAME,
- "mutex_trylock() incorrectly failed when "
- "the mutex was not held, %d/%d\n", id, mp->mp_rc);
+ "mutex_trylock() incorrectly failed when the mutex "
+ "was not held, %d/%d\n", (int)id, mp->mp_rc);
rc = -EINVAL;
} else {
splat_vprint(file, SPLAT_MUTEX_TEST1_NAME, "%s",
@@ -188,6 +189,7 @@ splat_mutex_test2(struct file *file, void *arg)
{
mutex_priv_t *mp;
taskq_t *tq;
+ taskqid_t id;
int i, rc = 0;
mp = (mutex_priv_t *)kmalloc(sizeof(*mp), GFP_KERNEL);
@@ -218,7 +220,8 @@ splat_mutex_test2(struct file *file, void *arg)
* mutex is implemented right this will never happy, that's a pass.
*/
for (i = 0; i < SPLAT_MUTEX_TEST_COUNT; i++) {
- if (!taskq_dispatch(tq, splat_mutex_test2_func, mp, TQ_SLEEP)) {
+ id = taskq_dispatch(tq, splat_mutex_test2_func, mp, TQ_SLEEP);
+ if (id == TASKQID_INVALID) {
splat_vprint(file, SPLAT_MUTEX_TEST2_NAME,
"Failed to queue task %d\n", i);
rc = -EINVAL;
@@ -260,6 +263,7 @@ splat_mutex_test3(struct file *file, void *arg)
{
mutex_priv_t mp;
taskq_t *tq;
+ taskqid_t id;
int rc = 0;
mp.mp_magic = SPLAT_MUTEX_TEST_MAGIC;
@@ -283,7 +287,8 @@ splat_mutex_test3(struct file *file, void *arg)
goto out_exit;
}
- if (taskq_dispatch(tq, splat_mutex_owned, &mp, TQ_SLEEP) == 0) {
+ id = taskq_dispatch(tq, splat_mutex_owned, &mp, TQ_SLEEP);
+ if (id == TASKQID_INVALID) {
splat_vprint(file, SPLAT_MUTEX_TEST3_NAME, "Failed to "
"dispatch function '%s' to taskq\n",
sym2str(splat_mutex_owned));
@@ -310,7 +315,8 @@ splat_mutex_test3(struct file *file, void *arg)
goto out;
}
- if (taskq_dispatch(tq, splat_mutex_owned, &mp, TQ_SLEEP) == 0) {
+ id = taskq_dispatch(tq, splat_mutex_owned, &mp, TQ_SLEEP);
+ if (id == TASKQID_INVALID) {
splat_vprint(file, SPLAT_MUTEX_TEST3_NAME, "Failed to "
"dispatch function '%s' to taskq\n",
sym2str(splat_mutex_owned));
@@ -411,13 +417,13 @@ splat_mutex_init(void)
spin_lock_init(&sub->test_lock);
sub->desc.id = SPLAT_SUBSYSTEM_MUTEX;
- SPLAT_TEST_INIT(sub, SPLAT_MUTEX_TEST1_NAME, SPLAT_MUTEX_TEST1_DESC,
+ splat_test_init(sub, SPLAT_MUTEX_TEST1_NAME, SPLAT_MUTEX_TEST1_DESC,
SPLAT_MUTEX_TEST1_ID, splat_mutex_test1);
- SPLAT_TEST_INIT(sub, SPLAT_MUTEX_TEST2_NAME, SPLAT_MUTEX_TEST2_DESC,
+ splat_test_init(sub, SPLAT_MUTEX_TEST2_NAME, SPLAT_MUTEX_TEST2_DESC,
SPLAT_MUTEX_TEST2_ID, splat_mutex_test2);
- SPLAT_TEST_INIT(sub, SPLAT_MUTEX_TEST3_NAME, SPLAT_MUTEX_TEST3_DESC,
+ splat_test_init(sub, SPLAT_MUTEX_TEST3_NAME, SPLAT_MUTEX_TEST3_DESC,
SPLAT_MUTEX_TEST3_ID, splat_mutex_test3);
- SPLAT_TEST_INIT(sub, SPLAT_MUTEX_TEST4_NAME, SPLAT_MUTEX_TEST4_DESC,
+ splat_test_init(sub, SPLAT_MUTEX_TEST4_NAME, SPLAT_MUTEX_TEST4_DESC,
SPLAT_MUTEX_TEST4_ID, splat_mutex_test4);
return sub;
@@ -427,10 +433,10 @@ void
splat_mutex_fini(splat_subsystem_t *sub)
{
ASSERT(sub);
- SPLAT_TEST_FINI(sub, SPLAT_MUTEX_TEST4_ID);
- SPLAT_TEST_FINI(sub, SPLAT_MUTEX_TEST3_ID);
- SPLAT_TEST_FINI(sub, SPLAT_MUTEX_TEST2_ID);
- SPLAT_TEST_FINI(sub, SPLAT_MUTEX_TEST1_ID);
+ splat_test_fini(sub, SPLAT_MUTEX_TEST4_ID);
+ splat_test_fini(sub, SPLAT_MUTEX_TEST3_ID);
+ splat_test_fini(sub, SPLAT_MUTEX_TEST2_ID);
+ splat_test_fini(sub, SPLAT_MUTEX_TEST1_ID);
kfree(sub);
}
diff --git a/spl/module/splat/splat-random.c b/spl/module/splat/splat-random.c
index 33b799bad2e2..670931a8cf04 100644
--- a/spl/module/splat/splat-random.c
+++ b/spl/module/splat/splat-random.c
@@ -108,7 +108,7 @@ splat_krng_init(void)
spin_lock_init(&sub->test_lock);
sub->desc.id = SPLAT_SUBSYSTEM_KRNG;
- SPLAT_TEST_INIT(sub, SPLAT_KRNG_TEST1_NAME, SPLAT_KRNG_TEST1_DESC,
+ splat_test_init(sub, SPLAT_KRNG_TEST1_NAME, SPLAT_KRNG_TEST1_DESC,
SPLAT_KRNG_TEST1_ID, splat_krng_test1);
return sub;
@@ -119,7 +119,7 @@ splat_krng_fini(splat_subsystem_t *sub)
{
ASSERT(sub);
- SPLAT_TEST_FINI(sub, SPLAT_KRNG_TEST1_ID);
+ splat_test_fini(sub, SPLAT_KRNG_TEST1_ID);
kfree(sub);
}
diff --git a/spl/module/splat/splat-rwlock.c b/spl/module/splat/splat-rwlock.c
index 7abb19d1c87c..5d3ffb195115 100644
--- a/spl/module/splat/splat-rwlock.c
+++ b/spl/module/splat/splat-rwlock.c
@@ -106,6 +106,17 @@ void splat_init_rw_priv(rw_priv_t *rwp, struct file *file)
rwp->rw_type = 0;
}
+#if defined(CONFIG_PREEMPT_RT_FULL)
+static int
+splat_rwlock_test1(struct file *file, void *arg)
+{
+ /*
+ * This test will never succeed on PREEMPT_RT_FULL because these
+ * kernels only allow a single thread to hold the lock.
+ */
+ return 0;
+}
+#else
static int
splat_rwlock_wr_thr(void *arg)
{
@@ -297,6 +308,7 @@ splat_rwlock_test1(struct file *file, void *arg)
return rc;
}
+#endif
static void
splat_rwlock_test2_func(void *arg)
@@ -348,7 +360,8 @@ splat_rwlock_test2(struct file *file, void *arg)
* rwlock is implemented right this will never happy, that's a pass.
*/
for (i = 0; i < tq_count; i++) {
- if (!taskq_dispatch(tq,splat_rwlock_test2_func,rwp,TQ_SLEEP)) {
+ if (taskq_dispatch(tq, splat_rwlock_test2_func, rwp,
+ TQ_SLEEP) == TASKQID_INVALID) {
splat_vprint(file, SPLAT_RWLOCK_TEST2_NAME,
"Failed to queue task %d\n", i);
rc = -EINVAL;
@@ -469,7 +482,7 @@ splat_rwlock_test4_type(taskq_t *tq, rw_priv_t *rwp, int expected_rc,
rw_enter(&rwp->rw_rwlock, holder_type);
id = taskq_dispatch(tq, splat_rwlock_test4_func, rwp, TQ_SLEEP);
- if (id == 0) {
+ if (id == TASKQID_INVALID) {
splat_vprint(rwp->rw_file, SPLAT_RWLOCK_TEST4_NAME, "%s",
"taskq_dispatch() failed\n");
rc = -EINVAL;
@@ -513,11 +526,22 @@ splat_rwlock_test4(struct file *file, void *arg)
splat_init_rw_priv(rwp, file);
- /* Validate all combinations of rw_tryenter() contention */
+ /*
+ * Validate all combinations of rw_tryenter() contention.
+ *
+ * The concurrent reader test is modified for PREEMPT_RT_FULL
+ * kernels which do not permit concurrent read locks to be taken
+ * from different threads. The same thread is allowed to take
+ * the read lock multiple times.
+ */
rc1 = splat_rwlock_test4_type(tq, rwp, -EBUSY, RW_WRITER, RW_WRITER);
rc2 = splat_rwlock_test4_type(tq, rwp, -EBUSY, RW_WRITER, RW_READER);
rc3 = splat_rwlock_test4_type(tq, rwp, -EBUSY, RW_READER, RW_WRITER);
+#if defined(CONFIG_PREEMPT_RT_FULL)
+ rc4 = splat_rwlock_test4_type(tq, rwp, -EBUSY, RW_READER, RW_READER);
+#else
rc4 = splat_rwlock_test4_type(tq, rwp, 0, RW_READER, RW_READER);
+#endif
rc5 = splat_rwlock_test4_type(tq, rwp, 0, RW_NONE, RW_WRITER);
rc6 = splat_rwlock_test4_type(tq, rwp, 0, RW_NONE, RW_READER);
@@ -685,19 +709,19 @@ splat_rwlock_init(void)
spin_lock_init(&sub->test_lock);
sub->desc.id = SPLAT_SUBSYSTEM_RWLOCK;
- SPLAT_TEST_INIT(sub, SPLAT_RWLOCK_TEST1_NAME, SPLAT_RWLOCK_TEST1_DESC,
+ splat_test_init(sub, SPLAT_RWLOCK_TEST1_NAME, SPLAT_RWLOCK_TEST1_DESC,
SPLAT_RWLOCK_TEST1_ID, splat_rwlock_test1);
- SPLAT_TEST_INIT(sub, SPLAT_RWLOCK_TEST2_NAME, SPLAT_RWLOCK_TEST2_DESC,
+ splat_test_init(sub, SPLAT_RWLOCK_TEST2_NAME, SPLAT_RWLOCK_TEST2_DESC,
SPLAT_RWLOCK_TEST2_ID, splat_rwlock_test2);
- SPLAT_TEST_INIT(sub, SPLAT_RWLOCK_TEST3_NAME, SPLAT_RWLOCK_TEST3_DESC,
+ splat_test_init(sub, SPLAT_RWLOCK_TEST3_NAME, SPLAT_RWLOCK_TEST3_DESC,
SPLAT_RWLOCK_TEST3_ID, splat_rwlock_test3);
- SPLAT_TEST_INIT(sub, SPLAT_RWLOCK_TEST4_NAME, SPLAT_RWLOCK_TEST4_DESC,
+ splat_test_init(sub, SPLAT_RWLOCK_TEST4_NAME, SPLAT_RWLOCK_TEST4_DESC,
SPLAT_RWLOCK_TEST4_ID, splat_rwlock_test4);
- SPLAT_TEST_INIT(sub, SPLAT_RWLOCK_TEST5_NAME, SPLAT_RWLOCK_TEST5_DESC,
+ splat_test_init(sub, SPLAT_RWLOCK_TEST5_NAME, SPLAT_RWLOCK_TEST5_DESC,
SPLAT_RWLOCK_TEST5_ID, splat_rwlock_test5);
- SPLAT_TEST_INIT(sub, SPLAT_RWLOCK_TEST6_NAME, SPLAT_RWLOCK_TEST6_DESC,
+ splat_test_init(sub, SPLAT_RWLOCK_TEST6_NAME, SPLAT_RWLOCK_TEST6_DESC,
SPLAT_RWLOCK_TEST6_ID, splat_rwlock_test6);
- SPLAT_TEST_INIT(sub, SPLAT_RWLOCK_TEST7_NAME, SPLAT_RWLOCK_TEST7_DESC,
+ splat_test_init(sub, SPLAT_RWLOCK_TEST7_NAME, SPLAT_RWLOCK_TEST7_DESC,
SPLAT_RWLOCK_TEST7_ID, splat_rwlock_test7);
return sub;
@@ -707,13 +731,13 @@ void
splat_rwlock_fini(splat_subsystem_t *sub)
{
ASSERT(sub);
- SPLAT_TEST_FINI(sub, SPLAT_RWLOCK_TEST7_ID);
- SPLAT_TEST_FINI(sub, SPLAT_RWLOCK_TEST6_ID);
- SPLAT_TEST_FINI(sub, SPLAT_RWLOCK_TEST5_ID);
- SPLAT_TEST_FINI(sub, SPLAT_RWLOCK_TEST4_ID);
- SPLAT_TEST_FINI(sub, SPLAT_RWLOCK_TEST3_ID);
- SPLAT_TEST_FINI(sub, SPLAT_RWLOCK_TEST2_ID);
- SPLAT_TEST_FINI(sub, SPLAT_RWLOCK_TEST1_ID);
+ splat_test_fini(sub, SPLAT_RWLOCK_TEST7_ID);
+ splat_test_fini(sub, SPLAT_RWLOCK_TEST6_ID);
+ splat_test_fini(sub, SPLAT_RWLOCK_TEST5_ID);
+ splat_test_fini(sub, SPLAT_RWLOCK_TEST4_ID);
+ splat_test_fini(sub, SPLAT_RWLOCK_TEST3_ID);
+ splat_test_fini(sub, SPLAT_RWLOCK_TEST2_ID);
+ splat_test_fini(sub, SPLAT_RWLOCK_TEST1_ID);
kfree(sub);
}
diff --git a/spl/module/splat/splat-taskq.c b/spl/module/splat/splat-taskq.c
index 8f06f413d5bc..6d22018fc05d 100644
--- a/spl/module/splat/splat-taskq.c
+++ b/spl/module/splat/splat-taskq.c
@@ -160,7 +160,7 @@ splat_taskq_test1_impl(struct file *file, void *arg, boolean_t prealloc)
&tq_arg, TQ_SLEEP);
}
- if (id == 0) {
+ if (id == TASKQID_INVALID) {
splat_vprint(file, SPLAT_TASKQ_TEST1_NAME,
"Taskq '%s' function '%s' dispatch failed\n",
tq_arg.name, sym2str(splat_taskq_test13_func));
@@ -296,7 +296,7 @@ splat_taskq_test2_impl(struct file *file, void *arg, boolean_t prealloc) {
tq_args[i], TQ_SLEEP);
}
- if (id == 0) {
+ if (id == TASKQID_INVALID) {
splat_vprint(file, SPLAT_TASKQ_TEST2_NAME,
"Taskq '%s/%d' function '%s' dispatch "
"failed\n", tq_args[i]->name, tq_args[i]->id,
@@ -318,7 +318,7 @@ splat_taskq_test2_impl(struct file *file, void *arg, boolean_t prealloc) {
tq_args[i], TQ_SLEEP);
}
- if (id == 0) {
+ if (id == TASKQID_INVALID) {
splat_vprint(file, SPLAT_TASKQ_TEST2_NAME, "Taskq "
"'%s/%d' function '%s' dispatch failed\n",
tq_args[i]->name, tq_args[i]->id,
@@ -420,7 +420,7 @@ splat_taskq_test3_impl(struct file *file, void *arg, boolean_t prealloc)
tq_arg, TQ_SLEEP);
}
- if (id == 0) {
+ if (id == TASKQID_INVALID) {
splat_vprint(file, SPLAT_TASKQ_TEST3_NAME,
"Taskq '%s' function '%s' dispatch failed\n",
tq_arg->name, sym2str(splat_taskq_test13_func));
@@ -525,7 +525,7 @@ splat_taskq_test4_common(struct file *file, void *arg, int minalloc,
&tq_arg, TQ_SLEEP);
}
- if (id == 0) {
+ if (id == TASKQID_INVALID) {
splat_vprint(file, SPLAT_TASKQ_TEST4_NAME,
"Taskq '%s' function '%s' dispatch "
"%d failed\n", tq_arg.name,
@@ -741,7 +741,7 @@ splat_taskq_test5_impl(struct file *file, void *arg, boolean_t prealloc)
&tq_id[i], TQ_SLEEP);
}
- if (id == 0) {
+ if (id == TASKQID_INVALID) {
splat_vprint(file, SPLAT_TASKQ_TEST5_NAME,
"Taskq '%s' function '%s' dispatch failed\n",
tq_arg.name, sym2str(splat_taskq_test5_func));
@@ -905,7 +905,7 @@ splat_taskq_test6_impl(struct file *file, void *arg, boolean_t prealloc)
&tq_id[i], tflags);
}
- if (id == 0) {
+ if (id == TASKQID_INVALID) {
splat_vprint(file, SPLAT_TASKQ_TEST6_NAME,
"Taskq '%s' function '%s' dispatch failed\n",
tq_arg.name, sym2str(splat_taskq_test6_func));
@@ -983,7 +983,7 @@ splat_taskq_test7_func(void *arg)
tq_arg, TQ_SLEEP);
}
- if (id == 0) {
+ if (id == TASKQID_INVALID) {
splat_vprint(tq_arg->file, SPLAT_TASKQ_TEST7_NAME,
"Taskq '%s' function '%s' dispatch failed "
"(depth = %u)\n", tq_arg->name,
@@ -1040,11 +1040,12 @@ splat_taskq_test7_impl(struct file *file, void *arg, boolean_t prealloc)
error = (tq_arg->depth == SPLAT_TASKQ_DEPTH_MAX ? 0 : -EINVAL);
+ splat_vprint(file, SPLAT_TASKQ_TEST7_NAME,
+ "Taskq '%s' destroying\n", tq_arg->name);
+
kmem_free(tqe, sizeof (taskq_ent_t));
kmem_free(tq_arg, sizeof (splat_taskq_arg_t));
- splat_vprint(file, SPLAT_TASKQ_TEST7_NAME,
- "Taskq '%s' destroying\n", tq_arg->name);
taskq_destroy(tq);
return (error);
@@ -1120,7 +1121,7 @@ splat_taskq_throughput(struct file *file, void *arg, const char *name,
&tq_arg, TQ_SLEEP, tqes[i]);
id = tqes[i]->tqent_id;
- if (id == 0) {
+ if (id == TASKQID_INVALID) {
splat_vprint(file, name, "Taskq '%s' function '%s' "
"dispatch %d failed\n", tq_arg.name,
sym2str(splat_taskq_throughput_func), i);
@@ -1234,7 +1235,7 @@ splat_taskq_test9(struct file *file, void *arg)
id = taskq_dispatch_delay(tq, splat_taskq_test9_func,
tq_arg, TQ_SLEEP, ddi_get_lbolt() + rnd);
- if (id == 0) {
+ if (id == TASKQID_INVALID) {
splat_vprint(file, SPLAT_TASKQ_TEST9_NAME,
"Taskq '%s' delay dispatch failed\n",
SPLAT_TASKQ_TEST9_NAME);
@@ -1343,7 +1344,7 @@ splat_taskq_test10(struct file *file, void *arg)
tq_arg, TQ_SLEEP, ddi_get_lbolt() + rnd);
}
- if (tq_arg->id == 0) {
+ if (tq_arg->id == TASKQID_INVALID) {
splat_vprint(file, SPLAT_TASKQ_TEST10_NAME,
"Taskq '%s' dispatch failed\n",
SPLAT_TASKQ_TEST10_NAME);
@@ -1472,8 +1473,8 @@ splat_taskq_test11(struct file *file, void *arg)
dynamic.tv_sec, dynamic.tv_nsec);
/* A 10x increase in runtime is used to indicate a core problem. */
- if ((dynamic.tv_sec * NANOSEC + dynamic.tv_nsec) >
- ((normal.tv_sec * NANOSEC + normal.tv_nsec) * 10))
+ if (((int64_t)dynamic.tv_sec * NANOSEC + (int64_t)dynamic.tv_nsec) >
+ (((int64_t)normal.tv_sec * NANOSEC + (int64_t)normal.tv_nsec) * 10))
error = -ETIME;
return (error);
@@ -1496,27 +1497,27 @@ splat_taskq_init(void)
spin_lock_init(&sub->test_lock);
sub->desc.id = SPLAT_SUBSYSTEM_TASKQ;
- SPLAT_TEST_INIT(sub, SPLAT_TASKQ_TEST1_NAME, SPLAT_TASKQ_TEST1_DESC,
+ splat_test_init(sub, SPLAT_TASKQ_TEST1_NAME, SPLAT_TASKQ_TEST1_DESC,
SPLAT_TASKQ_TEST1_ID, splat_taskq_test1);
- SPLAT_TEST_INIT(sub, SPLAT_TASKQ_TEST2_NAME, SPLAT_TASKQ_TEST2_DESC,
+ splat_test_init(sub, SPLAT_TASKQ_TEST2_NAME, SPLAT_TASKQ_TEST2_DESC,
SPLAT_TASKQ_TEST2_ID, splat_taskq_test2);
- SPLAT_TEST_INIT(sub, SPLAT_TASKQ_TEST3_NAME, SPLAT_TASKQ_TEST3_DESC,
+ splat_test_init(sub, SPLAT_TASKQ_TEST3_NAME, SPLAT_TASKQ_TEST3_DESC,
SPLAT_TASKQ_TEST3_ID, splat_taskq_test3);
- SPLAT_TEST_INIT(sub, SPLAT_TASKQ_TEST4_NAME, SPLAT_TASKQ_TEST4_DESC,
+ splat_test_init(sub, SPLAT_TASKQ_TEST4_NAME, SPLAT_TASKQ_TEST4_DESC,
SPLAT_TASKQ_TEST4_ID, splat_taskq_test4);
- SPLAT_TEST_INIT(sub, SPLAT_TASKQ_TEST5_NAME, SPLAT_TASKQ_TEST5_DESC,
+ splat_test_init(sub, SPLAT_TASKQ_TEST5_NAME, SPLAT_TASKQ_TEST5_DESC,
SPLAT_TASKQ_TEST5_ID, splat_taskq_test5);
- SPLAT_TEST_INIT(sub, SPLAT_TASKQ_TEST6_NAME, SPLAT_TASKQ_TEST6_DESC,
+ splat_test_init(sub, SPLAT_TASKQ_TEST6_NAME, SPLAT_TASKQ_TEST6_DESC,
SPLAT_TASKQ_TEST6_ID, splat_taskq_test6);
- SPLAT_TEST_INIT(sub, SPLAT_TASKQ_TEST7_NAME, SPLAT_TASKQ_TEST7_DESC,
+ splat_test_init(sub, SPLAT_TASKQ_TEST7_NAME, SPLAT_TASKQ_TEST7_DESC,
SPLAT_TASKQ_TEST7_ID, splat_taskq_test7);
- SPLAT_TEST_INIT(sub, SPLAT_TASKQ_TEST8_NAME, SPLAT_TASKQ_TEST8_DESC,
+ splat_test_init(sub, SPLAT_TASKQ_TEST8_NAME, SPLAT_TASKQ_TEST8_DESC,
SPLAT_TASKQ_TEST8_ID, splat_taskq_test8);
- SPLAT_TEST_INIT(sub, SPLAT_TASKQ_TEST9_NAME, SPLAT_TASKQ_TEST9_DESC,
+ splat_test_init(sub, SPLAT_TASKQ_TEST9_NAME, SPLAT_TASKQ_TEST9_DESC,
SPLAT_TASKQ_TEST9_ID, splat_taskq_test9);
- SPLAT_TEST_INIT(sub, SPLAT_TASKQ_TEST10_NAME, SPLAT_TASKQ_TEST10_DESC,
+ splat_test_init(sub, SPLAT_TASKQ_TEST10_NAME, SPLAT_TASKQ_TEST10_DESC,
SPLAT_TASKQ_TEST10_ID, splat_taskq_test10);
- SPLAT_TEST_INIT(sub, SPLAT_TASKQ_TEST11_NAME, SPLAT_TASKQ_TEST11_DESC,
+ splat_test_init(sub, SPLAT_TASKQ_TEST11_NAME, SPLAT_TASKQ_TEST11_DESC,
SPLAT_TASKQ_TEST11_ID, splat_taskq_test11);
return sub;
@@ -1526,17 +1527,17 @@ void
splat_taskq_fini(splat_subsystem_t *sub)
{
ASSERT(sub);
- SPLAT_TEST_FINI(sub, SPLAT_TASKQ_TEST11_ID);
- SPLAT_TEST_FINI(sub, SPLAT_TASKQ_TEST10_ID);
- SPLAT_TEST_FINI(sub, SPLAT_TASKQ_TEST9_ID);
- SPLAT_TEST_FINI(sub, SPLAT_TASKQ_TEST8_ID);
- SPLAT_TEST_FINI(sub, SPLAT_TASKQ_TEST7_ID);
- SPLAT_TEST_FINI(sub, SPLAT_TASKQ_TEST6_ID);
- SPLAT_TEST_FINI(sub, SPLAT_TASKQ_TEST5_ID);
- SPLAT_TEST_FINI(sub, SPLAT_TASKQ_TEST4_ID);
- SPLAT_TEST_FINI(sub, SPLAT_TASKQ_TEST3_ID);
- SPLAT_TEST_FINI(sub, SPLAT_TASKQ_TEST2_ID);
- SPLAT_TEST_FINI(sub, SPLAT_TASKQ_TEST1_ID);
+ splat_test_fini(sub, SPLAT_TASKQ_TEST11_ID);
+ splat_test_fini(sub, SPLAT_TASKQ_TEST10_ID);
+ splat_test_fini(sub, SPLAT_TASKQ_TEST9_ID);
+ splat_test_fini(sub, SPLAT_TASKQ_TEST8_ID);
+ splat_test_fini(sub, SPLAT_TASKQ_TEST7_ID);
+ splat_test_fini(sub, SPLAT_TASKQ_TEST6_ID);
+ splat_test_fini(sub, SPLAT_TASKQ_TEST5_ID);
+ splat_test_fini(sub, SPLAT_TASKQ_TEST4_ID);
+ splat_test_fini(sub, SPLAT_TASKQ_TEST3_ID);
+ splat_test_fini(sub, SPLAT_TASKQ_TEST2_ID);
+ splat_test_fini(sub, SPLAT_TASKQ_TEST1_ID);
kfree(sub);
}
diff --git a/spl/module/splat/splat-thread.c b/spl/module/splat/splat-thread.c
index dcf7d4a98788..e99d691013c3 100644
--- a/spl/module/splat/splat-thread.c
+++ b/spl/module/splat/splat-thread.c
@@ -363,11 +363,11 @@ splat_thread_init(void)
spin_lock_init(&sub->test_lock);
sub->desc.id = SPLAT_SUBSYSTEM_THREAD;
- SPLAT_TEST_INIT(sub, SPLAT_THREAD_TEST1_NAME, SPLAT_THREAD_TEST1_DESC,
+ splat_test_init(sub, SPLAT_THREAD_TEST1_NAME, SPLAT_THREAD_TEST1_DESC,
SPLAT_THREAD_TEST1_ID, splat_thread_test1);
- SPLAT_TEST_INIT(sub, SPLAT_THREAD_TEST2_NAME, SPLAT_THREAD_TEST2_DESC,
+ splat_test_init(sub, SPLAT_THREAD_TEST2_NAME, SPLAT_THREAD_TEST2_DESC,
SPLAT_THREAD_TEST2_ID, splat_thread_test2);
- SPLAT_TEST_INIT(sub, SPLAT_THREAD_TEST3_NAME, SPLAT_THREAD_TEST3_DESC,
+ splat_test_init(sub, SPLAT_THREAD_TEST3_NAME, SPLAT_THREAD_TEST3_DESC,
SPLAT_THREAD_TEST3_ID, splat_thread_test3);
return sub;
@@ -377,9 +377,9 @@ void
splat_thread_fini(splat_subsystem_t *sub)
{
ASSERT(sub);
- SPLAT_TEST_FINI(sub, SPLAT_THREAD_TEST3_ID);
- SPLAT_TEST_FINI(sub, SPLAT_THREAD_TEST2_ID);
- SPLAT_TEST_FINI(sub, SPLAT_THREAD_TEST1_ID);
+ splat_test_fini(sub, SPLAT_THREAD_TEST3_ID);
+ splat_test_fini(sub, SPLAT_THREAD_TEST2_ID);
+ splat_test_fini(sub, SPLAT_THREAD_TEST1_ID);
kfree(sub);
}
diff --git a/spl/module/splat/splat-time.c b/spl/module/splat/splat-time.c
index b4e94c866244..5b5ad62f68f7 100644
--- a/spl/module/splat/splat-time.c
+++ b/spl/module/splat/splat-time.c
@@ -93,9 +93,9 @@ splat_time_init(void)
spin_lock_init(&sub->test_lock);
sub->desc.id = SPLAT_SUBSYSTEM_TIME;
- SPLAT_TEST_INIT(sub, SPLAT_TIME_TEST1_NAME, SPLAT_TIME_TEST1_DESC,
+ splat_test_init(sub, SPLAT_TIME_TEST1_NAME, SPLAT_TIME_TEST1_DESC,
SPLAT_TIME_TEST1_ID, splat_time_test1);
- SPLAT_TEST_INIT(sub, SPLAT_TIME_TEST2_NAME, SPLAT_TIME_TEST2_DESC,
+ splat_test_init(sub, SPLAT_TIME_TEST2_NAME, SPLAT_TIME_TEST2_DESC,
SPLAT_TIME_TEST2_ID, splat_time_test2);
return sub;
@@ -106,8 +106,8 @@ splat_time_fini(splat_subsystem_t *sub)
{
ASSERT(sub);
- SPLAT_TEST_FINI(sub, SPLAT_TIME_TEST2_ID);
- SPLAT_TEST_FINI(sub, SPLAT_TIME_TEST1_ID);
+ splat_test_fini(sub, SPLAT_TIME_TEST2_ID);
+ splat_test_fini(sub, SPLAT_TIME_TEST1_ID);
kfree(sub);
}
diff --git a/spl/module/splat/splat-vnode.c b/spl/module/splat/splat-vnode.c
index bffcf492ff5d..ad69cf642119 100644
--- a/spl/module/splat/splat-vnode.c
+++ b/spl/module/splat/splat-vnode.c
@@ -409,19 +409,19 @@ splat_vnode_init(void)
spin_lock_init(&sub->test_lock);
sub->desc.id = SPLAT_SUBSYSTEM_VNODE;
- SPLAT_TEST_INIT(sub, SPLAT_VNODE_TEST1_NAME, SPLAT_VNODE_TEST1_DESC,
+ splat_test_init(sub, SPLAT_VNODE_TEST1_NAME, SPLAT_VNODE_TEST1_DESC,
SPLAT_VNODE_TEST1_ID, splat_vnode_test1);
- SPLAT_TEST_INIT(sub, SPLAT_VNODE_TEST2_NAME, SPLAT_VNODE_TEST2_DESC,
+ splat_test_init(sub, SPLAT_VNODE_TEST2_NAME, SPLAT_VNODE_TEST2_DESC,
SPLAT_VNODE_TEST2_ID, splat_vnode_test2);
- SPLAT_TEST_INIT(sub, SPLAT_VNODE_TEST3_NAME, SPLAT_VNODE_TEST3_DESC,
+ splat_test_init(sub, SPLAT_VNODE_TEST3_NAME, SPLAT_VNODE_TEST3_DESC,
SPLAT_VNODE_TEST3_ID, splat_vnode_test3);
#if LINUX_VERSION_CODE <= KERNEL_VERSION(4,1,0)
- SPLAT_TEST_INIT(sub, SPLAT_VNODE_TEST4_NAME, SPLAT_VNODE_TEST4_DESC,
+ splat_test_init(sub, SPLAT_VNODE_TEST4_NAME, SPLAT_VNODE_TEST4_DESC,
SPLAT_VNODE_TEST4_ID, splat_vnode_test4);
#endif
- SPLAT_TEST_INIT(sub, SPLAT_VNODE_TEST5_NAME, SPLAT_VNODE_TEST5_DESC,
+ splat_test_init(sub, SPLAT_VNODE_TEST5_NAME, SPLAT_VNODE_TEST5_DESC,
SPLAT_VNODE_TEST5_ID, splat_vnode_test5);
- SPLAT_TEST_INIT(sub, SPLAT_VNODE_TEST6_NAME, SPLAT_VNODE_TEST6_DESC,
+ splat_test_init(sub, SPLAT_VNODE_TEST6_NAME, SPLAT_VNODE_TEST6_DESC,
SPLAT_VNODE_TEST6_ID, splat_vnode_test6);
return sub;
@@ -432,14 +432,14 @@ splat_vnode_fini(splat_subsystem_t *sub)
{
ASSERT(sub);
- SPLAT_TEST_FINI(sub, SPLAT_VNODE_TEST6_ID);
- SPLAT_TEST_FINI(sub, SPLAT_VNODE_TEST5_ID);
+ splat_test_fini(sub, SPLAT_VNODE_TEST6_ID);
+ splat_test_fini(sub, SPLAT_VNODE_TEST5_ID);
#if LINUX_VERSION_CODE <= KERNEL_VERSION(4,1,0)
- SPLAT_TEST_FINI(sub, SPLAT_VNODE_TEST4_ID);
+ splat_test_fini(sub, SPLAT_VNODE_TEST4_ID);
#endif
- SPLAT_TEST_FINI(sub, SPLAT_VNODE_TEST3_ID);
- SPLAT_TEST_FINI(sub, SPLAT_VNODE_TEST2_ID);
- SPLAT_TEST_FINI(sub, SPLAT_VNODE_TEST1_ID);
+ splat_test_fini(sub, SPLAT_VNODE_TEST3_ID);
+ splat_test_fini(sub, SPLAT_VNODE_TEST2_ID);
+ splat_test_fini(sub, SPLAT_VNODE_TEST1_ID);
kfree(sub);
} /* splat_vnode_fini() */
diff --git a/spl/module/splat/splat-zlib.c b/spl/module/splat/splat-zlib.c
index eaa48369db90..dc9211838932 100644
--- a/spl/module/splat/splat-zlib.c
+++ b/spl/module/splat/splat-zlib.c
@@ -144,7 +144,7 @@ splat_zlib_init(void)
spin_lock_init(&sub->test_lock);
sub->desc.id = SPLAT_SUBSYSTEM_ZLIB;
- SPLAT_TEST_INIT(sub, SPLAT_ZLIB_TEST1_NAME, SPLAT_ZLIB_TEST1_DESC,
+ splat_test_init(sub, SPLAT_ZLIB_TEST1_NAME, SPLAT_ZLIB_TEST1_DESC,
SPLAT_ZLIB_TEST1_ID, splat_zlib_test1);
return sub;
@@ -155,7 +155,7 @@ splat_zlib_fini(splat_subsystem_t *sub)
{
ASSERT(sub);
- SPLAT_TEST_FINI(sub, SPLAT_ZLIB_TEST1_ID);
+ splat_test_fini(sub, SPLAT_ZLIB_TEST1_ID);
kfree(sub);
}
diff --git a/spl/rpm/Makefile.in b/spl/rpm/Makefile.in
index 28c3448c4f23..036f2166e322 100644
--- a/spl/rpm/Makefile.in
+++ b/spl/rpm/Makefile.in
@@ -1,7 +1,7 @@
-# Makefile.in generated by automake 1.15 from Makefile.am.
+# Makefile.in generated by automake 1.15.1 from Makefile.am.
# @configure_input@
-# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+# Copyright (C) 1994-2017 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
diff --git a/spl/rpm/generic/Makefile.in b/spl/rpm/generic/Makefile.in
index 7de9fed86ae9..9915e0d70401 100644
--- a/spl/rpm/generic/Makefile.in
+++ b/spl/rpm/generic/Makefile.in
@@ -1,7 +1,7 @@
-# Makefile.in generated by automake 1.15 from Makefile.am.
+# Makefile.in generated by automake 1.15.1 from Makefile.am.
# @configure_input@
-# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+# Copyright (C) 1994-2017 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
diff --git a/spl/rpm/generic/spl-dkms.spec.in b/spl/rpm/generic/spl-dkms.spec.in
index 949660ebc024..419624058463 100644
--- a/spl/rpm/generic/spl-dkms.spec.in
+++ b/spl/rpm/generic/spl-dkms.spec.in
@@ -1,5 +1,9 @@
%{?!packager: %define packager Brian Behlendorf <behlendorf1 at llnl.gov>}
+%if ! 0%{?rhel}%{?fedora}%{?mageia}%{?suse_version}
+%define not_rpm 1
+%endif
+
%define module @PACKAGE@
%define mkconf scripts/dkms.mkconf
@@ -18,7 +22,9 @@ BuildArch: noarch
Requires: dkms >= 2.2.0.2
Requires: gcc, make, perl
+%if 0%{?rhel}%{?fedora}%{?mageia}%{?suse_version}
Requires: kernel-devel
+%endif
Provides: %{module}-kmod = %{version}
%description
@@ -62,8 +68,15 @@ echo -e "support or upgrade DKMS to a more current version."
exit 1
%preun
-echo -e "Uninstall of %{module} module (version %{version}) beginning:"
-dkms remove -m %{module} -v %{version} --all --rpm_safe_upgrade
+CONFIG_H="/var/lib/dkms/%{module}/%{version}/*/*/%{module}_config.h"
+SPEC_META_ALIAS="@PACKAGE at -@VERSION at -@RELEASE@"
+DKMS_META_ALIAS=`cat $CONFIG_H 2>/dev/null |
+ awk -F'"' '/META_ALIAS/ { print $2; exit 0 }'`
+if [ "$SPEC_META_ALIAS" = "$DKMS_META_ALIAS" ]; then
+ echo -e
+ echo -e "Uninstall of %{module} module ($SPEC_META_ALIAS) beginning:"
+ dkms remove -m %{module} -v %{version} --all %{!?not_rpm:--rpm_safe_upgrade}
+fi
exit 0
%changelog
diff --git a/spl/rpm/generic/spl-kmod.spec.in b/spl/rpm/generic/spl-kmod.spec.in
index 5730ced619e1..03d131eac55f 100644
--- a/spl/rpm/generic/spl-kmod.spec.in
+++ b/spl/rpm/generic/spl-kmod.spec.in
@@ -53,8 +53,8 @@ BuildRequires: %{_bindir}/kmodtool
%endif
%endif
-# LDFLAGS are not sanitized by arch/powerpc/Makefile (unlike other arches)
-%ifarch ppc ppc64 ppc64le
+# LDFLAGS are not sanitized by arch/*/Makefile for these architectures.
+%ifarch ppc ppc64 ppc64le aarch64
%global __global_ldflags %{nil}
%endif
@@ -167,73 +167,15 @@ chmod u+x ${RPM_BUILD_ROOT}%{kmodinstdir_prefix}/*/extra/*/*/*
rm -rf $RPM_BUILD_ROOT
%changelog
-* Mon Jul 10 2017 Tony Hutter <hutter2 at llnl.gov> - 0.6.5.11-1
-- Fix RWSEM_SPINLOCK_IS_RAW check failed zfsonlinux/zfs#622
-* Mon Jun 12 2017 Tony Hutter <hutter2 at llnl.gov> - 0.6.5.10-1
-- Linux 4.12 compat: PF_FSTRANS was removed zfsonlinux/spl#614
-- Clear PF_FSTRANS over spl_filp_fallocate() zfsonlinux/splzfsonlinux/zfs#4529
-- glibc 2.25 compat: remove assert(X=Y) zfsonlinux/spl#610
-- Linux 4.11 compat: remove stub for __put_task_struct zfsonlinux/spl#608
-- Linux 4.11 compat: add linux/sched/signal.h zfsonlinux/spl#608
-- Linux 4.11 compat: vfs_getattr() takes 4 args zfsonlinux/spl#608
-- Fix powerpc build zfsonlinux/spl#607
-- Linux 4.11 compat: set_task_state() removed zfsonlinux/spl#603
-* Fri Feb 3 2017 Brian Behlendorf <behlendorf1 at llnl.gov> - 0.6.5.9-1
-- Use kernel slab for vn_cache and vn_file_cache zfsonlinux/spl#599
-- Fix splat-cred.c cred usage zfsonlinux/spl#556
-- Fix splat memleak zfsonlinux/spl#590
-- Fix p0 initializer zfsonlinux/spl#576
-- Fix aarch64 type warning zfsonlinux/spl#574
-- Linux 4.8 compat: Fix RW_READ_HELD zfsonlinux/zfs#5233
-- Linux 4.9 compat: group_info changes zfsonlinux/spl#581
-- Fix crgetgroups out-of-bound and misc cred fix zfsonlinux/spl#556
-* Fri Sep 9 2016 Ned Bass <bass6 at llnl.gov> - 0.6.5.8-1
-- Fix HAVE_MUTEX_OWNER test for kernels prior to 4.6 zfsonlinux/spl#566
-- Add handling for kernel 4.7's CONFIG_TRIM_UNUSED_KSYMS zfsonlinux/spl#565
-- Linux 4.8 compat: rw_semaphore atomic_long_t count zfsonlinux/spl#563
-- Implement a proper rw_tryupgrade zfsonlinux/spl#554
-- Add rw_tryupgrade() zfsonlinux/spl#534 zfsonlinux/zfs#4388
-- Fix taskq_wait_outstanding re-evaluate tq_next_id zfsonlinux/spl#553
-- Fix race between taskq_destroy and dynamic spawning thread zfsonlinux/spl#553 zfsonlinux/spl#550
-- Use kernel provided mutex owner zfsonlinux/spl#553 zfsonlinux/spl#550
-- Add isa_defs for MIPS zfsonlinux/spl#558
-- Linux 4.7 compat: inode_lock() and friends zfsonlinux/spl#549
-- Fix: handle NULL case in spl_kmem_free_track() zfsonlinux/spl#567
-* Thu May 12 2016 Ned Bass <bass6 at llnl.gov> - 0.6.5.7-1
-- Fix PPC build failure zfsonlinux/spl#516
-* Tue Mar 22 2016 Ned Bass <bass6 at llnl.gov> - 0.6.5.6-1
-- Remove artificial architecture restrictions in packaging
-- Add support for s390[x] zfsonlinux/spl#537
-* Wed Mar 9 2016 Ned Bass <bass6 at llnl.gov> - 0.6.5.5-1
-- Linux 4.5 compatibility zfsonlinux/spl#524
-- Create working debuginfo packages on Red Hat zfsonlinux/zfs#4224
-- Allow copy-builtin to run multiple times zfsonlinux/spl#526
-- Use safer flags for in-kernel memory allocations zfsonlinux/spl#523
-- Fix potential deadlock in cv_wait() zfsonlinux/zfs#4106
-- Fix livelock in shrinker zfsonlinux/zfs#3936
-* Fri Jan 8 2016 Ned Bass <bass6 at llnl.gov> - 0.6.5.4-1
-- Build fixes on SPARC and some kernels
-- Fix taskq dynamic spawning deadlock
-- Fix builtin kernel builds
-- Fix crash due to overflow in P2ROUNDUP macro
-- Fix deadlock during direct memory reclaim
-* Tue Oct 13 2015 Ned Bass <bass6 at llnl.gov> - 0.6.5.3-1
-- Fix CPU hotplug zfsonlinux/spl#482
-- Disable dynamic taskqs by default to avoid deadlock zfsonlinux/spl#484
-* Tue Sep 29 2015 Ned Bass <bass6 at llnl.gov> - 0.6.5.2-1
-- Released 0.6.5.2-1
-- Fix PAX Patch/Grsec SLAB_USERCOPY panic zfsonlinux/zfs#3796
-- Always remove during dkms uninstall/update zfsonlinux/spl#476
-* Thu Sep 19 2015 Ned Bass <bass6 at llnl.gov> - 0.6.5.1-1
-- Released 0.6.5.1-1, no changes from spl-0.6.5
-* Thu Sep 10 2015 Brian Behlendorf <behlendorf1 at llnl.gov> - 0.6.5-1
-- Released 0.6.5-1, detailed release notes are available at:
-- https://github.com/zfsonlinux/zfs/releases/tag/zfs-0.6.5
-* Wed Apr 8 2015 Brian Behlendorf <behlendorf1 at llnl.gov> - 0.6.4-1
-- Released 0.6.4-1
-* Thu Jun 12 2014 Brian Behlendorf <behlendorf1 at llnl.gov> - 0.6.3-1
-- Released 0.6.3-1
-* Wed Aug 21 2013 Brian Behlendorf <behlendorf1 at llnl.gov> - 0.6.2-1
-- Released 0.6.2-1
-* Fri Mar 22 2013 Brian Behlendorf <behlendorf1 at llnl.gov> - 0.6.1-1
-- First official stable release.
+* Wed Oct 18 2017 Tony Hutter <hutter2 at llnl.gov> - 0.7.3-1
+- Released 0.7.3-1, detailed release notes are available at:
+- https://github.com/zfsonlinux/zfs/releases/tag/zfs-0.7.3
+* Fri Sep 22 2017 Tony Hutter <hutter2 at llnl.gov> - 0.7.2-1
+- Released 0.7.2-1, detailed release notes are available at:
+- https://github.com/zfsonlinux/zfs/releases/tag/zfs-0.7.2
+* Tue Aug 8 2017 Tony Hutter <hutter2 at llnl.gov> - 0.7.1-1
+- Released 0.7.1-1, detailed release notes are available at:
+- https://github.com/zfsonlinux/zfs/releases/tag/zfs-0.7.1
+* Wed Jul 26 2017 Brian Behlendorf <behlendorf1 at llnl.gov> - 0.7.0-1
+- Released 0.7.0-1, detailed release notes are available at:
+- https://github.com/zfsonlinux/zfs/releases/tag/zfs-0.7.0
diff --git a/spl/rpm/generic/spl.spec.in b/spl/rpm/generic/spl.spec.in
index 1968f7bbb9f6..55ec810a1c2d 100644
--- a/spl/rpm/generic/spl.spec.in
+++ b/spl/rpm/generic/spl.spec.in
@@ -28,78 +28,21 @@ make install DESTDIR=%{?buildroot}
%files
%doc AUTHORS COPYING DISCLAIMER
+%{_bindir}/*
%{_sbindir}/*
%{_mandir}/man1/*
%{_mandir}/man5/*
%changelog
-* Mon Jul 10 2017 Tony Hutter <hutter2 at llnl.gov> - 0.6.5.11-1
-- Fix RWSEM_SPINLOCK_IS_RAW check failed zfsonlinux/zfs#622
-* Mon Jun 12 2017 Tony Hutter <hutter2 at llnl.gov> - 0.6.5.10-1
-- Linux 4.12 compat: PF_FSTRANS was removed zfsonlinux/spl#614
-- Clear PF_FSTRANS over spl_filp_fallocate() zfsonlinux/splzfsonlinux/zfs#4529
-- glibc 2.25 compat: remove assert(X=Y) zfsonlinux/spl#610
-- Linux 4.11 compat: remove stub for __put_task_struct zfsonlinux/spl#608
-- Linux 4.11 compat: add linux/sched/signal.h zfsonlinux/spl#608
-- Linux 4.11 compat: vfs_getattr() takes 4 args zfsonlinux/spl#608
-- Fix powerpc build zfsonlinux/spl#607
-- Linux 4.11 compat: set_task_state() removed zfsonlinux/spl#603
-* Fri Feb 3 2017 Brian Behlendorf <behlendorf1 at llnl.gov> - 0.6.5.9-1
-- Use kernel slab for vn_cache and vn_file_cache zfsonlinux/spl#599
-- Fix splat-cred.c cred usage zfsonlinux/spl#556
-- Fix splat memleak zfsonlinux/spl#590
-- Fix p0 initializer zfsonlinux/spl#576
-- Fix aarch64 type warning zfsonlinux/spl#574
-- Linux 4.8 compat: Fix RW_READ_HELD zfsonlinux/zfs#5233
-- Linux 4.9 compat: group_info changes zfsonlinux/spl#581
-- Fix crgetgroups out-of-bound and misc cred fix zfsonlinux/spl#556
-* Fri Sep 9 2016 Ned Bass <bass6 at llnl.gov> - 0.6.5.8-1
-- Fix HAVE_MUTEX_OWNER test for kernels prior to 4.6 zfsonlinux/spl#566
-- Add handling for kernel 4.7's CONFIG_TRIM_UNUSED_KSYMS zfsonlinux/spl#565
-- Linux 4.8 compat: rw_semaphore atomic_long_t count zfsonlinux/spl#563
-- Implement a proper rw_tryupgrade zfsonlinux/spl#554
-- Add rw_tryupgrade() zfsonlinux/spl#534 zfsonlinux/zfs#4388
-- Fix taskq_wait_outstanding re-evaluate tq_next_id zfsonlinux/spl#553
-- Fix race between taskq_destroy and dynamic spawning thread zfsonlinux/spl#553 zfsonlinux/spl#550
-- Use kernel provided mutex owner zfsonlinux/spl#553 zfsonlinux/spl#550
-- Add isa_defs for MIPS zfsonlinux/spl#558
-- Linux 4.7 compat: inode_lock() and friends zfsonlinux/spl#549
-- Fix: handle NULL case in spl_kmem_free_track() zfsonlinux/spl#567
-* Thu May 12 2016 Ned Bass <bass6 at llnl.gov> - 0.6.5.7-1
-- Fix PPC build failure zfsonlinux/spl#516
-* Tue Mar 22 2016 Ned Bass <bass6 at llnl.gov> - 0.6.5.6-1
-- Remove artificial architecture restrictions in packaging
-- Add support for s390[x] zfsonlinux/spl#537
-* Wed Mar 9 2016 Ned Bass <bass6 at llnl.gov> - 0.6.5.5-1
-- Linux 4.5 compatibility zfsonlinux/spl#524
-- Create working debuginfo packages on Red Hat zfsonlinux/zfs#4224
-- Allow copy-builtin to run multiple times zfsonlinux/spl#526
-- Use safer flags for in-kernel memory allocations zfsonlinux/spl#523
-- Fix potential deadlock in cv_wait() zfsonlinux/zfs#4106
-- Fix livelock in shrinker zfsonlinux/zfs#3936
-* Fri Jan 8 2016 Ned Bass <bass6 at llnl.gov> - 0.6.5.4-1
-- Build fixes on SPARC and some kernels
-- Fix taskq dynamic spawning deadlock
-- Fix builtin kernel builds
-- Fix crash due to overflow in P2ROUNDUP macro
-- Fix deadlock during direct memory reclaim
-* Tue Oct 13 2015 Ned Bass <bass6 at llnl.gov> - 0.6.5.3-1
-- Fix CPU hotplug zfsonlinux/spl#482
-- Disable dynamic taskqs by default to avoid deadlock zfsonlinux/spl#484
-* Tue Sep 29 2015 Ned Bass <bass6 at llnl.gov> - 0.6.5.2-1
-- Released 0.6.5.2-1
-- Fix PAX Patch/Grsec SLAB_USERCOPY panic zfsonlinux/zfs#3796
-- Always remove during dkms uninstall/update zfsonlinux/spl#476
-* Thu Sep 19 2015 Ned Bass <bass6 at llnl.gov> - 0.6.5.1-1
-- Released 0.6.5.1-1, no changes from spl-0.6.5
-* Thu Sep 10 2015 Brian Behlendorf <behlendorf1 at llnl.gov> - 0.6.5-1
-- Released 0.6.5-1, detailed release notes are available at:
-- https://github.com/zfsonlinux/zfs/releases/tag/zfs-0.6.5
-* Wed Apr 8 2015 Brian Behlendorf <behlendorf1 at llnl.gov> - 0.6.4-1
-- Released 0.6.4-1
-* Thu Jun 12 2014 Brian Behlendorf <behlendorf1 at llnl.gov> - 0.6.3-1
-- Released 0.6.3-1
-* Wed Aug 21 2013 Brian Behlendorf <behlendorf1 at llnl.gov> - 0.6.2-1
-- Released 0.6.2-1
-* Fri Mar 22 2013 Brian Behlendorf <behlendorf1 at llnl.gov> - 0.6.1-1
-- First official stable release.
+* Wed Oct 18 2017 Tony Hutter <hutter2 at llnl.gov> - 0.7.3-1
+- Released 0.7.3-1, detailed release notes are available at:
+- https://github.com/zfsonlinux/zfs/releases/tag/zfs-0.7.3
+* Fri Sep 22 2017 Tony Hutter <hutter2 at llnl.gov> - 0.7.2-1
+- Released 0.7.2-1, detailed release notes are available at:
+- https://github.com/zfsonlinux/zfs/releases/tag/zfs-0.7.2
+* Tue Aug 8 2017 Tony Hutter <hutter2 at llnl.gov> - 0.7.1-1
+- Released 0.7.1-1, detailed release notes are available at:
+- https://github.com/zfsonlinux/zfs/releases/tag/zfs-0.7.1
+* Wed Jul 26 2017 Brian Behlendorf <behlendorf1 at llnl.gov> - 0.7.0-1
+- Released 0.7.0-1, detailed release notes are available at:
+- https://github.com/zfsonlinux/zfs/releases/tag/zfs-0.7.0
diff --git a/spl/rpm/redhat/Makefile.in b/spl/rpm/redhat/Makefile.in
index 96fb468f875e..d9cc5a0d679e 100644
--- a/spl/rpm/redhat/Makefile.in
+++ b/spl/rpm/redhat/Makefile.in
@@ -1,7 +1,7 @@
-# Makefile.in generated by automake 1.15 from Makefile.am.
+# Makefile.in generated by automake 1.15.1 from Makefile.am.
# @configure_input@
-# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+# Copyright (C) 1994-2017 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
diff --git a/spl/rpm/redhat/spl-dkms.spec.in b/spl/rpm/redhat/spl-dkms.spec.in
index 949660ebc024..419624058463 100644
--- a/spl/rpm/redhat/spl-dkms.spec.in
+++ b/spl/rpm/redhat/spl-dkms.spec.in
@@ -1,5 +1,9 @@
%{?!packager: %define packager Brian Behlendorf <behlendorf1 at llnl.gov>}
+%if ! 0%{?rhel}%{?fedora}%{?mageia}%{?suse_version}
+%define not_rpm 1
+%endif
+
%define module @PACKAGE@
%define mkconf scripts/dkms.mkconf
@@ -18,7 +22,9 @@ BuildArch: noarch
Requires: dkms >= 2.2.0.2
Requires: gcc, make, perl
+%if 0%{?rhel}%{?fedora}%{?mageia}%{?suse_version}
Requires: kernel-devel
+%endif
Provides: %{module}-kmod = %{version}
%description
@@ -62,8 +68,15 @@ echo -e "support or upgrade DKMS to a more current version."
exit 1
%preun
-echo -e "Uninstall of %{module} module (version %{version}) beginning:"
-dkms remove -m %{module} -v %{version} --all --rpm_safe_upgrade
+CONFIG_H="/var/lib/dkms/%{module}/%{version}/*/*/%{module}_config.h"
+SPEC_META_ALIAS="@PACKAGE at -@VERSION at -@RELEASE@"
+DKMS_META_ALIAS=`cat $CONFIG_H 2>/dev/null |
+ awk -F'"' '/META_ALIAS/ { print $2; exit 0 }'`
+if [ "$SPEC_META_ALIAS" = "$DKMS_META_ALIAS" ]; then
+ echo -e
+ echo -e "Uninstall of %{module} module ($SPEC_META_ALIAS) beginning:"
+ dkms remove -m %{module} -v %{version} --all %{!?not_rpm:--rpm_safe_upgrade}
+fi
exit 0
%changelog
diff --git a/spl/rpm/redhat/spl-kmod.spec.in b/spl/rpm/redhat/spl-kmod.spec.in
index cab72a7c78da..4e2a9f955de5 100644
--- a/spl/rpm/redhat/spl-kmod.spec.in
+++ b/spl/rpm/redhat/spl-kmod.spec.in
@@ -22,8 +22,8 @@ BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
Requires: @PACKAGE@ = %{version}\n\
Conflicts: @PACKAGE at -dkms\n\n" > %{_sourcedir}/kmod-preamble)
-# LDFLAGS are not sanitized by arch/powerpc/Makefile (unlike other arches)
-%ifarch ppc ppc64 ppc64le
+# LDFLAGS are not sanitized by arch/*/Makefile for these architectures.
+%ifarch ppc ppc64 ppc64le aarch64
%global __global_ldflags %{nil}
%endif
diff --git a/spl/rpm/redhat/spl.spec.in b/spl/rpm/redhat/spl.spec.in
index 1968f7bbb9f6..55ec810a1c2d 100644
--- a/spl/rpm/redhat/spl.spec.in
+++ b/spl/rpm/redhat/spl.spec.in
@@ -28,78 +28,21 @@ make install DESTDIR=%{?buildroot}
%files
%doc AUTHORS COPYING DISCLAIMER
+%{_bindir}/*
%{_sbindir}/*
%{_mandir}/man1/*
%{_mandir}/man5/*
%changelog
-* Mon Jul 10 2017 Tony Hutter <hutter2 at llnl.gov> - 0.6.5.11-1
-- Fix RWSEM_SPINLOCK_IS_RAW check failed zfsonlinux/zfs#622
-* Mon Jun 12 2017 Tony Hutter <hutter2 at llnl.gov> - 0.6.5.10-1
-- Linux 4.12 compat: PF_FSTRANS was removed zfsonlinux/spl#614
-- Clear PF_FSTRANS over spl_filp_fallocate() zfsonlinux/splzfsonlinux/zfs#4529
-- glibc 2.25 compat: remove assert(X=Y) zfsonlinux/spl#610
-- Linux 4.11 compat: remove stub for __put_task_struct zfsonlinux/spl#608
-- Linux 4.11 compat: add linux/sched/signal.h zfsonlinux/spl#608
-- Linux 4.11 compat: vfs_getattr() takes 4 args zfsonlinux/spl#608
-- Fix powerpc build zfsonlinux/spl#607
-- Linux 4.11 compat: set_task_state() removed zfsonlinux/spl#603
-* Fri Feb 3 2017 Brian Behlendorf <behlendorf1 at llnl.gov> - 0.6.5.9-1
-- Use kernel slab for vn_cache and vn_file_cache zfsonlinux/spl#599
-- Fix splat-cred.c cred usage zfsonlinux/spl#556
-- Fix splat memleak zfsonlinux/spl#590
-- Fix p0 initializer zfsonlinux/spl#576
-- Fix aarch64 type warning zfsonlinux/spl#574
-- Linux 4.8 compat: Fix RW_READ_HELD zfsonlinux/zfs#5233
-- Linux 4.9 compat: group_info changes zfsonlinux/spl#581
-- Fix crgetgroups out-of-bound and misc cred fix zfsonlinux/spl#556
-* Fri Sep 9 2016 Ned Bass <bass6 at llnl.gov> - 0.6.5.8-1
-- Fix HAVE_MUTEX_OWNER test for kernels prior to 4.6 zfsonlinux/spl#566
-- Add handling for kernel 4.7's CONFIG_TRIM_UNUSED_KSYMS zfsonlinux/spl#565
-- Linux 4.8 compat: rw_semaphore atomic_long_t count zfsonlinux/spl#563
-- Implement a proper rw_tryupgrade zfsonlinux/spl#554
-- Add rw_tryupgrade() zfsonlinux/spl#534 zfsonlinux/zfs#4388
-- Fix taskq_wait_outstanding re-evaluate tq_next_id zfsonlinux/spl#553
-- Fix race between taskq_destroy and dynamic spawning thread zfsonlinux/spl#553 zfsonlinux/spl#550
-- Use kernel provided mutex owner zfsonlinux/spl#553 zfsonlinux/spl#550
-- Add isa_defs for MIPS zfsonlinux/spl#558
-- Linux 4.7 compat: inode_lock() and friends zfsonlinux/spl#549
-- Fix: handle NULL case in spl_kmem_free_track() zfsonlinux/spl#567
-* Thu May 12 2016 Ned Bass <bass6 at llnl.gov> - 0.6.5.7-1
-- Fix PPC build failure zfsonlinux/spl#516
-* Tue Mar 22 2016 Ned Bass <bass6 at llnl.gov> - 0.6.5.6-1
-- Remove artificial architecture restrictions in packaging
-- Add support for s390[x] zfsonlinux/spl#537
-* Wed Mar 9 2016 Ned Bass <bass6 at llnl.gov> - 0.6.5.5-1
-- Linux 4.5 compatibility zfsonlinux/spl#524
-- Create working debuginfo packages on Red Hat zfsonlinux/zfs#4224
-- Allow copy-builtin to run multiple times zfsonlinux/spl#526
-- Use safer flags for in-kernel memory allocations zfsonlinux/spl#523
-- Fix potential deadlock in cv_wait() zfsonlinux/zfs#4106
-- Fix livelock in shrinker zfsonlinux/zfs#3936
-* Fri Jan 8 2016 Ned Bass <bass6 at llnl.gov> - 0.6.5.4-1
-- Build fixes on SPARC and some kernels
-- Fix taskq dynamic spawning deadlock
-- Fix builtin kernel builds
-- Fix crash due to overflow in P2ROUNDUP macro
-- Fix deadlock during direct memory reclaim
-* Tue Oct 13 2015 Ned Bass <bass6 at llnl.gov> - 0.6.5.3-1
-- Fix CPU hotplug zfsonlinux/spl#482
-- Disable dynamic taskqs by default to avoid deadlock zfsonlinux/spl#484
-* Tue Sep 29 2015 Ned Bass <bass6 at llnl.gov> - 0.6.5.2-1
-- Released 0.6.5.2-1
-- Fix PAX Patch/Grsec SLAB_USERCOPY panic zfsonlinux/zfs#3796
-- Always remove during dkms uninstall/update zfsonlinux/spl#476
-* Thu Sep 19 2015 Ned Bass <bass6 at llnl.gov> - 0.6.5.1-1
-- Released 0.6.5.1-1, no changes from spl-0.6.5
-* Thu Sep 10 2015 Brian Behlendorf <behlendorf1 at llnl.gov> - 0.6.5-1
-- Released 0.6.5-1, detailed release notes are available at:
-- https://github.com/zfsonlinux/zfs/releases/tag/zfs-0.6.5
-* Wed Apr 8 2015 Brian Behlendorf <behlendorf1 at llnl.gov> - 0.6.4-1
-- Released 0.6.4-1
-* Thu Jun 12 2014 Brian Behlendorf <behlendorf1 at llnl.gov> - 0.6.3-1
-- Released 0.6.3-1
-* Wed Aug 21 2013 Brian Behlendorf <behlendorf1 at llnl.gov> - 0.6.2-1
-- Released 0.6.2-1
-* Fri Mar 22 2013 Brian Behlendorf <behlendorf1 at llnl.gov> - 0.6.1-1
-- First official stable release.
+* Wed Oct 18 2017 Tony Hutter <hutter2 at llnl.gov> - 0.7.3-1
+- Released 0.7.3-1, detailed release notes are available at:
+- https://github.com/zfsonlinux/zfs/releases/tag/zfs-0.7.3
+* Fri Sep 22 2017 Tony Hutter <hutter2 at llnl.gov> - 0.7.2-1
+- Released 0.7.2-1, detailed release notes are available at:
+- https://github.com/zfsonlinux/zfs/releases/tag/zfs-0.7.2
+* Tue Aug 8 2017 Tony Hutter <hutter2 at llnl.gov> - 0.7.1-1
+- Released 0.7.1-1, detailed release notes are available at:
+- https://github.com/zfsonlinux/zfs/releases/tag/zfs-0.7.1
+* Wed Jul 26 2017 Brian Behlendorf <behlendorf1 at llnl.gov> - 0.7.0-1
+- Released 0.7.0-1, detailed release notes are available at:
+- https://github.com/zfsonlinux/zfs/releases/tag/zfs-0.7.0
diff --git a/spl/scripts/Makefile.in b/spl/scripts/Makefile.in
index 7f13dcce7252..ba3398407e49 100644
--- a/spl/scripts/Makefile.in
+++ b/spl/scripts/Makefile.in
@@ -1,7 +1,7 @@
-# Makefile.in generated by automake 1.15 from Makefile.am.
+# Makefile.in generated by automake 1.15.1 from Makefile.am.
# @configure_input@
-# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+# Copyright (C) 1994-2017 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
diff --git a/spl/scripts/check.sh b/spl/scripts/check.sh
index fc97cec2310c..5f5cb18e219b 100755
--- a/spl/scripts/check.sh
+++ b/spl/scripts/check.sh
@@ -28,7 +28,7 @@
prog=check.sh
spl_module=../module/spl/spl.ko
splat_module=../module/splat/splat.ko
-splat_cmd=../cmd/splat
+splat_cmd=../cmd/splat/splat
verbose=
die() {
diff --git a/spl/spl_config.h.in b/spl/spl_config.h.in
index 52b110fdba88..6f2249d41130 100644
--- a/spl/spl_config.h.in
+++ b/spl/spl_config.h.in
@@ -78,9 +78,18 @@
/* Define to 1 if you have the <inttypes.h> header file. */
#undef HAVE_INTTYPES_H
+/* kernel_read() take loff_t pointer */
+#undef HAVE_KERNEL_READ_PPOS
+
+/* kernel_write() take loff_t pointer */
+#undef HAVE_KERNEL_WRITE_PPOS
+
/* struct kmem_cache has allocflags */
#undef HAVE_KMEM_CACHE_ALLOCFLAGS
+/* kmem_cache_create_usercopy() exists */
+#undef HAVE_KMEM_CACHE_CREATE_USERCOPY
+
/* struct kmem_cache has gfpflags */
#undef HAVE_KMEM_CACHE_GFPFLAGS
@@ -90,9 +99,6 @@
/* Define to 1 if you have the <memory.h> header file. */
#undef HAVE_MEMORY_H
-/* yes */
-#undef HAVE_MUTEX_OWNER
-
/* yes */
#undef HAVE_PDE_DATA
diff --git a/zfs/META b/zfs/META
index f0a20aeef7f0..4dcb0d6dcba1 100644
--- a/zfs/META
+++ b/zfs/META
@@ -1,8 +1,8 @@
Meta: 1
Name: zfs
Branch: 1.0
-Version: 0.6.5.11
-Release: 1ubuntu3
+Version: 0.7.3
+Release: 1ubuntu1
Release-Tags: relext
License: CDDL
Author: OpenZFS on Linux
diff --git a/zfs/Makefile.am b/zfs/Makefile.am
index f69feb773b5d..86d920e73bb6 100644
--- a/zfs/Makefile.am
+++ b/zfs/Makefile.am
@@ -39,27 +39,46 @@ dist-hook:
sed -i 's/Release:[[:print:]]*/Release: $(RELEASE)/' \
$(distdir)/META
-checkstyle: cstyle shellcheck
+checkstyle: cstyle shellcheck flake8 commitcheck
+
+commitcheck:
+ @if git rev-parse --git-dir > /dev/null 2>&1; then \
+ scripts/commitcheck.sh; \
+ fi
cstyle:
@find ${top_srcdir} -name '*.[hc]' ! -name 'zfs_config.*' \
- ! -name '*.mod.c' -type f -exec scripts/cstyle.pl {} \+
+ ! -name '*.mod.c' -type f -exec scripts/cstyle.pl -cpP {} \+
shellcheck:
@if type shellcheck > /dev/null 2>&1; then \
- (find ${top_srcdir} -type f -name '*.sh.in' -o -type f \
- -name '*.sh'; find etc/init.d/zfs*.in -type f) | \
- grep -v 'zfs-script-config' | \
- while read file; do \
- shellcheck --format gcc "$$file"; \
- done; \
- fi
+ shellcheck --exclude=SC1090 --format=gcc scripts/paxcheck.sh \
+ scripts/zloop.sh \
+ scripts/zfs-tests.sh \
+ scripts/zfs.sh \
+ scripts/commitcheck.sh \
+ $$(find cmd/zed/zed.d/*.sh -type f) \
+ $$(find cmd/zpool/zpool.d/* -executable); \
+ fi
-lint: cppcheck
+lint: cppcheck paxcheck
cppcheck:
@if type cppcheck > /dev/null 2>&1; then \
- cppcheck --quiet --force ${top_srcdir}; \
+ cppcheck --quiet --force --error-exitcode=2 \
+ --suppressions-list=.github/suppressions.txt \
+ -UHAVE_SSE2 -UHAVE_AVX512F \
+ ${top_srcdir}; \
+ fi
+
+paxcheck:
+ @if type scanelf > /dev/null 2>&1; then \
+ scripts/paxcheck.sh ${top_srcdir}; \
+ fi
+
+flake8:
+ @if type flake8 > /dev/null 2>&1; then \
+ flake8 ${top_srcdir}; \
fi
ctags:
@@ -73,5 +92,6 @@ etags:
tags: ctags etags
pkg: @DEFAULT_PACKAGE@
+pkg-dkms: @DEFAULT_PACKAGE at -dkms
pkg-kmod: @DEFAULT_PACKAGE at -kmod
pkg-utils: @DEFAULT_PACKAGE at -utils
diff --git a/zfs/Makefile.in b/zfs/Makefile.in
index 0793205bb13e..d3eb66562d3f 100644
--- a/zfs/Makefile.in
+++ b/zfs/Makefile.in
@@ -1,7 +1,7 @@
-# Makefile.in generated by automake 1.15 from Makefile.am.
+# Makefile.in generated by automake 1.15.1 from Makefile.am.
# @configure_input@
-# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+# Copyright (C) 1994-2017 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -99,7 +99,8 @@ host_triplet = @host@
target_triplet = @target@
subdir = .
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
+am__aclocal_m4_deps = $(top_srcdir)/config/always-arch.m4 \
+ $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/always-no-unused-but-set-variable.m4 \
$(top_srcdir)/config/dkms.m4 \
$(top_srcdir)/config/kernel-acl.m4 \
@@ -115,6 +116,8 @@ am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/kernel-bio-op.m4 \
$(top_srcdir)/config/kernel-bio-rw-barrier.m4 \
$(top_srcdir)/config/kernel-bio-rw-discard.m4 \
+ $(top_srcdir)/config/kernel-bio_set_dev.m4 \
+ $(top_srcdir)/config/kernel-blk-queue-bdi.m4 \
$(top_srcdir)/config/kernel-blk-queue-flush.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-hw-sectors.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-segments.m4 \
@@ -122,7 +125,6 @@ am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/kernel-blkdev-get-by-path.m4 \
$(top_srcdir)/config/kernel-blkdev-get.m4 \
$(top_srcdir)/config/kernel-block-device-operations-release-void.m4 \
- $(top_srcdir)/config/kernel-check-disk-size-change.m4 \
$(top_srcdir)/config/kernel-clear-inode.m4 \
$(top_srcdir)/config/kernel-commit-metadata.m4 \
$(top_srcdir)/config/kernel-create-nameidata.m4 \
@@ -139,9 +141,11 @@ am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/kernel-encode-fh-inode.m4 \
$(top_srcdir)/config/kernel-evict-inode.m4 \
$(top_srcdir)/config/kernel-fallocate.m4 \
+ $(top_srcdir)/config/kernel-file-dentry.m4 \
$(top_srcdir)/config/kernel-file-inode.m4 \
$(top_srcdir)/config/kernel-fmode-t.m4 \
$(top_srcdir)/config/kernel-follow-down-one.m4 \
+ $(top_srcdir)/config/kernel-fpu.m4 \
$(top_srcdir)/config/kernel-fsync.m4 \
$(top_srcdir)/config/kernel-generic_io_acct.m4 \
$(top_srcdir)/config/kernel-generic_readlink.m4 \
@@ -149,17 +153,20 @@ am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/kernel-get-gendisk.m4 \
$(top_srcdir)/config/kernel-get-link.m4 \
$(top_srcdir)/config/kernel-inode-getattr.m4 \
+ $(top_srcdir)/config/kernel-inode-set-flags.m4 \
$(top_srcdir)/config/kernel-insert-inode-locked.m4 \
$(top_srcdir)/config/kernel-invalidate-bdev-args.m4 \
$(top_srcdir)/config/kernel-is_owner_or_cap.m4 \
$(top_srcdir)/config/kernel-kmap-atomic-args.m4 \
- $(top_srcdir)/config/kernel-kobj-name-len.m4 \
+ $(top_srcdir)/config/kernel-kuid-helpers.m4 \
$(top_srcdir)/config/kernel-lookup-bdev.m4 \
$(top_srcdir)/config/kernel-lookup-nameidata.m4 \
$(top_srcdir)/config/kernel-lseek-execute.m4 \
$(top_srcdir)/config/kernel-mk-request-fn.m4 \
$(top_srcdir)/config/kernel-mkdir-umode-t.m4 \
+ $(top_srcdir)/config/kernel-mod-param.m4 \
$(top_srcdir)/config/kernel-mount-nodev.m4 \
+ $(top_srcdir)/config/kernel-objtool.m4 \
$(top_srcdir)/config/kernel-open-bdev-exclusive.m4 \
$(top_srcdir)/config/kernel-put-link.m4 \
$(top_srcdir)/config/kernel-rename.m4 \
@@ -170,10 +177,13 @@ am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/kernel-show-options.m4 \
$(top_srcdir)/config/kernel-shrink.m4 \
$(top_srcdir)/config/kernel-submit_bio.m4 \
+ $(top_srcdir)/config/kernel-super-userns.m4 \
+ $(top_srcdir)/config/kernel-tmpfile.m4 \
$(top_srcdir)/config/kernel-truncate-range.m4 \
$(top_srcdir)/config/kernel-truncate-setsize.m4 \
$(top_srcdir)/config/kernel-vfs-iterate.m4 \
$(top_srcdir)/config/kernel-vfs-rw-iterate.m4 \
+ $(top_srcdir)/config/kernel-vm_node_stat.m4 \
$(top_srcdir)/config/kernel-xattr-handler.m4 \
$(top_srcdir)/config/kernel.m4 $(top_srcdir)/config/libtool.m4 \
$(top_srcdir)/config/ltoptions.m4 \
@@ -181,10 +191,13 @@ am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/ltversion.m4 \
$(top_srcdir)/config/lt~obsolete.m4 \
$(top_srcdir)/config/mount-helper.m4 \
- $(top_srcdir)/config/user-arch.m4 \
+ $(top_srcdir)/config/toolchain-simd.m4 \
$(top_srcdir)/config/user-dracut.m4 \
$(top_srcdir)/config/user-frame-larger-than.m4 \
+ $(top_srcdir)/config/user-libattr.m4 \
$(top_srcdir)/config/user-libblkid.m4 \
+ $(top_srcdir)/config/user-libtirpc.m4 \
+ $(top_srcdir)/config/user-libudev.m4 \
$(top_srcdir)/config/user-libuuid.m4 \
$(top_srcdir)/config/user-makedev.m4 \
$(top_srcdir)/config/user-no-format-truncation.m4 \
@@ -207,7 +220,7 @@ CONFIG_HEADER = zfs_config.h
CONFIG_CLEAN_FILES = module/Makefile module/avl/Makefile \
module/nvpair/Makefile module/unicode/Makefile \
module/zcommon/Makefile module/zfs/Makefile \
- module/zpios/Makefile zfs.release
+ module/zpios/Makefile module/icp/Makefile zfs.release
CONFIG_CLEAN_VPATH_FILES =
AM_V_P = $(am__v_P_ at AM_V@)
am__v_P_ = $(am__v_P_ at AM_DEFAULT_V@)
@@ -304,6 +317,7 @@ am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/config/deb.am \
$(top_srcdir)/config/install-sh $(top_srcdir)/config/ltmain.sh \
$(top_srcdir)/config/missing $(top_srcdir)/module/Makefile.in \
$(top_srcdir)/module/avl/Makefile.in \
+ $(top_srcdir)/module/icp/Makefile.in \
$(top_srcdir)/module/nvpair/Makefile.in \
$(top_srcdir)/module/unicode/Makefile.in \
$(top_srcdir)/module/zcommon/Makefile.in \
@@ -373,7 +387,6 @@ CPP = @CPP@
CPPFLAGS = @CPPFLAGS@
CYGPATH_W = @CYGPATH_W@
DEBUG_CFLAGS = @DEBUG_CFLAGS@
-DEBUG_DMU_TX = @DEBUG_DMU_TX@
DEBUG_STACKFLAGS = @DEBUG_STACKFLAGS@
DEBUG_ZFS = @DEBUG_ZFS@
DEFAULT_INITCONF_DIR = @DEFAULT_INITCONF_DIR@
@@ -412,10 +425,14 @@ KERNELCPPFLAGS = @KERNELCPPFLAGS@
KERNELMAKE_PARAMS = @KERNELMAKE_PARAMS@
LD = @LD@
LDFLAGS = @LDFLAGS@
+LIBATTR = @LIBATTR@
LIBBLKID = @LIBBLKID@
LIBOBJS = @LIBOBJS@
LIBS = @LIBS@
+LIBTIRPC = @LIBTIRPC@
+LIBTIRPC_CFLAGS = @LIBTIRPC_CFLAGS@
LIBTOOL = @LIBTOOL@
+LIBUDEV = @LIBUDEV@
LIBUUID = @LIBUUID@
LINUX = @LINUX@
LINUX_OBJ = @LINUX_OBJ@
@@ -446,8 +463,12 @@ PACKAGE_TARNAME = @PACKAGE_TARNAME@
PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
+QAT_OBJ = @QAT_OBJ@
+QAT_SRC = @QAT_SRC@
+QAT_SYMBOLS = @QAT_SYMBOLS@
RANLIB = @RANLIB@
RELEASE = @RELEASE@
+RM = @RM@
RPM = @RPM@
RPMBUILD = @RPMBUILD@
RPMBUILD_VERSION = @RPMBUILD_VERSION@
@@ -487,6 +508,7 @@ ZFS_META_RELEASE = @ZFS_META_RELEASE@
ZFS_META_VERSION = @ZFS_META_VERSION@
ZFS_MODULE_LOAD = @ZFS_MODULE_LOAD@
ZLIB = @ZLIB@
+ZONENAME = @ZONENAME@
abs_builddir = @abs_builddir@
abs_srcdir = @abs_srcdir@
abs_top_builddir = @abs_top_builddir@
@@ -628,6 +650,8 @@ module/zfs/Makefile: $(top_builddir)/config.status $(top_srcdir)/module/zfs/Make
cd $(top_builddir) && $(SHELL) ./config.status $@
module/zpios/Makefile: $(top_builddir)/config.status $(top_srcdir)/module/zpios/Makefile.in
cd $(top_builddir) && $(SHELL) ./config.status $@
+module/icp/Makefile: $(top_builddir)/config.status $(top_srcdir)/module/icp/Makefile.in
+ cd $(top_builddir) && $(SHELL) ./config.status $@
zfs.release: $(top_builddir)/config.status $(srcdir)/zfs.release.in
cd $(top_builddir) && $(SHELL) ./config.status $@
@@ -854,7 +878,7 @@ distdir: $(DISTFILES)
! -type d ! -perm -444 -exec $(install_sh) -c -m a+r {} {} \; \
|| chmod -R a+r "$(distdir)"
dist-gzip: distdir
- tardir=$(distdir) && $(am__tar) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).tar.gz
+ tardir=$(distdir) && $(am__tar) | eval GZIP= gzip $(GZIP_ENV) -c >$(distdir).tar.gz
$(am__post_remove_distdir)
dist-bzip2: distdir
@@ -880,7 +904,7 @@ dist-shar: distdir
@echo WARNING: "Support for shar distribution archives is" \
"deprecated." >&2
@echo WARNING: "It will be removed altogether in Automake 2.0" >&2
- shar $(distdir) | GZIP=$(GZIP_ENV) gzip -c >$(distdir).shar.gz
+ shar $(distdir) | eval GZIP= gzip $(GZIP_ENV) -c >$(distdir).shar.gz
$(am__post_remove_distdir)
dist-zip: distdir
@@ -898,7 +922,7 @@ dist dist-all:
distcheck: dist
case '$(DIST_ARCHIVES)' in \
*.tar.gz*) \
- GZIP=$(GZIP_ENV) gzip -dc $(distdir).tar.gz | $(am__untar) ;;\
+ eval GZIP= gzip $(GZIP_ENV) -dc $(distdir).tar.gz | $(am__untar) ;;\
*.tar.bz2*) \
bzip2 -dc $(distdir).tar.bz2 | $(am__untar) ;;\
*.tar.lz*) \
@@ -908,7 +932,7 @@ distcheck: dist
*.tar.Z*) \
uncompress -c $(distdir).tar.Z | $(am__untar) ;;\
*.shar.gz*) \
- GZIP=$(GZIP_ENV) gzip -dc $(distdir).shar.gz | unshar ;;\
+ eval GZIP= gzip $(GZIP_ENV) -dc $(distdir).shar.gz | unshar ;;\
*.zip*) \
unzip $(distdir).zip ;;\
esac
@@ -1205,33 +1229,50 @@ deb-local:
fi)
deb-kmod: deb-local rpm-kmod
- at CONFIG_KERNEL_TRUE@ name=${PACKAGE}; \
- at CONFIG_KERNEL_TRUE@ version=${VERSION}-${RELEASE}; \
- at CONFIG_KERNEL_TRUE@ arch=`$(RPM) -qp $${name}-kmod-$${version}.src.rpm --qf %{arch} | tail -1`; \
- at CONFIG_KERNEL_TRUE@ pkg1=kmod-$${name}*$${version}.$${arch}.rpm; \
- at CONFIG_KERNEL_TRUE@ fakeroot $(ALIEN) --bump=0 --scripts --to-deb $$pkg1; \
- at CONFIG_KERNEL_TRUE@ $(RM) $$pkg1
+ name=${PACKAGE}; \
+ version=${VERSION}-${RELEASE}; \
+ arch=`$(RPM) -qp $${name}-kmod-$${version}.src.rpm --qf %{arch} | tail -1`; \
+ pkg1=kmod-$${name}*$${version}.$${arch}.rpm; \
+ fakeroot $(ALIEN) --bump=0 --scripts --to-deb $$pkg1; \
+ $(RM) $$pkg1
+
+deb-dkms: deb-local rpm-dkms
+ name=${PACKAGE}; \
+ version=${VERSION}-${RELEASE}; \
+ arch=`$(RPM) -qp $${name}-dkms-$${version}.src.rpm --qf %{arch} | tail -1`; \
+ pkg1=$${name}-dkms-$${version}.$${arch}.rpm; \
+ fakeroot $(ALIEN) --bump=0 --scripts --to-deb $$pkg1; \
+ $(RM) $$pkg1
deb-utils: deb-local rpm-utils
- at CONFIG_USER_TRUE@ name=${PACKAGE}; \
- at CONFIG_USER_TRUE@ version=${VERSION}-${RELEASE}; \
- at CONFIG_USER_TRUE@ arch=`$(RPM) -qp $${name}-$${version}.src.rpm --qf %{arch} | tail -1`; \
- at CONFIG_USER_TRUE@ pkg1=$${name}-$${version}.$${arch}.rpm; \
- at CONFIG_USER_TRUE@ pkg2=libnvpair1-$${version}.$${arch}.rpm; \
- at CONFIG_USER_TRUE@ pkg3=libuutil1-$${version}.$${arch}.rpm; \
- at CONFIG_USER_TRUE@ pkg4=libzfs2-$${version}.$${arch}.rpm; \
- at CONFIG_USER_TRUE@ pkg5=libzpool2-$${version}.$${arch}.rpm; \
- at CONFIG_USER_TRUE@ pkg6=libzfs2-devel-$${version}.$${arch}.rpm; \
- at CONFIG_USER_TRUE@ pkg7=$${name}-test-$${version}.$${arch}.rpm; \
- at CONFIG_USER_TRUE@ pkg8=$${name}-dracut-$${version}.$${arch}.rpm; \
- at CONFIG_USER_TRUE@ pkg9=$${name}-initramfs-$${version}.$${arch}.rpm; \
- at CONFIG_USER_TRUE@ fakeroot $(ALIEN) --bump=0 --scripts --to-deb \
- at CONFIG_USER_TRUE@ $$pkg1 $$pkg2 $$pkg3 $$pkg4 $$pkg5 $$pkg6 $$pkg7 \
- at CONFIG_USER_TRUE@ $$pkg8 $$pkg9;
- at CONFIG_USER_TRUE@ $(RM) $$pkg1 $$pkg2 $$pkg3 $$pkg4 $$pkg5 $$pkg6 $$pkg7 \
- at CONFIG_USER_TRUE@ $$pkg8 $$pkg9;
-
-deb: deb-kmod deb-utils
+ name=${PACKAGE}; \
+ version=${VERSION}-${RELEASE}; \
+ arch=`$(RPM) -qp $${name}-$${version}.src.rpm --qf %{arch} | tail -1`; \
+ pkg1=$${name}-$${version}.$${arch}.rpm; \
+ pkg2=libnvpair1-$${version}.$${arch}.rpm; \
+ pkg3=libuutil1-$${version}.$${arch}.rpm; \
+ pkg4=libzfs2-$${version}.$${arch}.rpm; \
+ pkg5=libzpool2-$${version}.$${arch}.rpm; \
+ pkg6=libzfs2-devel-$${version}.$${arch}.rpm; \
+ pkg7=$${name}-test-$${version}.$${arch}.rpm; \
+ pkg8=$${name}-dracut-$${version}.$${arch}.rpm; \
+ pkg9=$${name}-initramfs-$${version}.$${arch}.rpm; \
+ path_prepend=`mktemp -d /tmp/intercept.XXX`; \
+ echo "#$(SHELL)" > $${path_prepend}/dh_shlibdeps; \
+ echo "`which dh_shlibdeps` -- \
+ -xlibuutil1linux -xlibnvpair1linux -xlibzfs2linux -xlibzpool2linux" \
+ >> $${path_prepend}/dh_shlibdeps; \
+ chmod +x $${path_prepend}/dh_shlibdeps; \
+ env PATH=$${path_prepend}:$${PATH} \
+ fakeroot $(ALIEN) --bump=0 --scripts --to-deb \
+ $$pkg1 $$pkg2 $$pkg3 $$pkg4 $$pkg5 $$pkg6 $$pkg7 \
+ $$pkg8 $$pkg9; \
+ $(RM) $${path_prepend}/dh_shlibdeps; \
+ rmdir $${path_prepend}; \
+ $(RM) $$pkg1 $$pkg2 $$pkg3 $$pkg4 $$pkg5 $$pkg6 $$pkg7 \
+ $$pkg8 $$pkg9;
+
+deb: deb-kmod deb-dkms deb-utils
tgz-local:
@(if test "${HAVE_ALIEN}" = "no"; then \
echo -e "\n" \
@@ -1277,27 +1318,46 @@ dist-hook:
sed -i 's/Release:[[:print:]]*/Release: $(RELEASE)/' \
$(distdir)/META
-checkstyle: cstyle shellcheck
+checkstyle: cstyle shellcheck flake8 commitcheck
+
+commitcheck:
+ @if git rev-parse --git-dir > /dev/null 2>&1; then \
+ scripts/commitcheck.sh; \
+ fi
cstyle:
@find ${top_srcdir} -name '*.[hc]' ! -name 'zfs_config.*' \
- ! -name '*.mod.c' -type f -exec scripts/cstyle.pl {} \+
+ ! -name '*.mod.c' -type f -exec scripts/cstyle.pl -cpP {} \+
shellcheck:
@if type shellcheck > /dev/null 2>&1; then \
- (find ${top_srcdir} -type f -name '*.sh.in' -o -type f \
- -name '*.sh'; find etc/init.d/zfs*.in -type f) | \
- grep -v 'zfs-script-config' | \
- while read file; do \
- shellcheck --format gcc "$$file"; \
- done; \
- fi
+ shellcheck --exclude=SC1090 --format=gcc scripts/paxcheck.sh \
+ scripts/zloop.sh \
+ scripts/zfs-tests.sh \
+ scripts/zfs.sh \
+ scripts/commitcheck.sh \
+ $$(find cmd/zed/zed.d/*.sh -type f) \
+ $$(find cmd/zpool/zpool.d/* -executable); \
+ fi
-lint: cppcheck
+lint: cppcheck paxcheck
cppcheck:
@if type cppcheck > /dev/null 2>&1; then \
- cppcheck --quiet --force ${top_srcdir}; \
+ cppcheck --quiet --force --error-exitcode=2 \
+ --suppressions-list=.github/suppressions.txt \
+ -UHAVE_SSE2 -UHAVE_AVX512F \
+ ${top_srcdir}; \
+ fi
+
+paxcheck:
+ @if type scanelf > /dev/null 2>&1; then \
+ scripts/paxcheck.sh ${top_srcdir}; \
+ fi
+
+flake8:
+ @if type flake8 > /dev/null 2>&1; then \
+ flake8 ${top_srcdir}; \
fi
ctags:
@@ -1311,6 +1371,7 @@ etags:
tags: ctags etags
pkg: @DEFAULT_PACKAGE@
+pkg-dkms: @DEFAULT_PACKAGE at -dkms
pkg-kmod: @DEFAULT_PACKAGE at -kmod
pkg-utils: @DEFAULT_PACKAGE at -utils
diff --git a/zfs/README.markdown b/zfs/README.markdown
index 3fc88a0e5f21..fd2ca8c86651 100644
--- a/zfs/README.markdown
+++ b/zfs/README.markdown
@@ -1,10 +1,19 @@
-Native ZFS for Linux!
-
+<p align="center"><img src="http://zfsonlinux.org/images/zfs-linux.png"/></p>
ZFS is an advanced file system and volume manager which was originally
developed for Solaris and is now maintained by the Illumos community.
ZFS on Linux, which is also known as ZoL, is currently feature complete. It
-includes fully functional and stable SPA, DMU, ZVOL, and ZPL layers.
+includes fully functional and stable SPA, DMU, ZVOL, and ZPL layers. And it's native!
+
+# Official Resources
+ * [Site](http://zfsonlinux.org)
+ * [Wiki](https://github.com/zfsonlinux/zfs/wiki)
+ * [Mailing lists](https://github.com/zfsonlinux/zfs/wiki/Mailing-Lists)
+ * [OpenZFS site](http://open-zfs.org/)
+# Installation
Full documentation for installing ZoL on your favorite Linux distribution can
-be found at: <http://zfsonlinux.org>
+be found at [our site](http://zfsonlinux.org/).
+
+# Contribute & Develop
+We have a separate document with [contribution guidelines](./.github/CONTRIBUTING.md).
\ No newline at end of file
diff --git a/zfs/aclocal.m4 b/zfs/aclocal.m4
index 4be369660e9c..2694a61e14a2 100644
--- a/zfs/aclocal.m4
+++ b/zfs/aclocal.m4
@@ -1,6 +1,6 @@
-# generated automatically by aclocal 1.15 -*- Autoconf -*-
+# generated automatically by aclocal 1.15.1 -*- Autoconf -*-
-# Copyright (C) 1996-2014 Free Software Foundation, Inc.
+# Copyright (C) 1996-2017 Free Software Foundation, Inc.
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -20,7 +20,7 @@ You have another version of autoconf. It may work, but is not guaranteed to.
If you have problems, you may need to regenerate the build system entirely.
To do so, use the procedure documented by the package, typically 'autoreconf'.])])
-# Copyright (C) 2002-2014 Free Software Foundation, Inc.
+# Copyright (C) 2002-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -35,7 +35,7 @@ AC_DEFUN([AM_AUTOMAKE_VERSION],
[am__api_version='1.15'
dnl Some users find AM_AUTOMAKE_VERSION and mistake it for a way to
dnl require some minimum version. Point them to the right macro.
-m4_if([$1], [1.15], [],
+m4_if([$1], [1.15.1], [],
[AC_FATAL([Do not call $0, use AM_INIT_AUTOMAKE([$1]).])])dnl
])
@@ -51,14 +51,14 @@ m4_define([_AM_AUTOCONF_VERSION], [])
# Call AM_AUTOMAKE_VERSION and AM_AUTOMAKE_VERSION so they can be traced.
# This function is AC_REQUIREd by AM_INIT_AUTOMAKE.
AC_DEFUN([AM_SET_CURRENT_AUTOMAKE_VERSION],
-[AM_AUTOMAKE_VERSION([1.15])dnl
+[AM_AUTOMAKE_VERSION([1.15.1])dnl
m4_ifndef([AC_AUTOCONF_VERSION],
[m4_copy([m4_PACKAGE_VERSION], [AC_AUTOCONF_VERSION])])dnl
_AM_AUTOCONF_VERSION(m4_defn([AC_AUTOCONF_VERSION]))])
# Figure out how to run the assembler. -*- Autoconf -*-
-# Copyright (C) 2001-2014 Free Software Foundation, Inc.
+# Copyright (C) 2001-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -78,7 +78,7 @@ _AM_IF_OPTION([no-dependencies],, [_AM_DEPENDENCIES([CCAS])])dnl
# AM_AUX_DIR_EXPAND -*- Autoconf -*-
-# Copyright (C) 2001-2014 Free Software Foundation, Inc.
+# Copyright (C) 2001-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -130,7 +130,7 @@ am_aux_dir=`cd "$ac_aux_dir" && pwd`
# AM_CONDITIONAL -*- Autoconf -*-
-# Copyright (C) 1997-2014 Free Software Foundation, Inc.
+# Copyright (C) 1997-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -161,7 +161,7 @@ AC_CONFIG_COMMANDS_PRE(
Usually this means the macro was only invoked conditionally.]])
fi])])
-# Copyright (C) 1999-2014 Free Software Foundation, Inc.
+# Copyright (C) 1999-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -352,7 +352,7 @@ _AM_SUBST_NOTMAKE([am__nodep])dnl
# Generate code to set up dependency tracking. -*- Autoconf -*-
-# Copyright (C) 1999-2014 Free Software Foundation, Inc.
+# Copyright (C) 1999-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -428,7 +428,7 @@ AC_DEFUN([AM_OUTPUT_DEPENDENCY_COMMANDS],
# Do all the work for Automake. -*- Autoconf -*-
-# Copyright (C) 1996-2014 Free Software Foundation, Inc.
+# Copyright (C) 1996-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -625,7 +625,7 @@ for _am_header in $config_headers :; do
done
echo "timestamp for $_am_arg" >`AS_DIRNAME(["$_am_arg"])`/stamp-h[]$_am_stamp_count])
-# Copyright (C) 2001-2014 Free Software Foundation, Inc.
+# Copyright (C) 2001-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -646,7 +646,7 @@ if test x"${install_sh+set}" != xset; then
fi
AC_SUBST([install_sh])])
-# Copyright (C) 2003-2014 Free Software Foundation, Inc.
+# Copyright (C) 2003-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -668,7 +668,7 @@ AC_SUBST([am__leading_dot])])
# Add --enable-maintainer-mode option to configure. -*- Autoconf -*-
# From Jim Meyering
-# Copyright (C) 1996-2014 Free Software Foundation, Inc.
+# Copyright (C) 1996-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -703,7 +703,7 @@ AC_MSG_CHECKING([whether to enable maintainer-specific portions of Makefiles])
# Check to see how 'make' treats includes. -*- Autoconf -*-
-# Copyright (C) 2001-2014 Free Software Foundation, Inc.
+# Copyright (C) 2001-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -753,7 +753,7 @@ rm -f confinc confmf
# Fake the existence of programs that GNU maintainers use. -*- Autoconf -*-
-# Copyright (C) 1997-2014 Free Software Foundation, Inc.
+# Copyright (C) 1997-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -792,7 +792,7 @@ fi
# Helper functions for option handling. -*- Autoconf -*-
-# Copyright (C) 2001-2014 Free Software Foundation, Inc.
+# Copyright (C) 2001-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -821,7 +821,7 @@ AC_DEFUN([_AM_SET_OPTIONS],
AC_DEFUN([_AM_IF_OPTION],
[m4_ifset(_AM_MANGLE_OPTION([$1]), [$2], [$3])])
-# Copyright (C) 1999-2014 Free Software Foundation, Inc.
+# Copyright (C) 1999-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -868,7 +868,7 @@ AC_LANG_POP([C])])
# For backward compatibility.
AC_DEFUN_ONCE([AM_PROG_CC_C_O], [AC_REQUIRE([AC_PROG_CC])])
-# Copyright (C) 2001-2014 Free Software Foundation, Inc.
+# Copyright (C) 2001-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -887,7 +887,7 @@ AC_DEFUN([AM_RUN_LOG],
# Check to make sure that the build environment is sane. -*- Autoconf -*-
-# Copyright (C) 1996-2014 Free Software Foundation, Inc.
+# Copyright (C) 1996-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -968,7 +968,7 @@ AC_CONFIG_COMMANDS_PRE(
rm -f conftest.file
])
-# Copyright (C) 2009-2014 Free Software Foundation, Inc.
+# Copyright (C) 2009-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -1028,7 +1028,7 @@ AC_SUBST([AM_BACKSLASH])dnl
_AM_SUBST_NOTMAKE([AM_BACKSLASH])dnl
])
-# Copyright (C) 2001-2014 Free Software Foundation, Inc.
+# Copyright (C) 2001-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -1056,7 +1056,7 @@ fi
INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
AC_SUBST([INSTALL_STRIP_PROGRAM])])
-# Copyright (C) 2006-2014 Free Software Foundation, Inc.
+# Copyright (C) 2006-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -1075,7 +1075,7 @@ AC_DEFUN([AM_SUBST_NOTMAKE], [_AM_SUBST_NOTMAKE($@)])
# Check how to create a tarball. -*- Autoconf -*-
-# Copyright (C) 2004-2014 Free Software Foundation, Inc.
+# Copyright (C) 2004-2017 Free Software Foundation, Inc.
#
# This file is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -1206,6 +1206,7 @@ AC_SUBST([am__tar])
AC_SUBST([am__untar])
]) # _AM_PROG_TAR
+m4_include([config/always-arch.m4])
m4_include([config/always-no-bool-compare.m4])
m4_include([config/always-no-unused-but-set-variable.m4])
m4_include([config/dkms.m4])
@@ -1222,6 +1223,8 @@ m4_include([config/kernel-bio-failfast.m4])
m4_include([config/kernel-bio-op.m4])
m4_include([config/kernel-bio-rw-barrier.m4])
m4_include([config/kernel-bio-rw-discard.m4])
+m4_include([config/kernel-bio_set_dev.m4])
+m4_include([config/kernel-blk-queue-bdi.m4])
m4_include([config/kernel-blk-queue-flush.m4])
m4_include([config/kernel-blk-queue-max-hw-sectors.m4])
m4_include([config/kernel-blk-queue-max-segments.m4])
@@ -1229,7 +1232,6 @@ m4_include([config/kernel-blk-queue-unplug.m4])
m4_include([config/kernel-blkdev-get-by-path.m4])
m4_include([config/kernel-blkdev-get.m4])
m4_include([config/kernel-block-device-operations-release-void.m4])
-m4_include([config/kernel-check-disk-size-change.m4])
m4_include([config/kernel-clear-inode.m4])
m4_include([config/kernel-commit-metadata.m4])
m4_include([config/kernel-create-nameidata.m4])
@@ -1246,9 +1248,11 @@ m4_include([config/kernel-elevator-change.m4])
m4_include([config/kernel-encode-fh-inode.m4])
m4_include([config/kernel-evict-inode.m4])
m4_include([config/kernel-fallocate.m4])
+m4_include([config/kernel-file-dentry.m4])
m4_include([config/kernel-file-inode.m4])
m4_include([config/kernel-fmode-t.m4])
m4_include([config/kernel-follow-down-one.m4])
+m4_include([config/kernel-fpu.m4])
m4_include([config/kernel-fsync.m4])
m4_include([config/kernel-generic_io_acct.m4])
m4_include([config/kernel-generic_readlink.m4])
@@ -1256,17 +1260,20 @@ m4_include([config/kernel-get-disk-ro.m4])
m4_include([config/kernel-get-gendisk.m4])
m4_include([config/kernel-get-link.m4])
m4_include([config/kernel-inode-getattr.m4])
+m4_include([config/kernel-inode-set-flags.m4])
m4_include([config/kernel-insert-inode-locked.m4])
m4_include([config/kernel-invalidate-bdev-args.m4])
m4_include([config/kernel-is_owner_or_cap.m4])
m4_include([config/kernel-kmap-atomic-args.m4])
-m4_include([config/kernel-kobj-name-len.m4])
+m4_include([config/kernel-kuid-helpers.m4])
m4_include([config/kernel-lookup-bdev.m4])
m4_include([config/kernel-lookup-nameidata.m4])
m4_include([config/kernel-lseek-execute.m4])
m4_include([config/kernel-mk-request-fn.m4])
m4_include([config/kernel-mkdir-umode-t.m4])
+m4_include([config/kernel-mod-param.m4])
m4_include([config/kernel-mount-nodev.m4])
+m4_include([config/kernel-objtool.m4])
m4_include([config/kernel-open-bdev-exclusive.m4])
m4_include([config/kernel-put-link.m4])
m4_include([config/kernel-rename.m4])
@@ -1277,10 +1284,13 @@ m4_include([config/kernel-sget-args.m4])
m4_include([config/kernel-show-options.m4])
m4_include([config/kernel-shrink.m4])
m4_include([config/kernel-submit_bio.m4])
+m4_include([config/kernel-super-userns.m4])
+m4_include([config/kernel-tmpfile.m4])
m4_include([config/kernel-truncate-range.m4])
m4_include([config/kernel-truncate-setsize.m4])
m4_include([config/kernel-vfs-iterate.m4])
m4_include([config/kernel-vfs-rw-iterate.m4])
+m4_include([config/kernel-vm_node_stat.m4])
m4_include([config/kernel-xattr-handler.m4])
m4_include([config/kernel.m4])
m4_include([config/libtool.m4])
@@ -1289,10 +1299,13 @@ m4_include([config/ltsugar.m4])
m4_include([config/ltversion.m4])
m4_include([config/lt~obsolete.m4])
m4_include([config/mount-helper.m4])
-m4_include([config/user-arch.m4])
+m4_include([config/toolchain-simd.m4])
m4_include([config/user-dracut.m4])
m4_include([config/user-frame-larger-than.m4])
+m4_include([config/user-libattr.m4])
m4_include([config/user-libblkid.m4])
+m4_include([config/user-libtirpc.m4])
+m4_include([config/user-libudev.m4])
m4_include([config/user-libuuid.m4])
m4_include([config/user-makedev.m4])
m4_include([config/user-no-format-truncation.m4])
diff --git a/zfs/config/Rules.am b/zfs/config/Rules.am
index 20a21e972bcd..1d39e7779a7e 100644
--- a/zfs/config/Rules.am
+++ b/zfs/config/Rules.am
@@ -5,6 +5,7 @@ AM_CFLAGS = ${DEBUG_CFLAGS} -Wall -Wstrict-prototypes
AM_CFLAGS += ${NO_UNUSED_BUT_SET_VARIABLE}
AM_CFLAGS += ${NO_BOOL_COMPARE}
AM_CFLAGS += -fno-strict-aliasing
+AM_CFLAGS += -std=gnu99
AM_CPPFLAGS = -D_GNU_SOURCE -D__EXTENSIONS__ -D_REENTRANT
AM_CPPFLAGS += -D_POSIX_PTHREAD_SEMANTICS -D_FILE_OFFSET_BITS=64
AM_CPPFLAGS += -D_LARGEFILE64_SOURCE -DHAVE_LARGE_STACKS=1
diff --git a/zfs/config/always-arch.m4 b/zfs/config/always-arch.m4
new file mode 100644
index 000000000000..c3e6b4a9789a
--- /dev/null
+++ b/zfs/config/always-arch.m4
@@ -0,0 +1,22 @@
+dnl #
+dnl # Set the target arch for libspl atomic implementation and the icp
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_ARCH], [
+ AC_MSG_CHECKING(for target asm dir)
+ TARGET_ARCH=`echo ${target_cpu} | sed -e s/i.86/i386/`
+
+ case $TARGET_ARCH in
+ i386|x86_64)
+ TARGET_ASM_DIR=asm-${TARGET_ARCH}
+ ;;
+ *)
+ TARGET_ASM_DIR=asm-generic
+ ;;
+ esac
+
+ AC_SUBST([TARGET_ASM_DIR])
+ AM_CONDITIONAL([TARGET_ASM_X86_64], test $TARGET_ASM_DIR = asm-x86_64)
+ AM_CONDITIONAL([TARGET_ASM_I386], test $TARGET_ASM_DIR = asm-i386)
+ AM_CONDITIONAL([TARGET_ASM_GENERIC], test $TARGET_ASM_DIR = asm-generic)
+ AC_MSG_RESULT([$TARGET_ASM_DIR])
+])
diff --git a/zfs/config/deb.am b/zfs/config/deb.am
index 648417b2adc6..98e98e45f402 100644
--- a/zfs/config/deb.am
+++ b/zfs/config/deb.am
@@ -15,17 +15,23 @@ deb-local:
fi)
deb-kmod: deb-local rpm-kmod
-if CONFIG_KERNEL
name=${PACKAGE}; \
version=${VERSION}-${RELEASE}; \
arch=`$(RPM) -qp $${name}-kmod-$${version}.src.rpm --qf %{arch} | tail -1`; \
pkg1=kmod-$${name}*$${version}.$${arch}.rpm; \
fakeroot $(ALIEN) --bump=0 --scripts --to-deb $$pkg1; \
$(RM) $$pkg1
-endif
+
+
+deb-dkms: deb-local rpm-dkms
+ name=${PACKAGE}; \
+ version=${VERSION}-${RELEASE}; \
+ arch=`$(RPM) -qp $${name}-dkms-$${version}.src.rpm --qf %{arch} | tail -1`; \
+ pkg1=$${name}-dkms-$${version}.$${arch}.rpm; \
+ fakeroot $(ALIEN) --bump=0 --scripts --to-deb $$pkg1; \
+ $(RM) $$pkg1
deb-utils: deb-local rpm-utils
-if CONFIG_USER
name=${PACKAGE}; \
version=${VERSION}-${RELEASE}; \
arch=`$(RPM) -qp $${name}-$${version}.src.rpm --qf %{arch} | tail -1`; \
@@ -38,11 +44,25 @@ if CONFIG_USER
pkg7=$${name}-test-$${version}.$${arch}.rpm; \
pkg8=$${name}-dracut-$${version}.$${arch}.rpm; \
pkg9=$${name}-initramfs-$${version}.$${arch}.rpm; \
+## Arguments need to be passed to dh_shlibdeps. Alien provides no mechanism
+## to do this, so we install a shim onto the path which calls the real
+## dh_shlibdeps with the required arguments.
+ path_prepend=`mktemp -d /tmp/intercept.XXX`; \
+ echo "#$(SHELL)" > $${path_prepend}/dh_shlibdeps; \
+ echo "`which dh_shlibdeps` -- \
+ -xlibuutil1linux -xlibnvpair1linux -xlibzfs2linux -xlibzpool2linux" \
+ >> $${path_prepend}/dh_shlibdeps; \
+## These -x arguments are passed to dpkg-shlibdeps, which exclude the
+## Debianized packages from the auto-generated dependencies of the new debs,
+## which should NOT be mixed with the alien-generated debs created here
+ chmod +x $${path_prepend}/dh_shlibdeps; \
+ env PATH=$${path_prepend}:$${PATH} \
fakeroot $(ALIEN) --bump=0 --scripts --to-deb \
$$pkg1 $$pkg2 $$pkg3 $$pkg4 $$pkg5 $$pkg6 $$pkg7 \
- $$pkg8 $$pkg9;
+ $$pkg8 $$pkg9; \
+ $(RM) $${path_prepend}/dh_shlibdeps; \
+ rmdir $${path_prepend}; \
$(RM) $$pkg1 $$pkg2 $$pkg3 $$pkg4 $$pkg5 $$pkg6 $$pkg7 \
$$pkg8 $$pkg9;
-endif
-deb: deb-kmod deb-utils
+deb: deb-kmod deb-dkms deb-utils
diff --git a/zfs/config/kernel-acl.m4 b/zfs/config/kernel-acl.m4
index b2c33f1c40e3..311484349d4a 100644
--- a/zfs/config/kernel-acl.m4
+++ b/zfs/config/kernel-acl.m4
@@ -16,26 +16,27 @@ AC_DEFUN([ZFS_AC_KERNEL_POSIX_ACL_RELEASE], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_POSIX_ACL_RELEASE, 1,
[posix_acl_release() is available])
- ],[
- AC_MSG_RESULT(no)
- ])
- AC_MSG_CHECKING([whether posix_acl_release() is GPL-only])
- ZFS_LINUX_TRY_COMPILE([
- #include <linux/cred.h>
- #include <linux/fs.h>
- #include <linux/posix_acl.h>
+ AC_MSG_CHECKING([whether posix_acl_release() is GPL-only])
+ ZFS_LINUX_TRY_COMPILE([
+ #include <linux/module.h>
+ #include <linux/cred.h>
+ #include <linux/fs.h>
+ #include <linux/posix_acl.h>
- MODULE_LICENSE("$ZFS_META_LICENSE");
- ],[
- struct posix_acl* tmp = posix_acl_alloc(1, 0);
- posix_acl_release(tmp);
+ MODULE_LICENSE("$ZFS_META_LICENSE");
+ ],[
+ struct posix_acl* tmp = posix_acl_alloc(1, 0);
+ posix_acl_release(tmp);
+ ],[
+ AC_MSG_RESULT(no)
+ ],[
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_POSIX_ACL_RELEASE_GPL_ONLY, 1,
+ [posix_acl_release() is GPL-only])
+ ])
],[
AC_MSG_RESULT(no)
- ],[
- AC_MSG_RESULT(yes)
- AC_DEFINE(HAVE_POSIX_ACL_RELEASE_GPL_ONLY, 1,
- [posix_acl_release() is GPL-only])
])
])
diff --git a/zfs/config/kernel-bdi.m4 b/zfs/config/kernel-bdi.m4
index c2a9dd28bf4e..cb7479ee9c46 100644
--- a/zfs/config/kernel-bdi.m4
+++ b/zfs/config/kernel-bdi.m4
@@ -11,8 +11,9 @@ AC_DEFUN([ZFS_AC_KERNEL_BDI], [
struct super_block sb;
], [
char *name = "bdi";
+ atomic_long_t zfs_bdi_seq;
int error __attribute__((unused)) =
- super_setup_bdi_name(&sb, name);
+ super_setup_bdi_name(&sb, "%.28s-%ld", name, atomic_long_inc_return(&zfs_bdi_seq));
], [super_setup_bdi_name], [fs/super.c], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_SUPER_SETUP_BDI_NAME, 1,
diff --git a/zfs/config/kernel-bio_set_dev.m4 b/zfs/config/kernel-bio_set_dev.m4
new file mode 100644
index 000000000000..6be873c56154
--- /dev/null
+++ b/zfs/config/kernel-bio_set_dev.m4
@@ -0,0 +1,22 @@
+dnl #
+dnl # Linux 4.14 API,
+dnl #
+dnl # The bio_set_dev() helper was introduced as part of the transition
+dnl # to have struct gendisk in struct bio.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_BIO_SET_DEV], [
+ AC_MSG_CHECKING([whether bio_set_dev() exists])
+ ZFS_LINUX_TRY_COMPILE([
+ #include <linux/bio.h>
+ #include <linux/fs.h>
+ ],[
+ struct block_device *bdev = NULL;
+ struct bio *bio = NULL;
+ bio_set_dev(bio, bdev);
+ ],[
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_BIO_SET_DEV, 1, [bio_set_dev() exists])
+ ],[
+ AC_MSG_RESULT(no)
+ ])
+])
diff --git a/zfs/config/kernel-blk-queue-bdi.m4 b/zfs/config/kernel-blk-queue-bdi.m4
new file mode 100644
index 000000000000..816471166a5a
--- /dev/null
+++ b/zfs/config/kernel-blk-queue-bdi.m4
@@ -0,0 +1,20 @@
+dnl #
+dnl # 2.6.32 - 4.11, statically allocated bdi in request_queue
+dnl # 4.12 - x.y, dynamically allocated bdi in request_queue
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_BDI], [
+ AC_MSG_CHECKING([whether blk_queue bdi is dynamic])
+ ZFS_LINUX_TRY_COMPILE([
+ #include <linux/blkdev.h>
+ ],[
+ struct request_queue q;
+ struct backing_dev_info bdi;
+ q.backing_dev_info = &bdi;
+ ],[
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_BLK_QUEUE_BDI_DYNAMIC, 1,
+ [blk queue backing_dev_info is dynamic])
+ ],[
+ AC_MSG_RESULT(no)
+ ])
+])
diff --git a/zfs/config/kernel-check-disk-size-change.m4 b/zfs/config/kernel-check-disk-size-change.m4
deleted file mode 100644
index ea5c75f39f20..000000000000
--- a/zfs/config/kernel-check-disk-size-change.m4
+++ /dev/null
@@ -1,18 +0,0 @@
-dnl #
-dnl # 2.6.28 API change
-dnl # Added check_disk_size_change() helper function.
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_CHECK_DISK_SIZE_CHANGE],
- [AC_MSG_CHECKING([whether check_disk_size_change() is available])
- ZFS_LINUX_TRY_COMPILE_SYMBOL([
- #include <linux/fs.h>
- ], [
- check_disk_size_change(NULL, NULL);
- ], [check_disk_size_change], [fs/block_dev.c], [
- AC_MSG_RESULT(yes)
- AC_DEFINE(HAVE_CHECK_DISK_SIZE_CHANGE, 1,
- [check_disk_size_change() is available])
- ], [
- AC_MSG_RESULT(no)
- ])
-])
diff --git a/zfs/config/kernel-file-dentry.m4 b/zfs/config/kernel-file-dentry.m4
new file mode 100644
index 000000000000..daf742ee1b0a
--- /dev/null
+++ b/zfs/config/kernel-file-dentry.m4
@@ -0,0 +1,20 @@
+dnl #
+dnl # 4.1 API change
+dnl # struct access file->f_path.dentry was replaced by accessor function
+dnl # since fix torvalds/linux at 4bacc9c9234c ("overlayfs: Make f_path always
+dnl # point to the overlay and f_inode to the underlay").
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_FILE_DENTRY], [
+ AC_MSG_CHECKING([whether file_dentry() is available])
+ ZFS_LINUX_TRY_COMPILE([
+ #include <linux/fs.h>
+ ],[
+ struct file *f = NULL;
+ file_dentry(f);
+ ],[
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_FILE_DENTRY, 1, [file_dentry() is available])
+ ],[
+ AC_MSG_RESULT(no)
+ ])
+])
diff --git a/zfs/config/kernel-fpu.m4 b/zfs/config/kernel-fpu.m4
new file mode 100644
index 000000000000..1c5690969d48
--- /dev/null
+++ b/zfs/config/kernel-fpu.m4
@@ -0,0 +1,18 @@
+dnl #
+dnl # 4.2 API change
+dnl # asm/i387.h is replaced by asm/fpu/api.h
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_FPU], [
+ AC_MSG_CHECKING([whether asm/fpu/api.h exists])
+ ZFS_LINUX_TRY_COMPILE([
+ #include <linux/kernel.h>
+ #include <asm/fpu/api.h>
+ ],[
+ __kernel_fpu_begin();
+ ],[
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_FPU_API_H, 1, [kernel has <asm/fpu/api.h> interface])
+ ],[
+ AC_MSG_RESULT(no)
+ ])
+])
diff --git a/zfs/config/kernel-generic_io_acct.m4 b/zfs/config/kernel-generic_io_acct.m4
index 25bfa384808b..0aa76216226c 100644
--- a/zfs/config/kernel-generic_io_acct.m4
+++ b/zfs/config/kernel-generic_io_acct.m4
@@ -4,8 +4,8 @@ dnl #
dnl # torvalds/linux at 394ffa503bc40e32d7f54a9b817264e81ce131b4 allows us to
dnl # increment iostat counters without generic_make_request().
dnl #
-AC_DEFUN([ZFS_AC_KERNEL_GENERIC_IO_ACCT], [
- AC_MSG_CHECKING([whether generic IO accounting symbols are avaliable])
+AC_DEFUN([ZFS_AC_KERNEL_GENERIC_IO_ACCT_3ARG], [
+ AC_MSG_CHECKING([whether 3 arg generic IO accounting symbols are available])
ZFS_LINUX_TRY_COMPILE_SYMBOL([
#include <linux/bio.h>
@@ -18,8 +18,35 @@ AC_DEFUN([ZFS_AC_KERNEL_GENERIC_IO_ACCT], [
generic_end_io_acct(0, NULL, 0);
], [generic_start_io_acct], [block/bio.c], [
AC_MSG_RESULT(yes)
- AC_DEFINE(HAVE_GENERIC_IO_ACCT, 1,
- [generic_start_io_acct()/generic_end_io_acct() avaliable])
+ AC_DEFINE(HAVE_GENERIC_IO_ACCT_3ARG, 1,
+ [generic_start_io_acct()/generic_end_io_acct() available])
+ ], [
+ AC_MSG_RESULT(no)
+ ])
+])
+
+dnl #
+dnl # Linux 4.14 API,
+dnl #
+dnl # generic_start_io_acct/generic_end_io_acct now require request_queue to be
+dnl # provided. No functional changes, but preparation for inflight accounting
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_GENERIC_IO_ACCT_4ARG], [
+ AC_MSG_CHECKING([whether 4 arg generic IO accounting symbols are available])
+ ZFS_LINUX_TRY_COMPILE_SYMBOL([
+ #include <linux/bio.h>
+
+ void (*generic_start_io_acct_f)(struct request_queue *, int,
+ unsigned long, struct hd_struct *) = &generic_start_io_acct;
+ void (*generic_end_io_acct_f)(struct request_queue *, int,
+ struct hd_struct *, unsigned long) = &generic_end_io_acct;
+ ], [
+ generic_start_io_acct(NULL, 0, 0, NULL);
+ generic_end_io_acct(NULL, 0, NULL, 0);
+ ], [generic_start_io_acct], [block/bio.c], [
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_GENERIC_IO_ACCT_4ARG, 1,
+ [generic_start_io_acct()/generic_end_io_acct() 4 arg available])
], [
AC_MSG_RESULT(no)
])
diff --git a/zfs/config/kernel-inode-set-flags.m4 b/zfs/config/kernel-inode-set-flags.m4
new file mode 100644
index 000000000000..e0ad26796dd0
--- /dev/null
+++ b/zfs/config/kernel-inode-set-flags.m4
@@ -0,0 +1,18 @@
+dnl #
+dnl # 3.15 API change
+dnl # inode_set_flags introduced to set i_flags
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_INODE_SET_FLAGS], [
+ AC_MSG_CHECKING([whether inode_set_flags() exists])
+ ZFS_LINUX_TRY_COMPILE([
+ #include <linux/fs.h>
+ ],[
+ struct inode inode;
+ inode_set_flags(&inode, S_IMMUTABLE, S_IMMUTABLE);
+ ],[
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_INODE_SET_FLAGS, 1, [inode_set_flags() exists])
+ ],[
+ AC_MSG_RESULT(no)
+ ])
+])
diff --git a/zfs/config/kernel-kobj-name-len.m4 b/zfs/config/kernel-kobj-name-len.m4
deleted file mode 100644
index 37999fabb81d..000000000000
--- a/zfs/config/kernel-kobj-name-len.m4
+++ /dev/null
@@ -1,21 +0,0 @@
-dnl #
-dnl # 2.6.27 API change,
-dnl # kobject KOBJ_NAME_LEN static limit removed. All users of this
-dnl # constant were removed prior to 2.6.27, but to be on the safe
-dnl # side this check ensures the constant is undefined.
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_KOBJ_NAME_LEN], [
- AC_MSG_CHECKING([whether kernel defines KOBJ_NAME_LEN])
- ZFS_LINUX_TRY_COMPILE([
- #include <linux/kobject.h>
- ],[
- int val __attribute__ ((unused));
- val = KOBJ_NAME_LEN;
- ],[
- AC_MSG_RESULT([yes])
- AC_DEFINE(HAVE_KOBJ_NAME_LEN, 1,
- [kernel defines KOBJ_NAME_LEN])
- ],[
- AC_MSG_RESULT([no])
- ])
-])
diff --git a/zfs/config/kernel-kuid-helpers.m4 b/zfs/config/kernel-kuid-helpers.m4
new file mode 100644
index 000000000000..60713b9d3132
--- /dev/null
+++ b/zfs/config/kernel-kuid-helpers.m4
@@ -0,0 +1,22 @@
+dnl #
+dnl # 3.5 API change,
+dnl # Since usernamespaces were introduced in kernel version 3.5, it
+dnl # became necessary to go through one more level of indirection
+dnl # when dealing with uid/gid - namely the kuid type.
+dnl #
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_KUID_HELPERS], [
+ AC_MSG_CHECKING([whether i_(uid|gid)_(read|write) exist])
+ ZFS_LINUX_TRY_COMPILE([
+ #include <linux/fs.h>
+ ],[
+ struct inode *ip = NULL;
+ (void) i_uid_read(ip);
+ ],[
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_KUID_HELPERS, 1,
+ [i_(uid|gid)_(read|write) exist])
+ ],[
+ AC_MSG_RESULT(no)
+ ])
+])
diff --git a/zfs/config/kernel-mkdir-umode-t.m4 b/zfs/config/kernel-mkdir-umode-t.m4
index 634260b3ca2b..ebc21be9ec55 100644
--- a/zfs/config/kernel-mkdir-umode-t.m4
+++ b/zfs/config/kernel-mkdir-umode-t.m4
@@ -4,7 +4,7 @@ dnl # The VFS .create, .mkdir and .mknod callbacks were updated to take a
dnl # umode_t type rather than an int. The expectation is that any backport
dnl # would also change all three prototypes. However, if it turns out that
dnl # some distribution doesn't backport the whole thing this could be
-dnl # broken apart in to three seperate checks.
+dnl # broken apart in to three separate checks.
dnl #
AC_DEFUN([ZFS_AC_KERNEL_MKDIR_UMODE_T], [
AC_MSG_CHECKING([whether iops->create()/mkdir()/mknod() take umode_t])
diff --git a/zfs/config/kernel-mod-param.m4 b/zfs/config/kernel-mod-param.m4
new file mode 100644
index 000000000000..b72be684a44e
--- /dev/null
+++ b/zfs/config/kernel-mod-param.m4
@@ -0,0 +1,30 @@
+dnl #
+dnl # Grsecurity kernel API change
+dnl # constified parameters of module_param_call() methods
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_MODULE_PARAM_CALL_CONST], [
+ AC_MSG_CHECKING([whether module_param_call() is hardened])
+ ZFS_LINUX_TRY_COMPILE([
+ #include <linux/module.h>
+ #include <linux/moduleparam.h>
+
+ int param_get(char *b, const struct kernel_param *kp)
+ {
+ return (0);
+ }
+
+ int param_set(const char *b, const struct kernel_param *kp)
+ {
+ return (0);
+ }
+
+ module_param_call(p, param_set, param_get, NULL, 0644);
+ ],[
+ ],[
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(MODULE_PARAM_CALL_CONST, 1,
+ [hardened module_param_call])
+ ],[
+ AC_MSG_RESULT(no)
+ ])
+])
diff --git a/zfs/config/kernel-objtool.m4 b/zfs/config/kernel-objtool.m4
new file mode 100644
index 000000000000..94e7dda2d16d
--- /dev/null
+++ b/zfs/config/kernel-objtool.m4
@@ -0,0 +1,19 @@
+dnl #
+dnl # 4.6 API for compile-time stack validation
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_OBJTOOL], [
+ AC_MSG_CHECKING([for compile-time stack validation (objtool)])
+ ZFS_LINUX_TRY_COMPILE([
+ #undef __ASSEMBLY__
+ #include <asm/frame.h>
+ ],[
+ #if !defined(FRAME_BEGIN)
+ CTASSERT(1);
+ #endif
+ ],[
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_KERNEL_OBJTOOL, 1, [kernel does stack verification])
+ ],[
+ AC_MSG_RESULT(no)
+ ])
+])
diff --git a/zfs/config/kernel-super-userns.m4 b/zfs/config/kernel-super-userns.m4
new file mode 100644
index 000000000000..de94ad967ac3
--- /dev/null
+++ b/zfs/config/kernel-super-userns.m4
@@ -0,0 +1,21 @@
+dnl #
+dnl # 4.8 API change
+dnl # struct user_namespace was added to struct super_block as
+dnl # super->s_user_ns member
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SUPER_USER_NS], [
+ AC_MSG_CHECKING([whether super_block->s_user_ns exists])
+ ZFS_LINUX_TRY_COMPILE([
+ #include <linux/fs.h>
+ #include <linux/user_namespace.h>
+ ],[
+ struct super_block super;
+ super.s_user_ns = (struct user_namespace *)NULL;
+ ],[
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_SUPER_USER_NS, 1,
+ [super_block->s_user_ns exists])
+ ],[
+ AC_MSG_RESULT(no)
+ ])
+])
diff --git a/zfs/config/kernel-tmpfile.m4 b/zfs/config/kernel-tmpfile.m4
new file mode 100644
index 000000000000..5aad90450e8b
--- /dev/null
+++ b/zfs/config/kernel-tmpfile.m4
@@ -0,0 +1,23 @@
+dnl #
+dnl # 3.11 API change
+dnl # Add support for i_op->tmpfile
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_TMPFILE], [
+ AC_MSG_CHECKING([whether i_op->tmpfile() exists])
+ ZFS_LINUX_TRY_COMPILE([
+ #include <linux/fs.h>
+ int tmpfile(struct inode *inode, struct dentry *dentry,
+ umode_t mode) { return 0; }
+ static struct inode_operations
+ iops __attribute__ ((unused)) = {
+ .tmpfile = tmpfile,
+ };
+ ],[
+ ],[
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(HAVE_TMPFILE, 1,
+ [i_op->tmpfile() exists])
+ ],[
+ AC_MSG_RESULT(no)
+ ])
+])
diff --git a/zfs/config/kernel-vm_node_stat.m4 b/zfs/config/kernel-vm_node_stat.m4
new file mode 100644
index 000000000000..e1c42f884b0b
--- /dev/null
+++ b/zfs/config/kernel-vm_node_stat.m4
@@ -0,0 +1,22 @@
+dnl #
+dnl # 4.8 API change
+dnl # kernel vm counters change
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_VM_NODE_STAT], [
+ AC_MSG_CHECKING([whether to use vm_node_stat based fn's])
+ ZFS_LINUX_TRY_COMPILE([
+ #include <linux/mm.h>
+ #include <linux/vmstat.h>
+ ],[
+ int a __attribute__ ((unused)) = NR_VM_NODE_STAT_ITEMS;
+ long x __attribute__ ((unused)) =
+ atomic_long_read(&vm_node_stat[0]);
+ (void) global_node_page_state(0);
+ ],[
+ AC_MSG_RESULT(yes)
+ AC_DEFINE(ZFS_GLOBAL_NODE_PAGE_STATE, 1,
+ [using global_node_page_state()])
+ ],[
+ AC_MSG_RESULT(no)
+ ])
+])
diff --git a/zfs/config/kernel-xattr-handler.m4 b/zfs/config/kernel-xattr-handler.m4
index 4ac08d8e7eeb..0b61b85b1d45 100644
--- a/zfs/config/kernel-xattr-handler.m4
+++ b/zfs/config/kernel-xattr-handler.m4
@@ -114,6 +114,7 @@ AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_GET], [
dnl # attr_handler, and handler_flags argument was removed and
dnl # should be accessed by handler->flags.
dnl #
+ AC_MSG_RESULT(no)
AC_MSG_CHECKING([whether xattr_handler->get() wants xattr_handler])
ZFS_LINUX_TRY_COMPILE([
#include <linux/xattr.h>
@@ -217,6 +218,7 @@ AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_SET], [
dnl # xattr_handler, and handler_flags argument was removed and
dnl # should be accessed by handler->flags.
dnl #
+ AC_MSG_RESULT(no)
AC_MSG_CHECKING([whether xattr_handler->set() wants xattr_handler])
ZFS_LINUX_TRY_COMPILE([
#include <linux/xattr.h>
@@ -294,7 +296,7 @@ dnl #
AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_LIST], [
dnl # 4.5 API change,
dnl # The xattr_handler->list() callback was changed to take only a
- dnl # dentry and it only needs to return if it's accessable.
+ dnl # dentry and it only needs to return if it's accessible.
AC_MSG_CHECKING([whether xattr_handler->list() wants simple])
ZFS_LINUX_TRY_COMPILE([
#include <linux/xattr.h>
diff --git a/zfs/config/kernel.m4 b/zfs/config/kernel.m4
index 9c6802a6c20b..c3ed5cf3320b 100644
--- a/zfs/config/kernel.m4
+++ b/zfs/config/kernel.m4
@@ -1,18 +1,20 @@
dnl #
-dnl # Default ZFS kernel configuration
+dnl # Default ZFS kernel configuration
dnl #
AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
ZFS_AC_KERNEL
ZFS_AC_SPL
+ ZFS_AC_QAT
ZFS_AC_TEST_MODULE
+ ZFS_AC_KERNEL_OBJTOOL
ZFS_AC_KERNEL_CONFIG
ZFS_AC_KERNEL_DECLARE_EVENT_CLASS
ZFS_AC_KERNEL_CURRENT_BIO_TAIL
+ ZFS_AC_KERNEL_SUPER_USER_NS
ZFS_AC_KERNEL_SUBMIT_BIO
ZFS_AC_KERNEL_BDEV_BLOCK_DEVICE_OPERATIONS
ZFS_AC_KERNEL_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID
ZFS_AC_KERNEL_TYPE_FMODE_T
- ZFS_AC_KERNEL_KOBJ_NAME_LEN
ZFS_AC_KERNEL_3ARG_BLKDEV_GET
ZFS_AC_KERNEL_BLKDEV_GET_BY_PATH
ZFS_AC_KERNEL_OPEN_BDEV_EXCLUSIVE
@@ -22,6 +24,7 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
ZFS_AC_KERNEL_BDEV_PHYSICAL_BLOCK_SIZE
ZFS_AC_KERNEL_BIO_BVEC_ITER
ZFS_AC_KERNEL_BIO_FAILFAST_DTD
+ ZFS_AC_KERNEL_BIO_SET_DEV
ZFS_AC_KERNEL_REQ_FAILFAST_MASK
ZFS_AC_KERNEL_REQ_OP_DISCARD
ZFS_AC_KERNEL_REQ_OP_SECURE_ERASE
@@ -31,6 +34,7 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
ZFS_AC_KERNEL_BIO_BI_STATUS
ZFS_AC_KERNEL_BIO_RW_BARRIER
ZFS_AC_KERNEL_BIO_RW_DISCARD
+ ZFS_AC_KERNEL_BLK_QUEUE_BDI
ZFS_AC_KERNEL_BLK_QUEUE_FLUSH
ZFS_AC_KERNEL_BLK_QUEUE_MAX_HW_SECTORS
ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS
@@ -60,9 +64,11 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
ZFS_AC_KERNEL_INODE_OPERATIONS_GET_ACL
ZFS_AC_KERNEL_INODE_OPERATIONS_SET_ACL
ZFS_AC_KERNEL_INODE_OPERATIONS_GETATTR
+ ZFS_AC_KERNEL_INODE_SET_FLAGS
ZFS_AC_KERNEL_GET_ACL_HANDLE_CACHE
ZFS_AC_KERNEL_SHOW_OPTIONS
ZFS_AC_KERNEL_FILE_INODE
+ ZFS_AC_KERNEL_FILE_DENTRY
ZFS_AC_KERNEL_FSYNC
ZFS_AC_KERNEL_EVICT_INODE
ZFS_AC_KERNEL_DIRTY_INODE_WITH_FLAGS
@@ -75,6 +81,7 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
ZFS_AC_KERNEL_CREATE_NAMEIDATA
ZFS_AC_KERNEL_GET_LINK
ZFS_AC_KERNEL_PUT_LINK
+ ZFS_AC_KERNEL_TMPFILE
ZFS_AC_KERNEL_TRUNCATE_RANGE
ZFS_AC_KERNEL_AUTOMOUNT
ZFS_AC_KERNEL_ENCODE_FH_WITH_INODE
@@ -88,7 +95,6 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
ZFS_AC_KERNEL_D_SET_D_OP
ZFS_AC_KERNEL_D_REVALIDATE_NAMEIDATA
ZFS_AC_KERNEL_CONST_DENTRY_OPERATIONS
- ZFS_AC_KERNEL_CHECK_DISK_SIZE_CHANGE
ZFS_AC_KERNEL_TRUNCATE_SETSIZE
ZFS_AC_KERNEL_6ARGS_SECURITY_INODE_INIT_SECURITY
ZFS_AC_KERNEL_CALLBACK_SECURITY_INODE_INIT_SECURITY
@@ -108,10 +114,15 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
ZFS_AC_KERNEL_KMAP_ATOMIC_ARGS
ZFS_AC_KERNEL_FOLLOW_DOWN_ONE
ZFS_AC_KERNEL_MAKE_REQUEST_FN
- ZFS_AC_KERNEL_GENERIC_IO_ACCT
+ ZFS_AC_KERNEL_GENERIC_IO_ACCT_3ARG
+ ZFS_AC_KERNEL_GENERIC_IO_ACCT_4ARG
+ ZFS_AC_KERNEL_FPU
+ ZFS_AC_KERNEL_KUID_HELPERS
+ ZFS_AC_KERNEL_MODULE_PARAM_CALL_CONST
ZFS_AC_KERNEL_RENAME_WANTS_FLAGS
ZFS_AC_KERNEL_HAVE_GENERIC_SETXATTR
ZFS_AC_KERNEL_CURRENT_TIME
+ ZFS_AC_KERNEL_VM_NODE_STAT
AS_IF([test "$LINUX_OBJ" != "$LINUX"], [
KERNELMAKE_PARAMS="$KERNELMAKE_PARAMS O=$LINUX_OBJ"
@@ -121,6 +132,8 @@ AC_DEFUN([ZFS_AC_CONFIG_KERNEL], [
dnl # -Wall -fno-strict-aliasing -Wstrict-prototypes and other
dnl # compiler options are added by the kernel build system.
+ KERNELCPPFLAGS="$KERNELCPPFLAGS -std=gnu99"
+ KERNELCPPFLAGS="$KERNELCPPFLAGS -Wno-declaration-after-statement"
KERNELCPPFLAGS="$KERNELCPPFLAGS $NO_UNUSED_BUT_SET_VARIABLE"
KERNELCPPFLAGS="$KERNELCPPFLAGS $NO_BOOL_COMPARE"
KERNELCPPFLAGS="$KERNELCPPFLAGS -DHAVE_SPL -D_KERNEL"
@@ -193,6 +206,7 @@ AC_DEFUN([ZFS_AC_KERNEL], [
AS_IF([test "$kernelsrc" = "NONE"], [
kernsrcver=NONE
])
+ withlinux=yes
])
AC_MSG_RESULT([$kernelsrc])
@@ -205,7 +219,7 @@ AC_DEFUN([ZFS_AC_KERNEL], [
AC_MSG_CHECKING([kernel build directory])
AS_IF([test -z "$kernelbuild"], [
- AS_IF([test -e "/lib/modules/$(uname -r)/build"], [
+ AS_IF([test x$withlinux != xyes -a -e "/lib/modules/$(uname -r)/build"], [
kernelbuild=`readlink -f /lib/modules/$(uname -r)/build`
], [test -d ${kernelsrc}-obj/${target_cpu}/${target_cpu}], [
kernelbuild=${kernelsrc}-obj/${target_cpu}/${target_cpu}
@@ -273,7 +287,9 @@ AC_DEFUN([ZFS_AC_SPL], [
AC_ARG_WITH([spl],
AS_HELP_STRING([--with-spl=PATH],
[Path to spl source]),
- [splsrc="$withval"])
+ AS_IF([test "$withval" = "yes"],
+ AC_MSG_ERROR([--with-spl=PATH requires a PATH]),
+ [splsrc="$withval"]))
AC_ARG_WITH([spl-obj],
AS_HELP_STRING([--with-spl-obj=PATH],
@@ -299,6 +315,14 @@ AC_DEFUN([ZFS_AC_SPL], [
AC_MSG_CHECKING([spl source directory])
AS_IF([test -z "${splsrc}"], [
+ [all_spl_sources="
+ ${splsrc0}
+ ${splsrc1}
+ ${splsrc2}
+ ${splsrc3}
+ ${splsrc4}
+ ${splsrc5}
+ ${splsrc6}"],
AS_IF([ test -e "${splsrc0}/spl.release.in"], [
splsrc=${splsrc0}
], [ test -e "${splsrc1}/spl.release.in"], [
@@ -317,6 +341,7 @@ AC_DEFUN([ZFS_AC_SPL], [
splsrc="[Not found]"
])
], [
+ [all_spl_sources="$withval"],
AS_IF([test "$splsrc" = "NONE"], [
splbuild=NONE
splsrcver=NONE
@@ -328,7 +353,10 @@ AC_DEFUN([ZFS_AC_SPL], [
AC_MSG_ERROR([
*** Please make sure the kmod spl devel package for your distribution
*** is installed then try again. If that fails you can specify the
- *** location of the spl source with the '--with-spl=PATH' option.])
+ *** location of the spl source with the '--with-spl=PATH' option.
+ *** The spl version must match the version of ZFS you are building,
+ *** ${VERSION}. Failed to find spl.release.in in the following:
+ $all_spl_sources])
])
dnl #
@@ -344,6 +372,10 @@ AC_DEFUN([ZFS_AC_SPL], [
dnl # SPL package.
dnl #
AC_MSG_CHECKING([spl build directory])
+
+ all_spl_config_locs="${splsrc}/${LINUX_VERSION}
+ ${splsrc}"
+
while true; do
AS_IF([test -z "$splbuild"], [
AS_IF([ test -e "${splsrc}/${LINUX_VERSION}/spl_config.h" ], [
@@ -370,7 +402,9 @@ AC_DEFUN([ZFS_AC_SPL], [
*** Please make sure the kmod spl devel <kernel> package for your
*** distribution is installed then try again. If that fails you
*** can specify the location of the spl objects with the
- *** '--with-spl-obj=PATH' option.])
+ *** '--with-spl-obj=PATH' option. Failed to find spl_config.h in
+ *** any of the following:
+ $all_spl_config_locs])
])
AC_MSG_CHECKING([spl source version])
@@ -443,6 +477,95 @@ AC_DEFUN([ZFS_AC_SPL], [
AC_SUBST(SPL_SYMBOLS)
])
+dnl #
+dnl # Detect the QAT module to be built against
+dnl # QAT provides hardware acceleration for data compression:
+dnl # https://01.org/intel-quickassist-technology
+dnl # * Download and install QAT driver from the above link
+dnl # * Start QAT driver in your system:
+dnl # service qat_service start
+dnl # * Enable QAT in ZFS, e.g.:
+dnl # ./configure --with-qat=<qat-driver-path>/QAT1.6
+dnl # make
+dnl # * Set GZIP compression in ZFS dataset:
+dnl # zfs set compression = gzip <dataset>
+dnl # Then the data written to this ZFS pool is compressed
+dnl # by QAT accelerator automatically, and de-compressed by
+dnl # QAT when read from the pool.
+dnl # * Get QAT hardware statistics by:
+dnl # cat /proc/icp_dh895xcc_dev/qat
+dnl # * To disable QAT:
+dnl # insmod zfs.ko zfs_qat_disable=1
+dnl #
+AC_DEFUN([ZFS_AC_QAT], [
+ AC_ARG_WITH([qat],
+ AS_HELP_STRING([--with-qat=PATH],
+ [Path to qat source]),
+ AS_IF([test "$withval" = "yes"],
+ AC_MSG_ERROR([--with-qat=PATH requires a PATH]),
+ [qatsrc="$withval"]))
+
+ AC_ARG_WITH([qat-obj],
+ AS_HELP_STRING([--with-qat-obj=PATH],
+ [Path to qat build objects]),
+ [qatbuild="$withval"])
+
+ AS_IF([test ! -z "${qatsrc}"], [
+ AC_MSG_CHECKING([qat source directory])
+ AC_MSG_RESULT([$qatsrc])
+ QAT_SRC="${qatsrc}/quickassist"
+ AS_IF([ test ! -e "$QAT_SRC/include/cpa.h"], [
+ AC_MSG_ERROR([
+ *** Please make sure the qat driver package is installed
+ *** and specify the location of the qat source with the
+ *** '--with-qat=PATH' option then try again. Failed to
+ *** find cpa.h in:
+ ${QAT_SRC}/include])
+ ])
+ ])
+
+ AS_IF([test ! -z "${qatsrc}"], [
+ AC_MSG_CHECKING([qat build directory])
+ AS_IF([test -z "$qatbuild"], [
+ qatbuild="${qatsrc}/build"
+ ])
+
+ AC_MSG_RESULT([$qatbuild])
+ QAT_OBJ=${qatbuild}
+ AS_IF([ ! test -e "$QAT_OBJ/icp_qa_al.ko"], [
+ AC_MSG_ERROR([
+ *** Please make sure the qat driver is installed then try again.
+ *** Failed to find icp_qa_al.ko in:
+ $QAT_OBJ])
+ ])
+
+ AC_SUBST(QAT_SRC)
+ AC_SUBST(QAT_OBJ)
+
+ AC_DEFINE(HAVE_QAT, 1,
+ [qat is enabled and existed])
+ ])
+
+ dnl #
+ dnl # Detect the name used for the QAT Module.symvers file.
+ dnl #
+ AS_IF([test ! -z "${qatsrc}"], [
+ AC_MSG_CHECKING([qat file for module symbols])
+ QAT_SYMBOLS=$QAT_SRC/lookaside/access_layer/src/Module.symvers
+
+ AS_IF([test -r $QAT_SYMBOLS], [
+ AC_MSG_RESULT([$QAT_SYMBOLS])
+ AC_SUBST(QAT_SYMBOLS)
+ ],[
+ AC_MSG_ERROR([
+ *** Please make sure the qat driver is installed then try again.
+ *** Failed to find Module.symvers in:
+ $QAT_SYMBOLS])
+ ])
+ ])
+ ])
+])
+
dnl #
dnl # Basic toolchain sanity check.
dnl #
diff --git a/zfs/config/toolchain-simd.m4 b/zfs/config/toolchain-simd.m4
new file mode 100644
index 000000000000..29abbbb5b6a3
--- /dev/null
+++ b/zfs/config/toolchain-simd.m4
@@ -0,0 +1,361 @@
+dnl #
+dnl # Checks if host toolchain supports SIMD instructions
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_TOOLCHAIN_SIMD], [
+ case "$host_cpu" in
+ x86_64 | x86 | i686)
+ ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE
+ ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE2
+ ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE3
+ ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSSE3
+ ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE4_1
+ ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE4_2
+ ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX
+ ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX2
+ ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512F
+ ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512CD
+ ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512DQ
+ ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512BW
+ ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512IFMA
+ ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512VBMI
+ ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512PF
+ ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512ER
+ ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512VL
+ ;;
+ esac
+])
+
+dnl #
+dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE], [
+ AC_MSG_CHECKING([whether host toolchain supports SSE])
+
+ AC_LINK_IFELSE([AC_LANG_SOURCE([[
+ void main()
+ {
+ __asm__ __volatile__("xorps %xmm0, %xmm1");
+ }
+ ]])], [
+ AC_DEFINE([HAVE_SSE], 1, [Define if host toolchain supports SSE])
+ AC_MSG_RESULT([yes])
+ ], [
+ AC_MSG_RESULT([no])
+ ])
+])
+
+dnl #
+dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE2
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE2], [
+ AC_MSG_CHECKING([whether host toolchain supports SSE2])
+
+ AC_LINK_IFELSE([AC_LANG_SOURCE([[
+ void main()
+ {
+ __asm__ __volatile__("pxor %xmm0, %xmm1");
+ }
+ ]])], [
+ AC_DEFINE([HAVE_SSE2], 1, [Define if host toolchain supports SSE2])
+ AC_MSG_RESULT([yes])
+ ], [
+ AC_MSG_RESULT([no])
+ ])
+])
+
+dnl #
+dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE3
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE3], [
+ AC_MSG_CHECKING([whether host toolchain supports SSE3])
+
+ AC_LINK_IFELSE([AC_LANG_SOURCE([[
+ void main()
+ {
+ char v[16];
+ __asm__ __volatile__("lddqu %0,%%xmm0" :: "m"(v[0]));
+ }
+ ]])], [
+ AC_DEFINE([HAVE_SSE3], 1, [Define if host toolchain supports SSE3])
+ AC_MSG_RESULT([yes])
+ ], [
+ AC_MSG_RESULT([no])
+ ])
+])
+
+dnl #
+dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSSE3
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSSE3], [
+ AC_MSG_CHECKING([whether host toolchain supports SSSE3])
+
+ AC_LINK_IFELSE([AC_LANG_SOURCE([[
+ void main()
+ {
+ __asm__ __volatile__("pshufb %xmm0,%xmm1");
+ }
+ ]])], [
+ AC_DEFINE([HAVE_SSSE3], 1, [Define if host toolchain supports SSSE3])
+ AC_MSG_RESULT([yes])
+ ], [
+ AC_MSG_RESULT([no])
+ ])
+])
+
+dnl #
+dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE4_1
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE4_1], [
+ AC_MSG_CHECKING([whether host toolchain supports SSE4.1])
+
+ AC_LINK_IFELSE([AC_LANG_SOURCE([[
+ void main()
+ {
+ __asm__ __volatile__("pmaxsb %xmm0,%xmm1");
+ }
+ ]])], [
+ AC_DEFINE([HAVE_SSE4_1], 1, [Define if host toolchain supports SSE4.1])
+ AC_MSG_RESULT([yes])
+ ], [
+ AC_MSG_RESULT([no])
+ ])
+])
+
+dnl #
+dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE4_2
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_SSE4_2], [
+ AC_MSG_CHECKING([whether host toolchain supports SSE4.2])
+
+ AC_LINK_IFELSE([AC_LANG_SOURCE([[
+ void main()
+ {
+ __asm__ __volatile__("pcmpgtq %xmm0, %xmm1");
+ }
+ ]])], [
+ AC_DEFINE([HAVE_SSE4_2], 1, [Define if host toolchain supports SSE4.2])
+ AC_MSG_RESULT([yes])
+ ], [
+ AC_MSG_RESULT([no])
+ ])
+])
+
+dnl #
+dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX], [
+ AC_MSG_CHECKING([whether host toolchain supports AVX])
+
+ AC_LINK_IFELSE([AC_LANG_SOURCE([[
+ void main()
+ {
+ char v[32];
+ __asm__ __volatile__("vmovdqa %0,%%ymm0" :: "m"(v[0]));
+ }
+ ]])], [
+ AC_MSG_RESULT([yes])
+ AC_DEFINE([HAVE_AVX], 1, [Define if host toolchain supports AVX])
+ ], [
+ AC_MSG_RESULT([no])
+ ])
+])
+
+dnl #
+dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX2
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX2], [
+ AC_MSG_CHECKING([whether host toolchain supports AVX2])
+
+ AC_LINK_IFELSE([AC_LANG_SOURCE([
+ [
+ void main()
+ {
+ __asm__ __volatile__("vpshufb %ymm0,%ymm1,%ymm2");
+ }
+ ]])], [
+ AC_MSG_RESULT([yes])
+ AC_DEFINE([HAVE_AVX2], 1, [Define if host toolchain supports AVX2])
+ ], [
+ AC_MSG_RESULT([no])
+ ])
+])
+
+dnl #
+dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512F
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512F], [
+ AC_MSG_CHECKING([whether host toolchain supports AVX512F])
+
+ AC_LINK_IFELSE([AC_LANG_SOURCE([
+ [
+ void main()
+ {
+ __asm__ __volatile__("vpandd %zmm0,%zmm1,%zmm2");
+ }
+ ]])], [
+ AC_MSG_RESULT([yes])
+ AC_DEFINE([HAVE_AVX512F], 1, [Define if host toolchain supports AVX512F])
+ ], [
+ AC_MSG_RESULT([no])
+ ])
+])
+
+dnl #
+dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512CD
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512CD], [
+ AC_MSG_CHECKING([whether host toolchain supports AVX512CD])
+
+ AC_LINK_IFELSE([AC_LANG_SOURCE([
+ [
+ void main()
+ {
+ __asm__ __volatile__("vplzcntd %zmm0,%zmm1");
+ }
+ ]])], [
+ AC_MSG_RESULT([yes])
+ AC_DEFINE([HAVE_AVX512CD], 1, [Define if host toolchain supports AVX512CD])
+ ], [
+ AC_MSG_RESULT([no])
+ ])
+])
+
+dnl #
+dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512DQ
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512DQ], [
+ AC_MSG_CHECKING([whether host toolchain supports AVX512DQ])
+
+ AC_LINK_IFELSE([AC_LANG_SOURCE([
+ [
+ void main()
+ {
+ __asm__ __volatile__("vandpd %zmm0,%zmm1,%zmm2");
+ }
+ ]])], [
+ AC_MSG_RESULT([yes])
+ AC_DEFINE([HAVE_AVX512DQ], 1, [Define if host toolchain supports AVX512DQ])
+ ], [
+ AC_MSG_RESULT([no])
+ ])
+])
+
+dnl #
+dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512BW
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512BW], [
+ AC_MSG_CHECKING([whether host toolchain supports AVX512BW])
+
+ AC_LINK_IFELSE([AC_LANG_SOURCE([
+ [
+ void main()
+ {
+ __asm__ __volatile__("vpshufb %zmm0,%zmm1,%zmm2");
+ }
+ ]])], [
+ AC_MSG_RESULT([yes])
+ AC_DEFINE([HAVE_AVX512BW], 1, [Define if host toolchain supports AVX512BW])
+ ], [
+ AC_MSG_RESULT([no])
+ ])
+])
+
+dnl #
+dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512IFMA
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512IFMA], [
+ AC_MSG_CHECKING([whether host toolchain supports AVX512IFMA])
+
+ AC_LINK_IFELSE([AC_LANG_SOURCE([
+ [
+ void main()
+ {
+ __asm__ __volatile__("vpmadd52luq %zmm0,%zmm1,%zmm2");
+ }
+ ]])], [
+ AC_MSG_RESULT([yes])
+ AC_DEFINE([HAVE_AVX512IFMA], 1, [Define if host toolchain supports AVX512IFMA])
+ ], [
+ AC_MSG_RESULT([no])
+ ])
+])
+
+dnl #
+dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512VBMI
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512VBMI], [
+ AC_MSG_CHECKING([whether host toolchain supports AVX512VBMI])
+
+ AC_LINK_IFELSE([AC_LANG_SOURCE([
+ [
+ void main()
+ {
+ __asm__ __volatile__("vpermb %zmm0,%zmm1,%zmm2");
+ }
+ ]])], [
+ AC_MSG_RESULT([yes])
+ AC_DEFINE([HAVE_AVX512VBMI], 1, [Define if host toolchain supports AVX512VBMI])
+ ], [
+ AC_MSG_RESULT([no])
+ ])
+])
+
+dnl #
+dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512PF
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512PF], [
+ AC_MSG_CHECKING([whether host toolchain supports AVX512PF])
+
+ AC_LINK_IFELSE([AC_LANG_SOURCE([
+ [
+ void main()
+ {
+ __asm__ __volatile__("vgatherpf0dps (%rsi,%zmm0,4){%k1}");
+ }
+ ]])], [
+ AC_MSG_RESULT([yes])
+ AC_DEFINE([HAVE_AVX512PF], 1, [Define if host toolchain supports AVX512PF])
+ ], [
+ AC_MSG_RESULT([no])
+ ])
+])
+
+dnl #
+dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512ER
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512ER], [
+ AC_MSG_CHECKING([whether host toolchain supports AVX512ER])
+
+ AC_LINK_IFELSE([AC_LANG_SOURCE([
+ [
+ void main()
+ {
+ __asm__ __volatile__("vexp2pd %zmm0,%zmm1");
+ }
+ ]])], [
+ AC_MSG_RESULT([yes])
+ AC_DEFINE([HAVE_AVX512ER], 1, [Define if host toolchain supports AVX512ER])
+ ], [
+ AC_MSG_RESULT([no])
+ ])
+])
+
+dnl #
+dnl # ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512VL
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_TOOLCHAIN_CAN_BUILD_AVX512VL], [
+ AC_MSG_CHECKING([whether host toolchain supports AVX512VL])
+
+ AC_LINK_IFELSE([AC_LANG_SOURCE([
+ [
+ void main()
+ {
+ __asm__ __volatile__("vpabsq %zmm0,%zmm1");
+ }
+ ]])], [
+ AC_MSG_RESULT([yes])
+ AC_DEFINE([HAVE_AVX512VL], 1, [Define if host toolchain supports AVX512VL])
+ ], [
+ AC_MSG_RESULT([no])
+ ])
+])
diff --git a/zfs/config/user-arch.m4 b/zfs/config/user-arch.m4
deleted file mode 100644
index fcc566fc5db5..000000000000
--- a/zfs/config/user-arch.m4
+++ /dev/null
@@ -1,19 +0,0 @@
-dnl #
-dnl # Set the target arch for libspl atomic implementation
-dnl #
-AC_DEFUN([ZFS_AC_CONFIG_USER_ARCH], [
- AC_MSG_CHECKING(for target asm dir)
- TARGET_ARCH=`echo ${target_cpu} | sed -e s/i.86/i386/`
-
- case $TARGET_ARCH in
- i386|x86_64)
- TARGET_ASM_DIR=asm-${TARGET_ARCH}
- ;;
- *)
- TARGET_ASM_DIR=asm-generic
- ;;
- esac
-
- AC_SUBST([TARGET_ASM_DIR])
- AC_MSG_RESULT([$TARGET_ASM_DIR])
-])
diff --git a/zfs/config/user-libattr.m4 b/zfs/config/user-libattr.m4
new file mode 100644
index 000000000000..3298fd491174
--- /dev/null
+++ b/zfs/config/user-libattr.m4
@@ -0,0 +1,12 @@
+dnl #
+dnl # Check for libattr
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_USER_LIBATTR], [
+ LIBATTR=
+
+ AC_CHECK_HEADER([attr/xattr.h], [], [AC_MSG_FAILURE([
+ *** attr/xattr.h missing, libattr-devel package required])])
+
+ AC_SUBST([LIBATTR], ["-lattr"])
+ AC_DEFINE([HAVE_LIBATTR], 1, [Define if you have libattr])
+])
diff --git a/zfs/config/user-libblkid.m4 b/zfs/config/user-libblkid.m4
index 2dd26238c70e..5bc7f466ae7a 100644
--- a/zfs/config/user-libblkid.m4
+++ b/zfs/config/user-libblkid.m4
@@ -1,113 +1,13 @@
dnl #
-dnl # Check for ZFS support in libblkid. This test needs to check
-dnl # more than if the library exists because we expect there are
-dnl # at least 3 flavors of the library out in the wild:
-dnl #
-dnl # 1) blkid which has no ZFS support
-dnl # 2) blkid with ZFS support and a flawed method of probing
-dnl # 3) blkid with ZFS support and a working method of probing
-dnl #
-dnl # To handle this the check first validates that there is a version
-dnl # of the library installed. If there is it creates a simulated
-dnl # ZFS filesystem and then links a small test app which attempts
-dnl # to detect the simualated filesystem type. If it correctly
-dnl # identifies the filesystem as ZFS we can safely assume case 3).
-dnl # Otherwise we disable blkid support and resort to manual probing.
+dnl # Check for libblkid. Basic support for detecting ZFS pools
+dnl # has existing in blkid since 2008.
dnl #
AC_DEFUN([ZFS_AC_CONFIG_USER_LIBBLKID], [
- AC_ARG_WITH([blkid],
- [AS_HELP_STRING([--with-blkid],
- [support blkid caching @<:@default=check@:>@])],
- [],
- [with_blkid=check])
-
LIBBLKID=
- AS_IF([test "x$with_blkid" = xyes],
- [
- AC_SUBST([LIBBLKID], ["-lblkid"])
- AC_DEFINE([HAVE_LIBBLKID], 1,
- [Define if you have libblkid])
- ])
-
- AS_IF([test "x$with_blkid" = xcheck],
- [
- AC_CHECK_LIB([blkid], [blkid_get_cache],
- [
- AC_MSG_CHECKING([for blkid zfs support])
-
- ZFS_DEV=`mktemp`
- truncate -s 64M $ZFS_DEV
- echo -en "\x0c\xb1\xba\0\0\0\0\0" | \
- dd of=$ZFS_DEV bs=1k count=8 \
- seek=128 conv=notrunc &>/dev/null \
- >/dev/null 2>/dev/null
- echo -en "\x0c\xb1\xba\0\0\0\0\0" | \
- dd of=$ZFS_DEV bs=1k count=8 \
- seek=132 conv=notrunc &>/dev/null \
- >/dev/null 2>/dev/null
- echo -en "\x0c\xb1\xba\0\0\0\0\0" | \
- dd of=$ZFS_DEV bs=1k count=8 \
- seek=136 conv=notrunc &>/dev/null \
- >/dev/null 2>/dev/null
- echo -en "\x0c\xb1\xba\0\0\0\0\0" | \
- dd of=$ZFS_DEV bs=1k count=8 \
- seek=140 conv=notrunc &>/dev/null \
- >/dev/null 2>/dev/null
-
- saved_LIBS="$LIBS"
- LIBS="-lblkid"
-
- AC_RUN_IFELSE([AC_LANG_PROGRAM(
- [
- #include <stdio.h>
- #include <stdlib.h>
- #include <blkid/blkid.h>
- ],
- [
- blkid_cache cache;
- char *value;
-
- if (blkid_get_cache(&cache, NULL) < 0)
- return 1;
-
- value = blkid_get_tag_value(cache, "TYPE",
- "$ZFS_DEV");
- if (!value) {
- blkid_put_cache(cache);
- return 2;
- }
-
- if (strcmp(value, "zfs_member")) {
- free(value);
- blkid_put_cache(cache);
- return 0;
- }
- free(value);
- blkid_put_cache(cache);
- ])],
- [
- rm -f $ZFS_DEV
- AC_MSG_RESULT([yes])
- AC_SUBST([LIBBLKID], ["-lblkid"])
- AC_DEFINE([HAVE_LIBBLKID], 1,
- [Define if you have libblkid])
- ],
- [
- rm -f $ZFS_DEV
- AC_MSG_RESULT([no])
- AS_IF([test "x$with_blkid" != xcheck],
- [AC_MSG_FAILURE(
- [--with-blkid given but unavailable])])
- ])
+ AC_CHECK_HEADER([blkid/blkid.h], [], [AC_MSG_FAILURE([
+ *** blkid.h missing, libblkid-devel package required])])
- LIBS="$saved_LIBS"
- ],
- [
- AS_IF([test "x$with_blkid" != xcheck],
- [AC_MSG_FAILURE(
- [--with-blkid given but unavailable])])
- ]
- [])
- ])
+ AC_SUBST([LIBBLKID], ["-lblkid"])
+ AC_DEFINE([HAVE_LIBBLKID], 1, [Define if you have libblkid])
])
diff --git a/zfs/config/user-libtirpc.m4 b/zfs/config/user-libtirpc.m4
new file mode 100644
index 000000000000..5f929061f318
--- /dev/null
+++ b/zfs/config/user-libtirpc.m4
@@ -0,0 +1,30 @@
+dnl #
+dnl # Check for libtirpc - may be needed for xdr functionality
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_USER_LIBTIRPC], [
+ AC_ARG_WITH([tirpc],
+ [AS_HELP_STRING([--with-tirpc],
+ [use tirpc for xdr encoding @<:@default=check@:>@])],
+ [],
+ [with_tirpc=check])
+
+ LIBTIRPC=
+ LIBTIRPC_CFLAGS=
+
+ AS_IF([test "x$with_tirpc" != xno],
+ [AC_CHECK_LIB([tirpc], [xdrmem_create],
+ [AC_SUBST([LIBTIRPC], [-ltirpc])
+ AC_SUBST([LIBTIRPC_CFLAGS], [-I/usr/include/tirpc])
+ AC_DEFINE([HAVE_LIBTIRPC], [1], [Define if you have libtirpc])
+ ],
+ [if test "x$with_tirpc" != xcheck; then
+ AC_MSG_FAILURE(
+ [--with-tirpc was given, but test for tirpc failed])
+ fi
+ AC_SEARCH_LIBS([xdrmem_create], [tirpc], [], [
+ AC_MSG_FAILURE([xdrmem_create() requires tirpc or libc])])
+ ])],
+ [AC_SEARCH_LIBS([xdrmem_create], [tirpc], [], [
+ AC_MSG_FAILURE([xdrmem_create() requires libc])])
+ ])
+])
diff --git a/zfs/config/user-libudev.m4 b/zfs/config/user-libudev.m4
new file mode 100644
index 000000000000..9b7454927ea7
--- /dev/null
+++ b/zfs/config/user-libudev.m4
@@ -0,0 +1,19 @@
+dnl #
+dnl # Check for libudev - needed for vdev auto-online and auto-replace
+dnl #
+AC_DEFUN([ZFS_AC_CONFIG_USER_LIBUDEV], [
+ LIBUDEV=
+
+ AC_CHECK_HEADER([libudev.h], [
+ user_libudev=yes
+ AC_SUBST([LIBUDEV], ["-ludev"])
+ AC_DEFINE([HAVE_LIBUDEV], 1, [Define if you have libudev])
+ ], [
+ user_libudev=no
+ ])
+
+ AC_SEARCH_LIBS([udev_device_get_is_initialized], [udev], [
+ AC_DEFINE([HAVE_LIBUDEV_UDEV_DEVICE_GET_IS_INITIALIZED], 1, [
+ Define if udev_device_get_is_initialized is available])], [])
+
+])
diff --git a/zfs/config/user-libuuid.m4 b/zfs/config/user-libuuid.m4
index aba375a22820..f0da671a3f61 100644
--- a/zfs/config/user-libuuid.m4
+++ b/zfs/config/user-libuuid.m4
@@ -7,10 +7,10 @@ AC_DEFUN([ZFS_AC_CONFIG_USER_LIBUUID], [
AC_CHECK_HEADER([uuid/uuid.h], [], [AC_MSG_FAILURE([
*** uuid/uuid.h missing, libuuid-devel package required])])
- AC_CHECK_LIB([uuid], [uuid_generate], [], [AC_MSG_FAILURE([
+ AC_SEARCH_LIBS([uuid_generate], [uuid], [], [AC_MSG_FAILURE([
*** uuid_generate() missing, libuuid-devel package required])])
- AC_CHECK_LIB([uuid], [uuid_is_null], [], [AC_MSG_FAILURE([
+ AC_SEARCH_LIBS([uuid_is_null], [uuid], [], [AC_MSG_FAILURE([
*** uuid_is_null() missing, libuuid-devel package required])])
AC_SUBST([LIBUUID], ["-luuid"])
diff --git a/zfs/config/user-zlib.m4 b/zfs/config/user-zlib.m4
index a48361662e4f..82c0962e4517 100644
--- a/zfs/config/user-zlib.m4
+++ b/zfs/config/user-zlib.m4
@@ -7,13 +7,13 @@ AC_DEFUN([ZFS_AC_CONFIG_USER_ZLIB], [
AC_CHECK_HEADER([zlib.h], [], [AC_MSG_FAILURE([
*** zlib.h missing, zlib-devel package required])])
- AC_CHECK_LIB([z], [compress2], [], [AC_MSG_FAILURE([
+ AC_SEARCH_LIBS([compress2], [z], [], [AC_MSG_FAILURE([
*** compress2() missing, zlib-devel package required])])
- AC_CHECK_LIB([z], [uncompress], [], [AC_MSG_FAILURE([
+ AC_SEARCH_LIBS([uncompress], [z], [], [AC_MSG_FAILURE([
*** uncompress() missing, zlib-devel package required])])
- AC_CHECK_LIB([z], [crc32], [], [AC_MSG_FAILURE([
+ AC_SEARCH_LIBS([crc32], [z], [], [AC_MSG_FAILURE([
*** crc32() missing, zlib-devel package required])])
AC_SUBST([ZLIB], ["-lz"])
diff --git a/zfs/config/user.m4 b/zfs/config/user.m4
index 87323937a589..2b033f5a57c5 100644
--- a/zfs/config/user.m4
+++ b/zfs/config/user.m4
@@ -8,16 +8,31 @@ AC_DEFUN([ZFS_AC_CONFIG_USER], [
ZFS_AC_CONFIG_USER_SYSTEMD
ZFS_AC_CONFIG_USER_SYSVINIT
ZFS_AC_CONFIG_USER_DRACUT
- ZFS_AC_CONFIG_USER_ARCH
ZFS_AC_CONFIG_USER_ZLIB
ZFS_AC_CONFIG_USER_LIBUUID
+ ZFS_AC_CONFIG_USER_LIBTIRPC
ZFS_AC_CONFIG_USER_LIBBLKID
+ ZFS_AC_CONFIG_USER_LIBATTR
+ ZFS_AC_CONFIG_USER_LIBUDEV
ZFS_AC_CONFIG_USER_FRAME_LARGER_THAN
ZFS_AC_CONFIG_USER_RUNSTATEDIR
ZFS_AC_CONFIG_USER_MAKEDEV_IN_SYSMACROS
ZFS_AC_CONFIG_USER_MAKEDEV_IN_MKDEV
ZFS_AC_CONFIG_USER_NO_FORMAT_TRUNCATION
-dnl #
-dnl # Checks for library functions
+
+ ZFS_AC_TEST_FRAMEWORK
+
AC_CHECK_FUNCS([mlockall])
])
+
+dnl #
+dnl # Setup the environment for the ZFS Test Suite. Currently only
+dnl # Linux sytle systems are supported but this infrastructure can
+dnl # be extended to support other platforms if needed.
+dnl #
+AC_DEFUN([ZFS_AC_TEST_FRAMEWORK], [
+ ZONENAME="echo global"
+ AC_SUBST(ZONENAME)
+
+ AC_SUBST(RM)
+])
diff --git a/zfs/config/zfs-build.m4 b/zfs/config/zfs-build.m4
index facd30282701..7651dc2c12e4 100644
--- a/zfs/config/zfs-build.m4
+++ b/zfs/config/zfs-build.m4
@@ -37,38 +37,14 @@ AC_DEFUN([ZFS_AC_DEBUG], [
AC_MSG_RESULT([$enable_debug])
])
-AC_DEFUN([ZFS_AC_DEBUG_DMU_TX], [
- AC_ARG_ENABLE([debug-dmu-tx],
- [AS_HELP_STRING([--enable-debug-dmu-tx],
- [Enable dmu tx validation @<:@default=no@:>@])],
- [],
- [enable_debug_dmu_tx=no])
-
- AS_IF([test "x$enable_debug_dmu_tx" = xyes],
- [
- KERNELCPPFLAGS="${KERNELCPPFLAGS} -DDEBUG_DMU_TX"
- DEBUG_DMU_TX="_with_debug_dmu_tx"
- AC_DEFINE([DEBUG_DMU_TX], [1],
- [Define to 1 to enabled dmu tx validation])
- ],
- [
- DEBUG_DMU_TX="_without_debug_dmu_tx"
- ])
-
- AC_SUBST(DEBUG_DMU_TX)
- AC_MSG_CHECKING([whether dmu tx validation is enabled])
- AC_MSG_RESULT([$enable_debug_dmu_tx])
-])
-
AC_DEFUN([ZFS_AC_CONFIG_ALWAYS], [
ZFS_AC_CONFIG_ALWAYS_NO_UNUSED_BUT_SET_VARIABLE
ZFS_AC_CONFIG_ALWAYS_NO_BOOL_COMPARE
+ ZFS_AC_CONFIG_ALWAYS_TOOLCHAIN_SIMD
+ ZFS_AC_CONFIG_ALWAYS_ARCH
])
AC_DEFUN([ZFS_AC_CONFIG], [
- TARGET_ASM_DIR=asm-generic
- AC_SUBST(TARGET_ASM_DIR)
-
ZFS_CONFIG=all
AC_ARG_WITH([config],
AS_HELP_STRING([--with-config=CONFIG],
@@ -87,10 +63,10 @@ AC_DEFUN([ZFS_AC_CONFIG], [
ZFS_AC_CONFIG_ALWAYS
case "$ZFS_CONFIG" in
- user) ZFS_AC_CONFIG_USER ;;
kernel) ZFS_AC_CONFIG_KERNEL ;;
- all) ZFS_AC_CONFIG_KERNEL
- ZFS_AC_CONFIG_USER ;;
+ user) ZFS_AC_CONFIG_USER ;;
+ all) ZFS_AC_CONFIG_USER
+ ZFS_AC_CONFIG_KERNEL ;;
srpm) ;;
*)
AC_MSG_RESULT([Error!])
@@ -99,10 +75,15 @@ AC_DEFUN([ZFS_AC_CONFIG], [
esac
AM_CONDITIONAL([CONFIG_USER],
- [test "$ZFS_CONFIG" = user -o "$ZFS_CONFIG" = all])
+ [test "$ZFS_CONFIG" = user -o "$ZFS_CONFIG" = all])
AM_CONDITIONAL([CONFIG_KERNEL],
- [test "$ZFS_CONFIG" = kernel -o "$ZFS_CONFIG" = all] &&
- [test "x$enable_linux_builtin" != xyes ])
+ [test "$ZFS_CONFIG" = kernel -o "$ZFS_CONFIG" = all] &&
+ [test "x$enable_linux_builtin" != xyes ])
+ AM_CONDITIONAL([WANT_DEVNAME2DEVID],
+ [test "x$user_libudev" = xyes ])
+ AM_CONDITIONAL([CONFIG_QAT],
+ [test "$ZFS_CONFIG" = kernel -o "$ZFS_CONFIG" = all] &&
+ [test "x$qatsrc" != x ])
])
dnl #
@@ -139,7 +120,7 @@ AC_DEFUN([ZFS_AC_RPM], [
AC_MSG_RESULT([$HAVE_RPMBUILD])
])
- RPM_DEFINE_COMMON='--define "$(DEBUG_ZFS) 1" --define "$(DEBUG_DMU_TX) 1"'
+ RPM_DEFINE_COMMON='--define "$(DEBUG_ZFS) 1"'
RPM_DEFINE_UTIL='--define "_dracutdir $(dracutdir)" --define "_udevdir $(udevdir)" --define "_udevruledir $(udevruledir)" --define "_initconfdir $(DEFAULT_INITCONF_DIR)" $(DEFINE_INITRAMFS)'
RPM_DEFINE_KMOD='--define "kernels $(LINUX_VERSION)" --define "require_spldir $(SPL)" --define "require_splobj $(SPL_OBJ)" --define "ksrc $(LINUX)" --define "kobj $(LINUX_OBJ)"'
RPM_DEFINE_DKMS=
@@ -266,6 +247,8 @@ AC_DEFUN([ZFS_AC_DEFAULT_PACKAGE], [
VENDOR=ubuntu ;
elif test -f /etc/debian_version ; then
VENDOR=debian ;
+ elif test -f /etc/alpine-release ; then
+ VENDOR=alpine ;
else
VENDOR= ;
fi
@@ -278,6 +261,7 @@ AC_DEFUN([ZFS_AC_DEFAULT_PACKAGE], [
redhat) DEFAULT_PACKAGE=rpm ;;
fedora) DEFAULT_PACKAGE=rpm ;;
gentoo) DEFAULT_PACKAGE=tgz ;;
+ alpine) DEFAULT_PACKAGE=tgz ;;
arch) DEFAULT_PACKAGE=tgz ;;
sles) DEFAULT_PACKAGE=rpm ;;
slackware) DEFAULT_PACKAGE=tgz ;;
@@ -299,7 +283,8 @@ AC_DEFUN([ZFS_AC_DEFAULT_PACKAGE], [
toss) DEFAULT_INIT_SCRIPT=redhat ;;
redhat) DEFAULT_INIT_SCRIPT=redhat ;;
fedora) DEFAULT_INIT_SCRIPT=fedora ;;
- gentoo) DEFAULT_INIT_SCRIPT=gentoo ;;
+ gentoo) DEFAULT_INIT_SCRIPT=openrc ;;
+ alpine) DEFAULT_INIT_SCRIPT=openrc ;;
arch) DEFAULT_INIT_SCRIPT=lsb ;;
sles) DEFAULT_INIT_SCRIPT=lsb ;;
slackware) DEFAULT_INIT_SCRIPT=lsb ;;
@@ -313,6 +298,7 @@ AC_DEFUN([ZFS_AC_DEFAULT_PACKAGE], [
AC_MSG_CHECKING([default init config direectory])
case "$VENDOR" in
+ alpine) DEFAULT_INITCONF_DIR=/etc/conf.d ;;
gentoo) DEFAULT_INITCONF_DIR=/etc/conf.d ;;
toss) DEFAULT_INITCONF_DIR=/etc/sysconfig ;;
redhat) DEFAULT_INITCONF_DIR=/etc/sysconfig ;;
diff --git a/zfs/config/zfs-meta.m4 b/zfs/config/zfs-meta.m4
index d174cccc7a1e..b8e26c492ac3 100644
--- a/zfs/config/zfs-meta.m4
+++ b/zfs/config/zfs-meta.m4
@@ -98,6 +98,14 @@ AC_DEFUN([ZFS_AC_META], [
if test -n "${_release}"; then
ZFS_META_RELEASE=${_release}
_zfs_ac_meta_type="git describe"
+ else
+ _match="${ZFS_META_NAME}-${ZFS_META_VERSION}-${ZFS_META_RELEASE}"
+ _alias=$(git describe --match=${_match} 2>/dev/null)
+ _release=$(echo ${_alias}|cut -f3- -d'-'|sed 's/-/_/g')
+ if test -n "${_release}"; then
+ ZFS_META_RELEASE=${_release}
+ _zfs_ac_meta_type="git describe"
+ fi
fi
fi
diff --git a/zfs/configure b/zfs/configure
index 9634569bb611..27191632abc1 100755
--- a/zfs/configure
+++ b/zfs/configure
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for zfs 0.6.5.11.
+# Generated by GNU Autoconf 2.69 for zfs 0.7.3.
#
#
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
@@ -587,8 +587,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='zfs'
PACKAGE_TARNAME='zfs'
-PACKAGE_VERSION='0.6.5.11'
-PACKAGE_STRING='zfs 0.6.5.11'
+PACKAGE_VERSION='0.7.3'
+PACKAGE_STRING='zfs 0.7.3'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''
@@ -632,27 +632,26 @@ ac_subst_vars='am__EXEEXT_FALSE
am__EXEEXT_TRUE
LTLIBOBJS
LIBOBJS
-DEBUG_DMU_TX
DEBUG_ZFS
DEBUG_STACKFLAGS
DEBUG_CFLAGS
+CONFIG_QAT_FALSE
+CONFIG_QAT_TRUE
+WANT_DEVNAME2DEVID_FALSE
+WANT_DEVNAME2DEVID_TRUE
CONFIG_KERNEL_FALSE
CONFIG_KERNEL_TRUE
CONFIG_USER_FALSE
CONFIG_USER_TRUE
-KERNELCPPFLAGS
-KERNELMAKE_PARAMS
-SPL_SYMBOLS
-SPL_VERSION
-SPL_OBJ
-SPL
-LINUX_SYMBOLS
-LINUX_VERSION
-LINUX_OBJ
-LINUX
+RM
+ZONENAME
NO_FORMAT_TRUNCATION
FRAME_LARGER_THAN
+LIBUDEV
+LIBATTR
LIBBLKID
+LIBTIRPC_CFLAGS
+LIBTIRPC
LIBUUID
ZLIB
dracutdir
@@ -665,10 +664,29 @@ ZFS_INIT_SYSTEMD
udevruledir
udevdir
mounthelperdir
+KERNELCPPFLAGS
+KERNELMAKE_PARAMS
+QAT_SYMBOLS
+QAT_OBJ
+QAT_SRC
+SPL_SYMBOLS
+SPL_VERSION
+SPL_OBJ
+SPL
+LINUX_SYMBOLS
+LINUX_VERSION
+LINUX_OBJ
+LINUX
+TARGET_ASM_GENERIC_FALSE
+TARGET_ASM_GENERIC_TRUE
+TARGET_ASM_I386_FALSE
+TARGET_ASM_I386_TRUE
+TARGET_ASM_X86_64_FALSE
+TARGET_ASM_X86_64_TRUE
+TARGET_ASM_DIR
NO_BOOL_COMPARE
NO_UNUSED_BUT_SET_VARIABLE
ZFS_CONFIG
-TARGET_ASM_DIR
ALIEN_VERSION
ALIEN
HAVE_ALIEN
@@ -853,6 +871,13 @@ enable_libtool_lock
with_spec
with_config
enable_linux_builtin
+with_linux
+with_linux_obj
+with_spl
+with_spl_obj
+with_spl_timeout
+with_qat
+with_qat_obj
with_mounthelperdir
with_udevdir
with_udevruledir
@@ -862,14 +887,8 @@ with_systemdpresetdir
with_systemdmodulesloaddir
enable_sysvinit
with_dracutdir
-with_blkid
-with_linux
-with_linux_obj
-with_spl
-with_spl_obj
-with_spl_timeout
+with_tirpc
enable_debug
-enable_debug_dmu_tx
'
ac_precious_vars='build_alias
host_alias
@@ -1433,7 +1452,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
-\`configure' configures zfs 0.6.5.11 to adapt to many kinds of systems.
+\`configure' configures zfs 0.7.3 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
@@ -1505,7 +1524,7 @@ fi
if test -n "$ac_init_help"; then
case $ac_init_help in
- short | recursive ) echo "Configuration of zfs 0.6.5.11:";;
+ short | recursive ) echo "Configuration of zfs 0.7.3:";;
esac
cat <<\_ACEOF
@@ -1532,7 +1551,6 @@ Optional Features:
--enable-systemd install systemd unit/preset files [[default: yes]]
--enable-sysvinit install SysV init scripts [default: yes]
--enable-debug Enable generic debug support [default=no]
- --enable-debug-dmu-tx Enable dmu tx validation [default=no]
Optional Packages:
--with-PACKAGE[=ARG] use PACKAGE [ARG=yes]
@@ -1547,6 +1565,13 @@ Optional Packages:
compiler's sysroot if not specified).
--with-spec=SPEC Spec files 'generic|redhat'
--with-config=CONFIG Config file 'kernel|user|all|srpm'
+ --with-linux=PATH Path to kernel source
+ --with-linux-obj=PATH Path to kernel build objects
+ --with-spl=PATH Path to spl source
+ --with-spl-obj=PATH Path to spl build objects
+ --with-spl-timeout=SECS Wait SECS for SPL header and symver file [default=0]
+ --with-qat=PATH Path to qat source
+ --with-qat-obj=PATH Path to qat build objects
--with-mounthelperdir=DIR
install mount.zfs in dir [[/sbin]]
--with-udevdir=DIR install udev helpers [default=check]
@@ -1561,12 +1586,7 @@ Optional Packages:
install systemd module load files into dir
[[/usr/lib/modules-load.d]]
--with-dracutdir=DIR install dracut helpers [default=check]
- --with-blkid support blkid caching [default=check]
- --with-linux=PATH Path to kernel source
- --with-linux-obj=PATH Path to kernel build objects
- --with-spl=PATH Path to spl source
- --with-spl-obj=PATH Path to spl build objects
- --with-spl-timeout=SECS Wait SECS for SPL header and symver file [default=0]
+ --with-tirpc use tirpc for xdr encoding [default=check]
Some influential environment variables:
CC C compiler command
@@ -1648,7 +1668,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
-zfs configure 0.6.5.11
+zfs configure 0.7.3
generated by GNU Autoconf 2.69
Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2013,7 +2033,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
-It was created by zfs $as_me 0.6.5.11, which was
+It was created by zfs $as_me 0.7.3, which was
generated by GNU Autoconf 2.69. Invocation command line was
$ $0 $@
@@ -2472,6 +2492,14 @@ _ACEOF
if test -n "${_release}"; then
ZFS_META_RELEASE=${_release}
_zfs_ac_meta_type="git describe"
+ else
+ _match="${ZFS_META_NAME}-${ZFS_META_VERSION}-${ZFS_META_RELEASE}"
+ _alias=$(git describe --match=${_match} 2>/dev/null)
+ _release=$(echo ${_alias}|cut -f3- -d'-'|sed 's/-/_/g')
+ if test -n "${_release}"; then
+ ZFS_META_RELEASE=${_release}
+ _zfs_ac_meta_type="git describe"
+ fi
fi
fi
@@ -3172,7 +3200,7 @@ fi
# Define the identity of the package.
PACKAGE='zfs'
- VERSION='0.6.5.11'
+ VERSION='0.7.3'
cat >>confdefs.h <<_ACEOF
@@ -12457,6 +12485,7 @@ fi
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking zfs author" >&5
$as_echo_n "checking zfs author... " >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ZFS_META_AUTHOR" >&5
@@ -12491,6 +12520,8 @@ $as_echo_n "checking linux distribution... " >&6; }
VENDOR=ubuntu ;
elif test -f /etc/debian_version ; then
VENDOR=debian ;
+ elif test -f /etc/alpine-release ; then
+ VENDOR=alpine ;
else
VENDOR= ;
fi
@@ -12505,6 +12536,7 @@ $as_echo_n "checking default package type... " >&6; }
redhat) DEFAULT_PACKAGE=rpm ;;
fedora) DEFAULT_PACKAGE=rpm ;;
gentoo) DEFAULT_PACKAGE=tgz ;;
+ alpine) DEFAULT_PACKAGE=tgz ;;
arch) DEFAULT_PACKAGE=tgz ;;
sles) DEFAULT_PACKAGE=rpm ;;
slackware) DEFAULT_PACKAGE=tgz ;;
@@ -12530,7 +12562,8 @@ $as_echo_n "checking default init script type... " >&6; }
toss) DEFAULT_INIT_SCRIPT=redhat ;;
redhat) DEFAULT_INIT_SCRIPT=redhat ;;
fedora) DEFAULT_INIT_SCRIPT=fedora ;;
- gentoo) DEFAULT_INIT_SCRIPT=gentoo ;;
+ gentoo) DEFAULT_INIT_SCRIPT=openrc ;;
+ alpine) DEFAULT_INIT_SCRIPT=openrc ;;
arch) DEFAULT_INIT_SCRIPT=lsb ;;
sles) DEFAULT_INIT_SCRIPT=lsb ;;
slackware) DEFAULT_INIT_SCRIPT=lsb ;;
@@ -12546,6 +12579,7 @@ $as_echo "$DEFAULT_INIT_SCRIPT" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking default init config direectory" >&5
$as_echo_n "checking default init config direectory... " >&6; }
case "$VENDOR" in
+ alpine) DEFAULT_INITCONF_DIR=/etc/conf.d ;;
gentoo) DEFAULT_INITCONF_DIR=/etc/conf.d ;;
toss) DEFAULT_INITCONF_DIR=/etc/sysconfig ;;
redhat) DEFAULT_INITCONF_DIR=/etc/sysconfig ;;
@@ -12610,7 +12644,7 @@ $as_echo "$HAVE_RPMBUILD" >&6; }
fi
- RPM_DEFINE_COMMON='--define "$(DEBUG_ZFS) 1" --define "$(DEBUG_DMU_TX) 1"'
+ RPM_DEFINE_COMMON='--define "$(DEBUG_ZFS) 1"'
RPM_DEFINE_UTIL='--define "_dracutdir $(dracutdir)" --define "_udevdir $(udevdir)" --define "_udevruledir $(udevruledir)" --define "_initconfdir $(DEFAULT_INITCONF_DIR)" $(DEFINE_INITRAMFS)'
RPM_DEFINE_KMOD='--define "kernels $(LINUX_VERSION)" --define "require_spldir $(SPL)" --define "require_splobj $(SPL_OBJ)" --define "ksrc $(LINUX)" --define "kobj $(LINUX_OBJ)"'
RPM_DEFINE_DKMS=
@@ -12724,9 +12758,6 @@ fi
- TARGET_ASM_DIR=asm-generic
-
-
ZFS_CONFIG=all
# Check whether --with-config was given.
@@ -12822,750 +12853,488 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ case "$host_cpu" in
+ x86_64 | x86 | i686)
- case "$ZFS_CONFIG" in
- user)
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dkms.conf file" >&5
-$as_echo_n "checking for dkms.conf file... " >&6; }
- if test -e dkms.conf; then :
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether host toolchain supports SSE" >&5
+$as_echo_n "checking whether host toolchain supports SSE... " >&6; }
- as_fn_error $? "
- *** ZFS should not be manually built in the DKMS source tree.
- *** Remove all ZFS packages before compiling the ZoL sources.
- *** Running \"make install\" breaks ZFS packages." "$LINENO" 5
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
-else
+ void main()
+ {
+ __asm__ __volatile__("xorps %xmm0, %xmm1");
+ }
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
-$as_echo "not found" >&6; }
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
-fi
+$as_echo "#define HAVE_SSE 1" >>confdefs.h
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
-# Check whether --with-mounthelperdir was given.
-if test "${with_mounthelperdir+set}" = set; then :
- withval=$with_mounthelperdir; mounthelperdir=$withval
else
- mounthelperdir=/sbin
-fi
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether host toolchain supports SSE2" >&5
+$as_echo_n "checking whether host toolchain supports SSE2... " >&6; }
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for udev directories" >&5
-$as_echo_n "checking for udev directories... " >&6; }
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
-# Check whether --with-udevdir was given.
-if test "${with_udevdir+set}" = set; then :
- withval=$with_udevdir; udevdir=$withval
-else
- udevdir=check
-fi
+ void main()
+ {
+ __asm__ __volatile__("pxor %xmm0, %xmm1");
+ }
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
- if test "x$udevdir" = xcheck; then :
- path1=/lib/udev
- path2=/usr/lib/udev
- default=$path2
+$as_echo "#define HAVE_SSE2 1" >>confdefs.h
- if test -d "$path1"; then :
- udevdir="$path1"
-else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
- if test -d "$path2"; then :
- udevdir="$path2"
else
- udevdir="$default"
-fi
-fi
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
-# Check whether --with-udevruledir was given.
-if test "${with_udevruledir+set}" = set; then :
- withval=$with_udevruledir; udevruledir=$withval
-else
- udevruledir="${udevdir}/rules.d"
-fi
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether host toolchain supports SSE3" >&5
+$as_echo_n "checking whether host toolchain supports SSE3... " >&6; }
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+ void main()
+ {
+ char v[16];
+ __asm__ __volatile__("lddqu %0,%%xmm0" :: "m"(v[0]));
+ }
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $udevdir;$udevruledir" >&5
-$as_echo "$udevdir;$udevruledir" >&6; }
+$as_echo "#define HAVE_SSE3 1" >>confdefs.h
- # Check whether --enable-systemd was given.
-if test "${enable_systemd+set}" = set; then :
- enableval=$enable_systemd;
-else
- enable_systemd=yes
-fi
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
-# Check whether --with-systemdunitdir was given.
-if test "${with_systemdunitdir+set}" = set; then :
- withval=$with_systemdunitdir; systemdunitdir=$withval
-else
- systemdunitdir=/usr/lib/systemd/system
fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether host toolchain supports SSSE3" >&5
+$as_echo_n "checking whether host toolchain supports SSSE3... " >&6; }
-# Check whether --with-systemdpresetdir was given.
-if test "${with_systemdpresetdir+set}" = set; then :
- withval=$with_systemdpresetdir; systemdpresetdir=$withval
-else
- systemdpresetdir=/usr/lib/systemd/system-preset
-fi
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+ void main()
+ {
+ __asm__ __volatile__("pshufb %xmm0,%xmm1");
+ }
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
-# Check whether --with-systemdmodulesloaddir was given.
-if test "${with_systemdmodulesloaddir+set}" = set; then :
- withval=$with_systemdmodulesloaddir; systemdmoduleloaddir=$withval
-else
- systemdmodulesloaddir=/usr/lib/modules-load.d
-fi
+$as_echo "#define HAVE_SSSE3 1" >>confdefs.h
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
- if test "x$enable_systemd" = xyes; then :
+else
- ZFS_INIT_SYSTEMD=systemd
- ZFS_MODULE_LOAD=modules-load.d
- modulesloaddir=$systemdmodulesloaddir
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether host toolchain supports SSE4.1" >&5
+$as_echo_n "checking whether host toolchain supports SSE4.1... " >&6; }
+
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+ void main()
+ {
+ __asm__ __volatile__("pmaxsb %xmm0,%xmm1");
+ }
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+$as_echo "#define HAVE_SSE4_1 1" >>confdefs.h
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
- # Check whether --enable-sysvinit was given.
-if test "${enable_sysvinit+set}" = set; then :
- enableval=$enable_sysvinit;
else
- enable_sysvinit=yes
-fi
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
- if test "x$enable_sysvinit" = xyes; then :
- ZFS_INIT_SYSV=init.d
fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether host toolchain supports SSE4.2" >&5
+$as_echo_n "checking whether host toolchain supports SSE4.2... " >&6; }
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dracut directory" >&5
-$as_echo_n "checking for dracut directory... " >&6; }
-
-# Check whether --with-dracutdir was given.
-if test "${with_dracutdir+set}" = set; then :
- withval=$with_dracutdir; dracutdir=$withval
-else
- dracutdir=check
-fi
+ void main()
+ {
+ __asm__ __volatile__("pcmpgtq %xmm0, %xmm1");
+ }
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
- if test "x$dracutdir" = xcheck; then :
- path1=/usr/share/dracut
- path2=/usr/lib/dracut
- default=$path2
+$as_echo "#define HAVE_SSE4_2 1" >>confdefs.h
- if test -d "$path1"; then :
- dracutdir="$path1"
-else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
- if test -d "$path2"; then :
- dracutdir="$path2"
else
- dracutdir="$default"
-fi
-fi
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $dracutdir" >&5
-$as_echo "$dracutdir" >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether host toolchain supports AVX" >&5
+$as_echo_n "checking whether host toolchain supports AVX... " >&6; }
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for target asm dir" >&5
-$as_echo_n "checking for target asm dir... " >&6; }
- TARGET_ARCH=`echo ${target_cpu} | sed -e s/i.86/i386/`
+ void main()
+ {
+ char v[32];
+ __asm__ __volatile__("vmovdqa %0,%%ymm0" :: "m"(v[0]));
+ }
- case $TARGET_ARCH in
- i386|x86_64)
- TARGET_ASM_DIR=asm-${TARGET_ARCH}
- ;;
- *)
- TARGET_ASM_DIR=asm-generic
- ;;
- esac
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $TARGET_ASM_DIR" >&5
-$as_echo "$TARGET_ASM_DIR" >&6; }
+$as_echo "#define HAVE_AVX 1" >>confdefs.h
- ZLIB=
+else
- ac_fn_c_check_header_mongrel "$LINENO" "zlib.h" "ac_cv_header_zlib_h" "$ac_includes_default"
-if test "x$ac_cv_header_zlib_h" = xyes; then :
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
-else
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "
- *** zlib.h missing, zlib-devel package required
-See \`config.log' for more details" "$LINENO" 5; }
fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether host toolchain supports AVX2" >&5
+$as_echo_n "checking whether host toolchain supports AVX2... " >&6; }
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for compress2 in -lz" >&5
-$as_echo_n "checking for compress2 in -lz... " >&6; }
-if ${ac_cv_lib_z_compress2+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- ac_check_lib_save_LIBS=$LIBS
-LIBS="-lz $LIBS"
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
-/* Override any GCC internal prototype to avoid an error.
- Use char because int might match the return type of a GCC
- builtin and then its argument prototype would still apply. */
-#ifdef __cplusplus
-extern "C"
-#endif
-char compress2 ();
-int
-main ()
-{
-return compress2 ();
- ;
- return 0;
-}
+
+ void main()
+ {
+ __asm__ __volatile__("vpshufb %ymm0,%ymm1,%ymm2");
+ }
+
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
- ac_cv_lib_z_compress2=yes
-else
- ac_cv_lib_z_compress2=no
-fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
-LIBS=$ac_check_lib_save_LIBS
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_z_compress2" >&5
-$as_echo "$ac_cv_lib_z_compress2" >&6; }
-if test "x$ac_cv_lib_z_compress2" = xyes; then :
- cat >>confdefs.h <<_ACEOF
-#define HAVE_LIBZ 1
-_ACEOF
- LIBS="-lz $LIBS"
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
-else
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "
- *** compress2() missing, zlib-devel package required
-See \`config.log' for more details" "$LINENO" 5; }
-fi
+$as_echo "#define HAVE_AVX2 1" >>confdefs.h
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for uncompress in -lz" >&5
-$as_echo_n "checking for uncompress in -lz... " >&6; }
-if ${ac_cv_lib_z_uncompress+:} false; then :
- $as_echo_n "(cached) " >&6
else
- ac_check_lib_save_LIBS=$LIBS
-LIBS="-lz $LIBS"
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-/* Override any GCC internal prototype to avoid an error.
- Use char because int might match the return type of a GCC
- builtin and then its argument prototype would still apply. */
-#ifdef __cplusplus
-extern "C"
-#endif
-char uncompress ();
-int
-main ()
-{
-return uncompress ();
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
- ac_cv_lib_z_uncompress=yes
-else
- ac_cv_lib_z_uncompress=no
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
-LIBS=$ac_check_lib_save_LIBS
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_z_uncompress" >&5
-$as_echo "$ac_cv_lib_z_uncompress" >&6; }
-if test "x$ac_cv_lib_z_uncompress" = xyes; then :
- cat >>confdefs.h <<_ACEOF
-#define HAVE_LIBZ 1
-_ACEOF
- LIBS="-lz $LIBS"
-
-else
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "
- *** uncompress() missing, zlib-devel package required
-See \`config.log' for more details" "$LINENO" 5; }
-fi
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether host toolchain supports AVX512F" >&5
+$as_echo_n "checking whether host toolchain supports AVX512F... " >&6; }
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for crc32 in -lz" >&5
-$as_echo_n "checking for crc32 in -lz... " >&6; }
-if ${ac_cv_lib_z_crc32+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- ac_check_lib_save_LIBS=$LIBS
-LIBS="-lz $LIBS"
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
-/* Override any GCC internal prototype to avoid an error.
- Use char because int might match the return type of a GCC
- builtin and then its argument prototype would still apply. */
-#ifdef __cplusplus
-extern "C"
-#endif
-char crc32 ();
-int
-main ()
-{
-return crc32 ();
- ;
- return 0;
-}
+
+ void main()
+ {
+ __asm__ __volatile__("vpandd %zmm0,%zmm1,%zmm2");
+ }
+
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
- ac_cv_lib_z_crc32=yes
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+$as_echo "#define HAVE_AVX512F 1" >>confdefs.h
+
+
else
- ac_cv_lib_z_crc32=no
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
-LIBS=$ac_check_lib_save_LIBS
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_z_crc32" >&5
-$as_echo "$ac_cv_lib_z_crc32" >&6; }
-if test "x$ac_cv_lib_z_crc32" = xyes; then :
- cat >>confdefs.h <<_ACEOF
-#define HAVE_LIBZ 1
-_ACEOF
-
- LIBS="-lz $LIBS"
-else
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "
- *** crc32() missing, zlib-devel package required
-See \`config.log' for more details" "$LINENO" 5; }
-fi
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether host toolchain supports AVX512CD" >&5
+$as_echo_n "checking whether host toolchain supports AVX512CD... " >&6; }
- ZLIB="-lz"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
-$as_echo "#define HAVE_ZLIB 1" >>confdefs.h
+ void main()
+ {
+ __asm__ __volatile__("vplzcntd %zmm0,%zmm1");
+ }
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
- LIBUUID=
+$as_echo "#define HAVE_AVX512CD 1" >>confdefs.h
- ac_fn_c_check_header_mongrel "$LINENO" "uuid/uuid.h" "ac_cv_header_uuid_uuid_h" "$ac_includes_default"
-if test "x$ac_cv_header_uuid_uuid_h" = xyes; then :
else
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "
- *** uuid/uuid.h missing, libuuid-devel package required
-See \`config.log' for more details" "$LINENO" 5; }
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether host toolchain supports AVX512DQ" >&5
+$as_echo_n "checking whether host toolchain supports AVX512DQ... " >&6; }
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for uuid_generate in -luuid" >&5
-$as_echo_n "checking for uuid_generate in -luuid... " >&6; }
-if ${ac_cv_lib_uuid_uuid_generate+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- ac_check_lib_save_LIBS=$LIBS
-LIBS="-luuid $LIBS"
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
-/* Override any GCC internal prototype to avoid an error.
- Use char because int might match the return type of a GCC
- builtin and then its argument prototype would still apply. */
-#ifdef __cplusplus
-extern "C"
-#endif
-char uuid_generate ();
-int
-main ()
-{
-return uuid_generate ();
- ;
- return 0;
-}
+
+ void main()
+ {
+ __asm__ __volatile__("vandpd %zmm0,%zmm1,%zmm2");
+ }
+
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
- ac_cv_lib_uuid_uuid_generate=yes
-else
- ac_cv_lib_uuid_uuid_generate=no
-fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
-LIBS=$ac_check_lib_save_LIBS
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_uuid_uuid_generate" >&5
-$as_echo "$ac_cv_lib_uuid_uuid_generate" >&6; }
-if test "x$ac_cv_lib_uuid_uuid_generate" = xyes; then :
- cat >>confdefs.h <<_ACEOF
-#define HAVE_LIBUUID 1
-_ACEOF
- LIBS="-luuid $LIBS"
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
-else
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "
- *** uuid_generate() missing, libuuid-devel package required
-See \`config.log' for more details" "$LINENO" 5; }
-fi
+$as_echo "#define HAVE_AVX512DQ 1" >>confdefs.h
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for uuid_is_null in -luuid" >&5
-$as_echo_n "checking for uuid_is_null in -luuid... " >&6; }
-if ${ac_cv_lib_uuid_uuid_is_null+:} false; then :
- $as_echo_n "(cached) " >&6
else
- ac_check_lib_save_LIBS=$LIBS
-LIBS="-luuid $LIBS"
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-/* Override any GCC internal prototype to avoid an error.
- Use char because int might match the return type of a GCC
- builtin and then its argument prototype would still apply. */
-#ifdef __cplusplus
-extern "C"
-#endif
-char uuid_is_null ();
-int
-main ()
-{
-return uuid_is_null ();
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
- ac_cv_lib_uuid_uuid_is_null=yes
-else
- ac_cv_lib_uuid_uuid_is_null=no
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
-LIBS=$ac_check_lib_save_LIBS
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_uuid_uuid_is_null" >&5
-$as_echo "$ac_cv_lib_uuid_uuid_is_null" >&6; }
-if test "x$ac_cv_lib_uuid_uuid_is_null" = xyes; then :
- cat >>confdefs.h <<_ACEOF
-#define HAVE_LIBUUID 1
-_ACEOF
- LIBS="-luuid $LIBS"
-else
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "
- *** uuid_is_null() missing, libuuid-devel package required
-See \`config.log' for more details" "$LINENO" 5; }
-fi
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether host toolchain supports AVX512BW" >&5
+$as_echo_n "checking whether host toolchain supports AVX512BW... " >&6; }
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
- LIBUUID="-luuid"
+ void main()
+ {
+ __asm__ __volatile__("vpshufb %zmm0,%zmm1,%zmm2");
+ }
-$as_echo "#define HAVE_LIBUUID 1" >>confdefs.h
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+$as_echo "#define HAVE_AVX512BW 1" >>confdefs.h
-# Check whether --with-blkid was given.
-if test "${with_blkid+set}" = set; then :
- withval=$with_blkid;
else
- with_blkid=check
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
- LIBBLKID=
- if test "x$with_blkid" = xyes; then :
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether host toolchain supports AVX512IFMA" >&5
+$as_echo_n "checking whether host toolchain supports AVX512IFMA... " >&6; }
- LIBBLKID="-lblkid"
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
-$as_echo "#define HAVE_LIBBLKID 1" >>confdefs.h
+ void main()
+ {
+ __asm__ __volatile__("vpmadd52luq %zmm0,%zmm1,%zmm2");
+ }
+
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
-fi
+$as_echo "#define HAVE_AVX512IFMA 1" >>confdefs.h
- if test "x$with_blkid" = xcheck; then :
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for blkid_get_cache in -lblkid" >&5
-$as_echo_n "checking for blkid_get_cache in -lblkid... " >&6; }
-if ${ac_cv_lib_blkid_blkid_get_cache+:} false; then :
- $as_echo_n "(cached) " >&6
else
- ac_check_lib_save_LIBS=$LIBS
-LIBS="-lblkid $LIBS"
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-/* Override any GCC internal prototype to avoid an error.
- Use char because int might match the return type of a GCC
- builtin and then its argument prototype would still apply. */
-#ifdef __cplusplus
-extern "C"
-#endif
-char blkid_get_cache ();
-int
-main ()
-{
-return blkid_get_cache ();
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
- ac_cv_lib_blkid_blkid_get_cache=yes
-else
- ac_cv_lib_blkid_blkid_get_cache=no
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
-LIBS=$ac_check_lib_save_LIBS
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_blkid_blkid_get_cache" >&5
-$as_echo "$ac_cv_lib_blkid_blkid_get_cache" >&6; }
-if test "x$ac_cv_lib_blkid_blkid_get_cache" = xyes; then :
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for blkid zfs support" >&5
-$as_echo_n "checking for blkid zfs support... " >&6; }
-
- ZFS_DEV=`mktemp`
- truncate -s 64M $ZFS_DEV
- echo -en "\x0c\xb1\xba\0\0\0\0\0" | \
- dd of=$ZFS_DEV bs=1k count=8 \
- seek=128 conv=notrunc &>/dev/null \
- >/dev/null 2>/dev/null
- echo -en "\x0c\xb1\xba\0\0\0\0\0" | \
- dd of=$ZFS_DEV bs=1k count=8 \
- seek=132 conv=notrunc &>/dev/null \
- >/dev/null 2>/dev/null
- echo -en "\x0c\xb1\xba\0\0\0\0\0" | \
- dd of=$ZFS_DEV bs=1k count=8 \
- seek=136 conv=notrunc &>/dev/null \
- >/dev/null 2>/dev/null
- echo -en "\x0c\xb1\xba\0\0\0\0\0" | \
- dd of=$ZFS_DEV bs=1k count=8 \
- seek=140 conv=notrunc &>/dev/null \
- >/dev/null 2>/dev/null
-
- saved_LIBS="$LIBS"
- LIBS="-lblkid"
-
- if test "$cross_compiling" = yes; then :
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "cannot run test program while cross compiling
-See \`config.log' for more details" "$LINENO" 5; }
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-
- #include <stdio.h>
- #include <stdlib.h>
- #include <blkid/blkid.h>
-int
-main ()
-{
- blkid_cache cache;
- char *value;
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether host toolchain supports AVX512VBMI" >&5
+$as_echo_n "checking whether host toolchain supports AVX512VBMI... " >&6; }
- if (blkid_get_cache(&cache, NULL) < 0)
- return 1;
-
- value = blkid_get_tag_value(cache, "TYPE",
- "$ZFS_DEV");
- if (!value) {
- blkid_put_cache(cache);
- return 2;
- }
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
- if (strcmp(value, "zfs_member")) {
- free(value);
- blkid_put_cache(cache);
- return 0;
- }
- free(value);
- blkid_put_cache(cache);
+ void main()
+ {
+ __asm__ __volatile__("vpermb %zmm0,%zmm1,%zmm2");
+ }
- ;
- return 0;
-}
_ACEOF
-if ac_fn_c_try_run "$LINENO"; then :
+if ac_fn_c_try_link "$LINENO"; then :
- rm -f $ZFS_DEV
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
- LIBBLKID="-lblkid"
-
-$as_echo "#define HAVE_LIBBLKID 1" >>confdefs.h
+$as_echo "#define HAVE_AVX512VBMI 1" >>confdefs.h
else
- rm -f $ZFS_DEV
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
$as_echo "no" >&6; }
- if test "x$with_blkid" != xcheck; then :
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "--with-blkid given but unavailable
-See \`config.log' for more details" "$LINENO" 5; }
-fi
-
-fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
- conftest.$ac_objext conftest.beam conftest.$ac_ext
-fi
-
-
- LIBS="$saved_LIBS"
-
-else
-
- if test "x$with_blkid" != xcheck; then :
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "--with-blkid given but unavailable
-See \`config.log' for more details" "$LINENO" 5; }
-fi
-
fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
-fi
-
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -Wframe-larger-than=<size> support" >&5
-$as_echo_n "checking for -Wframe-larger-than=<size> support... " >&6; }
-
- saved_flags="$CFLAGS"
- CFLAGS="$CFLAGS -Wframe-larger-than=1024"
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether host toolchain supports AVX512PF" >&5
+$as_echo_n "checking whether host toolchain supports AVX512PF... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
-int
-main ()
-{
- ;
- return 0;
-}
+ void main()
+ {
+ __asm__ __volatile__("vgatherpf0dps (%rsi,%zmm0,4){%k1}");
+ }
+
_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
+if ac_fn_c_try_link "$LINENO"; then :
- FRAME_LARGER_THAN=-Wframe-larger-than=1024
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
+$as_echo "#define HAVE_AVX512PF 1" >>confdefs.h
+
+
else
- FRAME_LARGER_THAN=
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
$as_echo "no" >&6; }
fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-
- CFLAGS="$saved_flags"
-
-
-
- if test "x$runstatedir" = x; then
- runstatedir='${localstatedir}/run'
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
- fi
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether host toolchain supports AVX512ER" >&5
+$as_echo_n "checking whether host toolchain supports AVX512ER... " >&6; }
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking makedev() is declared in sys/sysmacros.h" >&5
-$as_echo_n "checking makedev() is declared in sys/sysmacros.h... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
- #include <sys/sysmacros.h>
-int
-main ()
-{
-
- int k;
- k = makedev(0,0);
+ void main()
+ {
+ __asm__ __volatile__("vexp2pd %zmm0,%zmm1");
+ }
- ;
- return 0;
-}
_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
+if ac_fn_c_try_link "$LINENO"; then :
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
-$as_echo "#define HAVE_MAKEDEV_IN_SYSMACROS 1" >>confdefs.h
+$as_echo "#define HAVE_AVX512ER 1" >>confdefs.h
else
@@ -13574,33 +13343,29 @@ else
$as_echo "no" >&6; }
fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking makedev() is declared in sys/mkdev.h" >&5
-$as_echo_n "checking makedev() is declared in sys/mkdev.h... " >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether host toolchain supports AVX512VL" >&5
+$as_echo_n "checking whether host toolchain supports AVX512VL... " >&6; }
+
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
- #include <sys/mkdev.h>
-
-int
-main ()
-{
- int k;
- k = makedev(0,0);
+ void main()
+ {
+ __asm__ __volatile__("vpabsq %zmm0,%zmm1");
+ }
- ;
- return 0;
-}
_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
+if ac_fn_c_try_link "$LINENO"; then :
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
-$as_echo "#define HAVE_MAKEDEV_IN_MKDEV 1" >>confdefs.h
+$as_echo "#define HAVE_AVX512VL 1" >>confdefs.h
else
@@ -13609,56 +13374,57 @@ else
$as_echo "no" >&6; }
fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
-
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -Wno-format-truncation support" >&5
-$as_echo_n "checking for -Wno-format-truncation support... " >&6; }
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
- saved_flags="$CFLAGS"
- CFLAGS="$CFLAGS -Wno-format-truncation"
+ ;;
+ esac
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-int
-main ()
-{
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for target asm dir" >&5
+$as_echo_n "checking for target asm dir... " >&6; }
+ TARGET_ARCH=`echo ${target_cpu} | sed -e s/i.86/i386/`
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
+ case $TARGET_ARCH in
+ i386|x86_64)
+ TARGET_ASM_DIR=asm-${TARGET_ARCH}
+ ;;
+ *)
+ TARGET_ASM_DIR=asm-generic
+ ;;
+ esac
- NO_FORMAT_TRUNCATION=-Wno-format-truncation
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
+ if test $TARGET_ASM_DIR = asm-x86_64; then
+ TARGET_ASM_X86_64_TRUE=
+ TARGET_ASM_X86_64_FALSE='#'
else
+ TARGET_ASM_X86_64_TRUE='#'
+ TARGET_ASM_X86_64_FALSE=
+fi
- NO_FORMAT_TRUNCATION=
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-
+ if test $TARGET_ASM_DIR = asm-i386; then
+ TARGET_ASM_I386_TRUE=
+ TARGET_ASM_I386_FALSE='#'
+else
+ TARGET_ASM_I386_TRUE='#'
+ TARGET_ASM_I386_FALSE=
fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
- CFLAGS="$saved_flags"
+ if test $TARGET_ASM_DIR = asm-generic; then
+ TARGET_ASM_GENERIC_TRUE=
+ TARGET_ASM_GENERIC_FALSE='#'
+else
+ TARGET_ASM_GENERIC_TRUE='#'
+ TARGET_ASM_GENERIC_FALSE=
+fi
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $TARGET_ASM_DIR" >&5
+$as_echo "$TARGET_ASM_DIR" >&6; }
- for ac_func in mlockall
-do :
- ac_fn_c_check_func "$LINENO" "mlockall" "ac_cv_func_mlockall"
-if test "x$ac_cv_func_mlockall" = xyes; then :
- cat >>confdefs.h <<_ACEOF
-#define HAVE_MLOCKALL 1
-_ACEOF
-fi
-done
- ;;
+ case "$ZFS_CONFIG" in
kernel)
@@ -13714,6 +13480,7 @@ else
kernsrcver=NONE
fi
+ withlinux=yes
fi
@@ -13732,7 +13499,7 @@ fi
$as_echo_n "checking kernel build directory... " >&6; }
if test -z "$kernelbuild"; then :
- if test -e "/lib/modules/$(uname -r)/build"; then :
+ if test x$withlinux != xyes -a -e "/lib/modules/$(uname -r)/build"; then :
kernelbuild=`readlink -f /lib/modules/$(uname -r)/build`
@@ -13857,7 +13624,11 @@ $as_echo "$LINUX_SYMBOLS" >&6; }
# Check whether --with-spl was given.
if test "${with_spl+set}" = set; then :
- withval=$with_spl; splsrc="$withval"
+ withval=$with_spl; if test "$withval" = "yes"; then :
+ as_fn_error $? "--with-spl=PATH requires a PATH" "$LINENO" 5
+else
+ splsrc="$withval"
+fi
fi
@@ -13889,6 +13660,14 @@ fi
$as_echo_n "checking spl source directory... " >&6; }
if test -z "${splsrc}"; then :
+ all_spl_sources="
+ ${splsrc0}
+ ${splsrc1}
+ ${splsrc2}
+ ${splsrc3}
+ ${splsrc4}
+ ${splsrc5}
+ ${splsrc6}",
if test -e "${splsrc0}/spl.release.in"; then :
splsrc=${splsrc0}
@@ -13925,6 +13704,7 @@ fi
else
+ all_spl_sources="$withval",
if test "$splsrc" = "NONE"; then :
splbuild=NONE
@@ -13941,12 +13721,19 @@ $as_echo "$splsrc" >&6; }
as_fn_error $? "
*** Please make sure the kmod spl devel package for your distribution
*** is installed then try again. If that fails you can specify the
- *** location of the spl source with the '--with-spl=PATH' option." "$LINENO" 5
+ *** location of the spl source with the '--with-spl=PATH' option.
+ *** The spl version must match the version of ZFS you are building,
+ *** ${VERSION}. Failed to find spl.release.in in the following:
+ $all_spl_sources" "$LINENO" 5
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking spl build directory" >&5
$as_echo_n "checking spl build directory... " >&6; }
+
+ all_spl_config_locs="${splsrc}/${LINUX_VERSION}
+ ${splsrc}"
+
while true; do
if test -z "$splbuild"; then :
@@ -13989,7 +13776,9 @@ $as_echo "$splbuild" >&6; }
*** Please make sure the kmod spl devel <kernel> package for your
*** distribution is installed then try again. If that fails you
*** can specify the location of the spl objects with the
- *** '--with-spl-obj=PATH' option." "$LINENO" 5
+ *** '--with-spl-obj=PATH' option. Failed to find spl_config.h in
+ *** any of the following:
+ $all_spl_config_locs" "$LINENO" 5
fi
@@ -14073,128 +13862,289 @@ $as_echo "$SPL_SYMBOLS" >&6; }
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether modules can be built" >&5
-$as_echo_n "checking whether modules can be built... " >&6; }
-
-cat confdefs.h - <<_ACEOF >conftest.c
+# Check whether --with-qat was given.
+if test "${with_qat+set}" = set; then :
+ withval=$with_qat; if test "$withval" = "yes"; then :
+ as_fn_error $? "--with-qat=PATH requires a PATH" "$LINENO" 5
+else
+ qatsrc="$withval"
+fi
+fi
-int
-main (void)
-{
- ;
- return 0;
-}
+# Check whether --with-qat-obj was given.
+if test "${with_qat_obj+set}" = set; then :
+ withval=$with_qat_obj; qatbuild="$withval"
+fi
-_ACEOF
+ if test ! -z "${qatsrc}"; then :
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking qat source directory" >&5
+$as_echo_n "checking qat source directory... " >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $qatsrc" >&5
+$as_echo "$qatsrc" >&6; }
+ QAT_SRC="${qatsrc}/quickassist"
+ if test ! -e "$QAT_SRC/include/cpa.h"; then :
-cat - <<_ACEOF >conftest.h
+ as_fn_error $? "
+ *** Please make sure the qat driver package is installed
+ *** and specify the location of the qat source with the
+ *** '--with-qat=PATH' option then try again. Failed to
+ *** find cpa.h in:
+ ${QAT_SRC}/include" "$LINENO" 5
-_ACEOF
+fi
+fi
- rm -Rf build && mkdir -p build && touch build/conftest.mod.c
- echo "obj-m := conftest.o" >build/Makefile
- modpost_flag=''
- test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage
- if { ac_try='cp conftest.c conftest.h build && make modules -C $LINUX_OBJ EXTRA_CFLAGS="-Werror $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag'
- { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; } >/dev/null && { ac_try='test -s build/conftest.o'
- { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; }; then :
+ if test ! -z "${qatsrc}"; then :
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking qat build directory" >&5
+$as_echo_n "checking qat build directory... " >&6; }
+ if test -z "$qatbuild"; then :
-else
- $as_echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
+ qatbuild="${qatsrc}/build"
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
- if test "x$enable_linux_builtin" != xyes; then
- as_fn_error $? "*** Unable to build an empty module." "$LINENO" 5
- else
- as_fn_error $? "
- *** Unable to build an empty module.
- *** Please run 'make scripts' inside the kernel source tree." "$LINENO" 5
- fi
+fi
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $qatbuild" >&5
+$as_echo "$qatbuild" >&6; }
+ QAT_OBJ=${qatbuild}
+ if ! test -e "$QAT_OBJ/icp_qa_al.ko"; then :
+ as_fn_error $? "
+ *** Please make sure the qat driver is installed then try again.
+ *** Failed to find icp_qa_al.ko in:
+ $QAT_OBJ" "$LINENO" 5
fi
- rm -Rf build
- if test "x$cross_compiling" != xyes; then :
- if test "$cross_compiling" = yes; then :
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "cannot run test program while cross compiling
-See \`config.log' for more details" "$LINENO" 5; }
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
+$as_echo "#define HAVE_QAT 1" >>confdefs.h
- #include "$LINUX/include/linux/license.h"
+fi
-int
-main ()
-{
+ if test ! -z "${qatsrc}"; then :
- return !license_is_gpl_compatible("$ZFS_META_LICENSE");
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking qat file for module symbols" >&5
+$as_echo_n "checking qat file for module symbols... " >&6; }
+ QAT_SYMBOLS=$QAT_SRC/lookaside/access_layer/src/Module.symvers
- ;
- return 0;
-}
+ if test -r $QAT_SYMBOLS; then :
-_ACEOF
-if ac_fn_c_try_run "$LINENO"; then :
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $QAT_SYMBOLS" >&5
+$as_echo "$QAT_SYMBOLS" >&6; }
-$as_echo "#define ZFS_IS_GPL_COMPATIBLE 1" >>confdefs.h
+else
+ as_fn_error $? "
+ *** Please make sure the qat driver is installed then try again.
+ *** Failed to find Module.symvers in:
+ $QAT_SYMBOLS" "$LINENO" 5
fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
- conftest.$ac_objext conftest.beam conftest.$ac_ext
-fi
-
fi
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether kernel was built with 16K or larger stacks" >&5
-$as_echo_n "checking whether kernel was built with 16K or larger stacks... " >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether modules can be built" >&5
+$as_echo_n "checking whether modules can be built... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.c
- #include <linux/module.h>
-
int
main (void)
{
- #if (THREAD_SIZE < 16384)
- #error "THREAD_SIZE is less than 16K"
- #endif
-
+ ;
+ return 0;
+}
+
+_ACEOF
+
+
+
+cat - <<_ACEOF >conftest.h
+
+_ACEOF
+
+
+ rm -Rf build && mkdir -p build && touch build/conftest.mod.c
+ echo "obj-m := conftest.o" >build/Makefile
+ modpost_flag=''
+ test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage
+ if { ac_try='cp conftest.c conftest.h build && make modules -C $LINUX_OBJ EXTRA_CFLAGS="-Werror $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; } >/dev/null && { ac_try='test -s build/conftest.o'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ if test "x$enable_linux_builtin" != xyes; then
+ as_fn_error $? "*** Unable to build an empty module." "$LINENO" 5
+ else
+ as_fn_error $? "
+ *** Unable to build an empty module.
+ *** Please run 'make scripts' inside the kernel source tree." "$LINENO" 5
+ fi
+
+
+
+fi
+ rm -Rf build
+
+
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for compile-time stack validation (objtool)" >&5
+$as_echo_n "checking for compile-time stack validation (objtool)... " >&6; }
+
+
+cat confdefs.h - <<_ACEOF >conftest.c
+
+
+ #undef __ASSEMBLY__
+ #include <asm/frame.h>
+
+int
+main (void)
+{
+
+ #if !defined(FRAME_BEGIN)
+ CTASSERT(1);
+ #endif
+
+ ;
+ return 0;
+}
+
+_ACEOF
+
+
+
+cat - <<_ACEOF >conftest.h
+
+_ACEOF
+
+
+ rm -Rf build && mkdir -p build && touch build/conftest.mod.c
+ echo "obj-m := conftest.o" >build/Makefile
+ modpost_flag=''
+ test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage
+ if { ac_try='cp conftest.c conftest.h build && make modules -C $LINUX_OBJ EXTRA_CFLAGS="-Werror $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; } >/dev/null && { ac_try='test -s build/conftest.o'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+$as_echo "#define HAVE_KERNEL_OBJTOOL 1" >>confdefs.h
+
+
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+
+
+fi
+ rm -Rf build
+
+
+
+
+ if test "x$cross_compiling" != xyes; then :
+
+ if test "$cross_compiling" = yes; then :
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "cannot run test program while cross compiling
+See \`config.log' for more details" "$LINENO" 5; }
+else
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+
+ #include "$LINUX/include/linux/license.h"
+
+int
+main ()
+{
+
+ return !license_is_gpl_compatible("$ZFS_META_LICENSE");
+
+ ;
+ return 0;
+}
+
+_ACEOF
+if ac_fn_c_try_run "$LINENO"; then :
+
+
+$as_echo "#define ZFS_IS_GPL_COMPATIBLE 1" >>confdefs.h
+
+
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
+ conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+
+
+fi
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether kernel was built with 16K or larger stacks" >&5
+$as_echo_n "checking whether kernel was built with 16K or larger stacks... " >&6; }
+
+
+cat confdefs.h - <<_ACEOF >conftest.c
+
+
+ #include <linux/module.h>
+
+int
+main (void)
+{
+
+ #if (THREAD_SIZE < 16384)
+ #error "THREAD_SIZE is less than 16K"
+ #endif
+
;
return 0;
}
@@ -14624,22 +14574,22 @@ fi
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether submit_bio() wants 1 arg" >&5
-$as_echo_n "checking whether submit_bio() wants 1 arg... " >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether super_block->s_user_ns exists" >&5
+$as_echo_n "checking whether super_block->s_user_ns exists... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.c
- #include <linux/bio.h>
+ #include <linux/fs.h>
+ #include <linux/user_namespace.h>
int
main (void)
{
- blk_qc_t blk_qc;
- struct bio *bio = NULL;
- blk_qc = submit_bio(bio);
+ struct super_block super;
+ super.s_user_ns = (struct user_namespace *)NULL;
;
return 0;
@@ -14673,7 +14623,7 @@ _ACEOF
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
-$as_echo "#define HAVE_1ARG_SUBMIT_BIO 1" >>confdefs.h
+$as_echo "#define HAVE_SUPER_USER_NS 1" >>confdefs.h
else
@@ -14691,36 +14641,22 @@ fi
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking block device operation prototypes" >&5
-$as_echo_n "checking block device operation prototypes... " >&6; }
- tmp_flags="$EXTRA_KCFLAGS"
- EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}"
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether submit_bio() wants 1 arg" >&5
+$as_echo_n "checking whether submit_bio() wants 1 arg... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.c
- #include <linux/blkdev.h>
-
- int blk_open(struct block_device *bdev, fmode_t mode)
- { return 0; }
- int blk_ioctl(struct block_device *bdev, fmode_t mode,
- unsigned x, unsigned long y) { return 0; }
- int blk_compat_ioctl(struct block_device * bdev, fmode_t mode,
- unsigned x, unsigned long y) { return 0; }
-
- static const struct block_device_operations
- bops __attribute__ ((unused)) = {
- .open = blk_open,
- .release = NULL,
- .ioctl = blk_ioctl,
- .compat_ioctl = blk_compat_ioctl,
- };
+ #include <linux/bio.h>
int
main (void)
{
+ blk_qc_t blk_qc;
+ struct bio *bio = NULL;
+ blk_qc = submit_bio(bio);
;
return 0;
@@ -14751,18 +14687,18 @@ _ACEOF
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; }; then :
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: struct block_device" >&5
-$as_echo "struct block_device" >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
-$as_echo "#define HAVE_BDEV_BLOCK_DEVICE_OPERATIONS 1" >>confdefs.h
+$as_echo "#define HAVE_1ARG_SUBMIT_BIO 1" >>confdefs.h
else
$as_echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: struct inode" >&5
-$as_echo "struct inode" >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
@@ -14770,11 +14706,10 @@ fi
rm -Rf build
- EXTRA_KCFLAGS="$tmp_flags"
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether block_device_operations.release is void" >&5
-$as_echo_n "checking whether block_device_operations.release is void... " >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking block device operation prototypes" >&5
+$as_echo_n "checking block device operation prototypes... " >&6; }
tmp_flags="$EXTRA_KCFLAGS"
EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}"
@@ -14784,14 +14719,19 @@ cat confdefs.h - <<_ACEOF >conftest.c
#include <linux/blkdev.h>
- void blk_release(struct gendisk *g, fmode_t mode) { return; }
+ int blk_open(struct block_device *bdev, fmode_t mode)
+ { return 0; }
+ int blk_ioctl(struct block_device *bdev, fmode_t mode,
+ unsigned x, unsigned long y) { return 0; }
+ int blk_compat_ioctl(struct block_device * bdev, fmode_t mode,
+ unsigned x, unsigned long y) { return 0; }
static const struct block_device_operations
bops __attribute__ ((unused)) = {
- .open = NULL,
- .release = blk_release,
- .ioctl = NULL,
- .compat_ioctl = NULL,
+ .open = blk_open,
+ .release = NULL,
+ .ioctl = blk_ioctl,
+ .compat_ioctl = blk_compat_ioctl,
};
int
@@ -14828,18 +14768,18 @@ _ACEOF
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; }; then :
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: void" >&5
-$as_echo "void" >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: struct block_device" >&5
+$as_echo "struct block_device" >&6; }
-$as_echo "#define HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID 1" >>confdefs.h
+$as_echo "#define HAVE_BDEV_BLOCK_DEVICE_OPERATIONS 1" >>confdefs.h
else
$as_echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: int" >&5
-$as_echo "int" >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: struct inode" >&5
+$as_echo "struct inode" >&6; }
@@ -14849,20 +14789,32 @@ fi
EXTRA_KCFLAGS="$tmp_flags"
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether kernel defines fmode_t" >&5
-$as_echo_n "checking whether kernel defines fmode_t... " >&6; }
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether block_device_operations.release is void" >&5
+$as_echo_n "checking whether block_device_operations.release is void... " >&6; }
+ tmp_flags="$EXTRA_KCFLAGS"
+ EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}"
cat confdefs.h - <<_ACEOF >conftest.c
- #include <linux/types.h>
+ #include <linux/blkdev.h>
+
+ void blk_release(struct gendisk *g, fmode_t mode) { return; }
+
+ static const struct block_device_operations
+ bops __attribute__ ((unused)) = {
+ .open = NULL,
+ .release = blk_release,
+ .ioctl = NULL,
+ .compat_ioctl = NULL,
+ };
int
main (void)
{
- fmode_t *ptr __attribute__ ((unused));
;
return 0;
@@ -14893,18 +14845,18 @@ _ACEOF
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; }; then :
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: void" >&5
+$as_echo "void" >&6; }
-$as_echo "#define HAVE_FMODE_T 1" >>confdefs.h
+$as_echo "#define HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID 1" >>confdefs.h
else
$as_echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: int" >&5
+$as_echo "int" >&6; }
@@ -14912,23 +14864,22 @@ fi
rm -Rf build
+ EXTRA_KCFLAGS="$tmp_flags"
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether kernel defines KOBJ_NAME_LEN" >&5
-$as_echo_n "checking whether kernel defines KOBJ_NAME_LEN... " >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether kernel defines fmode_t" >&5
+$as_echo_n "checking whether kernel defines fmode_t... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.c
- #include <linux/kobject.h>
+ #include <linux/types.h>
int
main (void)
{
- int val __attribute__ ((unused));
- val = KOBJ_NAME_LEN;
+ fmode_t *ptr __attribute__ ((unused));
;
return 0;
@@ -14962,7 +14913,7 @@ _ACEOF
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
-$as_echo "#define HAVE_KOBJ_NAME_LEN 1" >>confdefs.h
+$as_echo "#define HAVE_FMODE_T 1" >>confdefs.h
else
@@ -15902,6 +15853,74 @@ fi
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether bio_set_dev() exists" >&5
+$as_echo_n "checking whether bio_set_dev() exists... " >&6; }
+
+
+cat confdefs.h - <<_ACEOF >conftest.c
+
+
+ #include <linux/bio.h>
+ #include <linux/fs.h>
+
+int
+main (void)
+{
+
+ struct block_device *bdev = NULL;
+ struct bio *bio = NULL;
+ bio_set_dev(bio, bdev);
+
+ ;
+ return 0;
+}
+
+_ACEOF
+
+
+
+cat - <<_ACEOF >conftest.h
+
+_ACEOF
+
+
+ rm -Rf build && mkdir -p build && touch build/conftest.mod.c
+ echo "obj-m := conftest.o" >build/Makefile
+ modpost_flag=''
+ test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage
+ if { ac_try='cp conftest.c conftest.h build && make modules -C $LINUX_OBJ EXTRA_CFLAGS="-Werror $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; } >/dev/null && { ac_try='test -s build/conftest.o'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+$as_echo "#define HAVE_BIO_SET_DEV 1" >>confdefs.h
+
+
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+
+
+fi
+ rm -Rf build
+
+
+
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether REQ_FAILFAST_MASK is defined" >&5
$as_echo_n "checking whether REQ_FAILFAST_MASK is defined... " >&6; }
@@ -16497,10 +16516,8 @@ fi
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether blk_queue_flush() is available" >&5
-$as_echo_n "checking whether blk_queue_flush() is available... " >&6; }
- tmp_flags="$EXTRA_KCFLAGS"
- EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}"
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether blk_queue bdi is dynamic" >&5
+$as_echo_n "checking whether blk_queue bdi is dynamic... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.c
@@ -16512,8 +16529,9 @@ int
main (void)
{
- struct request_queue *q = NULL;
- (void) blk_queue_flush(q, REQ_FLUSH);
+ struct request_queue q;
+ struct backing_dev_info bdi;
+ q.backing_dev_info = &bdi;
;
return 0;
@@ -16547,75 +16565,7 @@ _ACEOF
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
-$as_echo "#define HAVE_BLK_QUEUE_FLUSH 1" >>confdefs.h
-
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether blk_queue_flush() is GPL-only" >&5
-$as_echo_n "checking whether blk_queue_flush() is GPL-only... " >&6; }
-
-
-cat confdefs.h - <<_ACEOF >conftest.c
-
-
- #include <linux/module.h>
- #include <linux/blkdev.h>
-
- MODULE_LICENSE("$ZFS_META_LICENSE");
-
-int
-main (void)
-{
-
- struct request_queue *q = NULL;
- (void) blk_queue_flush(q, REQ_FLUSH);
-
- ;
- return 0;
-}
-
-_ACEOF
-
-
-
-cat - <<_ACEOF >conftest.h
-
-_ACEOF
-
-
- rm -Rf build && mkdir -p build && touch build/conftest.mod.c
- echo "obj-m := conftest.o" >build/Makefile
- modpost_flag=''
- test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage
- if { ac_try='cp conftest.c conftest.h build && make modules -C $LINUX_OBJ EXTRA_CFLAGS="-Werror $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag'
- { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; } >/dev/null && { ac_try='test -s build/conftest.o'
- { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; }; then :
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-
-else
- $as_echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
-
-$as_echo "#define HAVE_BLK_QUEUE_FLUSH_GPL_ONLY 1" >>confdefs.h
-
-
-
-
-fi
- rm -Rf build
-
+$as_echo "#define HAVE_BLK_QUEUE_BDI_DYNAMIC 1" >>confdefs.h
else
@@ -16632,23 +16582,24 @@ fi
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether blk_queue_write_cache() exists" >&5
-$as_echo_n "checking whether blk_queue_write_cache() exists... " >&6; }
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether blk_queue_flush() is available" >&5
+$as_echo_n "checking whether blk_queue_flush() is available... " >&6; }
+ tmp_flags="$EXTRA_KCFLAGS"
+ EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}"
cat confdefs.h - <<_ACEOF >conftest.c
- #include <linux/kernel.h>
#include <linux/blkdev.h>
-
int
main (void)
{
struct request_queue *q = NULL;
- blk_queue_write_cache(q, true, true);
+ (void) blk_queue_flush(q, REQ_FLUSH);
;
return 0;
@@ -16682,17 +16633,16 @@ _ACEOF
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
-$as_echo "#define HAVE_BLK_QUEUE_WRITE_CACHE 1" >>confdefs.h
+$as_echo "#define HAVE_BLK_QUEUE_FLUSH 1" >>confdefs.h
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether blk_queue_write_cache() is GPL-only" >&5
-$as_echo_n "checking whether blk_queue_write_cache() is GPL-only... " >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether blk_queue_flush() is GPL-only" >&5
+$as_echo_n "checking whether blk_queue_flush() is GPL-only... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.c
- #include <linux/kernel.h>
#include <linux/module.h>
#include <linux/blkdev.h>
@@ -16703,7 +16653,7 @@ main (void)
{
struct request_queue *q = NULL;
- blk_queue_write_cache(q, true, true);
+ (void) blk_queue_flush(q, REQ_FLUSH);
;
return 0;
@@ -16744,7 +16694,7 @@ sed 's/^/| /' conftest.$ac_ext >&5
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
-$as_echo "#define HAVE_BLK_QUEUE_WRITE_CACHE_GPL_ONLY 1" >>confdefs.h
+$as_echo "#define HAVE_BLK_QUEUE_FLUSH_GPL_ONLY 1" >>confdefs.h
@@ -16768,95 +16718,23 @@ fi
- EXTRA_KCFLAGS="$tmp_flags"
-
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether blk_queue_max_hw_sectors() is available" >&5
-$as_echo_n "checking whether blk_queue_max_hw_sectors() is available... " >&6; }
- tmp_flags="$EXTRA_KCFLAGS"
- EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}"
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether blk_queue_write_cache() exists" >&5
+$as_echo_n "checking whether blk_queue_write_cache() exists... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.c
+ #include <linux/kernel.h>
#include <linux/blkdev.h>
-int
-main (void)
-{
-
- struct request_queue *q = NULL;
- (void) blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS);
-
- ;
- return 0;
-}
-
-_ACEOF
-
-
-
-cat - <<_ACEOF >conftest.h
-
-_ACEOF
-
-
- rm -Rf build && mkdir -p build && touch build/conftest.mod.c
- echo "obj-m := conftest.o" >build/Makefile
- modpost_flag=''
- test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage
- if { ac_try='cp conftest.c conftest.h build && make modules -C $LINUX_OBJ EXTRA_CFLAGS="-Werror $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag'
- { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; } >/dev/null && { ac_try='test -s build/conftest.o'
- { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; }; then :
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
-
-$as_echo "#define HAVE_BLK_QUEUE_MAX_HW_SECTORS 1" >>confdefs.h
-
-
-else
- $as_echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-
-
-
-fi
- rm -Rf build
-
-
- EXTRA_KCFLAGS="$tmp_flags"
-
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether blk_queue_max_segments() is available" >&5
-$as_echo_n "checking whether blk_queue_max_segments() is available... " >&6; }
- tmp_flags="$EXTRA_KCFLAGS"
- EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}"
-
-
-cat confdefs.h - <<_ACEOF >conftest.c
-
-
- #include <linux/blkdev.h>
int
main (void)
{
struct request_queue *q = NULL;
- (void) blk_queue_max_segments(q, BLK_MAX_SEGMENTS);
+ blk_queue_write_cache(q, true, true);
;
return 0;
@@ -16890,43 +16768,28 @@ _ACEOF
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
-$as_echo "#define HAVE_BLK_QUEUE_MAX_SEGMENTS 1" >>confdefs.h
-
-
-else
- $as_echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-
-
-
-fi
- rm -Rf build
-
-
- EXTRA_KCFLAGS="$tmp_flags"
+$as_echo "#define HAVE_BLK_QUEUE_WRITE_CACHE 1" >>confdefs.h
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the BIO_RW_UNPLUG enum is available" >&5
-$as_echo_n "checking whether the BIO_RW_UNPLUG enum is available... " >&6; }
- tmp_flags="$EXTRA_KCFLAGS"
- EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}"
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether blk_queue_write_cache() is GPL-only" >&5
+$as_echo_n "checking whether blk_queue_write_cache() is GPL-only... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.c
- #include <linux/blkdev.h>
+ #include <linux/kernel.h>
+ #include <linux/module.h>
+ #include <linux/blkdev.h>
+
+ MODULE_LICENSE("$ZFS_META_LICENSE");
int
main (void)
{
- extern enum bio_rw_flags rw;
-
- rw = BIO_RW_UNPLUG;
+ struct request_queue *q = NULL;
+ blk_queue_write_cache(q, true, true);
;
return 0;
@@ -16957,10 +16820,24 @@ _ACEOF
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; }; then :
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
-$as_echo "#define HAVE_BLK_QUEUE_HAVE_BIO_RW_UNPLUG 1" >>confdefs.h
+$as_echo "#define HAVE_BLK_QUEUE_WRITE_CACHE_GPL_ONLY 1" >>confdefs.h
+
+
+
+
+fi
+ rm -Rf build
+
else
@@ -16976,11 +16853,12 @@ fi
rm -Rf build
+
EXTRA_KCFLAGS="$tmp_flags"
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether struct blk_plug is available" >&5
-$as_echo_n "checking whether struct blk_plug is available... " >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether blk_queue_max_hw_sectors() is available" >&5
+$as_echo_n "checking whether blk_queue_max_hw_sectors() is available... " >&6; }
tmp_flags="$EXTRA_KCFLAGS"
EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}"
@@ -16994,10 +16872,8 @@ int
main (void)
{
- struct blk_plug plug;
-
- blk_start_plug(&plug);
- blk_finish_plug(&plug);
+ struct request_queue *q = NULL;
+ (void) blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS);
;
return 0;
@@ -17031,7 +16907,7 @@ _ACEOF
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
-$as_echo "#define HAVE_BLK_QUEUE_HAVE_BLK_PLUG 1" >>confdefs.h
+$as_echo "#define HAVE_BLK_QUEUE_MAX_HW_SECTORS 1" >>confdefs.h
else
@@ -17050,8 +16926,8 @@ fi
EXTRA_KCFLAGS="$tmp_flags"
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether get_disk_ro() is available" >&5
-$as_echo_n "checking whether get_disk_ro() is available... " >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether blk_queue_max_segments() is available" >&5
+$as_echo_n "checking whether blk_queue_max_segments() is available... " >&6; }
tmp_flags="$EXTRA_KCFLAGS"
EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}"
@@ -17065,8 +16941,8 @@ int
main (void)
{
- struct gendisk *disk = NULL;
- (void) get_disk_ro(disk);
+ struct request_queue *q = NULL;
+ (void) blk_queue_max_segments(q, BLK_MAX_SEGMENTS);
;
return 0;
@@ -17100,7 +16976,7 @@ _ACEOF
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
-$as_echo "#define HAVE_GET_DISK_RO 1" >>confdefs.h
+$as_echo "#define HAVE_BLK_QUEUE_MAX_SEGMENTS 1" >>confdefs.h
else
@@ -17118,21 +16994,25 @@ fi
EXTRA_KCFLAGS="$tmp_flags"
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether get_gendisk() is available" >&5
-$as_echo_n "checking whether get_gendisk() is available... " >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the BIO_RW_UNPLUG enum is available" >&5
+$as_echo_n "checking whether the BIO_RW_UNPLUG enum is available... " >&6; }
+ tmp_flags="$EXTRA_KCFLAGS"
+ EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}"
cat confdefs.h - <<_ACEOF >conftest.c
- #include <linux/genhd.h>
+ #include <linux/blkdev.h>
int
main (void)
{
- get_gendisk(0, NULL);
+ extern enum bio_rw_flags rw;
+
+ rw = BIO_RW_UNPLUG;
;
return 0;
@@ -17162,83 +17042,48 @@ _ACEOF
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; }; then :
- rc=0
-else
- $as_echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
- rc=1
-
-
-fi
- rm -Rf build
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
- if test $rc -ne 0; then :
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-
- else
- if test "x$enable_linux_builtin" != xyes; then
+$as_echo "#define HAVE_BLK_QUEUE_HAVE_BIO_RW_UNPLUG 1" >>confdefs.h
- grep -q -E '[[:space:]]get_gendisk[[:space:]]' \
- $LINUX_OBJ/$LINUX_SYMBOLS 2>/dev/null
- rc=$?
- if test $rc -ne 0; then
- export=0
- for file in block/genhd.c; do
- grep -q -E "EXPORT_SYMBOL.*(get_gendisk)" \
- "$LINUX/$file" 2>/dev/null
- rc=$?
- if test $rc -eq 0; then
- export=1
- break;
- fi
- done
- if test $export -eq 0; then :
- rc=1
- else :
- rc=0
- fi
- else :
- rc=0
- fi
- fi
- if test $rc -ne 0; then :
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
$as_echo "no" >&6; }
- else :
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
-
-$as_echo "#define HAVE_GET_GENDISK 1" >>confdefs.h
+fi
+ rm -Rf build
- fi
- fi
+ EXTRA_KCFLAGS="$tmp_flags"
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether bio_set_op_attrs is available" >&5
-$as_echo_n "checking whether bio_set_op_attrs is available... " >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether struct blk_plug is available" >&5
+$as_echo_n "checking whether struct blk_plug is available... " >&6; }
+ tmp_flags="$EXTRA_KCFLAGS"
+ EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}"
cat confdefs.h - <<_ACEOF >conftest.c
- #include <linux/bio.h>
+ #include <linux/blkdev.h>
int
main (void)
{
- struct bio *bio __attribute__ ((unused)) = NULL;
+ struct blk_plug plug;
- bio_set_op_attrs(bio, 0, 0);
+ blk_start_plug(&plug);
+ blk_finish_plug(&plug);
;
return 0;
@@ -17272,7 +17117,7 @@ _ACEOF
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
-$as_echo "#define HAVE_BIO_SET_OP_ATTRS 1" >>confdefs.h
+$as_echo "#define HAVE_BLK_QUEUE_HAVE_BLK_PLUG 1" >>confdefs.h
else
@@ -17288,24 +17133,26 @@ fi
rm -Rf build
+ EXTRA_KCFLAGS="$tmp_flags"
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether generic_readlink is global" >&5
-$as_echo_n "checking whether generic_readlink is global... " >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether get_disk_ro() is available" >&5
+$as_echo_n "checking whether get_disk_ro() is available... " >&6; }
+ tmp_flags="$EXTRA_KCFLAGS"
+ EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}"
cat confdefs.h - <<_ACEOF >conftest.c
- #include <linux/fs.h>
+ #include <linux/blkdev.h>
int
main (void)
{
- int i __attribute__ ((unused));
-
- i = generic_readlink(NULL, NULL, 0);
+ struct gendisk *disk = NULL;
+ (void) get_disk_ro(disk);
;
return 0;
@@ -17339,7 +17186,7 @@ _ACEOF
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
-$as_echo "#define HAVE_GENERIC_READLINK 1" >>confdefs.h
+$as_echo "#define HAVE_GET_DISK_RO 1" >>confdefs.h
else
@@ -17355,24 +17202,23 @@ fi
rm -Rf build
+ EXTRA_KCFLAGS="$tmp_flags"
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether get_gendisk() is available" >&5
+$as_echo_n "checking whether get_gendisk() is available... " >&6; }
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ql->discard_granularity is available" >&5
-$as_echo_n "checking whether ql->discard_granularity is available... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.c
- #include <linux/blkdev.h>
+ #include <linux/genhd.h>
int
main (void)
{
- struct queue_limits ql __attribute__ ((unused));
-
- ql.discard_granularity = 0;
+ get_gendisk(0, NULL);
;
return 0;
@@ -17402,51 +17248,291 @@ _ACEOF
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; }; then :
+ rc=0
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+ rc=1
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
-$as_echo "#define HAVE_DISCARD_GRANULARITY 1" >>confdefs.h
+fi
+ rm -Rf build
-else
- $as_echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
+ if test $rc -ne 0; then :
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
$as_echo "no" >&6; }
+ else
+ if test "x$enable_linux_builtin" != xyes; then
+ grep -q -E '[[:space:]]get_gendisk[[:space:]]' \
+ $LINUX_OBJ/$LINUX_SYMBOLS 2>/dev/null
+ rc=$?
+ if test $rc -ne 0; then
+ export=0
+ for file in block/genhd.c; do
+ grep -q -E "EXPORT_SYMBOL.*(get_gendisk)" \
+ "$LINUX/$file" 2>/dev/null
+ rc=$?
+ if test $rc -eq 0; then
+ export=1
+ break;
+ fi
+ done
+ if test $export -eq 0; then :
+ rc=1
+ else :
+ rc=0
+ fi
+ else :
+ rc=0
+ fi
-fi
- rm -Rf build
+ fi
+ if test $rc -ne 0; then :
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ else :
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether super_block uses const struct xattr_handler" >&5
-$as_echo_n "checking whether super_block uses const struct xattr_handler... " >&6; }
+$as_echo "#define HAVE_GET_GENDISK 1" >>confdefs.h
-cat confdefs.h - <<_ACEOF >conftest.c
+ fi
+ fi
- #include <linux/fs.h>
- #include <linux/xattr.h>
- const struct xattr_handler xattr_test_handler = {
- .prefix = "test",
- .get = NULL,
- .set = NULL,
- };
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether bio_set_op_attrs is available" >&5
+$as_echo_n "checking whether bio_set_op_attrs is available... " >&6; }
- const struct xattr_handler *xattr_handlers[] = {
- &xattr_test_handler,
- };
- const struct super_block sb __attribute__ ((unused)) = {
- .s_xattr = xattr_handlers,
- };
+cat confdefs.h - <<_ACEOF >conftest.c
+
+
+ #include <linux/bio.h>
+
+int
+main (void)
+{
+
+ struct bio *bio __attribute__ ((unused)) = NULL;
+
+ bio_set_op_attrs(bio, 0, 0);
+
+ ;
+ return 0;
+}
+
+_ACEOF
+
+
+
+cat - <<_ACEOF >conftest.h
+
+_ACEOF
+
+
+ rm -Rf build && mkdir -p build && touch build/conftest.mod.c
+ echo "obj-m := conftest.o" >build/Makefile
+ modpost_flag=''
+ test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage
+ if { ac_try='cp conftest.c conftest.h build && make modules -C $LINUX_OBJ EXTRA_CFLAGS="-Werror $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; } >/dev/null && { ac_try='test -s build/conftest.o'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+$as_echo "#define HAVE_BIO_SET_OP_ATTRS 1" >>confdefs.h
+
+
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+
+
+fi
+ rm -Rf build
+
+
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether generic_readlink is global" >&5
+$as_echo_n "checking whether generic_readlink is global... " >&6; }
+
+
+cat confdefs.h - <<_ACEOF >conftest.c
+
+
+ #include <linux/fs.h>
+
+int
+main (void)
+{
+
+ int i __attribute__ ((unused));
+
+ i = generic_readlink(NULL, NULL, 0);
+
+ ;
+ return 0;
+}
+
+_ACEOF
+
+
+
+cat - <<_ACEOF >conftest.h
+
+_ACEOF
+
+
+ rm -Rf build && mkdir -p build && touch build/conftest.mod.c
+ echo "obj-m := conftest.o" >build/Makefile
+ modpost_flag=''
+ test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage
+ if { ac_try='cp conftest.c conftest.h build && make modules -C $LINUX_OBJ EXTRA_CFLAGS="-Werror $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; } >/dev/null && { ac_try='test -s build/conftest.o'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+$as_echo "#define HAVE_GENERIC_READLINK 1" >>confdefs.h
+
+
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+
+
+fi
+ rm -Rf build
+
+
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether ql->discard_granularity is available" >&5
+$as_echo_n "checking whether ql->discard_granularity is available... " >&6; }
+
+
+cat confdefs.h - <<_ACEOF >conftest.c
+
+
+ #include <linux/blkdev.h>
+
+int
+main (void)
+{
+
+ struct queue_limits ql __attribute__ ((unused));
+
+ ql.discard_granularity = 0;
+
+ ;
+ return 0;
+}
+
+_ACEOF
+
+
+
+cat - <<_ACEOF >conftest.h
+
+_ACEOF
+
+
+ rm -Rf build && mkdir -p build && touch build/conftest.mod.c
+ echo "obj-m := conftest.o" >build/Makefile
+ modpost_flag=''
+ test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage
+ if { ac_try='cp conftest.c conftest.h build && make modules -C $LINUX_OBJ EXTRA_CFLAGS="-Werror $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; } >/dev/null && { ac_try='test -s build/conftest.o'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+$as_echo "#define HAVE_DISCARD_GRANULARITY 1" >>confdefs.h
+
+
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+
+
+fi
+ rm -Rf build
+
+
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether super_block uses const struct xattr_handler" >&5
+$as_echo_n "checking whether super_block uses const struct xattr_handler... " >&6; }
+
+
+cat confdefs.h - <<_ACEOF >conftest.c
+
+
+ #include <linux/fs.h>
+ #include <linux/xattr.h>
+
+ const struct xattr_handler xattr_test_handler = {
+ .prefix = "test",
+ .get = NULL,
+ .set = NULL,
+ };
+
+ const struct xattr_handler *xattr_handlers[] = {
+ &xattr_test_handler,
+ };
+
+ const struct super_block sb __attribute__ ((unused)) = {
+ .s_xattr = xattr_handlers,
+ };
int
main (void)
@@ -17633,7 +17719,9 @@ else
$as_echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether xattr_handler->get() wants xattr_handler" >&5
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether xattr_handler->get() wants xattr_handler" >&5
$as_echo_n "checking whether xattr_handler->get() wants xattr_handler... " >&6; }
@@ -17913,7 +18001,9 @@ else
$as_echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether xattr_handler->set() wants xattr_handler" >&5
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether xattr_handler->set() wants xattr_handler" >&5
$as_echo_n "checking whether xattr_handler->set() wants xattr_handler... " >&6; }
@@ -18663,39 +18753,26 @@ $as_echo "yes" >&6; }
$as_echo "#define HAVE_POSIX_ACL_RELEASE 1" >>confdefs.h
-else
- $as_echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-
-
-
-fi
- rm -Rf build
-
-
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether posix_acl_release() is GPL-only" >&5
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether posix_acl_release() is GPL-only" >&5
$as_echo_n "checking whether posix_acl_release() is GPL-only... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.c
- #include <linux/cred.h>
- #include <linux/fs.h>
- #include <linux/posix_acl.h>
+ #include <linux/module.h>
+ #include <linux/cred.h>
+ #include <linux/fs.h>
+ #include <linux/posix_acl.h>
- MODULE_LICENSE("$ZFS_META_LICENSE");
+ MODULE_LICENSE("$ZFS_META_LICENSE");
int
main (void)
{
- struct posix_acl* tmp = posix_acl_alloc(1, 0);
- posix_acl_release(tmp);
+ struct posix_acl* tmp = posix_acl_alloc(1, 0);
+ posix_acl_release(tmp);
;
return 0;
@@ -18726,14 +18803,14 @@ _ACEOF
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; }; then :
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
$as_echo "no" >&6; }
else
$as_echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
$as_echo "#define HAVE_POSIX_ACL_RELEASE_GPL_ONLY 1" >>confdefs.h
@@ -18741,6 +18818,20 @@ $as_echo "#define HAVE_POSIX_ACL_RELEASE_GPL_ONLY 1" >>confdefs.h
+fi
+ rm -Rf build
+
+
+
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+
+
fi
rm -Rf build
@@ -19674,6 +19765,72 @@ fi
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether inode_set_flags() exists" >&5
+$as_echo_n "checking whether inode_set_flags() exists... " >&6; }
+
+
+cat confdefs.h - <<_ACEOF >conftest.c
+
+
+ #include <linux/fs.h>
+
+int
+main (void)
+{
+
+ struct inode inode;
+ inode_set_flags(&inode, S_IMMUTABLE, S_IMMUTABLE);
+
+ ;
+ return 0;
+}
+
+_ACEOF
+
+
+
+cat - <<_ACEOF >conftest.h
+
+_ACEOF
+
+
+ rm -Rf build && mkdir -p build && touch build/conftest.mod.c
+ echo "obj-m := conftest.o" >build/Makefile
+ modpost_flag=''
+ test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage
+ if { ac_try='cp conftest.c conftest.h build && make modules -C $LINUX_OBJ EXTRA_CFLAGS="-Werror $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; } >/dev/null && { ac_try='test -s build/conftest.o'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+$as_echo "#define HAVE_INODE_SET_FLAGS 1" >>confdefs.h
+
+
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+
+
+fi
+ rm -Rf build
+
+
+
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether uncached_acl_sentinel() exists" >&5
$as_echo_n "checking whether uncached_acl_sentinel() exists... " >&6; }
@@ -19875,6 +20032,72 @@ fi
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether file_dentry() is available" >&5
+$as_echo_n "checking whether file_dentry() is available... " >&6; }
+
+
+cat confdefs.h - <<_ACEOF >conftest.c
+
+
+ #include <linux/fs.h>
+
+int
+main (void)
+{
+
+ struct file *f = NULL;
+ file_dentry(f);
+
+ ;
+ return 0;
+}
+
+_ACEOF
+
+
+
+cat - <<_ACEOF >conftest.h
+
+_ACEOF
+
+
+ rm -Rf build && mkdir -p build && touch build/conftest.mod.c
+ echo "obj-m := conftest.o" >build/Makefile
+ modpost_flag=''
+ test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage
+ if { ac_try='cp conftest.c conftest.h build && make modules -C $LINUX_OBJ EXTRA_CFLAGS="-Werror $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; } >/dev/null && { ac_try='test -s build/conftest.o'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+$as_echo "#define HAVE_FILE_DENTRY 1" >>confdefs.h
+
+
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+
+
+fi
+ rm -Rf build
+
+
+
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether fops->fsync() wants" >&5
$as_echo_n "checking whether fops->fsync() wants... " >&6; }
@@ -21271,18 +21494,19 @@ fi
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether iops->truncate_range() exists" >&5
-$as_echo_n "checking whether iops->truncate_range() exists... " >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether i_op->tmpfile() exists" >&5
+$as_echo_n "checking whether i_op->tmpfile() exists... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.c
#include <linux/fs.h>
- void truncate_range(struct inode *inode, loff_t start,
- loff_t end) { return; }
- static struct inode_operations iops __attribute__ ((unused)) = {
- .truncate_range = truncate_range,
+ int tmpfile(struct inode *inode, struct dentry *dentry,
+ umode_t mode) { return 0; }
+ static struct inode_operations
+ iops __attribute__ ((unused)) = {
+ .tmpfile = tmpfile,
};
int
@@ -21322,7 +21546,7 @@ _ACEOF
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
-$as_echo "#define HAVE_INODE_TRUNCATE_RANGE 1" >>confdefs.h
+$as_echo "#define HAVE_TMPFILE 1" >>confdefs.h
else
@@ -21340,17 +21564,18 @@ fi
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether dops->d_automount() exists" >&5
-$as_echo_n "checking whether dops->d_automount() exists... " >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether iops->truncate_range() exists" >&5
+$as_echo_n "checking whether iops->truncate_range() exists... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.c
- #include <linux/dcache.h>
- struct vfsmount *d_automount(struct path *p) { return NULL; }
- struct dentry_operations dops __attribute__ ((unused)) = {
- .d_automount = d_automount,
+ #include <linux/fs.h>
+ void truncate_range(struct inode *inode, loff_t start,
+ loff_t end) { return; }
+ static struct inode_operations iops __attribute__ ((unused)) = {
+ .truncate_range = truncate_range,
};
int
@@ -21390,7 +21615,7 @@ _ACEOF
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
-$as_echo "#define HAVE_AUTOMOUNT 1" >>confdefs.h
+$as_echo "#define HAVE_INODE_TRUNCATE_RANGE 1" >>confdefs.h
else
@@ -21408,18 +21633,17 @@ fi
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether eops->encode_fh() wants inode" >&5
-$as_echo_n "checking whether eops->encode_fh() wants inode... " >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether dops->d_automount() exists" >&5
+$as_echo_n "checking whether dops->d_automount() exists... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.c
- #include <linux/exportfs.h>
- int encode_fh(struct inode *inode, __u32 *fh, int *max_len,
- struct inode *parent) { return 0; }
- static struct export_operations eops __attribute__ ((unused))={
- .encode_fh = encode_fh,
+ #include <linux/dcache.h>
+ struct vfsmount *d_automount(struct path *p) { return NULL; }
+ struct dentry_operations dops __attribute__ ((unused)) = {
+ .d_automount = d_automount,
};
int
@@ -21459,7 +21683,7 @@ _ACEOF
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
-$as_echo "#define HAVE_ENCODE_FH_WITH_INODE 1" >>confdefs.h
+$as_echo "#define HAVE_AUTOMOUNT 1" >>confdefs.h
else
@@ -21477,17 +21701,18 @@ fi
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether eops->commit_metadata() exists" >&5
-$as_echo_n "checking whether eops->commit_metadata() exists... " >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether eops->encode_fh() wants inode" >&5
+$as_echo_n "checking whether eops->encode_fh() wants inode... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.c
#include <linux/exportfs.h>
- int commit_metadata(struct inode *inode) { return 0; }
+ int encode_fh(struct inode *inode, __u32 *fh, int *max_len,
+ struct inode *parent) { return 0; }
static struct export_operations eops __attribute__ ((unused))={
- .commit_metadata = commit_metadata,
+ .encode_fh = encode_fh,
};
int
@@ -21527,7 +21752,7 @@ _ACEOF
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
-$as_echo "#define HAVE_COMMIT_METADATA 1" >>confdefs.h
+$as_echo "#define HAVE_ENCODE_FH_WITH_INODE 1" >>confdefs.h
else
@@ -21544,21 +21769,24 @@ fi
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether clear_inode() is available" >&5
-$as_echo_n "checking whether clear_inode() is available... " >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether eops->commit_metadata() exists" >&5
+$as_echo_n "checking whether eops->commit_metadata() exists... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.c
- #include <linux/fs.h>
+ #include <linux/exportfs.h>
+ int commit_metadata(struct inode *inode) { return 0; }
+ static struct export_operations eops __attribute__ ((unused))={
+ .commit_metadata = commit_metadata,
+ };
int
main (void)
{
- clear_inode(NULL);
;
return 0;
@@ -21588,68 +21816,29 @@ _ACEOF
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; }; then :
- rc=0
-else
- $as_echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
- rc=1
-
-
-fi
- rm -Rf build
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
- if test $rc -ne 0; then :
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-
- else
- if test "x$enable_linux_builtin" != xyes; then
+$as_echo "#define HAVE_COMMIT_METADATA 1" >>confdefs.h
- grep -q -E '[[:space:]]clear_inode[[:space:]]' \
- $LINUX_OBJ/$LINUX_SYMBOLS 2>/dev/null
- rc=$?
- if test $rc -ne 0; then
- export=0
- for file in fs/inode.c; do
- grep -q -E "EXPORT_SYMBOL.*(clear_inode)" \
- "$LINUX/$file" 2>/dev/null
- rc=$?
- if test $rc -eq 0; then
- export=1
- break;
- fi
- done
- if test $export -eq 0; then :
- rc=1
- else :
- rc=0
- fi
- else :
- rc=0
- fi
- fi
- if test $rc -ne 0; then :
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
$as_echo "no" >&6; }
- else :
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
-$as_echo "#define HAVE_CLEAR_INODE 1" >>confdefs.h
+fi
+ rm -Rf build
- fi
- fi
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether setattr_prepare() is available" >&5
-$as_echo_n "checking whether setattr_prepare() is available... " >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether clear_inode() is available" >&5
+$as_echo_n "checking whether clear_inode() is available... " >&6; }
@@ -21662,11 +21851,7 @@ int
main (void)
{
- struct dentry *dentry = NULL;
- struct iattr *attr = NULL;
- int error;
-
- error = setattr_prepare(dentry, attr);
+ clear_inode(NULL);
;
return 0;
@@ -21715,13 +21900,13 @@ $as_echo "no" >&6; }
else
if test "x$enable_linux_builtin" != xyes; then
- grep -q -E '[[:space:]]setattr_prepare[[:space:]]' \
+ grep -q -E '[[:space:]]clear_inode[[:space:]]' \
$LINUX_OBJ/$LINUX_SYMBOLS 2>/dev/null
rc=$?
if test $rc -ne 0; then
export=0
- for file in fs/attr.c; do
- grep -q -E "EXPORT_SYMBOL.*(setattr_prepare)" \
+ for file in fs/inode.c; do
+ grep -q -E "EXPORT_SYMBOL.*(clear_inode)" \
"$LINUX/$file" 2>/dev/null
rc=$?
if test $rc -eq 0; then
@@ -21749,15 +21934,15 @@ $as_echo "no" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
-$as_echo "#define HAVE_SETATTR_PREPARE 1" >>confdefs.h
+$as_echo "#define HAVE_CLEAR_INODE 1" >>confdefs.h
fi
fi
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether insert_inode_locked() is available" >&5
-$as_echo_n "checking whether insert_inode_locked() is available... " >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether setattr_prepare() is available" >&5
+$as_echo_n "checking whether setattr_prepare() is available... " >&6; }
@@ -21770,7 +21955,11 @@ int
main (void)
{
- insert_inode_locked(NULL);
+ struct dentry *dentry = NULL;
+ struct iattr *attr = NULL;
+ int error;
+
+ error = setattr_prepare(dentry, attr);
;
return 0;
@@ -21819,13 +22008,13 @@ $as_echo "no" >&6; }
else
if test "x$enable_linux_builtin" != xyes; then
- grep -q -E '[[:space:]]insert_inode_locked[[:space:]]' \
+ grep -q -E '[[:space:]]setattr_prepare[[:space:]]' \
$LINUX_OBJ/$LINUX_SYMBOLS 2>/dev/null
rc=$?
if test $rc -ne 0; then
export=0
- for file in fs/inode.c; do
- grep -q -E "EXPORT_SYMBOL.*(insert_inode_locked)" \
+ for file in fs/attr.c; do
+ grep -q -E "EXPORT_SYMBOL.*(setattr_prepare)" \
"$LINUX/$file" 2>/dev/null
rc=$?
if test $rc -eq 0; then
@@ -21853,28 +22042,28 @@ $as_echo "no" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
-$as_echo "#define HAVE_INSERT_INODE_LOCKED 1" >>confdefs.h
+$as_echo "#define HAVE_SETATTR_PREPARE 1" >>confdefs.h
fi
fi
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether d_make_root() is available" >&5
-$as_echo_n "checking whether d_make_root() is available... " >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether insert_inode_locked() is available" >&5
+$as_echo_n "checking whether insert_inode_locked() is available... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.c
- #include <linux/dcache.h>
+ #include <linux/fs.h>
int
main (void)
{
- d_make_root(NULL);
+ insert_inode_locked(NULL);
;
return 0;
@@ -21923,13 +22112,13 @@ $as_echo "no" >&6; }
else
if test "x$enable_linux_builtin" != xyes; then
- grep -q -E '[[:space:]]d_make_root[[:space:]]' \
+ grep -q -E '[[:space:]]insert_inode_locked[[:space:]]' \
$LINUX_OBJ/$LINUX_SYMBOLS 2>/dev/null
rc=$?
if test $rc -ne 0; then
export=0
- for file in fs/dcache.c; do
- grep -q -E "EXPORT_SYMBOL.*(d_make_root)" \
+ for file in fs/inode.c; do
+ grep -q -E "EXPORT_SYMBOL.*(insert_inode_locked)" \
"$LINUX/$file" 2>/dev/null
rc=$?
if test $rc -eq 0; then
@@ -21957,15 +22146,15 @@ $as_echo "no" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
-$as_echo "#define HAVE_D_MAKE_ROOT 1" >>confdefs.h
+$as_echo "#define HAVE_INSERT_INODE_LOCKED 1" >>confdefs.h
fi
fi
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether d_obtain_alias() is available" >&5
-$as_echo_n "checking whether d_obtain_alias() is available... " >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether d_make_root() is available" >&5
+$as_echo_n "checking whether d_make_root() is available... " >&6; }
@@ -21978,7 +22167,7 @@ int
main (void)
{
- d_obtain_alias(NULL);
+ d_make_root(NULL);
;
return 0;
@@ -22027,13 +22216,13 @@ $as_echo "no" >&6; }
else
if test "x$enable_linux_builtin" != xyes; then
- grep -q -E '[[:space:]]d_obtain_alias[[:space:]]' \
+ grep -q -E '[[:space:]]d_make_root[[:space:]]' \
$LINUX_OBJ/$LINUX_SYMBOLS 2>/dev/null
rc=$?
if test $rc -ne 0; then
export=0
for file in fs/dcache.c; do
- grep -q -E "EXPORT_SYMBOL.*(d_obtain_alias)" \
+ grep -q -E "EXPORT_SYMBOL.*(d_make_root)" \
"$LINUX/$file" 2>/dev/null
rc=$?
if test $rc -eq 0; then
@@ -22061,15 +22250,15 @@ $as_echo "no" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
-$as_echo "#define HAVE_D_OBTAIN_ALIAS 1" >>confdefs.h
+$as_echo "#define HAVE_D_MAKE_ROOT 1" >>confdefs.h
fi
fi
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether d_prune_aliases() is available" >&5
-$as_echo_n "checking whether d_prune_aliases() is available... " >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether d_obtain_alias() is available" >&5
+$as_echo_n "checking whether d_obtain_alias() is available... " >&6; }
@@ -22082,8 +22271,7 @@ int
main (void)
{
- struct inode *ip = NULL;
- d_prune_aliases(ip);
+ d_obtain_alias(NULL);
;
return 0;
@@ -22132,13 +22320,13 @@ $as_echo "no" >&6; }
else
if test "x$enable_linux_builtin" != xyes; then
- grep -q -E '[[:space:]]d_prune_aliases[[:space:]]' \
+ grep -q -E '[[:space:]]d_obtain_alias[[:space:]]' \
$LINUX_OBJ/$LINUX_SYMBOLS 2>/dev/null
rc=$?
if test $rc -ne 0; then
export=0
for file in fs/dcache.c; do
- grep -q -E "EXPORT_SYMBOL.*(d_prune_aliases)" \
+ grep -q -E "EXPORT_SYMBOL.*(d_obtain_alias)" \
"$LINUX/$file" 2>/dev/null
rc=$?
if test $rc -eq 0; then
@@ -22166,15 +22354,15 @@ $as_echo "no" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
-$as_echo "#define HAVE_D_PRUNE_ALIASES 1" >>confdefs.h
+$as_echo "#define HAVE_D_OBTAIN_ALIAS 1" >>confdefs.h
fi
fi
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether d_set_d_op() is available" >&5
-$as_echo_n "checking whether d_set_d_op() is available... " >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether d_prune_aliases() is available" >&5
+$as_echo_n "checking whether d_prune_aliases() is available... " >&6; }
@@ -22187,7 +22375,8 @@ int
main (void)
{
- d_set_d_op(NULL, NULL);
+ struct inode *ip = NULL;
+ d_prune_aliases(ip);
;
return 0;
@@ -22236,13 +22425,13 @@ $as_echo "no" >&6; }
else
if test "x$enable_linux_builtin" != xyes; then
- grep -q -E '[[:space:]]d_set_d_op[[:space:]]' \
+ grep -q -E '[[:space:]]d_prune_aliases[[:space:]]' \
$LINUX_OBJ/$LINUX_SYMBOLS 2>/dev/null
rc=$?
if test $rc -ne 0; then
export=0
for file in fs/dcache.c; do
- grep -q -E "EXPORT_SYMBOL.*(d_set_d_op)" \
+ grep -q -E "EXPORT_SYMBOL.*(d_prune_aliases)" \
"$LINUX/$file" 2>/dev/null
rc=$?
if test $rc -eq 0; then
@@ -22270,16 +22459,16 @@ $as_echo "no" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
-$as_echo "#define HAVE_D_SET_D_OP 1" >>confdefs.h
+$as_echo "#define HAVE_D_PRUNE_ALIASES 1" >>confdefs.h
fi
fi
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether d_set_d_op() is available" >&5
+$as_echo_n "checking whether d_set_d_op() is available... " >&6; }
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether dops->d_revalidate() takes struct nameidata" >&5
-$as_echo_n "checking whether dops->d_revalidate() takes struct nameidata... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.c
@@ -22287,18 +22476,11 @@ cat confdefs.h - <<_ACEOF >conftest.c
#include <linux/dcache.h>
- int revalidate (struct dentry *dentry,
- struct nameidata *nidata) { return 0; }
-
- static const struct dentry_operations
- dops __attribute__ ((unused)) = {
- .d_revalidate = revalidate,
- };
-
int
main (void)
{
+ d_set_d_op(NULL, NULL);
;
return 0;
@@ -22328,113 +22510,88 @@ _ACEOF
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; }; then :
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
-
-$as_echo "#define HAVE_D_REVALIDATE_NAMEIDATA 1" >>confdefs.h
-
-
+ rc=0
else
$as_echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-
+ rc=1
fi
rm -Rf build
+ if test $rc -ne 0; then :
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether dentry uses const struct dentry_operations" >&5
-$as_echo_n "checking whether dentry uses const struct dentry_operations... " >&6; }
-
-
-cat confdefs.h - <<_ACEOF >conftest.c
-
-
- #include <linux/dcache.h>
-
- const struct dentry_operations test_d_op = {
- .d_revalidate = NULL,
- };
-
-int
-main (void)
-{
-
- struct dentry d __attribute__ ((unused));
-
- d.d_op = &test_d_op;
-
- ;
- return 0;
-}
-
-_ACEOF
-
-
+ else
+ if test "x$enable_linux_builtin" != xyes; then
-cat - <<_ACEOF >conftest.h
+ grep -q -E '[[:space:]]d_set_d_op[[:space:]]' \
+ $LINUX_OBJ/$LINUX_SYMBOLS 2>/dev/null
+ rc=$?
+ if test $rc -ne 0; then
+ export=0
+ for file in fs/dcache.c; do
+ grep -q -E "EXPORT_SYMBOL.*(d_set_d_op)" \
+ "$LINUX/$file" 2>/dev/null
+ rc=$?
+ if test $rc -eq 0; then
+ export=1
+ break;
+ fi
+ done
+ if test $export -eq 0; then :
+ rc=1
+ else :
+ rc=0
+ fi
+ else :
+ rc=0
+ fi
-_ACEOF
+ fi
+ if test $rc -ne 0; then :
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
- rm -Rf build && mkdir -p build && touch build/conftest.mod.c
- echo "obj-m := conftest.o" >build/Makefile
- modpost_flag=''
- test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage
- if { ac_try='cp conftest.c conftest.h build && make modules -C $LINUX_OBJ EXTRA_CFLAGS="-Werror $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag'
- { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; } >/dev/null && { ac_try='test -s build/conftest.o'
- { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; }; then :
+ else :
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
-$as_echo "#define HAVE_CONST_DENTRY_OPERATIONS 1" >>confdefs.h
-
-
-else
- $as_echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
+$as_echo "#define HAVE_D_SET_D_OP 1" >>confdefs.h
+ fi
+ fi
-fi
- rm -Rf build
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether dops->d_revalidate() takes struct nameidata" >&5
+$as_echo_n "checking whether dops->d_revalidate() takes struct nameidata... " >&6; }
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether check_disk_size_change() is available" >&5
-$as_echo_n "checking whether check_disk_size_change() is available... " >&6; }
+cat confdefs.h - <<_ACEOF >conftest.c
-cat confdefs.h - <<_ACEOF >conftest.c
+ #include <linux/dcache.h>
+ int revalidate (struct dentry *dentry,
+ struct nameidata *nidata) { return 0; }
- #include <linux/fs.h>
+ static const struct dentry_operations
+ dops __attribute__ ((unused)) = {
+ .d_revalidate = revalidate,
+ };
int
main (void)
{
- check_disk_size_change(NULL, NULL);
;
return 0;
@@ -22464,81 +22621,113 @@ _ACEOF
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; }; then :
- rc=0
-else
- $as_echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
- rc=1
-
-
-fi
- rm -Rf build
-
- if test $rc -ne 0; then :
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
- else
- if test "x$enable_linux_builtin" != xyes; then
+$as_echo "#define HAVE_D_REVALIDATE_NAMEIDATA 1" >>confdefs.h
- grep -q -E '[[:space:]]check_disk_size_change[[:space:]]' \
- $LINUX_OBJ/$LINUX_SYMBOLS 2>/dev/null
- rc=$?
- if test $rc -ne 0; then
- export=0
- for file in fs/block_dev.c; do
- grep -q -E "EXPORT_SYMBOL.*(check_disk_size_change)" \
- "$LINUX/$file" 2>/dev/null
- rc=$?
- if test $rc -eq 0; then
- export=1
- break;
- fi
- done
- if test $export -eq 0; then :
- rc=1
- else :
- rc=0
- fi
- else :
- rc=0
- fi
- fi
- if test $rc -ne 0; then :
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
$as_echo "no" >&6; }
- else :
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
-$as_echo "#define HAVE_CHECK_DISK_SIZE_CHANGE 1" >>confdefs.h
+fi
+ rm -Rf build
- fi
- fi
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether truncate_setsize() is available" >&5
-$as_echo_n "checking whether truncate_setsize() is available... " >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether dentry uses const struct dentry_operations" >&5
+$as_echo_n "checking whether dentry uses const struct dentry_operations... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.c
- #include <linux/mm.h>
+ #include <linux/dcache.h>
+
+ const struct dentry_operations test_d_op = {
+ .d_revalidate = NULL,
+ };
int
main (void)
{
- truncate_setsize(NULL, 0);
+ struct dentry d __attribute__ ((unused));
+
+ d.d_op = &test_d_op;
+
+ ;
+ return 0;
+}
+
+_ACEOF
+
+
+
+cat - <<_ACEOF >conftest.h
+
+_ACEOF
+
+
+ rm -Rf build && mkdir -p build && touch build/conftest.mod.c
+ echo "obj-m := conftest.o" >build/Makefile
+ modpost_flag=''
+ test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage
+ if { ac_try='cp conftest.c conftest.h build && make modules -C $LINUX_OBJ EXTRA_CFLAGS="-Werror $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; } >/dev/null && { ac_try='test -s build/conftest.o'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+$as_echo "#define HAVE_CONST_DENTRY_OPERATIONS 1" >>confdefs.h
+
+
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+
+
+fi
+ rm -Rf build
+
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether truncate_setsize() is available" >&5
+$as_echo_n "checking whether truncate_setsize() is available... " >&6; }
+
+
+
+cat confdefs.h - <<_ACEOF >conftest.c
+
+
+ #include <linux/mm.h>
+
+int
+main (void)
+{
+
+ truncate_setsize(NULL, 0);
;
return 0;
@@ -23165,8 +23354,9 @@ main (void)
{
char *name = "bdi";
+ atomic_long_t zfs_bdi_seq;
int error __attribute__((unused)) =
- super_setup_bdi_name(&sb, name);
+ super_setup_bdi_name(&sb, "%.28s-%ld", name, atomic_long_inc_return(&zfs_bdi_seq));
;
return 0;
@@ -24974,8 +25164,8 @@ fi
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether generic IO accounting symbols are avaliable" >&5
-$as_echo_n "checking whether generic IO accounting symbols are avaliable... " >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether 3 arg generic IO accounting symbols are available" >&5
+$as_echo_n "checking whether 3 arg generic IO accounting symbols are available... " >&6; }
@@ -25077,7 +25267,7 @@ $as_echo "no" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
-$as_echo "#define HAVE_GENERIC_IO_ACCT 1" >>confdefs.h
+$as_echo "#define HAVE_GENERIC_IO_ACCT_3ARG 1" >>confdefs.h
fi
@@ -25085,27 +25275,27 @@ $as_echo "#define HAVE_GENERIC_IO_ACCT 1" >>confdefs.h
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether iops->rename() wants flags" >&5
-$as_echo_n "checking whether iops->rename() wants flags... " >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether 4 arg generic IO accounting symbols are available" >&5
+$as_echo_n "checking whether 4 arg generic IO accounting symbols are available... " >&6; }
+
cat confdefs.h - <<_ACEOF >conftest.c
- #include <linux/fs.h>
- int rename_fn(struct inode *sip, struct dentry *sdp,
- struct inode *tip, struct dentry *tdp,
- unsigned int flags) { return 0; }
+ #include <linux/bio.h>
- static const struct inode_operations
- iops __attribute__ ((unused)) = {
- .rename = rename_fn,
- };
+ void (*generic_start_io_acct_f)(struct request_queue *, int,
+ unsigned long, struct hd_struct *) = &generic_start_io_acct;
+ void (*generic_end_io_acct_f)(struct request_queue *, int,
+ struct hd_struct *, unsigned long) = &generic_end_io_acct;
int
main (void)
{
+ generic_start_io_acct(NULL, 0, 0, NULL);
+ generic_end_io_acct(NULL, 0, NULL, 0);
;
return 0;
@@ -25135,11 +25325,116 @@ _ACEOF
ac_status=$?
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; }; then :
+ rc=0
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+ rc=1
+
+
+fi
+ rm -Rf build
+
+
+ if test $rc -ne 0; then :
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+ else
+ if test "x$enable_linux_builtin" != xyes; then
+
+ grep -q -E '[[:space:]]generic_start_io_acct[[:space:]]' \
+ $LINUX_OBJ/$LINUX_SYMBOLS 2>/dev/null
+ rc=$?
+ if test $rc -ne 0; then
+ export=0
+ for file in block/bio.c; do
+ grep -q -E "EXPORT_SYMBOL.*(generic_start_io_acct)" \
+ "$LINUX/$file" 2>/dev/null
+ rc=$?
+ if test $rc -eq 0; then
+ export=1
+ break;
+ fi
+ done
+ if test $export -eq 0; then :
+ rc=1
+ else :
+ rc=0
+ fi
+ else :
+ rc=0
+ fi
+
+ fi
+ if test $rc -ne 0; then :
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+ else :
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
-$as_echo "#define HAVE_RENAME_WANTS_FLAGS 1" >>confdefs.h
+$as_echo "#define HAVE_GENERIC_IO_ACCT_4ARG 1" >>confdefs.h
+
+
+ fi
+ fi
+
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether asm/fpu/api.h exists" >&5
+$as_echo_n "checking whether asm/fpu/api.h exists... " >&6; }
+
+
+cat confdefs.h - <<_ACEOF >conftest.c
+
+
+ #include <linux/kernel.h>
+ #include <asm/fpu/api.h>
+
+int
+main (void)
+{
+
+ __kernel_fpu_begin();
+
+ ;
+ return 0;
+}
+
+_ACEOF
+
+
+
+cat - <<_ACEOF >conftest.h
+
+_ACEOF
+
+
+ rm -Rf build && mkdir -p build && touch build/conftest.mod.c
+ echo "obj-m := conftest.o" >build/Makefile
+ modpost_flag=''
+ test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage
+ if { ac_try='cp conftest.c conftest.h build && make modules -C $LINUX_OBJ EXTRA_CFLAGS="-Werror $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; } >/dev/null && { ac_try='test -s build/conftest.o'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+$as_echo "#define HAVE_FPU_API_H 1" >>confdefs.h
else
@@ -25157,20 +25452,93 @@ fi
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether generic_setxattr() exists" >&5
-$as_echo_n "checking whether generic_setxattr() exists... " >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether i_(uid|gid)_(read|write) exist" >&5
+$as_echo_n "checking whether i_(uid|gid)_(read|write) exist... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.c
#include <linux/fs.h>
- #include <linux/xattr.h>
- static const struct inode_operations
- iops __attribute__ ((unused)) = {
- .setxattr = generic_setxattr
- };
+int
+main (void)
+{
+
+ struct inode *ip = NULL;
+ (void) i_uid_read(ip);
+
+ ;
+ return 0;
+}
+
+_ACEOF
+
+
+
+cat - <<_ACEOF >conftest.h
+
+_ACEOF
+
+
+ rm -Rf build && mkdir -p build && touch build/conftest.mod.c
+ echo "obj-m := conftest.o" >build/Makefile
+ modpost_flag=''
+ test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage
+ if { ac_try='cp conftest.c conftest.h build && make modules -C $LINUX_OBJ EXTRA_CFLAGS="-Werror $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; } >/dev/null && { ac_try='test -s build/conftest.o'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+$as_echo "#define HAVE_KUID_HELPERS 1" >>confdefs.h
+
+
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+
+
+fi
+ rm -Rf build
+
+
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether module_param_call() is hardened" >&5
+$as_echo_n "checking whether module_param_call() is hardened... " >&6; }
+
+
+cat confdefs.h - <<_ACEOF >conftest.c
+
+
+ #include <linux/module.h>
+ #include <linux/moduleparam.h>
+
+ int param_get(char *b, const struct kernel_param *kp)
+ {
+ return (0);
+ }
+
+ int param_set(const char *b, const struct kernel_param *kp)
+ {
+ return (0);
+ }
+
+ module_param_call(p, param_set, param_get, NULL, 0644);
int
main (void)
@@ -25209,7 +25577,7 @@ _ACEOF
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
-$as_echo "#define HAVE_GENERIC_SETXATTR 1" >>confdefs.h
+$as_echo "#define MODULE_PARAM_CALL_CONST 1" >>confdefs.h
else
@@ -25226,24 +25594,166 @@ fi
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether current_time() exists" >&5
-$as_echo_n "checking whether current_time() exists... " >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether iops->rename() wants flags" >&5
+$as_echo_n "checking whether iops->rename() wants flags... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.c
#include <linux/fs.h>
+ int rename_fn(struct inode *sip, struct dentry *sdp,
+ struct inode *tip, struct dentry *tdp,
+ unsigned int flags) { return 0; }
+
+ static const struct inode_operations
+ iops __attribute__ ((unused)) = {
+ .rename = rename_fn,
+ };
int
main (void)
{
- struct inode ip;
- struct timespec now __attribute__ ((unused));
-
- now = current_time(&ip);
+
+ ;
+ return 0;
+}
+
+_ACEOF
+
+
+
+cat - <<_ACEOF >conftest.h
+
+_ACEOF
+
+
+ rm -Rf build && mkdir -p build && touch build/conftest.mod.c
+ echo "obj-m := conftest.o" >build/Makefile
+ modpost_flag=''
+ test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage
+ if { ac_try='cp conftest.c conftest.h build && make modules -C $LINUX_OBJ EXTRA_CFLAGS="-Werror $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; } >/dev/null && { ac_try='test -s build/conftest.o'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+$as_echo "#define HAVE_RENAME_WANTS_FLAGS 1" >>confdefs.h
+
+
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+
+
+fi
+ rm -Rf build
+
+
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether generic_setxattr() exists" >&5
+$as_echo_n "checking whether generic_setxattr() exists... " >&6; }
+
+
+cat confdefs.h - <<_ACEOF >conftest.c
+
+
+ #include <linux/fs.h>
+ #include <linux/xattr.h>
+
+ static const struct inode_operations
+ iops __attribute__ ((unused)) = {
+ .setxattr = generic_setxattr
+ };
+
+int
+main (void)
+{
+
+
+ ;
+ return 0;
+}
+
+_ACEOF
+
+
+
+cat - <<_ACEOF >conftest.h
+
+_ACEOF
+
+
+ rm -Rf build && mkdir -p build && touch build/conftest.mod.c
+ echo "obj-m := conftest.o" >build/Makefile
+ modpost_flag=''
+ test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage
+ if { ac_try='cp conftest.c conftest.h build && make modules -C $LINUX_OBJ EXTRA_CFLAGS="-Werror $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; } >/dev/null && { ac_try='test -s build/conftest.o'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+$as_echo "#define HAVE_GENERIC_SETXATTR 1" >>confdefs.h
+
+
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+
+
+fi
+ rm -Rf build
+
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether current_time() exists" >&5
+$as_echo_n "checking whether current_time() exists... " >&6; }
+
+
+
+cat confdefs.h - <<_ACEOF >conftest.c
+
+
+ #include <linux/fs.h>
+
+int
+main (void)
+{
+
+ struct inode ip;
+ struct timespec now __attribute__ ((unused));
+
+ now = current_time(&ip);
;
return 0;
@@ -25275,81 +25785,2208 @@ _ACEOF
test $ac_status = 0; }; }; then :
rc=0
else
- $as_echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
- rc=1
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+ rc=1
+
+
+fi
+ rm -Rf build
+
+
+ if test $rc -ne 0; then :
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+ else
+ if test "x$enable_linux_builtin" != xyes; then
+
+ grep -q -E '[[:space:]]current_time[[:space:]]' \
+ $LINUX_OBJ/$LINUX_SYMBOLS 2>/dev/null
+ rc=$?
+ if test $rc -ne 0; then
+ export=0
+ for file in fs/inode.c; do
+ grep -q -E "EXPORT_SYMBOL.*(current_time)" \
+ "$LINUX/$file" 2>/dev/null
+ rc=$?
+ if test $rc -eq 0; then
+ export=1
+ break;
+ fi
+ done
+ if test $export -eq 0; then :
+ rc=1
+ else :
+ rc=0
+ fi
+ else :
+ rc=0
+ fi
+
+ fi
+ if test $rc -ne 0; then :
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+ else :
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+$as_echo "#define HAVE_CURRENT_TIME 1" >>confdefs.h
+
+
+ fi
+ fi
+
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to use vm_node_stat based fn's" >&5
+$as_echo_n "checking whether to use vm_node_stat based fn's... " >&6; }
+
+
+cat confdefs.h - <<_ACEOF >conftest.c
+
+
+ #include <linux/mm.h>
+ #include <linux/vmstat.h>
+
+int
+main (void)
+{
+
+ int a __attribute__ ((unused)) = NR_VM_NODE_STAT_ITEMS;
+ long x __attribute__ ((unused)) =
+ atomic_long_read(&vm_node_stat[0]);
+ (void) global_node_page_state(0);
+
+ ;
+ return 0;
+}
+
+_ACEOF
+
+
+
+cat - <<_ACEOF >conftest.h
+
+_ACEOF
+
+
+ rm -Rf build && mkdir -p build && touch build/conftest.mod.c
+ echo "obj-m := conftest.o" >build/Makefile
+ modpost_flag=''
+ test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage
+ if { ac_try='cp conftest.c conftest.h build && make modules -C $LINUX_OBJ EXTRA_CFLAGS="-Werror $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; } >/dev/null && { ac_try='test -s build/conftest.o'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+$as_echo "#define ZFS_GLOBAL_NODE_PAGE_STATE 1" >>confdefs.h
+
+
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+
+
+fi
+ rm -Rf build
+
+
+
+
+ if test "$LINUX_OBJ" != "$LINUX"; then :
+
+ KERNELMAKE_PARAMS="$KERNELMAKE_PARAMS O=$LINUX_OBJ"
+
+fi
+
+
+
+ KERNELCPPFLAGS="$KERNELCPPFLAGS -std=gnu99"
+ KERNELCPPFLAGS="$KERNELCPPFLAGS -Wno-declaration-after-statement"
+ KERNELCPPFLAGS="$KERNELCPPFLAGS $NO_UNUSED_BUT_SET_VARIABLE"
+ KERNELCPPFLAGS="$KERNELCPPFLAGS $NO_BOOL_COMPARE"
+ KERNELCPPFLAGS="$KERNELCPPFLAGS -DHAVE_SPL -D_KERNEL"
+ KERNELCPPFLAGS="$KERNELCPPFLAGS -DTEXT_DOMAIN=\\\"zfs-linux-kernel\\\""
+
+
+ ;;
+ user)
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dkms.conf file" >&5
+$as_echo_n "checking for dkms.conf file... " >&6; }
+ if test -e dkms.conf; then :
+
+ as_fn_error $? "
+ *** ZFS should not be manually built in the DKMS source tree.
+ *** Remove all ZFS packages before compiling the ZoL sources.
+ *** Running \"make install\" breaks ZFS packages." "$LINENO" 5
+
+else
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
+$as_echo "not found" >&6; }
+
+fi
+
+
+
+# Check whether --with-mounthelperdir was given.
+if test "${with_mounthelperdir+set}" = set; then :
+ withval=$with_mounthelperdir; mounthelperdir=$withval
+else
+ mounthelperdir=/sbin
+fi
+
+
+
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for udev directories" >&5
+$as_echo_n "checking for udev directories... " >&6; }
+
+# Check whether --with-udevdir was given.
+if test "${with_udevdir+set}" = set; then :
+ withval=$with_udevdir; udevdir=$withval
+else
+ udevdir=check
+fi
+
+
+ if test "x$udevdir" = xcheck; then :
+
+ path1=/lib/udev
+ path2=/usr/lib/udev
+ default=$path2
+
+ if test -d "$path1"; then :
+ udevdir="$path1"
+else
+
+ if test -d "$path2"; then :
+ udevdir="$path2"
+else
+ udevdir="$default"
+fi
+
+fi
+
+fi
+
+
+# Check whether --with-udevruledir was given.
+if test "${with_udevruledir+set}" = set; then :
+ withval=$with_udevruledir; udevruledir=$withval
+else
+ udevruledir="${udevdir}/rules.d"
+fi
+
+
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $udevdir;$udevruledir" >&5
+$as_echo "$udevdir;$udevruledir" >&6; }
+
+
+ # Check whether --enable-systemd was given.
+if test "${enable_systemd+set}" = set; then :
+ enableval=$enable_systemd;
+else
+ enable_systemd=yes
+fi
+
+
+
+# Check whether --with-systemdunitdir was given.
+if test "${with_systemdunitdir+set}" = set; then :
+ withval=$with_systemdunitdir; systemdunitdir=$withval
+else
+ systemdunitdir=/usr/lib/systemd/system
+fi
+
+
+
+# Check whether --with-systemdpresetdir was given.
+if test "${with_systemdpresetdir+set}" = set; then :
+ withval=$with_systemdpresetdir; systemdpresetdir=$withval
+else
+ systemdpresetdir=/usr/lib/systemd/system-preset
+fi
+
+
+
+# Check whether --with-systemdmodulesloaddir was given.
+if test "${with_systemdmodulesloaddir+set}" = set; then :
+ withval=$with_systemdmodulesloaddir; systemdmoduleloaddir=$withval
+else
+ systemdmodulesloaddir=/usr/lib/modules-load.d
+fi
+
+
+
+ if test "x$enable_systemd" = xyes; then :
+
+ ZFS_INIT_SYSTEMD=systemd
+ ZFS_MODULE_LOAD=modules-load.d
+ modulesloaddir=$systemdmodulesloaddir
+
+fi
+
+
+
+
+
+
+
+
+ # Check whether --enable-sysvinit was given.
+if test "${enable_sysvinit+set}" = set; then :
+ enableval=$enable_sysvinit;
+else
+ enable_sysvinit=yes
+fi
+
+
+ if test "x$enable_sysvinit" = xyes; then :
+ ZFS_INIT_SYSV=init.d
+fi
+
+
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dracut directory" >&5
+$as_echo_n "checking for dracut directory... " >&6; }
+
+# Check whether --with-dracutdir was given.
+if test "${with_dracutdir+set}" = set; then :
+ withval=$with_dracutdir; dracutdir=$withval
+else
+ dracutdir=check
+fi
+
+
+ if test "x$dracutdir" = xcheck; then :
+
+ path1=/usr/share/dracut
+ path2=/usr/lib/dracut
+ default=$path2
+
+ if test -d "$path1"; then :
+ dracutdir="$path1"
+else
+
+ if test -d "$path2"; then :
+ dracutdir="$path2"
+else
+ dracutdir="$default"
+fi
+
+fi
+
+fi
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $dracutdir" >&5
+$as_echo "$dracutdir" >&6; }
+
+
+ ZLIB=
+
+ ac_fn_c_check_header_mongrel "$LINENO" "zlib.h" "ac_cv_header_zlib_h" "$ac_includes_default"
+if test "x$ac_cv_header_zlib_h" = xyes; then :
+
+else
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "
+ *** zlib.h missing, zlib-devel package required
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing compress2" >&5
+$as_echo_n "checking for library containing compress2... " >&6; }
+if ${ac_cv_search_compress2+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_func_search_save_LIBS=$LIBS
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char compress2 ();
+int
+main ()
+{
+return compress2 ();
+ ;
+ return 0;
+}
+_ACEOF
+for ac_lib in '' z; do
+ if test -z "$ac_lib"; then
+ ac_res="none required"
+ else
+ ac_res=-l$ac_lib
+ LIBS="-l$ac_lib $ac_func_search_save_LIBS"
+ fi
+ if ac_fn_c_try_link "$LINENO"; then :
+ ac_cv_search_compress2=$ac_res
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext
+ if ${ac_cv_search_compress2+:} false; then :
+ break
+fi
+done
+if ${ac_cv_search_compress2+:} false; then :
+
+else
+ ac_cv_search_compress2=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_compress2" >&5
+$as_echo "$ac_cv_search_compress2" >&6; }
+ac_res=$ac_cv_search_compress2
+if test "$ac_res" != no; then :
+ test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+else
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "
+ *** compress2() missing, zlib-devel package required
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing uncompress" >&5
+$as_echo_n "checking for library containing uncompress... " >&6; }
+if ${ac_cv_search_uncompress+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_func_search_save_LIBS=$LIBS
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char uncompress ();
+int
+main ()
+{
+return uncompress ();
+ ;
+ return 0;
+}
+_ACEOF
+for ac_lib in '' z; do
+ if test -z "$ac_lib"; then
+ ac_res="none required"
+ else
+ ac_res=-l$ac_lib
+ LIBS="-l$ac_lib $ac_func_search_save_LIBS"
+ fi
+ if ac_fn_c_try_link "$LINENO"; then :
+ ac_cv_search_uncompress=$ac_res
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext
+ if ${ac_cv_search_uncompress+:} false; then :
+ break
+fi
+done
+if ${ac_cv_search_uncompress+:} false; then :
+
+else
+ ac_cv_search_uncompress=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_uncompress" >&5
+$as_echo "$ac_cv_search_uncompress" >&6; }
+ac_res=$ac_cv_search_uncompress
+if test "$ac_res" != no; then :
+ test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+else
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "
+ *** uncompress() missing, zlib-devel package required
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing crc32" >&5
+$as_echo_n "checking for library containing crc32... " >&6; }
+if ${ac_cv_search_crc32+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_func_search_save_LIBS=$LIBS
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char crc32 ();
+int
+main ()
+{
+return crc32 ();
+ ;
+ return 0;
+}
+_ACEOF
+for ac_lib in '' z; do
+ if test -z "$ac_lib"; then
+ ac_res="none required"
+ else
+ ac_res=-l$ac_lib
+ LIBS="-l$ac_lib $ac_func_search_save_LIBS"
+ fi
+ if ac_fn_c_try_link "$LINENO"; then :
+ ac_cv_search_crc32=$ac_res
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext
+ if ${ac_cv_search_crc32+:} false; then :
+ break
+fi
+done
+if ${ac_cv_search_crc32+:} false; then :
+
+else
+ ac_cv_search_crc32=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_crc32" >&5
+$as_echo "$ac_cv_search_crc32" >&6; }
+ac_res=$ac_cv_search_crc32
+if test "$ac_res" != no; then :
+ test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+else
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "
+ *** crc32() missing, zlib-devel package required
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+
+
+ ZLIB="-lz"
+
+
+$as_echo "#define HAVE_ZLIB 1" >>confdefs.h
+
+
+
+ LIBUUID=
+
+ ac_fn_c_check_header_mongrel "$LINENO" "uuid/uuid.h" "ac_cv_header_uuid_uuid_h" "$ac_includes_default"
+if test "x$ac_cv_header_uuid_uuid_h" = xyes; then :
+
+else
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "
+ *** uuid/uuid.h missing, libuuid-devel package required
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing uuid_generate" >&5
+$as_echo_n "checking for library containing uuid_generate... " >&6; }
+if ${ac_cv_search_uuid_generate+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_func_search_save_LIBS=$LIBS
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char uuid_generate ();
+int
+main ()
+{
+return uuid_generate ();
+ ;
+ return 0;
+}
+_ACEOF
+for ac_lib in '' uuid; do
+ if test -z "$ac_lib"; then
+ ac_res="none required"
+ else
+ ac_res=-l$ac_lib
+ LIBS="-l$ac_lib $ac_func_search_save_LIBS"
+ fi
+ if ac_fn_c_try_link "$LINENO"; then :
+ ac_cv_search_uuid_generate=$ac_res
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext
+ if ${ac_cv_search_uuid_generate+:} false; then :
+ break
+fi
+done
+if ${ac_cv_search_uuid_generate+:} false; then :
+
+else
+ ac_cv_search_uuid_generate=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_uuid_generate" >&5
+$as_echo "$ac_cv_search_uuid_generate" >&6; }
+ac_res=$ac_cv_search_uuid_generate
+if test "$ac_res" != no; then :
+ test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+else
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "
+ *** uuid_generate() missing, libuuid-devel package required
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing uuid_is_null" >&5
+$as_echo_n "checking for library containing uuid_is_null... " >&6; }
+if ${ac_cv_search_uuid_is_null+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_func_search_save_LIBS=$LIBS
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char uuid_is_null ();
+int
+main ()
+{
+return uuid_is_null ();
+ ;
+ return 0;
+}
+_ACEOF
+for ac_lib in '' uuid; do
+ if test -z "$ac_lib"; then
+ ac_res="none required"
+ else
+ ac_res=-l$ac_lib
+ LIBS="-l$ac_lib $ac_func_search_save_LIBS"
+ fi
+ if ac_fn_c_try_link "$LINENO"; then :
+ ac_cv_search_uuid_is_null=$ac_res
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext
+ if ${ac_cv_search_uuid_is_null+:} false; then :
+ break
+fi
+done
+if ${ac_cv_search_uuid_is_null+:} false; then :
+
+else
+ ac_cv_search_uuid_is_null=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_uuid_is_null" >&5
+$as_echo "$ac_cv_search_uuid_is_null" >&6; }
+ac_res=$ac_cv_search_uuid_is_null
+if test "$ac_res" != no; then :
+ test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+else
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "
+ *** uuid_is_null() missing, libuuid-devel package required
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+
+
+ LIBUUID="-luuid"
+
+
+$as_echo "#define HAVE_LIBUUID 1" >>confdefs.h
+
+
+
+
+# Check whether --with-tirpc was given.
+if test "${with_tirpc+set}" = set; then :
+ withval=$with_tirpc;
+else
+ with_tirpc=check
+fi
+
+
+ LIBTIRPC=
+ LIBTIRPC_CFLAGS=
+
+ if test "x$with_tirpc" != xno; then :
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for xdrmem_create in -ltirpc" >&5
+$as_echo_n "checking for xdrmem_create in -ltirpc... " >&6; }
+if ${ac_cv_lib_tirpc_xdrmem_create+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_check_lib_save_LIBS=$LIBS
+LIBS="-ltirpc $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char xdrmem_create ();
+int
+main ()
+{
+return xdrmem_create ();
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ ac_cv_lib_tirpc_xdrmem_create=yes
+else
+ ac_cv_lib_tirpc_xdrmem_create=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_tirpc_xdrmem_create" >&5
+$as_echo "$ac_cv_lib_tirpc_xdrmem_create" >&6; }
+if test "x$ac_cv_lib_tirpc_xdrmem_create" = xyes; then :
+ LIBTIRPC=-ltirpc
+
+ LIBTIRPC_CFLAGS=-I/usr/include/tirpc
+
+
+$as_echo "#define HAVE_LIBTIRPC 1" >>confdefs.h
+
+
+else
+ if test "x$with_tirpc" != xcheck; then
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "--with-tirpc was given, but test for tirpc failed
+See \`config.log' for more details" "$LINENO" 5; }
+ fi
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing xdrmem_create" >&5
+$as_echo_n "checking for library containing xdrmem_create... " >&6; }
+if ${ac_cv_search_xdrmem_create+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_func_search_save_LIBS=$LIBS
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char xdrmem_create ();
+int
+main ()
+{
+return xdrmem_create ();
+ ;
+ return 0;
+}
+_ACEOF
+for ac_lib in '' tirpc; do
+ if test -z "$ac_lib"; then
+ ac_res="none required"
+ else
+ ac_res=-l$ac_lib
+ LIBS="-l$ac_lib $ac_func_search_save_LIBS"
+ fi
+ if ac_fn_c_try_link "$LINENO"; then :
+ ac_cv_search_xdrmem_create=$ac_res
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext
+ if ${ac_cv_search_xdrmem_create+:} false; then :
+ break
+fi
+done
+if ${ac_cv_search_xdrmem_create+:} false; then :
+
+else
+ ac_cv_search_xdrmem_create=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_xdrmem_create" >&5
+$as_echo "$ac_cv_search_xdrmem_create" >&6; }
+ac_res=$ac_cv_search_xdrmem_create
+if test "$ac_res" != no; then :
+ test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+else
+
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "xdrmem_create() requires tirpc or libc
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+
+
+fi
+
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing xdrmem_create" >&5
+$as_echo_n "checking for library containing xdrmem_create... " >&6; }
+if ${ac_cv_search_xdrmem_create+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_func_search_save_LIBS=$LIBS
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char xdrmem_create ();
+int
+main ()
+{
+return xdrmem_create ();
+ ;
+ return 0;
+}
+_ACEOF
+for ac_lib in '' tirpc; do
+ if test -z "$ac_lib"; then
+ ac_res="none required"
+ else
+ ac_res=-l$ac_lib
+ LIBS="-l$ac_lib $ac_func_search_save_LIBS"
+ fi
+ if ac_fn_c_try_link "$LINENO"; then :
+ ac_cv_search_xdrmem_create=$ac_res
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext
+ if ${ac_cv_search_xdrmem_create+:} false; then :
+ break
+fi
+done
+if ${ac_cv_search_xdrmem_create+:} false; then :
+
+else
+ ac_cv_search_xdrmem_create=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_xdrmem_create" >&5
+$as_echo "$ac_cv_search_xdrmem_create" >&6; }
+ac_res=$ac_cv_search_xdrmem_create
+if test "$ac_res" != no; then :
+ test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+else
+
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "xdrmem_create() requires libc
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+
+
+fi
+
+
+ LIBBLKID=
+
+ ac_fn_c_check_header_mongrel "$LINENO" "blkid/blkid.h" "ac_cv_header_blkid_blkid_h" "$ac_includes_default"
+if test "x$ac_cv_header_blkid_blkid_h" = xyes; then :
+
+else
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "
+ *** blkid.h missing, libblkid-devel package required
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+
+
+
+ LIBBLKID="-lblkid"
+
+
+$as_echo "#define HAVE_LIBBLKID 1" >>confdefs.h
+
+
+
+ LIBATTR=
+
+ ac_fn_c_check_header_mongrel "$LINENO" "attr/xattr.h" "ac_cv_header_attr_xattr_h" "$ac_includes_default"
+if test "x$ac_cv_header_attr_xattr_h" = xyes; then :
+
+else
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "
+ *** attr/xattr.h missing, libattr-devel package required
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+
+
+
+ LIBATTR="-lattr"
+
+
+$as_echo "#define HAVE_LIBATTR 1" >>confdefs.h
+
+
+
+ LIBUDEV=
+
+ ac_fn_c_check_header_mongrel "$LINENO" "libudev.h" "ac_cv_header_libudev_h" "$ac_includes_default"
+if test "x$ac_cv_header_libudev_h" = xyes; then :
+
+ user_libudev=yes
+ LIBUDEV="-ludev"
+
+
+$as_echo "#define HAVE_LIBUDEV 1" >>confdefs.h
+
+
+else
+
+ user_libudev=no
+
+fi
+
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing udev_device_get_is_initialized" >&5
+$as_echo_n "checking for library containing udev_device_get_is_initialized... " >&6; }
+if ${ac_cv_search_udev_device_get_is_initialized+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_func_search_save_LIBS=$LIBS
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char udev_device_get_is_initialized ();
+int
+main ()
+{
+return udev_device_get_is_initialized ();
+ ;
+ return 0;
+}
+_ACEOF
+for ac_lib in '' udev; do
+ if test -z "$ac_lib"; then
+ ac_res="none required"
+ else
+ ac_res=-l$ac_lib
+ LIBS="-l$ac_lib $ac_func_search_save_LIBS"
+ fi
+ if ac_fn_c_try_link "$LINENO"; then :
+ ac_cv_search_udev_device_get_is_initialized=$ac_res
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext
+ if ${ac_cv_search_udev_device_get_is_initialized+:} false; then :
+ break
+fi
+done
+if ${ac_cv_search_udev_device_get_is_initialized+:} false; then :
+
+else
+ ac_cv_search_udev_device_get_is_initialized=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_udev_device_get_is_initialized" >&5
+$as_echo "$ac_cv_search_udev_device_get_is_initialized" >&6; }
+ac_res=$ac_cv_search_udev_device_get_is_initialized
+if test "$ac_res" != no; then :
+ test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+
+$as_echo "#define HAVE_LIBUDEV_UDEV_DEVICE_GET_IS_INITIALIZED 1" >>confdefs.h
+
+fi
+
+
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -Wframe-larger-than=<size> support" >&5
+$as_echo_n "checking for -Wframe-larger-than=<size> support... " >&6; }
+
+ saved_flags="$CFLAGS"
+ CFLAGS="$CFLAGS -Wframe-larger-than=1024"
+
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+
+ FRAME_LARGER_THAN=-Wframe-larger-than=1024
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+else
+
+ FRAME_LARGER_THAN=
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+ CFLAGS="$saved_flags"
+
+
+
+ if test "x$runstatedir" = x; then
+ runstatedir='${localstatedir}/run'
+
+ fi
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking makedev() is declared in sys/sysmacros.h" >&5
+$as_echo_n "checking makedev() is declared in sys/sysmacros.h... " >&6; }
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+ #include <sys/sysmacros.h>
+
+int
+main ()
+{
+
+ int k;
+ k = makedev(0,0);
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+$as_echo "#define HAVE_MAKEDEV_IN_SYSMACROS 1" >>confdefs.h
+
+
+else
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking makedev() is declared in sys/mkdev.h" >&5
+$as_echo_n "checking makedev() is declared in sys/mkdev.h... " >&6; }
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+ #include <sys/mkdev.h>
+
+int
+main ()
+{
+
+ int k;
+ k = makedev(0,0);
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+$as_echo "#define HAVE_MAKEDEV_IN_MKDEV 1" >>confdefs.h
+
+
+else
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -Wno-format-truncation support" >&5
+$as_echo_n "checking for -Wno-format-truncation support... " >&6; }
+
+ saved_flags="$CFLAGS"
+ CFLAGS="$CFLAGS -Wno-format-truncation"
+
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+
+ NO_FORMAT_TRUNCATION=-Wno-format-truncation
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+else
+
+ NO_FORMAT_TRUNCATION=
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+ CFLAGS="$saved_flags"
+
+
+
+
+ ZONENAME="echo global"
+
+
+
+
+
+ for ac_func in mlockall
+do :
+ ac_fn_c_check_func "$LINENO" "mlockall" "ac_cv_func_mlockall"
+if test "x$ac_cv_func_mlockall" = xyes; then :
+ cat >>confdefs.h <<_ACEOF
+#define HAVE_MLOCKALL 1
+_ACEOF
+
+fi
+done
+
+ ;;
+ all)
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dkms.conf file" >&5
+$as_echo_n "checking for dkms.conf file... " >&6; }
+ if test -e dkms.conf; then :
+
+ as_fn_error $? "
+ *** ZFS should not be manually built in the DKMS source tree.
+ *** Remove all ZFS packages before compiling the ZoL sources.
+ *** Running \"make install\" breaks ZFS packages." "$LINENO" 5
+
+else
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
+$as_echo "not found" >&6; }
+
+fi
+
+
+
+# Check whether --with-mounthelperdir was given.
+if test "${with_mounthelperdir+set}" = set; then :
+ withval=$with_mounthelperdir; mounthelperdir=$withval
+else
+ mounthelperdir=/sbin
+fi
+
+
+
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for udev directories" >&5
+$as_echo_n "checking for udev directories... " >&6; }
+
+# Check whether --with-udevdir was given.
+if test "${with_udevdir+set}" = set; then :
+ withval=$with_udevdir; udevdir=$withval
+else
+ udevdir=check
+fi
+
+
+ if test "x$udevdir" = xcheck; then :
+
+ path1=/lib/udev
+ path2=/usr/lib/udev
+ default=$path2
+
+ if test -d "$path1"; then :
+ udevdir="$path1"
+else
+
+ if test -d "$path2"; then :
+ udevdir="$path2"
+else
+ udevdir="$default"
+fi
+
+fi
+
+fi
+
+
+# Check whether --with-udevruledir was given.
+if test "${with_udevruledir+set}" = set; then :
+ withval=$with_udevruledir; udevruledir=$withval
+else
+ udevruledir="${udevdir}/rules.d"
+fi
+
+
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $udevdir;$udevruledir" >&5
+$as_echo "$udevdir;$udevruledir" >&6; }
+
+
+ # Check whether --enable-systemd was given.
+if test "${enable_systemd+set}" = set; then :
+ enableval=$enable_systemd;
+else
+ enable_systemd=yes
+fi
+
+
+
+# Check whether --with-systemdunitdir was given.
+if test "${with_systemdunitdir+set}" = set; then :
+ withval=$with_systemdunitdir; systemdunitdir=$withval
+else
+ systemdunitdir=/usr/lib/systemd/system
+fi
+
+
+
+# Check whether --with-systemdpresetdir was given.
+if test "${with_systemdpresetdir+set}" = set; then :
+ withval=$with_systemdpresetdir; systemdpresetdir=$withval
+else
+ systemdpresetdir=/usr/lib/systemd/system-preset
+fi
+
+
+
+# Check whether --with-systemdmodulesloaddir was given.
+if test "${with_systemdmodulesloaddir+set}" = set; then :
+ withval=$with_systemdmodulesloaddir; systemdmoduleloaddir=$withval
+else
+ systemdmodulesloaddir=/usr/lib/modules-load.d
+fi
+
+
+
+ if test "x$enable_systemd" = xyes; then :
+
+ ZFS_INIT_SYSTEMD=systemd
+ ZFS_MODULE_LOAD=modules-load.d
+ modulesloaddir=$systemdmodulesloaddir
+
+fi
+
+
+
+
+
+
+
+
+ # Check whether --enable-sysvinit was given.
+if test "${enable_sysvinit+set}" = set; then :
+ enableval=$enable_sysvinit;
+else
+ enable_sysvinit=yes
+fi
+
+
+ if test "x$enable_sysvinit" = xyes; then :
+ ZFS_INIT_SYSV=init.d
+fi
+
+
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dracut directory" >&5
+$as_echo_n "checking for dracut directory... " >&6; }
+
+# Check whether --with-dracutdir was given.
+if test "${with_dracutdir+set}" = set; then :
+ withval=$with_dracutdir; dracutdir=$withval
+else
+ dracutdir=check
+fi
+
+
+ if test "x$dracutdir" = xcheck; then :
+
+ path1=/usr/share/dracut
+ path2=/usr/lib/dracut
+ default=$path2
+
+ if test -d "$path1"; then :
+ dracutdir="$path1"
+else
+
+ if test -d "$path2"; then :
+ dracutdir="$path2"
+else
+ dracutdir="$default"
+fi
+
+fi
+
+fi
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $dracutdir" >&5
+$as_echo "$dracutdir" >&6; }
+
+
+ ZLIB=
+
+ ac_fn_c_check_header_mongrel "$LINENO" "zlib.h" "ac_cv_header_zlib_h" "$ac_includes_default"
+if test "x$ac_cv_header_zlib_h" = xyes; then :
+
+else
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "
+ *** zlib.h missing, zlib-devel package required
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing compress2" >&5
+$as_echo_n "checking for library containing compress2... " >&6; }
+if ${ac_cv_search_compress2+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_func_search_save_LIBS=$LIBS
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char compress2 ();
+int
+main ()
+{
+return compress2 ();
+ ;
+ return 0;
+}
+_ACEOF
+for ac_lib in '' z; do
+ if test -z "$ac_lib"; then
+ ac_res="none required"
+ else
+ ac_res=-l$ac_lib
+ LIBS="-l$ac_lib $ac_func_search_save_LIBS"
+ fi
+ if ac_fn_c_try_link "$LINENO"; then :
+ ac_cv_search_compress2=$ac_res
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext
+ if ${ac_cv_search_compress2+:} false; then :
+ break
+fi
+done
+if ${ac_cv_search_compress2+:} false; then :
+
+else
+ ac_cv_search_compress2=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_compress2" >&5
+$as_echo "$ac_cv_search_compress2" >&6; }
+ac_res=$ac_cv_search_compress2
+if test "$ac_res" != no; then :
+ test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+else
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "
+ *** compress2() missing, zlib-devel package required
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing uncompress" >&5
+$as_echo_n "checking for library containing uncompress... " >&6; }
+if ${ac_cv_search_uncompress+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_func_search_save_LIBS=$LIBS
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char uncompress ();
+int
+main ()
+{
+return uncompress ();
+ ;
+ return 0;
+}
+_ACEOF
+for ac_lib in '' z; do
+ if test -z "$ac_lib"; then
+ ac_res="none required"
+ else
+ ac_res=-l$ac_lib
+ LIBS="-l$ac_lib $ac_func_search_save_LIBS"
+ fi
+ if ac_fn_c_try_link "$LINENO"; then :
+ ac_cv_search_uncompress=$ac_res
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext
+ if ${ac_cv_search_uncompress+:} false; then :
+ break
+fi
+done
+if ${ac_cv_search_uncompress+:} false; then :
+
+else
+ ac_cv_search_uncompress=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_uncompress" >&5
+$as_echo "$ac_cv_search_uncompress" >&6; }
+ac_res=$ac_cv_search_uncompress
+if test "$ac_res" != no; then :
+ test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+else
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "
+ *** uncompress() missing, zlib-devel package required
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing crc32" >&5
+$as_echo_n "checking for library containing crc32... " >&6; }
+if ${ac_cv_search_crc32+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_func_search_save_LIBS=$LIBS
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char crc32 ();
+int
+main ()
+{
+return crc32 ();
+ ;
+ return 0;
+}
+_ACEOF
+for ac_lib in '' z; do
+ if test -z "$ac_lib"; then
+ ac_res="none required"
+ else
+ ac_res=-l$ac_lib
+ LIBS="-l$ac_lib $ac_func_search_save_LIBS"
+ fi
+ if ac_fn_c_try_link "$LINENO"; then :
+ ac_cv_search_crc32=$ac_res
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext
+ if ${ac_cv_search_crc32+:} false; then :
+ break
+fi
+done
+if ${ac_cv_search_crc32+:} false; then :
+
+else
+ ac_cv_search_crc32=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_crc32" >&5
+$as_echo "$ac_cv_search_crc32" >&6; }
+ac_res=$ac_cv_search_crc32
+if test "$ac_res" != no; then :
+ test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+else
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "
+ *** crc32() missing, zlib-devel package required
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+
+
+ ZLIB="-lz"
+
+
+$as_echo "#define HAVE_ZLIB 1" >>confdefs.h
+
+
+
+ LIBUUID=
+
+ ac_fn_c_check_header_mongrel "$LINENO" "uuid/uuid.h" "ac_cv_header_uuid_uuid_h" "$ac_includes_default"
+if test "x$ac_cv_header_uuid_uuid_h" = xyes; then :
+
+else
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "
+ *** uuid/uuid.h missing, libuuid-devel package required
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing uuid_generate" >&5
+$as_echo_n "checking for library containing uuid_generate... " >&6; }
+if ${ac_cv_search_uuid_generate+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_func_search_save_LIBS=$LIBS
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char uuid_generate ();
+int
+main ()
+{
+return uuid_generate ();
+ ;
+ return 0;
+}
+_ACEOF
+for ac_lib in '' uuid; do
+ if test -z "$ac_lib"; then
+ ac_res="none required"
+ else
+ ac_res=-l$ac_lib
+ LIBS="-l$ac_lib $ac_func_search_save_LIBS"
+ fi
+ if ac_fn_c_try_link "$LINENO"; then :
+ ac_cv_search_uuid_generate=$ac_res
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext
+ if ${ac_cv_search_uuid_generate+:} false; then :
+ break
+fi
+done
+if ${ac_cv_search_uuid_generate+:} false; then :
+
+else
+ ac_cv_search_uuid_generate=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_uuid_generate" >&5
+$as_echo "$ac_cv_search_uuid_generate" >&6; }
+ac_res=$ac_cv_search_uuid_generate
+if test "$ac_res" != no; then :
+ test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+else
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "
+ *** uuid_generate() missing, libuuid-devel package required
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing uuid_is_null" >&5
+$as_echo_n "checking for library containing uuid_is_null... " >&6; }
+if ${ac_cv_search_uuid_is_null+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_func_search_save_LIBS=$LIBS
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char uuid_is_null ();
+int
+main ()
+{
+return uuid_is_null ();
+ ;
+ return 0;
+}
+_ACEOF
+for ac_lib in '' uuid; do
+ if test -z "$ac_lib"; then
+ ac_res="none required"
+ else
+ ac_res=-l$ac_lib
+ LIBS="-l$ac_lib $ac_func_search_save_LIBS"
+ fi
+ if ac_fn_c_try_link "$LINENO"; then :
+ ac_cv_search_uuid_is_null=$ac_res
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext
+ if ${ac_cv_search_uuid_is_null+:} false; then :
+ break
+fi
+done
+if ${ac_cv_search_uuid_is_null+:} false; then :
+
+else
+ ac_cv_search_uuid_is_null=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_uuid_is_null" >&5
+$as_echo "$ac_cv_search_uuid_is_null" >&6; }
+ac_res=$ac_cv_search_uuid_is_null
+if test "$ac_res" != no; then :
+ test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+else
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "
+ *** uuid_is_null() missing, libuuid-devel package required
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+
+
+ LIBUUID="-luuid"
+
+
+$as_echo "#define HAVE_LIBUUID 1" >>confdefs.h
+
+
+
+
+# Check whether --with-tirpc was given.
+if test "${with_tirpc+set}" = set; then :
+ withval=$with_tirpc;
+else
+ with_tirpc=check
+fi
+
+
+ LIBTIRPC=
+ LIBTIRPC_CFLAGS=
+
+ if test "x$with_tirpc" != xno; then :
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for xdrmem_create in -ltirpc" >&5
+$as_echo_n "checking for xdrmem_create in -ltirpc... " >&6; }
+if ${ac_cv_lib_tirpc_xdrmem_create+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_check_lib_save_LIBS=$LIBS
+LIBS="-ltirpc $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char xdrmem_create ();
+int
+main ()
+{
+return xdrmem_create ();
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+ ac_cv_lib_tirpc_xdrmem_create=yes
+else
+ ac_cv_lib_tirpc_xdrmem_create=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_tirpc_xdrmem_create" >&5
+$as_echo "$ac_cv_lib_tirpc_xdrmem_create" >&6; }
+if test "x$ac_cv_lib_tirpc_xdrmem_create" = xyes; then :
+ LIBTIRPC=-ltirpc
+
+ LIBTIRPC_CFLAGS=-I/usr/include/tirpc
+
+
+$as_echo "#define HAVE_LIBTIRPC 1" >>confdefs.h
+
+
+else
+ if test "x$with_tirpc" != xcheck; then
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "--with-tirpc was given, but test for tirpc failed
+See \`config.log' for more details" "$LINENO" 5; }
+ fi
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing xdrmem_create" >&5
+$as_echo_n "checking for library containing xdrmem_create... " >&6; }
+if ${ac_cv_search_xdrmem_create+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_func_search_save_LIBS=$LIBS
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char xdrmem_create ();
+int
+main ()
+{
+return xdrmem_create ();
+ ;
+ return 0;
+}
+_ACEOF
+for ac_lib in '' tirpc; do
+ if test -z "$ac_lib"; then
+ ac_res="none required"
+ else
+ ac_res=-l$ac_lib
+ LIBS="-l$ac_lib $ac_func_search_save_LIBS"
+ fi
+ if ac_fn_c_try_link "$LINENO"; then :
+ ac_cv_search_xdrmem_create=$ac_res
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext
+ if ${ac_cv_search_xdrmem_create+:} false; then :
+ break
+fi
+done
+if ${ac_cv_search_xdrmem_create+:} false; then :
+
+else
+ ac_cv_search_xdrmem_create=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_xdrmem_create" >&5
+$as_echo "$ac_cv_search_xdrmem_create" >&6; }
+ac_res=$ac_cv_search_xdrmem_create
+if test "$ac_res" != no; then :
+ test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+else
+
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "xdrmem_create() requires tirpc or libc
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+
+
+fi
+
+else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing xdrmem_create" >&5
+$as_echo_n "checking for library containing xdrmem_create... " >&6; }
+if ${ac_cv_search_xdrmem_create+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_func_search_save_LIBS=$LIBS
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char xdrmem_create ();
+int
+main ()
+{
+return xdrmem_create ();
+ ;
+ return 0;
+}
+_ACEOF
+for ac_lib in '' tirpc; do
+ if test -z "$ac_lib"; then
+ ac_res="none required"
+ else
+ ac_res=-l$ac_lib
+ LIBS="-l$ac_lib $ac_func_search_save_LIBS"
+ fi
+ if ac_fn_c_try_link "$LINENO"; then :
+ ac_cv_search_xdrmem_create=$ac_res
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext
+ if ${ac_cv_search_xdrmem_create+:} false; then :
+ break
+fi
+done
+if ${ac_cv_search_xdrmem_create+:} false; then :
+
+else
+ ac_cv_search_xdrmem_create=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_xdrmem_create" >&5
+$as_echo "$ac_cv_search_xdrmem_create" >&6; }
+ac_res=$ac_cv_search_xdrmem_create
+if test "$ac_res" != no; then :
+ test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+else
+
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "xdrmem_create() requires libc
+See \`config.log' for more details" "$LINENO" 5; }
+fi
fi
- rm -Rf build
- if test $rc -ne 0; then :
+ LIBBLKID=
+
+ ac_fn_c_check_header_mongrel "$LINENO" "blkid/blkid.h" "ac_cv_header_blkid_blkid_h" "$ac_includes_default"
+if test "x$ac_cv_header_blkid_blkid_h" = xyes; then :
+
+else
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "
+ *** blkid.h missing, libblkid-devel package required
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+
+
+
+ LIBBLKID="-lblkid"
+
+
+$as_echo "#define HAVE_LIBBLKID 1" >>confdefs.h
+
+
+
+ LIBATTR=
+
+ ac_fn_c_check_header_mongrel "$LINENO" "attr/xattr.h" "ac_cv_header_attr_xattr_h" "$ac_includes_default"
+if test "x$ac_cv_header_attr_xattr_h" = xyes; then :
+
+else
+ { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "
+ *** attr/xattr.h missing, libattr-devel package required
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+
+
+
+ LIBATTR="-lattr"
+
+
+$as_echo "#define HAVE_LIBATTR 1" >>confdefs.h
+
+
+
+ LIBUDEV=
+
+ ac_fn_c_check_header_mongrel "$LINENO" "libudev.h" "ac_cv_header_libudev_h" "$ac_includes_default"
+if test "x$ac_cv_header_libudev_h" = xyes; then :
+
+ user_libudev=yes
+ LIBUDEV="-ludev"
+
+
+$as_echo "#define HAVE_LIBUDEV 1" >>confdefs.h
+
+
+else
+
+ user_libudev=no
+
+fi
+
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for library containing udev_device_get_is_initialized" >&5
+$as_echo_n "checking for library containing udev_device_get_is_initialized... " >&6; }
+if ${ac_cv_search_udev_device_get_is_initialized+:} false; then :
+ $as_echo_n "(cached) " >&6
+else
+ ac_func_search_save_LIBS=$LIBS
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+/* Override any GCC internal prototype to avoid an error.
+ Use char because int might match the return type of a GCC
+ builtin and then its argument prototype would still apply. */
+#ifdef __cplusplus
+extern "C"
+#endif
+char udev_device_get_is_initialized ();
+int
+main ()
+{
+return udev_device_get_is_initialized ();
+ ;
+ return 0;
+}
+_ACEOF
+for ac_lib in '' udev; do
+ if test -z "$ac_lib"; then
+ ac_res="none required"
+ else
+ ac_res=-l$ac_lib
+ LIBS="-l$ac_lib $ac_func_search_save_LIBS"
+ fi
+ if ac_fn_c_try_link "$LINENO"; then :
+ ac_cv_search_udev_device_get_is_initialized=$ac_res
+fi
+rm -f core conftest.err conftest.$ac_objext \
+ conftest$ac_exeext
+ if ${ac_cv_search_udev_device_get_is_initialized+:} false; then :
+ break
+fi
+done
+if ${ac_cv_search_udev_device_get_is_initialized+:} false; then :
+
+else
+ ac_cv_search_udev_device_get_is_initialized=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_udev_device_get_is_initialized" >&5
+$as_echo "$ac_cv_search_udev_device_get_is_initialized" >&6; }
+ac_res=$ac_cv_search_udev_device_get_is_initialized
+if test "$ac_res" != no; then :
+ test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+
+$as_echo "#define HAVE_LIBUDEV_UDEV_DEVICE_GET_IS_INITIALIZED 1" >>confdefs.h
+
+fi
+
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -Wframe-larger-than=<size> support" >&5
+$as_echo_n "checking for -Wframe-larger-than=<size> support... " >&6; }
+
+ saved_flags="$CFLAGS"
+ CFLAGS="$CFLAGS -Wframe-larger-than=1024"
+
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+
+ FRAME_LARGER_THAN=-Wframe-larger-than=1024
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+else
+
+ FRAME_LARGER_THAN=
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
$as_echo "no" >&6; }
- else
- if test "x$enable_linux_builtin" != xyes; then
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+ CFLAGS="$saved_flags"
+
+
+
+ if test "x$runstatedir" = x; then
+ runstatedir='${localstatedir}/run'
- grep -q -E '[[:space:]]current_time[[:space:]]' \
- $LINUX_OBJ/$LINUX_SYMBOLS 2>/dev/null
- rc=$?
- if test $rc -ne 0; then
- export=0
- for file in fs/inode.c; do
- grep -q -E "EXPORT_SYMBOL.*(current_time)" \
- "$LINUX/$file" 2>/dev/null
- rc=$?
- if test $rc -eq 0; then
- export=1
- break;
- fi
- done
- if test $export -eq 0; then :
- rc=1
- else :
- rc=0
- fi
- else :
- rc=0
fi
- fi
- if test $rc -ne 0; then :
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking makedev() is declared in sys/sysmacros.h" >&5
+$as_echo_n "checking makedev() is declared in sys/sysmacros.h... " >&6; }
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+ #include <sys/sysmacros.h>
+
+int
+main ()
+{
+
+ int k;
+ k = makedev(0,0);
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+$as_echo "#define HAVE_MAKEDEV_IN_SYSMACROS 1" >>confdefs.h
+
+
+else
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
$as_echo "no" >&6; }
- else :
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking makedev() is declared in sys/mkdev.h" >&5
+$as_echo_n "checking makedev() is declared in sys/mkdev.h... " >&6; }
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+ #include <sys/mkdev.h>
+
+int
+main ()
+{
+
+ int k;
+ k = makedev(0,0);
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
-$as_echo "#define HAVE_CURRENT_TIME 1" >>confdefs.h
+$as_echo "#define HAVE_MAKEDEV_IN_MKDEV 1" >>confdefs.h
- fi
- fi
+else
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
- if test "$LINUX_OBJ" != "$LINUX"; then :
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -Wno-format-truncation support" >&5
+$as_echo_n "checking for -Wno-format-truncation support... " >&6; }
- KERNELMAKE_PARAMS="$KERNELMAKE_PARAMS O=$LINUX_OBJ"
+ saved_flags="$CFLAGS"
+ CFLAGS="$CFLAGS -Wno-format-truncation"
+
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h. */
+
+int
+main ()
+{
+
+ ;
+ return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+
+ NO_FORMAT_TRUNCATION=-Wno-format-truncation
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+else
+
+ NO_FORMAT_TRUNCATION=
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ CFLAGS="$saved_flags"
- KERNELCPPFLAGS="$KERNELCPPFLAGS $NO_UNUSED_BUT_SET_VARIABLE"
- KERNELCPPFLAGS="$KERNELCPPFLAGS $NO_BOOL_COMPARE"
- KERNELCPPFLAGS="$KERNELCPPFLAGS -DHAVE_SPL -D_KERNEL"
- KERNELCPPFLAGS="$KERNELCPPFLAGS -DTEXT_DOMAIN=\\\"zfs-linux-kernel\\\""
- ;;
- all)
+ ZONENAME="echo global"
+
+
+
+
+
+ for ac_func in mlockall
+do :
+ ac_fn_c_check_func "$LINENO" "mlockall" "ac_cv_func_mlockall"
+if test "x$ac_cv_func_mlockall" = xyes; then :
+ cat >>confdefs.h <<_ACEOF
+#define HAVE_MLOCKALL 1
+_ACEOF
+
+fi
+done
+
+
+
# Check whether --with-linux was given.
@@ -25404,6 +28041,7 @@ else
kernsrcver=NONE
fi
+ withlinux=yes
fi
@@ -25422,7 +28060,7 @@ fi
$as_echo_n "checking kernel build directory... " >&6; }
if test -z "$kernelbuild"; then :
- if test -e "/lib/modules/$(uname -r)/build"; then :
+ if test x$withlinux != xyes -a -e "/lib/modules/$(uname -r)/build"; then :
kernelbuild=`readlink -f /lib/modules/$(uname -r)/build`
@@ -25547,7 +28185,11 @@ $as_echo "$LINUX_SYMBOLS" >&6; }
# Check whether --with-spl was given.
if test "${with_spl+set}" = set; then :
- withval=$with_spl; splsrc="$withval"
+ withval=$with_spl; if test "$withval" = "yes"; then :
+ as_fn_error $? "--with-spl=PATH requires a PATH" "$LINENO" 5
+else
+ splsrc="$withval"
+fi
fi
@@ -25579,6 +28221,14 @@ fi
$as_echo_n "checking spl source directory... " >&6; }
if test -z "${splsrc}"; then :
+ all_spl_sources="
+ ${splsrc0}
+ ${splsrc1}
+ ${splsrc2}
+ ${splsrc3}
+ ${splsrc4}
+ ${splsrc5}
+ ${splsrc6}",
if test -e "${splsrc0}/spl.release.in"; then :
splsrc=${splsrc0}
@@ -25615,6 +28265,7 @@ fi
else
+ all_spl_sources="$withval",
if test "$splsrc" = "NONE"; then :
splbuild=NONE
@@ -25631,12 +28282,19 @@ $as_echo "$splsrc" >&6; }
as_fn_error $? "
*** Please make sure the kmod spl devel package for your distribution
*** is installed then try again. If that fails you can specify the
- *** location of the spl source with the '--with-spl=PATH' option." "$LINENO" 5
+ *** location of the spl source with the '--with-spl=PATH' option.
+ *** The spl version must match the version of ZFS you are building,
+ *** ${VERSION}. Failed to find spl.release.in in the following:
+ $all_spl_sources" "$LINENO" 5
fi
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking spl build directory" >&5
$as_echo_n "checking spl build directory... " >&6; }
+
+ all_spl_config_locs="${splsrc}/${LINUX_VERSION}
+ ${splsrc}"
+
while true; do
if test -z "$splbuild"; then :
@@ -25679,7 +28337,9 @@ $as_echo "$splbuild" >&6; }
*** Please make sure the kmod spl devel <kernel> package for your
*** distribution is installed then try again. If that fails you
*** can specify the location of the spl objects with the
- *** '--with-spl-obj=PATH' option." "$LINENO" 5
+ *** '--with-spl-obj=PATH' option. Failed to find spl_config.h in
+ *** any of the following:
+ $all_spl_config_locs" "$LINENO" 5
fi
@@ -25763,6 +28423,99 @@ $as_echo "$SPL_SYMBOLS" >&6; }
+
+# Check whether --with-qat was given.
+if test "${with_qat+set}" = set; then :
+ withval=$with_qat; if test "$withval" = "yes"; then :
+ as_fn_error $? "--with-qat=PATH requires a PATH" "$LINENO" 5
+else
+ qatsrc="$withval"
+fi
+fi
+
+
+
+# Check whether --with-qat-obj was given.
+if test "${with_qat_obj+set}" = set; then :
+ withval=$with_qat_obj; qatbuild="$withval"
+fi
+
+
+ if test ! -z "${qatsrc}"; then :
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking qat source directory" >&5
+$as_echo_n "checking qat source directory... " >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $qatsrc" >&5
+$as_echo "$qatsrc" >&6; }
+ QAT_SRC="${qatsrc}/quickassist"
+ if test ! -e "$QAT_SRC/include/cpa.h"; then :
+
+ as_fn_error $? "
+ *** Please make sure the qat driver package is installed
+ *** and specify the location of the qat source with the
+ *** '--with-qat=PATH' option then try again. Failed to
+ *** find cpa.h in:
+ ${QAT_SRC}/include" "$LINENO" 5
+
+fi
+
+fi
+
+ if test ! -z "${qatsrc}"; then :
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking qat build directory" >&5
+$as_echo_n "checking qat build directory... " >&6; }
+ if test -z "$qatbuild"; then :
+
+ qatbuild="${qatsrc}/build"
+
+fi
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $qatbuild" >&5
+$as_echo "$qatbuild" >&6; }
+ QAT_OBJ=${qatbuild}
+ if ! test -e "$QAT_OBJ/icp_qa_al.ko"; then :
+
+ as_fn_error $? "
+ *** Please make sure the qat driver is installed then try again.
+ *** Failed to find icp_qa_al.ko in:
+ $QAT_OBJ" "$LINENO" 5
+
+fi
+
+
+
+
+
+$as_echo "#define HAVE_QAT 1" >>confdefs.h
+
+
+fi
+
+ if test ! -z "${qatsrc}"; then :
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking qat file for module symbols" >&5
+$as_echo_n "checking qat file for module symbols... " >&6; }
+ QAT_SYMBOLS=$QAT_SRC/lookaside/access_layer/src/Module.symvers
+
+ if test -r $QAT_SYMBOLS; then :
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: $QAT_SYMBOLS" >&5
+$as_echo "$QAT_SYMBOLS" >&6; }
+
+
+else
+
+ as_fn_error $? "
+ *** Please make sure the qat driver is installed then try again.
+ *** Failed to find Module.symvers in:
+ $QAT_SYMBOLS" "$LINENO" 5
+
+fi
+
+fi
+
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether modules can be built" >&5
$as_echo_n "checking whether modules can be built... " >&6; }
@@ -25828,6 +28581,74 @@ fi
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for compile-time stack validation (objtool)" >&5
+$as_echo_n "checking for compile-time stack validation (objtool)... " >&6; }
+
+
+cat confdefs.h - <<_ACEOF >conftest.c
+
+
+ #undef __ASSEMBLY__
+ #include <asm/frame.h>
+
+int
+main (void)
+{
+
+ #if !defined(FRAME_BEGIN)
+ CTASSERT(1);
+ #endif
+
+ ;
+ return 0;
+}
+
+_ACEOF
+
+
+
+cat - <<_ACEOF >conftest.h
+
+_ACEOF
+
+
+ rm -Rf build && mkdir -p build && touch build/conftest.mod.c
+ echo "obj-m := conftest.o" >build/Makefile
+ modpost_flag=''
+ test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage
+ if { ac_try='cp conftest.c conftest.h build && make modules -C $LINUX_OBJ EXTRA_CFLAGS="-Werror $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; } >/dev/null && { ac_try='test -s build/conftest.o'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+$as_echo "#define HAVE_KERNEL_OBJTOOL 1" >>confdefs.h
+
+
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+
+
+fi
+ rm -Rf build
+
+
+
+
if test "x$cross_compiling" != xyes; then :
if test "$cross_compiling" = yes; then :
@@ -26314,22 +29135,22 @@ fi
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether submit_bio() wants 1 arg" >&5
-$as_echo_n "checking whether submit_bio() wants 1 arg... " >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether super_block->s_user_ns exists" >&5
+$as_echo_n "checking whether super_block->s_user_ns exists... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.c
- #include <linux/bio.h>
+ #include <linux/fs.h>
+ #include <linux/user_namespace.h>
int
main (void)
{
- blk_qc_t blk_qc;
- struct bio *bio = NULL;
- blk_qc = submit_bio(bio);
+ struct super_block super;
+ super.s_user_ns = (struct user_namespace *)NULL;
;
return 0;
@@ -26363,7 +29184,7 @@ _ACEOF
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
-$as_echo "#define HAVE_1ARG_SUBMIT_BIO 1" >>confdefs.h
+$as_echo "#define HAVE_SUPER_USER_NS 1" >>confdefs.h
else
@@ -26381,36 +29202,22 @@ fi
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking block device operation prototypes" >&5
-$as_echo_n "checking block device operation prototypes... " >&6; }
- tmp_flags="$EXTRA_KCFLAGS"
- EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}"
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether submit_bio() wants 1 arg" >&5
+$as_echo_n "checking whether submit_bio() wants 1 arg... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.c
- #include <linux/blkdev.h>
-
- int blk_open(struct block_device *bdev, fmode_t mode)
- { return 0; }
- int blk_ioctl(struct block_device *bdev, fmode_t mode,
- unsigned x, unsigned long y) { return 0; }
- int blk_compat_ioctl(struct block_device * bdev, fmode_t mode,
- unsigned x, unsigned long y) { return 0; }
-
- static const struct block_device_operations
- bops __attribute__ ((unused)) = {
- .open = blk_open,
- .release = NULL,
- .ioctl = blk_ioctl,
- .compat_ioctl = blk_compat_ioctl,
- };
+ #include <linux/bio.h>
int
main (void)
{
+ blk_qc_t blk_qc;
+ struct bio *bio = NULL;
+ blk_qc = submit_bio(bio);
;
return 0;
@@ -26441,18 +29248,18 @@ _ACEOF
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; }; then :
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: struct block_device" >&5
-$as_echo "struct block_device" >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
-$as_echo "#define HAVE_BDEV_BLOCK_DEVICE_OPERATIONS 1" >>confdefs.h
+$as_echo "#define HAVE_1ARG_SUBMIT_BIO 1" >>confdefs.h
else
$as_echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: struct inode" >&5
-$as_echo "struct inode" >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
@@ -26460,11 +29267,10 @@ fi
rm -Rf build
- EXTRA_KCFLAGS="$tmp_flags"
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether block_device_operations.release is void" >&5
-$as_echo_n "checking whether block_device_operations.release is void... " >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking block device operation prototypes" >&5
+$as_echo_n "checking block device operation prototypes... " >&6; }
tmp_flags="$EXTRA_KCFLAGS"
EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}"
@@ -26474,14 +29280,19 @@ cat confdefs.h - <<_ACEOF >conftest.c
#include <linux/blkdev.h>
- void blk_release(struct gendisk *g, fmode_t mode) { return; }
+ int blk_open(struct block_device *bdev, fmode_t mode)
+ { return 0; }
+ int blk_ioctl(struct block_device *bdev, fmode_t mode,
+ unsigned x, unsigned long y) { return 0; }
+ int blk_compat_ioctl(struct block_device * bdev, fmode_t mode,
+ unsigned x, unsigned long y) { return 0; }
static const struct block_device_operations
bops __attribute__ ((unused)) = {
- .open = NULL,
- .release = blk_release,
- .ioctl = NULL,
- .compat_ioctl = NULL,
+ .open = blk_open,
+ .release = NULL,
+ .ioctl = blk_ioctl,
+ .compat_ioctl = blk_compat_ioctl,
};
int
@@ -26518,18 +29329,18 @@ _ACEOF
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; }; then :
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: void" >&5
-$as_echo "void" >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: struct block_device" >&5
+$as_echo "struct block_device" >&6; }
-$as_echo "#define HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID 1" >>confdefs.h
+$as_echo "#define HAVE_BDEV_BLOCK_DEVICE_OPERATIONS 1" >>confdefs.h
else
$as_echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: int" >&5
-$as_echo "int" >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: struct inode" >&5
+$as_echo "struct inode" >&6; }
@@ -26539,20 +29350,32 @@ fi
EXTRA_KCFLAGS="$tmp_flags"
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether kernel defines fmode_t" >&5
-$as_echo_n "checking whether kernel defines fmode_t... " >&6; }
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether block_device_operations.release is void" >&5
+$as_echo_n "checking whether block_device_operations.release is void... " >&6; }
+ tmp_flags="$EXTRA_KCFLAGS"
+ EXTRA_KCFLAGS="${NO_UNUSED_BUT_SET_VARIABLE}"
cat confdefs.h - <<_ACEOF >conftest.c
- #include <linux/types.h>
+ #include <linux/blkdev.h>
+
+ void blk_release(struct gendisk *g, fmode_t mode) { return; }
+
+ static const struct block_device_operations
+ bops __attribute__ ((unused)) = {
+ .open = NULL,
+ .release = blk_release,
+ .ioctl = NULL,
+ .compat_ioctl = NULL,
+ };
int
main (void)
{
- fmode_t *ptr __attribute__ ((unused));
;
return 0;
@@ -26583,18 +29406,18 @@ _ACEOF
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; }; then :
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: void" >&5
+$as_echo "void" >&6; }
-$as_echo "#define HAVE_FMODE_T 1" >>confdefs.h
+$as_echo "#define HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID 1" >>confdefs.h
else
$as_echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: int" >&5
+$as_echo "int" >&6; }
@@ -26602,23 +29425,22 @@ fi
rm -Rf build
+ EXTRA_KCFLAGS="$tmp_flags"
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether kernel defines KOBJ_NAME_LEN" >&5
-$as_echo_n "checking whether kernel defines KOBJ_NAME_LEN... " >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether kernel defines fmode_t" >&5
+$as_echo_n "checking whether kernel defines fmode_t... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.c
- #include <linux/kobject.h>
+ #include <linux/types.h>
int
main (void)
{
- int val __attribute__ ((unused));
- val = KOBJ_NAME_LEN;
+ fmode_t *ptr __attribute__ ((unused));
;
return 0;
@@ -26652,7 +29474,7 @@ _ACEOF
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
-$as_echo "#define HAVE_KOBJ_NAME_LEN 1" >>confdefs.h
+$as_echo "#define HAVE_FMODE_T 1" >>confdefs.h
else
@@ -27592,6 +30414,74 @@ fi
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether bio_set_dev() exists" >&5
+$as_echo_n "checking whether bio_set_dev() exists... " >&6; }
+
+
+cat confdefs.h - <<_ACEOF >conftest.c
+
+
+ #include <linux/bio.h>
+ #include <linux/fs.h>
+
+int
+main (void)
+{
+
+ struct block_device *bdev = NULL;
+ struct bio *bio = NULL;
+ bio_set_dev(bio, bdev);
+
+ ;
+ return 0;
+}
+
+_ACEOF
+
+
+
+cat - <<_ACEOF >conftest.h
+
+_ACEOF
+
+
+ rm -Rf build && mkdir -p build && touch build/conftest.mod.c
+ echo "obj-m := conftest.o" >build/Makefile
+ modpost_flag=''
+ test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage
+ if { ac_try='cp conftest.c conftest.h build && make modules -C $LINUX_OBJ EXTRA_CFLAGS="-Werror $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; } >/dev/null && { ac_try='test -s build/conftest.o'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+$as_echo "#define HAVE_BIO_SET_DEV 1" >>confdefs.h
+
+
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+
+
+fi
+ rm -Rf build
+
+
+
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether REQ_FAILFAST_MASK is defined" >&5
$as_echo_n "checking whether REQ_FAILFAST_MASK is defined... " >&6; }
@@ -28187,6 +31077,73 @@ fi
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether blk_queue bdi is dynamic" >&5
+$as_echo_n "checking whether blk_queue bdi is dynamic... " >&6; }
+
+
+cat confdefs.h - <<_ACEOF >conftest.c
+
+
+ #include <linux/blkdev.h>
+
+int
+main (void)
+{
+
+ struct request_queue q;
+ struct backing_dev_info bdi;
+ q.backing_dev_info = &bdi;
+
+ ;
+ return 0;
+}
+
+_ACEOF
+
+
+
+cat - <<_ACEOF >conftest.h
+
+_ACEOF
+
+
+ rm -Rf build && mkdir -p build && touch build/conftest.mod.c
+ echo "obj-m := conftest.o" >build/Makefile
+ modpost_flag=''
+ test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage
+ if { ac_try='cp conftest.c conftest.h build && make modules -C $LINUX_OBJ EXTRA_CFLAGS="-Werror $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; } >/dev/null && { ac_try='test -s build/conftest.o'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+$as_echo "#define HAVE_BLK_QUEUE_BDI_DYNAMIC 1" >>confdefs.h
+
+
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+
+
+fi
+ rm -Rf build
+
+
+
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether blk_queue_flush() is available" >&5
$as_echo_n "checking whether blk_queue_flush() is available... " >&6; }
tmp_flags="$EXTRA_KCFLAGS"
@@ -29323,7 +32280,9 @@ else
$as_echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether xattr_handler->get() wants xattr_handler" >&5
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether xattr_handler->get() wants xattr_handler" >&5
$as_echo_n "checking whether xattr_handler->get() wants xattr_handler... " >&6; }
@@ -29603,7 +32562,9 @@ else
$as_echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether xattr_handler->set() wants xattr_handler" >&5
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether xattr_handler->set() wants xattr_handler" >&5
$as_echo_n "checking whether xattr_handler->set() wants xattr_handler... " >&6; }
@@ -30353,39 +33314,26 @@ $as_echo "yes" >&6; }
$as_echo "#define HAVE_POSIX_ACL_RELEASE 1" >>confdefs.h
-else
- $as_echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-
-
-
-fi
- rm -Rf build
-
-
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether posix_acl_release() is GPL-only" >&5
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether posix_acl_release() is GPL-only" >&5
$as_echo_n "checking whether posix_acl_release() is GPL-only... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.c
- #include <linux/cred.h>
- #include <linux/fs.h>
- #include <linux/posix_acl.h>
+ #include <linux/module.h>
+ #include <linux/cred.h>
+ #include <linux/fs.h>
+ #include <linux/posix_acl.h>
- MODULE_LICENSE("$ZFS_META_LICENSE");
+ MODULE_LICENSE("$ZFS_META_LICENSE");
int
main (void)
{
- struct posix_acl* tmp = posix_acl_alloc(1, 0);
- posix_acl_release(tmp);
+ struct posix_acl* tmp = posix_acl_alloc(1, 0);
+ posix_acl_release(tmp);
;
return 0;
@@ -30416,14 +33364,14 @@ _ACEOF
$as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
test $ac_status = 0; }; }; then :
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
$as_echo "no" >&6; }
else
$as_echo "$as_me: failed program was:" >&5
sed 's/^/| /' conftest.$ac_ext >&5
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
$as_echo "#define HAVE_POSIX_ACL_RELEASE_GPL_ONLY 1" >>confdefs.h
@@ -30431,6 +33379,20 @@ $as_echo "#define HAVE_POSIX_ACL_RELEASE_GPL_ONLY 1" >>confdefs.h
+fi
+ rm -Rf build
+
+
+
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+
+
fi
rm -Rf build
@@ -31124,7 +34086,79 @@ _ACEOF
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
-$as_echo "#define HAVE_GET_ACL 1" >>confdefs.h
+$as_echo "#define HAVE_GET_ACL 1" >>confdefs.h
+
+
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+
+
+fi
+ rm -Rf build
+
+
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether iops->set_acl() exists" >&5
+$as_echo_n "checking whether iops->set_acl() exists... " >&6; }
+
+
+cat confdefs.h - <<_ACEOF >conftest.c
+
+
+ #include <linux/fs.h>
+
+ int set_acl_fn(struct inode *inode, struct posix_acl *acl, int type)
+ { return 0; }
+
+ static const struct inode_operations
+ iops __attribute__ ((unused)) = {
+ .set_acl = set_acl_fn,
+ };
+
+int
+main (void)
+{
+
+
+ ;
+ return 0;
+}
+
+_ACEOF
+
+
+
+cat - <<_ACEOF >conftest.h
+
+_ACEOF
+
+
+ rm -Rf build && mkdir -p build && touch build/conftest.mod.c
+ echo "obj-m := conftest.o" >build/Makefile
+ modpost_flag=''
+ test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage
+ if { ac_try='cp conftest.c conftest.h build && make modules -C $LINUX_OBJ EXTRA_CFLAGS="-Werror $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; } >/dev/null && { ac_try='test -s build/conftest.o'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+$as_echo "#define HAVE_SET_ACL 1" >>confdefs.h
else
@@ -31142,8 +34176,9 @@ fi
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether iops->set_acl() exists" >&5
-$as_echo_n "checking whether iops->set_acl() exists... " >&6; }
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether iops->getattr() takes a path" >&5
+$as_echo_n "checking whether iops->getattr() takes a path... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.c
@@ -31151,12 +34186,14 @@ cat confdefs.h - <<_ACEOF >conftest.c
#include <linux/fs.h>
- int set_acl_fn(struct inode *inode, struct posix_acl *acl, int type)
+ int test_getattr(
+ const struct path *p, struct kstat *k,
+ u32 request_mask, unsigned int query_flags)
{ return 0; }
static const struct inode_operations
iops __attribute__ ((unused)) = {
- .set_acl = set_acl_fn,
+ .getattr = test_getattr,
};
int
@@ -31196,7 +34233,7 @@ _ACEOF
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
-$as_echo "#define HAVE_SET_ACL 1" >>confdefs.h
+$as_echo "#define HAVE_PATH_IOPS_GETATTR 1" >>confdefs.h
else
@@ -31214,9 +34251,8 @@ fi
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether iops->getattr() takes a path" >&5
-$as_echo_n "checking whether iops->getattr() takes a path... " >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether iops->getattr() takes a vfsmount" >&5
+$as_echo_n "checking whether iops->getattr() takes a vfsmount... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.c
@@ -31225,8 +34261,8 @@ cat confdefs.h - <<_ACEOF >conftest.c
#include <linux/fs.h>
int test_getattr(
- const struct path *p, struct kstat *k,
- u32 request_mask, unsigned int query_flags)
+ struct vfsmount *mnt, struct dentry *d,
+ struct kstat *k)
{ return 0; }
static const struct inode_operations
@@ -31271,7 +34307,7 @@ _ACEOF
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
-$as_echo "#define HAVE_PATH_IOPS_GETATTR 1" >>confdefs.h
+$as_echo "#define HAVE_VFSMOUNT_IOPS_GETATTR 1" >>confdefs.h
else
@@ -31289,8 +34325,9 @@ fi
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether iops->getattr() takes a vfsmount" >&5
-$as_echo_n "checking whether iops->getattr() takes a vfsmount... " >&6; }
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether inode_set_flags() exists" >&5
+$as_echo_n "checking whether inode_set_flags() exists... " >&6; }
cat confdefs.h - <<_ACEOF >conftest.c
@@ -31298,20 +34335,12 @@ cat confdefs.h - <<_ACEOF >conftest.c
#include <linux/fs.h>
- int test_getattr(
- struct vfsmount *mnt, struct dentry *d,
- struct kstat *k)
- { return 0; }
-
- static const struct inode_operations
- iops __attribute__ ((unused)) = {
- .getattr = test_getattr,
- };
-
int
main (void)
{
+ struct inode inode;
+ inode_set_flags(&inode, S_IMMUTABLE, S_IMMUTABLE);
;
return 0;
@@ -31345,7 +34374,7 @@ _ACEOF
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
-$as_echo "#define HAVE_VFSMOUNT_IOPS_GETATTR 1" >>confdefs.h
+$as_echo "#define HAVE_INODE_SET_FLAGS 1" >>confdefs.h
else
@@ -31363,7 +34392,6 @@ fi
-
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether uncached_acl_sentinel() exists" >&5
$as_echo_n "checking whether uncached_acl_sentinel() exists... " >&6; }
@@ -31565,6 +34593,72 @@ fi
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether file_dentry() is available" >&5
+$as_echo_n "checking whether file_dentry() is available... " >&6; }
+
+
+cat confdefs.h - <<_ACEOF >conftest.c
+
+
+ #include <linux/fs.h>
+
+int
+main (void)
+{
+
+ struct file *f = NULL;
+ file_dentry(f);
+
+ ;
+ return 0;
+}
+
+_ACEOF
+
+
+
+cat - <<_ACEOF >conftest.h
+
+_ACEOF
+
+
+ rm -Rf build && mkdir -p build && touch build/conftest.mod.c
+ echo "obj-m := conftest.o" >build/Makefile
+ modpost_flag=''
+ test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage
+ if { ac_try='cp conftest.c conftest.h build && make modules -C $LINUX_OBJ EXTRA_CFLAGS="-Werror $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; } >/dev/null && { ac_try='test -s build/conftest.o'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+$as_echo "#define HAVE_FILE_DENTRY 1" >>confdefs.h
+
+
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+
+
+fi
+ rm -Rf build
+
+
+
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether fops->fsync() wants" >&5
$as_echo_n "checking whether fops->fsync() wants... " >&6; }
@@ -32955,6 +36049,76 @@ fi
+fi
+ rm -Rf build
+
+
+
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether i_op->tmpfile() exists" >&5
+$as_echo_n "checking whether i_op->tmpfile() exists... " >&6; }
+
+
+cat confdefs.h - <<_ACEOF >conftest.c
+
+
+ #include <linux/fs.h>
+ int tmpfile(struct inode *inode, struct dentry *dentry,
+ umode_t mode) { return 0; }
+ static struct inode_operations
+ iops __attribute__ ((unused)) = {
+ .tmpfile = tmpfile,
+ };
+
+int
+main (void)
+{
+
+
+ ;
+ return 0;
+}
+
+_ACEOF
+
+
+
+cat - <<_ACEOF >conftest.h
+
+_ACEOF
+
+
+ rm -Rf build && mkdir -p build && touch build/conftest.mod.c
+ echo "obj-m := conftest.o" >build/Makefile
+ modpost_flag=''
+ test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage
+ if { ac_try='cp conftest.c conftest.h build && make modules -C $LINUX_OBJ EXTRA_CFLAGS="-Werror $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; } >/dev/null && { ac_try='test -s build/conftest.o'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+$as_echo "#define HAVE_TMPFILE 1" >>confdefs.h
+
+
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+
+
fi
rm -Rf build
@@ -34110,110 +37274,6 @@ fi
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether check_disk_size_change() is available" >&5
-$as_echo_n "checking whether check_disk_size_change() is available... " >&6; }
-
-
-
-cat confdefs.h - <<_ACEOF >conftest.c
-
-
- #include <linux/fs.h>
-
-int
-main (void)
-{
-
- check_disk_size_change(NULL, NULL);
-
- ;
- return 0;
-}
-
-_ACEOF
-
-
-
-cat - <<_ACEOF >conftest.h
-
-_ACEOF
-
-
- rm -Rf build && mkdir -p build && touch build/conftest.mod.c
- echo "obj-m := conftest.o" >build/Makefile
- modpost_flag=''
- test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage
- if { ac_try='cp conftest.c conftest.h build && make modules -C $LINUX_OBJ EXTRA_CFLAGS="-Werror $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag'
- { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; } >/dev/null && { ac_try='test -s build/conftest.o'
- { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; }; then :
- rc=0
-else
- $as_echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
- rc=1
-
-
-fi
- rm -Rf build
-
-
- if test $rc -ne 0; then :
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-
- else
- if test "x$enable_linux_builtin" != xyes; then
-
- grep -q -E '[[:space:]]check_disk_size_change[[:space:]]' \
- $LINUX_OBJ/$LINUX_SYMBOLS 2>/dev/null
- rc=$?
- if test $rc -ne 0; then
- export=0
- for file in fs/block_dev.c; do
- grep -q -E "EXPORT_SYMBOL.*(check_disk_size_change)" \
- "$LINUX/$file" 2>/dev/null
- rc=$?
- if test $rc -eq 0; then
- export=1
- break;
- fi
- done
- if test $export -eq 0; then :
- rc=1
- else :
- rc=0
- fi
- else :
- rc=0
- fi
-
- fi
- if test $rc -ne 0; then :
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-
- else :
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
-
-$as_echo "#define HAVE_CHECK_DISK_SIZE_CHANGE 1" >>confdefs.h
-
-
- fi
- fi
-
-
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether truncate_setsize() is available" >&5
$as_echo_n "checking whether truncate_setsize() is available... " >&6; }
@@ -34855,8 +37915,9 @@ main (void)
{
char *name = "bdi";
+ atomic_long_t zfs_bdi_seq;
int error __attribute__((unused)) =
- super_setup_bdi_name(&sb, name);
+ super_setup_bdi_name(&sb, "%.28s-%ld", name, atomic_long_inc_return(&zfs_bdi_seq));
;
return 0;
@@ -36664,8 +39725,8 @@ fi
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether generic IO accounting symbols are avaliable" >&5
-$as_echo_n "checking whether generic IO accounting symbols are avaliable... " >&6; }
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether 3 arg generic IO accounting symbols are available" >&5
+$as_echo_n "checking whether 3 arg generic IO accounting symbols are available... " >&6; }
@@ -36767,7 +39828,7 @@ $as_echo "no" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
-$as_echo "#define HAVE_GENERIC_IO_ACCT 1" >>confdefs.h
+$as_echo "#define HAVE_GENERIC_IO_ACCT_3ARG 1" >>confdefs.h
fi
@@ -36775,165 +39836,27 @@ $as_echo "#define HAVE_GENERIC_IO_ACCT 1" >>confdefs.h
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether iops->rename() wants flags" >&5
-$as_echo_n "checking whether iops->rename() wants flags... " >&6; }
-
-
-cat confdefs.h - <<_ACEOF >conftest.c
-
-
- #include <linux/fs.h>
- int rename_fn(struct inode *sip, struct dentry *sdp,
- struct inode *tip, struct dentry *tdp,
- unsigned int flags) { return 0; }
-
- static const struct inode_operations
- iops __attribute__ ((unused)) = {
- .rename = rename_fn,
- };
-
-int
-main (void)
-{
-
-
- ;
- return 0;
-}
-
-_ACEOF
-
-
-
-cat - <<_ACEOF >conftest.h
-
-_ACEOF
-
-
- rm -Rf build && mkdir -p build && touch build/conftest.mod.c
- echo "obj-m := conftest.o" >build/Makefile
- modpost_flag=''
- test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage
- if { ac_try='cp conftest.c conftest.h build && make modules -C $LINUX_OBJ EXTRA_CFLAGS="-Werror $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag'
- { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; } >/dev/null && { ac_try='test -s build/conftest.o'
- { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; }; then :
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
-
-$as_echo "#define HAVE_RENAME_WANTS_FLAGS 1" >>confdefs.h
-
-
-else
- $as_echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-
-
-
-fi
- rm -Rf build
-
-
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether 4 arg generic IO accounting symbols are available" >&5
+$as_echo_n "checking whether 4 arg generic IO accounting symbols are available... " >&6; }
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether generic_setxattr() exists" >&5
-$as_echo_n "checking whether generic_setxattr() exists... " >&6; }
-
cat confdefs.h - <<_ACEOF >conftest.c
- #include <linux/fs.h>
- #include <linux/xattr.h>
-
- static const struct inode_operations
- iops __attribute__ ((unused)) = {
- .setxattr = generic_setxattr
- };
-
-int
-main (void)
-{
-
-
- ;
- return 0;
-}
-
-_ACEOF
-
-
-
-cat - <<_ACEOF >conftest.h
-
-_ACEOF
-
-
- rm -Rf build && mkdir -p build && touch build/conftest.mod.c
- echo "obj-m := conftest.o" >build/Makefile
- modpost_flag=''
- test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage
- if { ac_try='cp conftest.c conftest.h build && make modules -C $LINUX_OBJ EXTRA_CFLAGS="-Werror $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag'
- { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; } >/dev/null && { ac_try='test -s build/conftest.o'
- { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
- (eval $ac_try) 2>&5
- ac_status=$?
- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
- test $ac_status = 0; }; }; then :
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
-
-$as_echo "#define HAVE_GENERIC_SETXATTR 1" >>confdefs.h
-
-
-else
- $as_echo "$as_me: failed program was:" >&5
-sed 's/^/| /' conftest.$ac_ext >&5
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-
-
-
-fi
- rm -Rf build
-
-
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether current_time() exists" >&5
-$as_echo_n "checking whether current_time() exists... " >&6; }
-
-
-
-cat confdefs.h - <<_ACEOF >conftest.c
-
+ #include <linux/bio.h>
- #include <linux/fs.h>
+ void (*generic_start_io_acct_f)(struct request_queue *, int,
+ unsigned long, struct hd_struct *) = &generic_start_io_acct;
+ void (*generic_end_io_acct_f)(struct request_queue *, int,
+ struct hd_struct *, unsigned long) = &generic_end_io_acct;
int
main (void)
{
- struct inode ip;
- struct timespec now __attribute__ ((unused));
-
- now = current_time(&ip);
+ generic_start_io_acct(NULL, 0, 0, NULL);
+ generic_end_io_acct(NULL, 0, NULL, 0);
;
return 0;
@@ -36982,13 +39905,13 @@ $as_echo "no" >&6; }
else
if test "x$enable_linux_builtin" != xyes; then
- grep -q -E '[[:space:]]current_time[[:space:]]' \
+ grep -q -E '[[:space:]]generic_start_io_acct[[:space:]]' \
$LINUX_OBJ/$LINUX_SYMBOLS 2>/dev/null
rc=$?
if test $rc -ne 0; then
export=0
- for file in fs/inode.c; do
- grep -q -E "EXPORT_SYMBOL.*(current_time)" \
+ for file in block/bio.c; do
+ grep -q -E "EXPORT_SYMBOL.*(generic_start_io_acct)" \
"$LINUX/$file" 2>/dev/null
rc=$?
if test $rc -eq 0; then
@@ -37016,7 +39939,7 @@ $as_echo "no" >&6; }
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
-$as_echo "#define HAVE_CURRENT_TIME 1" >>confdefs.h
+$as_echo "#define HAVE_GENERIC_IO_ACCT_4ARG 1" >>confdefs.h
fi
@@ -37024,856 +39947,550 @@ $as_echo "#define HAVE_CURRENT_TIME 1" >>confdefs.h
- if test "$LINUX_OBJ" != "$LINUX"; then :
-
- KERNELMAKE_PARAMS="$KERNELMAKE_PARAMS O=$LINUX_OBJ"
-
-fi
-
-
-
- KERNELCPPFLAGS="$KERNELCPPFLAGS $NO_UNUSED_BUT_SET_VARIABLE"
- KERNELCPPFLAGS="$KERNELCPPFLAGS $NO_BOOL_COMPARE"
- KERNELCPPFLAGS="$KERNELCPPFLAGS -DHAVE_SPL -D_KERNEL"
- KERNELCPPFLAGS="$KERNELCPPFLAGS -DTEXT_DOMAIN=\\\"zfs-linux-kernel\\\""
-
-
-
-
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dkms.conf file" >&5
-$as_echo_n "checking for dkms.conf file... " >&6; }
- if test -e dkms.conf; then :
-
- as_fn_error $? "
- *** ZFS should not be manually built in the DKMS source tree.
- *** Remove all ZFS packages before compiling the ZoL sources.
- *** Running \"make install\" breaks ZFS packages." "$LINENO" 5
-
-else
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: not found" >&5
-$as_echo "not found" >&6; }
-
-fi
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether asm/fpu/api.h exists" >&5
+$as_echo_n "checking whether asm/fpu/api.h exists... " >&6; }
+cat confdefs.h - <<_ACEOF >conftest.c
-# Check whether --with-mounthelperdir was given.
-if test "${with_mounthelperdir+set}" = set; then :
- withval=$with_mounthelperdir; mounthelperdir=$withval
-else
- mounthelperdir=/sbin
-fi
-
-
-
-
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for udev directories" >&5
-$as_echo_n "checking for udev directories... " >&6; }
-
-# Check whether --with-udevdir was given.
-if test "${with_udevdir+set}" = set; then :
- withval=$with_udevdir; udevdir=$withval
-else
- udevdir=check
-fi
-
-
- if test "x$udevdir" = xcheck; then :
-
- path1=/lib/udev
- path2=/usr/lib/udev
- default=$path2
-
- if test -d "$path1"; then :
- udevdir="$path1"
-else
-
- if test -d "$path2"; then :
- udevdir="$path2"
-else
- udevdir="$default"
-fi
-
-fi
-
-fi
-
-
-# Check whether --with-udevruledir was given.
-if test "${with_udevruledir+set}" = set; then :
- withval=$with_udevruledir; udevruledir=$withval
-else
- udevruledir="${udevdir}/rules.d"
-fi
-
-
-
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $udevdir;$udevruledir" >&5
-$as_echo "$udevdir;$udevruledir" >&6; }
-
-
- # Check whether --enable-systemd was given.
-if test "${enable_systemd+set}" = set; then :
- enableval=$enable_systemd;
-else
- enable_systemd=yes
-fi
-
-
-
-# Check whether --with-systemdunitdir was given.
-if test "${with_systemdunitdir+set}" = set; then :
- withval=$with_systemdunitdir; systemdunitdir=$withval
-else
- systemdunitdir=/usr/lib/systemd/system
-fi
-
-
-
-# Check whether --with-systemdpresetdir was given.
-if test "${with_systemdpresetdir+set}" = set; then :
- withval=$with_systemdpresetdir; systemdpresetdir=$withval
-else
- systemdpresetdir=/usr/lib/systemd/system-preset
-fi
-
-
-
-# Check whether --with-systemdmodulesloaddir was given.
-if test "${with_systemdmodulesloaddir+set}" = set; then :
- withval=$with_systemdmodulesloaddir; systemdmoduleloaddir=$withval
-else
- systemdmodulesloaddir=/usr/lib/modules-load.d
-fi
-
-
-
- if test "x$enable_systemd" = xyes; then :
-
- ZFS_INIT_SYSTEMD=systemd
- ZFS_MODULE_LOAD=modules-load.d
- modulesloaddir=$systemdmodulesloaddir
-
-fi
+ #include <linux/kernel.h>
+ #include <asm/fpu/api.h>
+int
+main (void)
+{
+ __kernel_fpu_begin();
+ ;
+ return 0;
+}
+_ACEOF
- # Check whether --enable-sysvinit was given.
-if test "${enable_sysvinit+set}" = set; then :
- enableval=$enable_sysvinit;
-else
- enable_sysvinit=yes
-fi
+cat - <<_ACEOF >conftest.h
+_ACEOF
- if test "x$enable_sysvinit" = xyes; then :
- ZFS_INIT_SYSV=init.d
-fi
+ rm -Rf build && mkdir -p build && touch build/conftest.mod.c
+ echo "obj-m := conftest.o" >build/Makefile
+ modpost_flag=''
+ test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage
+ if { ac_try='cp conftest.c conftest.h build && make modules -C $LINUX_OBJ EXTRA_CFLAGS="-Werror $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; } >/dev/null && { ac_try='test -s build/conftest.o'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+$as_echo "#define HAVE_FPU_API_H 1" >>confdefs.h
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for dracut directory" >&5
-$as_echo_n "checking for dracut directory... " >&6; }
-# Check whether --with-dracutdir was given.
-if test "${with_dracutdir+set}" = set; then :
- withval=$with_dracutdir; dracutdir=$withval
else
- dracutdir=check
-fi
-
-
- if test "x$dracutdir" = xcheck; then :
-
- path1=/usr/share/dracut
- path2=/usr/lib/dracut
- default=$path2
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
- if test -d "$path1"; then :
- dracutdir="$path1"
-else
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
- if test -d "$path2"; then :
- dracutdir="$path2"
-else
- dracutdir="$default"
-fi
-fi
fi
+ rm -Rf build
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $dracutdir" >&5
-$as_echo "$dracutdir" >&6; }
-
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for target asm dir" >&5
-$as_echo_n "checking for target asm dir... " >&6; }
- TARGET_ARCH=`echo ${target_cpu} | sed -e s/i.86/i386/`
-
- case $TARGET_ARCH in
- i386|x86_64)
- TARGET_ASM_DIR=asm-${TARGET_ARCH}
- ;;
- *)
- TARGET_ASM_DIR=asm-generic
- ;;
- esac
-
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $TARGET_ASM_DIR" >&5
-$as_echo "$TARGET_ASM_DIR" >&6; }
-
- ZLIB=
- ac_fn_c_check_header_mongrel "$LINENO" "zlib.h" "ac_cv_header_zlib_h" "$ac_includes_default"
-if test "x$ac_cv_header_zlib_h" = xyes; then :
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether i_(uid|gid)_(read|write) exist" >&5
+$as_echo_n "checking whether i_(uid|gid)_(read|write) exist... " >&6; }
-else
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "
- *** zlib.h missing, zlib-devel package required
-See \`config.log' for more details" "$LINENO" 5; }
-fi
+cat confdefs.h - <<_ACEOF >conftest.c
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for compress2 in -lz" >&5
-$as_echo_n "checking for compress2 in -lz... " >&6; }
-if ${ac_cv_lib_z_compress2+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- ac_check_lib_save_LIBS=$LIBS
-LIBS="-lz $LIBS"
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
+ #include <linux/fs.h>
-/* Override any GCC internal prototype to avoid an error.
- Use char because int might match the return type of a GCC
- builtin and then its argument prototype would still apply. */
-#ifdef __cplusplus
-extern "C"
-#endif
-char compress2 ();
int
-main ()
+main (void)
{
-return compress2 ();
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
- ac_cv_lib_z_compress2=yes
-else
- ac_cv_lib_z_compress2=no
-fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
-LIBS=$ac_check_lib_save_LIBS
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_z_compress2" >&5
-$as_echo "$ac_cv_lib_z_compress2" >&6; }
-if test "x$ac_cv_lib_z_compress2" = xyes; then :
- cat >>confdefs.h <<_ACEOF
-#define HAVE_LIBZ 1
-_ACEOF
-
- LIBS="-lz $LIBS"
-else
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "
- *** compress2() missing, zlib-devel package required
-See \`config.log' for more details" "$LINENO" 5; }
-fi
-
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for uncompress in -lz" >&5
-$as_echo_n "checking for uncompress in -lz... " >&6; }
-if ${ac_cv_lib_z_uncompress+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- ac_check_lib_save_LIBS=$LIBS
-LIBS="-lz $LIBS"
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
+ struct inode *ip = NULL;
+ (void) i_uid_read(ip);
-/* Override any GCC internal prototype to avoid an error.
- Use char because int might match the return type of a GCC
- builtin and then its argument prototype would still apply. */
-#ifdef __cplusplus
-extern "C"
-#endif
-char uncompress ();
-int
-main ()
-{
-return uncompress ();
;
return 0;
}
-_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
- ac_cv_lib_z_uncompress=yes
-else
- ac_cv_lib_z_uncompress=no
-fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
-LIBS=$ac_check_lib_save_LIBS
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_z_uncompress" >&5
-$as_echo "$ac_cv_lib_z_uncompress" >&6; }
-if test "x$ac_cv_lib_z_uncompress" = xyes; then :
- cat >>confdefs.h <<_ACEOF
-#define HAVE_LIBZ 1
+
_ACEOF
- LIBS="-lz $LIBS"
-else
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "
- *** uncompress() missing, zlib-devel package required
-See \`config.log' for more details" "$LINENO" 5; }
-fi
+cat - <<_ACEOF >conftest.h
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for crc32 in -lz" >&5
-$as_echo_n "checking for crc32 in -lz... " >&6; }
-if ${ac_cv_lib_z_crc32+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- ac_check_lib_save_LIBS=$LIBS
-LIBS="-lz $LIBS"
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-
-/* Override any GCC internal prototype to avoid an error.
- Use char because int might match the return type of a GCC
- builtin and then its argument prototype would still apply. */
-#ifdef __cplusplus
-extern "C"
-#endif
-char crc32 ();
-int
-main ()
-{
-return crc32 ();
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
- ac_cv_lib_z_crc32=yes
-else
- ac_cv_lib_z_crc32=no
-fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
-LIBS=$ac_check_lib_save_LIBS
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_z_crc32" >&5
-$as_echo "$ac_cv_lib_z_crc32" >&6; }
-if test "x$ac_cv_lib_z_crc32" = xyes; then :
- cat >>confdefs.h <<_ACEOF
-#define HAVE_LIBZ 1
_ACEOF
- LIBS="-lz $LIBS"
-else
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "
- *** crc32() missing, zlib-devel package required
-See \`config.log' for more details" "$LINENO" 5; }
-fi
+ rm -Rf build && mkdir -p build && touch build/conftest.mod.c
+ echo "obj-m := conftest.o" >build/Makefile
+ modpost_flag=''
+ test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage
+ if { ac_try='cp conftest.c conftest.h build && make modules -C $LINUX_OBJ EXTRA_CFLAGS="-Werror $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; } >/dev/null && { ac_try='test -s build/conftest.o'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+$as_echo "#define HAVE_KUID_HELPERS 1" >>confdefs.h
- ZLIB="-lz"
+else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
-$as_echo "#define HAVE_ZLIB 1" >>confdefs.h
+fi
+ rm -Rf build
- LIBUUID=
- ac_fn_c_check_header_mongrel "$LINENO" "uuid/uuid.h" "ac_cv_header_uuid_uuid_h" "$ac_includes_default"
-if test "x$ac_cv_header_uuid_uuid_h" = xyes; then :
-else
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "
- *** uuid/uuid.h missing, libuuid-devel package required
-See \`config.log' for more details" "$LINENO" 5; }
-fi
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether module_param_call() is hardened" >&5
+$as_echo_n "checking whether module_param_call() is hardened... " >&6; }
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for uuid_generate in -luuid" >&5
-$as_echo_n "checking for uuid_generate in -luuid... " >&6; }
-if ${ac_cv_lib_uuid_uuid_generate+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- ac_check_lib_save_LIBS=$LIBS
-LIBS="-luuid $LIBS"
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
+cat confdefs.h - <<_ACEOF >conftest.c
-/* Override any GCC internal prototype to avoid an error.
- Use char because int might match the return type of a GCC
- builtin and then its argument prototype would still apply. */
-#ifdef __cplusplus
-extern "C"
-#endif
-char uuid_generate ();
-int
-main ()
-{
-return uuid_generate ();
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
- ac_cv_lib_uuid_uuid_generate=yes
-else
- ac_cv_lib_uuid_uuid_generate=no
-fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
-LIBS=$ac_check_lib_save_LIBS
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_uuid_uuid_generate" >&5
-$as_echo "$ac_cv_lib_uuid_uuid_generate" >&6; }
-if test "x$ac_cv_lib_uuid_uuid_generate" = xyes; then :
- cat >>confdefs.h <<_ACEOF
-#define HAVE_LIBUUID 1
-_ACEOF
- LIBS="-luuid $LIBS"
+ #include <linux/module.h>
+ #include <linux/moduleparam.h>
-else
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "
- *** uuid_generate() missing, libuuid-devel package required
-See \`config.log' for more details" "$LINENO" 5; }
-fi
+ int param_get(char *b, const struct kernel_param *kp)
+ {
+ return (0);
+ }
+ int param_set(const char *b, const struct kernel_param *kp)
+ {
+ return (0);
+ }
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for uuid_is_null in -luuid" >&5
-$as_echo_n "checking for uuid_is_null in -luuid... " >&6; }
-if ${ac_cv_lib_uuid_uuid_is_null+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- ac_check_lib_save_LIBS=$LIBS
-LIBS="-luuid $LIBS"
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
+ module_param_call(p, param_set, param_get, NULL, 0644);
-/* Override any GCC internal prototype to avoid an error.
- Use char because int might match the return type of a GCC
- builtin and then its argument prototype would still apply. */
-#ifdef __cplusplus
-extern "C"
-#endif
-char uuid_is_null ();
int
-main ()
+main (void)
{
-return uuid_is_null ();
+
+
;
return 0;
}
-_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
- ac_cv_lib_uuid_uuid_is_null=yes
-else
- ac_cv_lib_uuid_uuid_is_null=no
-fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
-LIBS=$ac_check_lib_save_LIBS
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_uuid_uuid_is_null" >&5
-$as_echo "$ac_cv_lib_uuid_uuid_is_null" >&6; }
-if test "x$ac_cv_lib_uuid_uuid_is_null" = xyes; then :
- cat >>confdefs.h <<_ACEOF
-#define HAVE_LIBUUID 1
+
_ACEOF
- LIBS="-luuid $LIBS"
-else
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "
- *** uuid_is_null() missing, libuuid-devel package required
-See \`config.log' for more details" "$LINENO" 5; }
-fi
+cat - <<_ACEOF >conftest.h
- LIBUUID="-luuid"
+_ACEOF
-$as_echo "#define HAVE_LIBUUID 1" >>confdefs.h
+ rm -Rf build && mkdir -p build && touch build/conftest.mod.c
+ echo "obj-m := conftest.o" >build/Makefile
+ modpost_flag=''
+ test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage
+ if { ac_try='cp conftest.c conftest.h build && make modules -C $LINUX_OBJ EXTRA_CFLAGS="-Werror $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; } >/dev/null && { ac_try='test -s build/conftest.o'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+$as_echo "#define MODULE_PARAM_CALL_CONST 1" >>confdefs.h
-# Check whether --with-blkid was given.
-if test "${with_blkid+set}" = set; then :
- withval=$with_blkid;
else
- with_blkid=check
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+
+
fi
+ rm -Rf build
- LIBBLKID=
- if test "x$with_blkid" = xyes; then :
- LIBBLKID="-lblkid"
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether iops->rename() wants flags" >&5
+$as_echo_n "checking whether iops->rename() wants flags... " >&6; }
-$as_echo "#define HAVE_LIBBLKID 1" >>confdefs.h
+cat confdefs.h - <<_ACEOF >conftest.c
-fi
- if test "x$with_blkid" = xcheck; then :
+ #include <linux/fs.h>
+ int rename_fn(struct inode *sip, struct dentry *sdp,
+ struct inode *tip, struct dentry *tdp,
+ unsigned int flags) { return 0; }
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for blkid_get_cache in -lblkid" >&5
-$as_echo_n "checking for blkid_get_cache in -lblkid... " >&6; }
-if ${ac_cv_lib_blkid_blkid_get_cache+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- ac_check_lib_save_LIBS=$LIBS
-LIBS="-lblkid $LIBS"
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
+ static const struct inode_operations
+ iops __attribute__ ((unused)) = {
+ .rename = rename_fn,
+ };
-/* Override any GCC internal prototype to avoid an error.
- Use char because int might match the return type of a GCC
- builtin and then its argument prototype would still apply. */
-#ifdef __cplusplus
-extern "C"
-#endif
-char blkid_get_cache ();
int
-main ()
+main (void)
{
-return blkid_get_cache ();
+
+
;
return 0;
}
-_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
- ac_cv_lib_blkid_blkid_get_cache=yes
-else
- ac_cv_lib_blkid_blkid_get_cache=no
-fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
-LIBS=$ac_check_lib_save_LIBS
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_blkid_blkid_get_cache" >&5
-$as_echo "$ac_cv_lib_blkid_blkid_get_cache" >&6; }
-if test "x$ac_cv_lib_blkid_blkid_get_cache" = xyes; then :
-
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for blkid zfs support" >&5
-$as_echo_n "checking for blkid zfs support... " >&6; }
-
- ZFS_DEV=`mktemp`
- truncate -s 64M $ZFS_DEV
- echo -en "\x0c\xb1\xba\0\0\0\0\0" | \
- dd of=$ZFS_DEV bs=1k count=8 \
- seek=128 conv=notrunc &>/dev/null \
- >/dev/null 2>/dev/null
- echo -en "\x0c\xb1\xba\0\0\0\0\0" | \
- dd of=$ZFS_DEV bs=1k count=8 \
- seek=132 conv=notrunc &>/dev/null \
- >/dev/null 2>/dev/null
- echo -en "\x0c\xb1\xba\0\0\0\0\0" | \
- dd of=$ZFS_DEV bs=1k count=8 \
- seek=136 conv=notrunc &>/dev/null \
- >/dev/null 2>/dev/null
- echo -en "\x0c\xb1\xba\0\0\0\0\0" | \
- dd of=$ZFS_DEV bs=1k count=8 \
- seek=140 conv=notrunc &>/dev/null \
- >/dev/null 2>/dev/null
-
- saved_LIBS="$LIBS"
- LIBS="-lblkid"
-
- if test "$cross_compiling" = yes; then :
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "cannot run test program while cross compiling
-See \`config.log' for more details" "$LINENO" 5; }
-else
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
- #include <stdio.h>
- #include <stdlib.h>
- #include <blkid/blkid.h>
-
-int
-main ()
-{
+_ACEOF
- blkid_cache cache;
- char *value;
- if (blkid_get_cache(&cache, NULL) < 0)
- return 1;
- value = blkid_get_tag_value(cache, "TYPE",
- "$ZFS_DEV");
- if (!value) {
- blkid_put_cache(cache);
- return 2;
- }
+cat - <<_ACEOF >conftest.h
- if (strcmp(value, "zfs_member")) {
- free(value);
- blkid_put_cache(cache);
- return 0;
- }
+_ACEOF
- free(value);
- blkid_put_cache(cache);
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_c_try_run "$LINENO"; then :
+ rm -Rf build && mkdir -p build && touch build/conftest.mod.c
+ echo "obj-m := conftest.o" >build/Makefile
+ modpost_flag=''
+ test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage
+ if { ac_try='cp conftest.c conftest.h build && make modules -C $LINUX_OBJ EXTRA_CFLAGS="-Werror $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; } >/dev/null && { ac_try='test -s build/conftest.o'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
- rm -f $ZFS_DEV
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
- LIBBLKID="-lblkid"
-
-$as_echo "#define HAVE_LIBBLKID 1" >>confdefs.h
+$as_echo "#define HAVE_RENAME_WANTS_FLAGS 1" >>confdefs.h
else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
- rm -f $ZFS_DEV
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
$as_echo "no" >&6; }
- if test "x$with_blkid" != xcheck; then :
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "--with-blkid given but unavailable
-See \`config.log' for more details" "$LINENO" 5; }
-fi
-fi
-rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
- conftest.$ac_objext conftest.beam conftest.$ac_ext
-fi
- LIBS="$saved_LIBS"
-
-else
-
- if test "x$with_blkid" != xcheck; then :
- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
-as_fn_error $? "--with-blkid given but unavailable
-See \`config.log' for more details" "$LINENO" 5; }
fi
+ rm -Rf build
-fi
-fi
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether generic_setxattr() exists" >&5
+$as_echo_n "checking whether generic_setxattr() exists... " >&6; }
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -Wframe-larger-than=<size> support" >&5
-$as_echo_n "checking for -Wframe-larger-than=<size> support... " >&6; }
+cat confdefs.h - <<_ACEOF >conftest.c
- saved_flags="$CFLAGS"
- CFLAGS="$CFLAGS -Wframe-larger-than=1024"
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
+ #include <linux/fs.h>
+ #include <linux/xattr.h>
+
+ static const struct inode_operations
+ iops __attribute__ ((unused)) = {
+ .setxattr = generic_setxattr
+ };
int
-main ()
+main (void)
{
+
;
return 0;
}
+
_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
- FRAME_LARGER_THAN=-Wframe-larger-than=1024
+
+
+cat - <<_ACEOF >conftest.h
+
+_ACEOF
+
+
+ rm -Rf build && mkdir -p build && touch build/conftest.mod.c
+ echo "obj-m := conftest.o" >build/Makefile
+ modpost_flag=''
+ test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage
+ if { ac_try='cp conftest.c conftest.h build && make modules -C $LINUX_OBJ EXTRA_CFLAGS="-Werror $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; } >/dev/null && { ac_try='test -s build/conftest.o'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
+$as_echo "#define HAVE_GENERIC_SETXATTR 1" >>confdefs.h
+
+
else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
- FRAME_LARGER_THAN=
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
$as_echo "no" >&6; }
+
+
fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ rm -Rf build
- CFLAGS="$saved_flags"
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether current_time() exists" >&5
+$as_echo_n "checking whether current_time() exists... " >&6; }
- if test "x$runstatedir" = x; then
- runstatedir='${localstatedir}/run'
- fi
+cat confdefs.h - <<_ACEOF >conftest.c
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking makedev() is declared in sys/sysmacros.h" >&5
-$as_echo_n "checking makedev() is declared in sys/sysmacros.h... " >&6; }
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
- #include <sys/sysmacros.h>
+ #include <linux/fs.h>
int
-main ()
+main (void)
{
- int k;
- k = makedev(0,0);
+ struct inode ip;
+ struct timespec now __attribute__ ((unused));
+
+ now = current_time(&ip);
;
return 0;
}
+
_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
-$as_echo "yes" >&6; }
-$as_echo "#define HAVE_MAKEDEV_IN_SYSMACROS 1" >>confdefs.h
+
+cat - <<_ACEOF >conftest.h
+
+_ACEOF
+ rm -Rf build && mkdir -p build && touch build/conftest.mod.c
+ echo "obj-m := conftest.o" >build/Makefile
+ modpost_flag=''
+ test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage
+ if { ac_try='cp conftest.c conftest.h build && make modules -C $LINUX_OBJ EXTRA_CFLAGS="-Werror $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; } >/dev/null && { ac_try='test -s build/conftest.o'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
+ rc=0
else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+ rc=1
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ rm -Rf build
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking makedev() is declared in sys/mkdev.h" >&5
-$as_echo_n "checking makedev() is declared in sys/mkdev.h... " >&6; }
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
+ if test $rc -ne 0; then :
- #include <sys/mkdev.h>
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
-int
-main ()
-{
+ else
+ if test "x$enable_linux_builtin" != xyes; then
- int k;
- k = makedev(0,0);
+ grep -q -E '[[:space:]]current_time[[:space:]]' \
+ $LINUX_OBJ/$LINUX_SYMBOLS 2>/dev/null
+ rc=$?
+ if test $rc -ne 0; then
+ export=0
+ for file in fs/inode.c; do
+ grep -q -E "EXPORT_SYMBOL.*(current_time)" \
+ "$LINUX/$file" 2>/dev/null
+ rc=$?
+ if test $rc -eq 0; then
+ export=1
+ break;
+ fi
+ done
+ if test $export -eq 0; then :
+ rc=1
+ else :
+ rc=0
+ fi
+ else :
+ rc=0
+ fi
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
+ fi
+ if test $rc -ne 0; then :
+
+ { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+ else :
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
-$as_echo "#define HAVE_MAKEDEV_IN_MKDEV 1" >>confdefs.h
+$as_echo "#define HAVE_CURRENT_TIME 1" >>confdefs.h
-else
+ fi
+ fi
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to use vm_node_stat based fn's" >&5
+$as_echo_n "checking whether to use vm_node_stat based fn's... " >&6; }
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for -Wno-format-truncation support" >&5
-$as_echo_n "checking for -Wno-format-truncation support... " >&6; }
- saved_flags="$CFLAGS"
- CFLAGS="$CFLAGS -Wno-format-truncation"
+cat confdefs.h - <<_ACEOF >conftest.c
- cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
+
+ #include <linux/mm.h>
+ #include <linux/vmstat.h>
int
-main ()
+main (void)
{
+ int a __attribute__ ((unused)) = NR_VM_NODE_STAT_ITEMS;
+ long x __attribute__ ((unused)) =
+ atomic_long_read(&vm_node_stat[0]);
+ (void) global_node_page_state(0);
+
;
return 0;
}
+
_ACEOF
-if ac_fn_c_try_compile "$LINENO"; then :
- NO_FORMAT_TRUNCATION=-Wno-format-truncation
+
+
+cat - <<_ACEOF >conftest.h
+
+_ACEOF
+
+
+ rm -Rf build && mkdir -p build && touch build/conftest.mod.c
+ echo "obj-m := conftest.o" >build/Makefile
+ modpost_flag=''
+ test "x$enable_linux_builtin" = xyes && modpost_flag='modpost=true' # fake modpost stage
+ if { ac_try='cp conftest.c conftest.h build && make modules -C $LINUX_OBJ EXTRA_CFLAGS="-Werror $EXTRA_KCFLAGS" $ARCH_UM M=$PWD/build $modpost_flag'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; } >/dev/null && { ac_try='test -s build/conftest.o'
+ { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+ (eval $ac_try) 2>&5
+ ac_status=$?
+ $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+ test $ac_status = 0; }; }; then :
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
$as_echo "yes" >&6; }
+$as_echo "#define ZFS_GLOBAL_NODE_PAGE_STATE 1" >>confdefs.h
+
+
else
+ $as_echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
- NO_FORMAT_TRUNCATION=
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
$as_echo "no" >&6; }
+
+
fi
-rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+ rm -Rf build
- CFLAGS="$saved_flags"
- for ac_func in mlockall
-do :
- ac_fn_c_check_func "$LINENO" "mlockall" "ac_cv_func_mlockall"
-if test "x$ac_cv_func_mlockall" = xyes; then :
- cat >>confdefs.h <<_ACEOF
-#define HAVE_MLOCKALL 1
-_ACEOF
+
+ if test "$LINUX_OBJ" != "$LINUX"; then :
+
+ KERNELMAKE_PARAMS="$KERNELMAKE_PARAMS O=$LINUX_OBJ"
fi
-done
- ;;
+
+
+ KERNELCPPFLAGS="$KERNELCPPFLAGS -std=gnu99"
+ KERNELCPPFLAGS="$KERNELCPPFLAGS -Wno-declaration-after-statement"
+ KERNELCPPFLAGS="$KERNELCPPFLAGS $NO_UNUSED_BUT_SET_VARIABLE"
+ KERNELCPPFLAGS="$KERNELCPPFLAGS $NO_BOOL_COMPARE"
+ KERNELCPPFLAGS="$KERNELCPPFLAGS -DHAVE_SPL -D_KERNEL"
+ KERNELCPPFLAGS="$KERNELCPPFLAGS -DTEXT_DOMAIN=\\\"zfs-linux-kernel\\\""
+
+
+ ;;
srpm) ;;
*)
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: Error!" >&5
@@ -37891,7 +40508,7 @@ else
fi
if test "$ZFS_CONFIG" = kernel -o "$ZFS_CONFIG" = all &&
- test "x$enable_linux_builtin" != xyes ; then
+ test "x$enable_linux_builtin" != xyes ; then
CONFIG_KERNEL_TRUE=
CONFIG_KERNEL_FALSE='#'
else
@@ -37899,6 +40516,23 @@ else
CONFIG_KERNEL_FALSE=
fi
+ if test "x$user_libudev" = xyes ; then
+ WANT_DEVNAME2DEVID_TRUE=
+ WANT_DEVNAME2DEVID_FALSE='#'
+else
+ WANT_DEVNAME2DEVID_TRUE='#'
+ WANT_DEVNAME2DEVID_FALSE=
+fi
+
+ if test "$ZFS_CONFIG" = kernel -o "$ZFS_CONFIG" = all &&
+ test "x$qatsrc" != x ; then
+ CONFIG_QAT_TRUE=
+ CONFIG_QAT_FALSE='#'
+else
+ CONFIG_QAT_TRUE='#'
+ CONFIG_QAT_FALSE=
+fi
+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether debugging is enabled" >&5
@@ -37939,36 +40573,8 @@ fi
$as_echo "$enable_debug" >&6; }
- # Check whether --enable-debug-dmu-tx was given.
-if test "${enable_debug_dmu_tx+set}" = set; then :
- enableval=$enable_debug_dmu_tx;
-else
- enable_debug_dmu_tx=no
-fi
-
-
- if test "x$enable_debug_dmu_tx" = xyes; then :
-
- KERNELCPPFLAGS="${KERNELCPPFLAGS} -DDEBUG_DMU_TX"
- DEBUG_DMU_TX="_with_debug_dmu_tx"
-
-$as_echo "#define DEBUG_DMU_TX 1" >>confdefs.h
-
-
-else
-
- DEBUG_DMU_TX="_without_debug_dmu_tx"
-
-fi
-
+ac_config_files="$ac_config_files Makefile module/Makefile module/avl/Makefile module/nvpair/Makefile module/unicode/Makefile module/zcommon/Makefile module/zfs/Makefile module/zpios/Makefile module/icp/Makefile include/Makefile include/linux/Makefile include/sys/Makefile include/sys/fs/Makefile include/sys/fm/Makefile include/sys/fm/fs/Makefile include/sys/crypto/Makefile include/sys/sysevent/Makefile zfs.release"
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether dmu tx validation is enabled" >&5
-$as_echo_n "checking whether dmu tx validation is enabled... " >&6; }
- { $as_echo "$as_me:${as_lineno-$LINENO}: result: $enable_debug_dmu_tx" >&5
-$as_echo "$enable_debug_dmu_tx" >&6; }
-
-
-ac_config_files="$ac_config_files Makefile module/Makefile module/avl/Makefile module/nvpair/Makefile module/unicode/Makefile module/zcommon/Makefile module/zfs/Makefile module/zpios/Makefile include/Makefile include/linux/Makefile include/sys/Makefile include/sys/fs/Makefile include/sys/fm/Makefile include/sys/fm/fs/Makefile zfs.release"
cat >confcache <<\_ACEOF
@@ -38112,6 +40718,18 @@ if test -z "${am__fastdepCCAS_TRUE}" && test -z "${am__fastdepCCAS_FALSE}"; then
as_fn_error $? "conditional \"am__fastdepCCAS\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
fi
+if test -z "${TARGET_ASM_X86_64_TRUE}" && test -z "${TARGET_ASM_X86_64_FALSE}"; then
+ as_fn_error $? "conditional \"TARGET_ASM_X86_64\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${TARGET_ASM_I386_TRUE}" && test -z "${TARGET_ASM_I386_FALSE}"; then
+ as_fn_error $? "conditional \"TARGET_ASM_I386\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${TARGET_ASM_GENERIC_TRUE}" && test -z "${TARGET_ASM_GENERIC_FALSE}"; then
+ as_fn_error $? "conditional \"TARGET_ASM_GENERIC\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
if test -z "${CONFIG_USER_TRUE}" && test -z "${CONFIG_USER_FALSE}"; then
as_fn_error $? "conditional \"CONFIG_USER\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
@@ -38120,6 +40738,14 @@ if test -z "${CONFIG_KERNEL_TRUE}" && test -z "${CONFIG_KERNEL_FALSE}"; then
as_fn_error $? "conditional \"CONFIG_KERNEL\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
fi
+if test -z "${WANT_DEVNAME2DEVID_TRUE}" && test -z "${WANT_DEVNAME2DEVID_FALSE}"; then
+ as_fn_error $? "conditional \"WANT_DEVNAME2DEVID\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${CONFIG_QAT_TRUE}" && test -z "${CONFIG_QAT_FALSE}"; then
+ as_fn_error $? "conditional \"CONFIG_QAT\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
: "${CONFIG_STATUS=./config.status}"
ac_write_fail=0
@@ -38517,7 +41143,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
-This file was extended by zfs $as_me 0.6.5.11, which was
+This file was extended by zfs $as_me 0.7.3, which was
generated by GNU Autoconf 2.69. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
@@ -38583,7 +41209,7 @@ _ACEOF
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
ac_cs_version="\\
-zfs config.status 0.6.5.11
+zfs config.status 0.7.3
configured by $0, generated by GNU Autoconf 2.69,
with options \\"\$ac_cs_config\\"
@@ -39006,12 +41632,15 @@ do
"module/zcommon/Makefile") CONFIG_FILES="$CONFIG_FILES module/zcommon/Makefile" ;;
"module/zfs/Makefile") CONFIG_FILES="$CONFIG_FILES module/zfs/Makefile" ;;
"module/zpios/Makefile") CONFIG_FILES="$CONFIG_FILES module/zpios/Makefile" ;;
+ "module/icp/Makefile") CONFIG_FILES="$CONFIG_FILES module/icp/Makefile" ;;
"include/Makefile") CONFIG_FILES="$CONFIG_FILES include/Makefile" ;;
"include/linux/Makefile") CONFIG_FILES="$CONFIG_FILES include/linux/Makefile" ;;
"include/sys/Makefile") CONFIG_FILES="$CONFIG_FILES include/sys/Makefile" ;;
"include/sys/fs/Makefile") CONFIG_FILES="$CONFIG_FILES include/sys/fs/Makefile" ;;
"include/sys/fm/Makefile") CONFIG_FILES="$CONFIG_FILES include/sys/fm/Makefile" ;;
"include/sys/fm/fs/Makefile") CONFIG_FILES="$CONFIG_FILES include/sys/fm/fs/Makefile" ;;
+ "include/sys/crypto/Makefile") CONFIG_FILES="$CONFIG_FILES include/sys/crypto/Makefile" ;;
+ "include/sys/sysevent/Makefile") CONFIG_FILES="$CONFIG_FILES include/sys/sysevent/Makefile" ;;
"zfs.release") CONFIG_FILES="$CONFIG_FILES zfs.release" ;;
*) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;;
diff --git a/zfs/configure.ac b/zfs/configure.ac
index 8bf4f6496adf..a2d7520dd587 100644
--- a/zfs/configure.ac
+++ b/zfs/configure.ac
@@ -39,7 +39,7 @@ AC_CONFIG_MACRO_DIR([config])
AC_CANONICAL_SYSTEM
AM_MAINTAINER_MODE
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
-AM_INIT_AUTOMAKE
+AM_INIT_AUTOMAKE([subdir-objects])
AC_CONFIG_HEADERS([zfs_config.h], [
(mv zfs_config.h zfs_config.h.tmp &&
awk -f ${ac_srcdir}/config/config.awk zfs_config.h.tmp >zfs_config.h &&
@@ -49,14 +49,14 @@ AC_PROG_INSTALL
AC_PROG_CC
AC_PROG_LIBTOOL
AM_PROG_AS
+AM_PROG_CC_C_O
ZFS_AC_LICENSE
ZFS_AC_PACKAGE
ZFS_AC_CONFIG
ZFS_AC_DEBUG
-ZFS_AC_DEBUG_DMU_TX
-AC_CONFIG_FILES([
+AC_CONFIG_FILES([
Makefile
module/Makefile
module/avl/Makefile
@@ -65,13 +65,17 @@ AC_CONFIG_FILES([
module/zcommon/Makefile
module/zfs/Makefile
module/zpios/Makefile
+ module/icp/Makefile
include/Makefile
include/linux/Makefile
include/sys/Makefile
include/sys/fs/Makefile
include/sys/fm/Makefile
include/sys/fm/fs/Makefile
+ include/sys/crypto/Makefile
+ include/sys/sysevent/Makefile
zfs.release
])
+
AC_OUTPUT
diff --git a/zfs/dkms.conf b/zfs/dkms.conf
index 134ff3bd0555..1109bffb1055 100644
--- a/zfs/dkms.conf
+++ b/zfs/dkms.conf
@@ -1,7 +1,7 @@
BUILD_DEPENDS[0]="spl"
AUTOINSTALL="yes"
PACKAGE_NAME="zfs"
-PACKAGE_VERSION="0.6.5.11"
+PACKAGE_VERSION="0.7.3"
PRE_BUILD="configure
--prefix=/usr
--with-config=kernel
@@ -69,6 +69,7 @@ STRIP[2]="${STRIP[0]}"
STRIP[3]="${STRIP[0]}"
STRIP[4]="${STRIP[0]}"
STRIP[5]="${STRIP[0]}"
+STRIP[6]="${STRIP[0]}"
BUILT_MODULE_NAME[0]="zavl"
BUILT_MODULE_LOCATION[0]="module/avl/"
DEST_MODULE_LOCATION[0]="/extra/zfs/zavl"
@@ -87,3 +88,6 @@ DEST_MODULE_LOCATION[4]="/extra/zfs/zunicode"
BUILT_MODULE_NAME[5]="zfs"
BUILT_MODULE_LOCATION[5]="module/zfs/"
DEST_MODULE_LOCATION[5]="/extra/zfs/zfs"
+BUILT_MODULE_NAME[6]="icp"
+BUILT_MODULE_LOCATION[6]="module/icp/"
+DEST_MODULE_LOCATION[6]="/extra/zfs/icp"
diff --git a/zfs/include/Makefile.in b/zfs/include/Makefile.in
index bd90322d2ac2..7ebd92b94128 100644
--- a/zfs/include/Makefile.in
+++ b/zfs/include/Makefile.in
@@ -1,7 +1,7 @@
-# Makefile.in generated by automake 1.15 from Makefile.am.
+# Makefile.in generated by automake 1.15.1 from Makefile.am.
# @configure_input@
-# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+# Copyright (C) 1994-2017 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -91,7 +91,8 @@ host_triplet = @host@
target_triplet = @target@
subdir = include
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
+am__aclocal_m4_deps = $(top_srcdir)/config/always-arch.m4 \
+ $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/always-no-unused-but-set-variable.m4 \
$(top_srcdir)/config/dkms.m4 \
$(top_srcdir)/config/kernel-acl.m4 \
@@ -107,6 +108,8 @@ am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/kernel-bio-op.m4 \
$(top_srcdir)/config/kernel-bio-rw-barrier.m4 \
$(top_srcdir)/config/kernel-bio-rw-discard.m4 \
+ $(top_srcdir)/config/kernel-bio_set_dev.m4 \
+ $(top_srcdir)/config/kernel-blk-queue-bdi.m4 \
$(top_srcdir)/config/kernel-blk-queue-flush.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-hw-sectors.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-segments.m4 \
@@ -114,7 +117,6 @@ am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/kernel-blkdev-get-by-path.m4 \
$(top_srcdir)/config/kernel-blkdev-get.m4 \
$(top_srcdir)/config/kernel-block-device-operations-release-void.m4 \
- $(top_srcdir)/config/kernel-check-disk-size-change.m4 \
$(top_srcdir)/config/kernel-clear-inode.m4 \
$(top_srcdir)/config/kernel-commit-metadata.m4 \
$(top_srcdir)/config/kernel-create-nameidata.m4 \
@@ -131,9 +133,11 @@ am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/kernel-encode-fh-inode.m4 \
$(top_srcdir)/config/kernel-evict-inode.m4 \
$(top_srcdir)/config/kernel-fallocate.m4 \
+ $(top_srcdir)/config/kernel-file-dentry.m4 \
$(top_srcdir)/config/kernel-file-inode.m4 \
$(top_srcdir)/config/kernel-fmode-t.m4 \
$(top_srcdir)/config/kernel-follow-down-one.m4 \
+ $(top_srcdir)/config/kernel-fpu.m4 \
$(top_srcdir)/config/kernel-fsync.m4 \
$(top_srcdir)/config/kernel-generic_io_acct.m4 \
$(top_srcdir)/config/kernel-generic_readlink.m4 \
@@ -141,17 +145,20 @@ am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/kernel-get-gendisk.m4 \
$(top_srcdir)/config/kernel-get-link.m4 \
$(top_srcdir)/config/kernel-inode-getattr.m4 \
+ $(top_srcdir)/config/kernel-inode-set-flags.m4 \
$(top_srcdir)/config/kernel-insert-inode-locked.m4 \
$(top_srcdir)/config/kernel-invalidate-bdev-args.m4 \
$(top_srcdir)/config/kernel-is_owner_or_cap.m4 \
$(top_srcdir)/config/kernel-kmap-atomic-args.m4 \
- $(top_srcdir)/config/kernel-kobj-name-len.m4 \
+ $(top_srcdir)/config/kernel-kuid-helpers.m4 \
$(top_srcdir)/config/kernel-lookup-bdev.m4 \
$(top_srcdir)/config/kernel-lookup-nameidata.m4 \
$(top_srcdir)/config/kernel-lseek-execute.m4 \
$(top_srcdir)/config/kernel-mk-request-fn.m4 \
$(top_srcdir)/config/kernel-mkdir-umode-t.m4 \
+ $(top_srcdir)/config/kernel-mod-param.m4 \
$(top_srcdir)/config/kernel-mount-nodev.m4 \
+ $(top_srcdir)/config/kernel-objtool.m4 \
$(top_srcdir)/config/kernel-open-bdev-exclusive.m4 \
$(top_srcdir)/config/kernel-put-link.m4 \
$(top_srcdir)/config/kernel-rename.m4 \
@@ -162,10 +169,13 @@ am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/kernel-show-options.m4 \
$(top_srcdir)/config/kernel-shrink.m4 \
$(top_srcdir)/config/kernel-submit_bio.m4 \
+ $(top_srcdir)/config/kernel-super-userns.m4 \
+ $(top_srcdir)/config/kernel-tmpfile.m4 \
$(top_srcdir)/config/kernel-truncate-range.m4 \
$(top_srcdir)/config/kernel-truncate-setsize.m4 \
$(top_srcdir)/config/kernel-vfs-iterate.m4 \
$(top_srcdir)/config/kernel-vfs-rw-iterate.m4 \
+ $(top_srcdir)/config/kernel-vm_node_stat.m4 \
$(top_srcdir)/config/kernel-xattr-handler.m4 \
$(top_srcdir)/config/kernel.m4 $(top_srcdir)/config/libtool.m4 \
$(top_srcdir)/config/ltoptions.m4 \
@@ -173,10 +183,13 @@ am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/ltversion.m4 \
$(top_srcdir)/config/lt~obsolete.m4 \
$(top_srcdir)/config/mount-helper.m4 \
- $(top_srcdir)/config/user-arch.m4 \
+ $(top_srcdir)/config/toolchain-simd.m4 \
$(top_srcdir)/config/user-dracut.m4 \
$(top_srcdir)/config/user-frame-larger-than.m4 \
+ $(top_srcdir)/config/user-libattr.m4 \
$(top_srcdir)/config/user-libblkid.m4 \
+ $(top_srcdir)/config/user-libtirpc.m4 \
+ $(top_srcdir)/config/user-libudev.m4 \
$(top_srcdir)/config/user-libuuid.m4 \
$(top_srcdir)/config/user-makedev.m4 \
$(top_srcdir)/config/user-no-format-truncation.m4 \
@@ -348,7 +361,6 @@ CPP = @CPP@
CPPFLAGS = @CPPFLAGS@
CYGPATH_W = @CYGPATH_W@
DEBUG_CFLAGS = @DEBUG_CFLAGS@
-DEBUG_DMU_TX = @DEBUG_DMU_TX@
DEBUG_STACKFLAGS = @DEBUG_STACKFLAGS@
DEBUG_ZFS = @DEBUG_ZFS@
DEFAULT_INITCONF_DIR = @DEFAULT_INITCONF_DIR@
@@ -387,10 +399,14 @@ KERNELCPPFLAGS = @KERNELCPPFLAGS@
KERNELMAKE_PARAMS = @KERNELMAKE_PARAMS@
LD = @LD@
LDFLAGS = @LDFLAGS@
+LIBATTR = @LIBATTR@
LIBBLKID = @LIBBLKID@
LIBOBJS = @LIBOBJS@
LIBS = @LIBS@
+LIBTIRPC = @LIBTIRPC@
+LIBTIRPC_CFLAGS = @LIBTIRPC_CFLAGS@
LIBTOOL = @LIBTOOL@
+LIBUDEV = @LIBUDEV@
LIBUUID = @LIBUUID@
LINUX = @LINUX@
LINUX_OBJ = @LINUX_OBJ@
@@ -421,8 +437,12 @@ PACKAGE_TARNAME = @PACKAGE_TARNAME@
PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
+QAT_OBJ = @QAT_OBJ@
+QAT_SRC = @QAT_SRC@
+QAT_SYMBOLS = @QAT_SYMBOLS@
RANLIB = @RANLIB@
RELEASE = @RELEASE@
+RM = @RM@
RPM = @RPM@
RPMBUILD = @RPMBUILD@
RPMBUILD_VERSION = @RPMBUILD_VERSION@
@@ -462,6 +482,7 @@ ZFS_META_RELEASE = @ZFS_META_RELEASE@
ZFS_META_VERSION = @ZFS_META_VERSION@
ZFS_MODULE_LOAD = @ZFS_MODULE_LOAD@
ZLIB = @ZLIB@
+ZONENAME = @ZONENAME@
abs_builddir = @abs_builddir@
abs_srcdir = @abs_srcdir@
abs_top_builddir = @abs_top_builddir@
diff --git a/zfs/include/libuutil.h b/zfs/include/libuutil.h
index 667542446672..71337159a7e0 100644
--- a/zfs/include/libuutil.h
+++ b/zfs/include/libuutil.h
@@ -98,12 +98,6 @@ extern int *uu_exit_ok(void);
extern int *uu_exit_fatal(void);
extern int *uu_exit_usage(void);
-/*
- * string->number conversions
- */
-extern int uu_strtoint(const char *, void *, size_t, int, int64_t, int64_t);
-extern int uu_strtouint(const char *, void *, size_t, int, uint64_t, uint64_t);
-
/*
* Debug print facility functions.
*/
diff --git a/zfs/include/libzfs.h b/zfs/include/libzfs.h
index 26847e0653e0..147589bbfdad 100644
--- a/zfs/include/libzfs.h
+++ b/zfs/include/libzfs.h
@@ -21,10 +21,12 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
- * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2016, Intel Corporation.
+ * Copyright 2016 Nexenta Systems, Inc.
+ * Copyright (c) 2017 Datto Inc.
*/
#ifndef _LIBZFS_H
@@ -48,8 +50,6 @@ extern "C" {
/*
* Miscellaneous ZFS constants
*/
-#define ZFS_MAXNAMELEN MAXNAMELEN
-#define ZPOOL_MAXNAMELEN MAXNAMELEN
#define ZFS_MAXPROPLEN MAXPATHLEN
#define ZPOOL_MAXPROPLEN MAXPATHLEN
@@ -58,13 +58,18 @@ extern "C" {
*/
#define DISK_ROOT "/dev"
#define UDISK_ROOT "/dev/disk"
+#define ZVOL_ROOT "/dev/zvol"
/*
* Default wait time for a device name to be created.
*/
#define DISK_LABEL_WAIT (30 * 1000) /* 30 seconds */
-#define DEFAULT_IMPORT_PATH_SIZE 7
+#define IMPORT_ORDER_PREFERRED_1 1
+#define IMPORT_ORDER_PREFERRED_2 2
+#define IMPORT_ORDER_SCAN_OFFSET 10
+#define IMPORT_ORDER_DEFAULT 100
+#define DEFAULT_IMPORT_PATH_SIZE 9
extern char *zpool_default_import_path[DEFAULT_IMPORT_PATH_SIZE];
/*
@@ -142,6 +147,8 @@ typedef enum zfs_error {
EZFS_DIFF, /* general failure of zfs diff */
EZFS_DIFFDATA, /* bad zfs diff data */
EZFS_POOLREADONLY, /* pool is in read-only mode */
+ EZFS_SCRUB_PAUSED, /* scrub currently paused */
+ EZFS_ACTIVE_POOL, /* pool is imported on a different system */
EZFS_UNKNOWN
} zfs_error_t;
@@ -233,6 +240,7 @@ extern void zpool_free_handles(libzfs_handle_t *);
*/
typedef int (*zpool_iter_f)(zpool_handle_t *, void *);
extern int zpool_iter(libzfs_handle_t *, zpool_iter_f, void *);
+extern boolean_t zpool_skip_pool(const char *);
/*
* Functions to create and destroy pools
@@ -254,11 +262,13 @@ typedef struct splitflags {
/*
* Functions to manipulate pool and vdev state
*/
-extern int zpool_scan(zpool_handle_t *, pool_scan_func_t);
+extern int zpool_scan(zpool_handle_t *, pool_scan_func_t, pool_scrub_cmd_t);
extern int zpool_clear(zpool_handle_t *, const char *, nvlist_t *);
extern int zpool_reguid(zpool_handle_t *);
extern int zpool_reopen(zpool_handle_t *);
+extern int zpool_sync_one(zpool_handle_t *, void *);
+
extern int zpool_vdev_online(zpool_handle_t *, const char *, int,
vdev_state_t *);
extern int zpool_vdev_offline(zpool_handle_t *, const char *, boolean_t);
@@ -279,14 +289,18 @@ extern nvlist_t *zpool_find_vdev_by_physpath(zpool_handle_t *, const char *,
boolean_t *, boolean_t *, boolean_t *);
extern int zpool_label_disk_wait(char *, int);
extern int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *, char *);
+extern uint64_t zpool_vdev_path_to_guid(zpool_handle_t *zhp, const char *path);
+
+int zfs_dev_is_dm(char *dev_name);
+int zfs_dev_is_whole_disk(char *dev_name);
+char *zfs_get_underlying_path(char *dev_name);
+char *zfs_get_enclosure_sysfs_path(char *dev_name);
/*
* Functions to manage pool properties
*/
extern int zpool_set_prop(zpool_handle_t *, const char *, const char *);
extern int zpool_get_prop(zpool_handle_t *, zpool_prop_t, char *,
- size_t proplen, zprop_source_t *);
-extern int zpool_get_prop_literal(zpool_handle_t *, zpool_prop_t, char *,
size_t proplen, zprop_source_t *, boolean_t literal);
extern uint64_t zpool_get_prop_int(zpool_handle_t *, zpool_prop_t,
zprop_source_t *);
@@ -313,6 +327,8 @@ typedef enum {
ZPOOL_STATUS_FAILING_DEV, /* device experiencing errors */
ZPOOL_STATUS_VERSION_NEWER, /* newer on-disk version */
ZPOOL_STATUS_HOSTID_MISMATCH, /* last accessed by another system */
+ ZPOOL_STATUS_HOSTID_ACTIVE, /* currently active on another system */
+ ZPOOL_STATUS_HOSTID_REQUIRED, /* multihost=on and hostid=0 */
ZPOOL_STATUS_IO_FAILURE_WAIT, /* failed I/O, failmode 'wait' */
ZPOOL_STATUS_IO_FAILURE_CONTINUE, /* failed I/O, failmode 'continue' */
ZPOOL_STATUS_BAD_LOG, /* cannot read log chain(s) */
@@ -343,7 +359,7 @@ typedef enum {
ZPOOL_STATUS_VERSION_OLDER, /* older legacy on-disk version */
ZPOOL_STATUS_FEAT_DISABLED, /* supported features are disabled */
ZPOOL_STATUS_RESILVERING, /* device being resilvered */
- ZPOOL_STATUS_OFFLINE_DEV, /* device online */
+ ZPOOL_STATUS_OFFLINE_DEV, /* device offline */
ZPOOL_STATUS_REMOVED_DEV, /* removed device */
/*
@@ -391,9 +407,12 @@ typedef struct importargs {
int can_be_active : 1; /* can the pool be active? */
int unique : 1; /* does 'poolname' already exist? */
int exists : 1; /* set on return if pool already exists */
+ int scan : 1; /* prefer scanning to libblkid cache */
} importargs_t;
extern nvlist_t *zpool_search_import(libzfs_handle_t *, importargs_t *);
+extern int zpool_tryimport(libzfs_handle_t *hdl, char *target,
+ nvlist_t **configp, importargs_t *args);
/* legacy pool search routines */
extern nvlist_t *zpool_find_import(libzfs_handle_t *, int, char **);
@@ -441,6 +460,7 @@ extern void zfs_close(zfs_handle_t *);
extern zfs_type_t zfs_get_type(const zfs_handle_t *);
extern const char *zfs_get_name(const zfs_handle_t *);
extern zpool_handle_t *zfs_get_pool_handle(const zfs_handle_t *);
+extern const char *zfs_get_pool_name(const zfs_handle_t *);
/*
* Property management functions. Some functions are shared with the kernel,
@@ -456,10 +476,11 @@ extern const char *zfs_prop_column_name(zfs_prop_t);
extern boolean_t zfs_prop_align_right(zfs_prop_t);
extern nvlist_t *zfs_valid_proplist(libzfs_handle_t *, zfs_type_t,
- nvlist_t *, uint64_t, zfs_handle_t *, const char *);
+ nvlist_t *, uint64_t, zfs_handle_t *, zpool_handle_t *, const char *);
extern const char *zfs_prop_to_name(zfs_prop_t);
extern int zfs_prop_set(zfs_handle_t *, const char *, const char *);
+extern int zfs_prop_set_list(zfs_handle_t *, nvlist_t *);
extern int zfs_prop_get(zfs_handle_t *, zfs_prop_t, char *, size_t,
zprop_source_t *, char *, size_t, boolean_t);
extern int zfs_prop_get_recvd(zfs_handle_t *, const char *, char *, size_t,
@@ -631,6 +652,9 @@ typedef struct sendflags {
/* WRITE_EMBEDDED records of type DATA are permitted */
boolean_t embed_data;
+
+ /* compressed WRITE records are permitted */
+ boolean_t compress;
} sendflags_t;
typedef boolean_t (snapfilter_cb_t)(zfs_handle_t *, void *);
@@ -638,6 +662,10 @@ typedef boolean_t (snapfilter_cb_t)(zfs_handle_t *, void *);
extern int zfs_send(zfs_handle_t *, const char *, const char *,
sendflags_t *, int, snapfilter_cb_t, void *, nvlist_t **);
extern int zfs_send_one(zfs_handle_t *, const char *, int, enum lzc_send_flags);
+extern int zfs_send_resume(libzfs_handle_t *, sendflags_t *, int outfd,
+ const char *);
+extern nvlist_t *zfs_send_resume_token_to_nvlist(libzfs_handle_t *hdl,
+ const char *token);
extern int zfs_promote(zfs_handle_t *);
extern int zfs_hold(zfs_handle_t *, const char *, const char *,
@@ -678,6 +706,12 @@ typedef struct recvflags {
/* set "canmount=off" on all modified filesystems */
boolean_t canmountoff;
+ /*
+ * Mark the file systems as "resumable" and do not destroy them if the
+ * receive is interrupted
+ */
+ boolean_t resumable;
+
/* byteswap flag is used internally; callers need not specify */
boolean_t byteswap;
@@ -685,8 +719,8 @@ typedef struct recvflags {
boolean_t nomount;
} recvflags_t;
-extern int zfs_receive(libzfs_handle_t *, const char *, recvflags_t *,
- int, avl_tree_t *);
+extern int zfs_receive(libzfs_handle_t *, const char *, nvlist_t *,
+ recvflags_t *, int, avl_tree_t *);
typedef enum diff_flags {
ZFS_DIFF_PARSEABLE = 0x1,
@@ -711,6 +745,7 @@ extern boolean_t zfs_bookmark_exists(const char *path);
extern int zfs_append_partition(char *path, size_t max_len);
extern int zfs_resolve_shortname(const char *name, char *path, size_t pathlen);
extern int zfs_strcmp_pathname(char *name, char *cmp_name, int wholedisk);
+extern int zfs_path_order(char *path, int *order);
/*
* Mount support functions.
@@ -741,14 +776,38 @@ extern int zfs_unshare_smb(zfs_handle_t *, const char *);
extern int zfs_unshareall_nfs(zfs_handle_t *);
extern int zfs_unshareall_smb(zfs_handle_t *);
extern int zfs_unshareall_bypath(zfs_handle_t *, const char *);
+extern int zfs_unshareall_bytype(zfs_handle_t *, const char *, const char *);
extern int zfs_unshareall(zfs_handle_t *);
extern int zfs_deleg_share_nfs(libzfs_handle_t *, char *, char *, char *,
void *, void *, int, zfs_share_op_t);
+/*
+ * Formats for iostat numbers. Examples: "12K", "30ms", "4B", "2321234", "-".
+ *
+ * ZFS_NICENUM_1024: Print kilo, mega, tera, peta, exa..
+ * ZFS_NICENUM_BYTES: Print single bytes ("13B"), kilo, mega, tera...
+ * ZFS_NICENUM_TIME: Print nanosecs, microsecs, millisecs, seconds...
+ * ZFS_NICENUM_RAW: Print the raw number without any formatting
+ * ZFS_NICENUM_RAWTIME: Same as RAW, but print dashes ('-') for zero.
+ */
+enum zfs_nicenum_format {
+ ZFS_NICENUM_1024 = 0,
+ ZFS_NICENUM_BYTES = 1,
+ ZFS_NICENUM_TIME = 2,
+ ZFS_NICENUM_RAW = 3,
+ ZFS_NICENUM_RAWTIME = 4
+};
+
/*
* Utility function to convert a number to a human-readable form.
*/
extern void zfs_nicenum(uint64_t, char *, size_t);
+extern void zfs_nicenum_format(uint64_t num, char *buf, size_t buflen,
+ enum zfs_nicenum_format type);
+
+
+extern void zfs_nicetime(uint64_t, char *, size_t);
+extern void zfs_nicebytes(uint64_t, char *, size_t);
extern int zfs_nicestrtonum(libzfs_handle_t *, const char *, uint64_t *);
/*
@@ -756,8 +815,17 @@ extern int zfs_nicestrtonum(libzfs_handle_t *, const char *, uint64_t *);
*/
#define STDOUT_VERBOSE 0x01
#define STDERR_VERBOSE 0x02
+#define NO_DEFAULT_PATH 0x04 /* Don't use $PATH to lookup the command */
int libzfs_run_process(const char *, char **, int flags);
+int libzfs_run_process_get_stdout(const char *path, char *argv[], char *env[],
+ char **lines[], int *lines_cnt);
+int libzfs_run_process_get_stdout_nopath(const char *path, char *argv[],
+ char *env[], char **lines[], int *lines_cnt);
+
+void libzfs_free_str_array(char **strs, int count);
+
+int libzfs_envvar_is_set(char *envvar);
/*
* Given a device or file, determine if it is part of a pool.
@@ -798,6 +866,22 @@ extern boolean_t libzfs_fru_compare(libzfs_handle_t *, const char *,
extern boolean_t libzfs_fru_notself(libzfs_handle_t *, const char *);
extern int zpool_fru_set(zpool_handle_t *, uint64_t, const char *);
+/*
+ * Support for Linux libudev derived persistent device strings
+ */
+extern boolean_t is_mpath_whole_disk(const char *);
+extern void update_vdev_config_dev_strs(nvlist_t *);
+extern char *zfs_strip_partition(char *);
+extern char *zfs_strip_partition_path(char *);
+
+#ifdef HAVE_LIBUDEV
+struct udev_device;
+
+extern boolean_t udev_is_mpath(struct udev_device *dev);
+extern int zfs_device_get_devid(struct udev_device *, char *, size_t);
+extern int zfs_device_get_physical(struct udev_device *, char *, size_t);
+#endif
+
#ifdef __cplusplus
}
#endif
diff --git a/zfs/include/libzfs_core.h b/zfs/include/libzfs_core.h
index bdd6c951ee49..b4f61151c472 100644
--- a/zfs/include/libzfs_core.h
+++ b/zfs/include/libzfs_core.h
@@ -20,7 +20,9 @@
*/
/*
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2017 Datto Inc.
+ * Copyright 2017 RackTop Systems.
*/
#ifndef _LIBZFS_CORE_H
@@ -38,9 +40,18 @@ extern "C" {
int libzfs_core_init(void);
void libzfs_core_fini(void);
+/*
+ * NB: this type should be kept binary compatible with dmu_objset_type_t.
+ */
+enum lzc_dataset_type {
+ LZC_DATSET_TYPE_ZFS = 2,
+ LZC_DATSET_TYPE_ZVOL
+};
+
int lzc_snapshot(nvlist_t *, nvlist_t *, nvlist_t **);
-int lzc_create(const char *, dmu_objset_type_t, nvlist_t *);
+int lzc_create(const char *, enum lzc_dataset_type, nvlist_t *);
int lzc_clone(const char *, const char *, nvlist_t *);
+int lzc_promote(const char *, char *, int);
int lzc_destroy_snaps(nvlist_t *, boolean_t, nvlist_t **);
int lzc_bookmark(nvlist_t *, nvlist_t **);
int lzc_get_bookmarks(const char *, nvlist_t *, nvlist_t **);
@@ -54,16 +65,35 @@ int lzc_get_holds(const char *, nvlist_t **);
enum lzc_send_flags {
LZC_SEND_FLAG_EMBED_DATA = 1 << 0,
- LZC_SEND_FLAG_LARGE_BLOCK = 1 << 1
+ LZC_SEND_FLAG_LARGE_BLOCK = 1 << 1,
+ LZC_SEND_FLAG_COMPRESS = 1 << 2
};
int lzc_send(const char *, const char *, int, enum lzc_send_flags);
+int lzc_send_resume(const char *, const char *, int,
+ enum lzc_send_flags, uint64_t, uint64_t);
+int lzc_send_space(const char *, const char *, enum lzc_send_flags, uint64_t *);
+
+struct dmu_replay_record;
+
int lzc_receive(const char *, nvlist_t *, const char *, boolean_t, int);
-int lzc_send_space(const char *, const char *, uint64_t *);
+int lzc_receive_resumable(const char *, nvlist_t *, const char *,
+ boolean_t, int);
+int lzc_receive_with_header(const char *, nvlist_t *, const char *, boolean_t,
+ boolean_t, int, const struct dmu_replay_record *);
+int lzc_receive_one(const char *, nvlist_t *, const char *, boolean_t,
+ boolean_t, int, const struct dmu_replay_record *, int, uint64_t *,
+ uint64_t *, uint64_t *, nvlist_t **);
+int lzc_receive_with_cmdprops(const char *, nvlist_t *, nvlist_t *,
+ const char *, boolean_t, boolean_t, int, const struct dmu_replay_record *,
+ int, uint64_t *, uint64_t *, uint64_t *, nvlist_t **);
boolean_t lzc_exists(const char *);
int lzc_rollback(const char *, char *, int);
+int lzc_rollback_to(const char *, const char *);
+
+int lzc_sync(const char *, nvlist_t *, nvlist_t **);
#ifdef __cplusplus
}
diff --git a/zfs/include/libzfs_impl.h b/zfs/include/libzfs_impl.h
index ff02fa7949bd..2efd85e31a75 100644
--- a/zfs/include/libzfs_impl.h
+++ b/zfs/include/libzfs_impl.h
@@ -21,17 +21,17 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
*/
#ifndef _LIBZFS_IMPL_H
#define _LIBZFS_IMPL_H
-#include <sys/dmu.h>
#include <sys/fs/zfs.h>
-#include <sys/zfs_ioctl.h>
#include <sys/spa.h>
#include <sys/nvpair.h>
+#include <sys/dmu.h>
+#include <sys/zfs_ioctl.h>
#include <libuutil.h>
#include <libzfs.h>
@@ -85,7 +85,7 @@ struct libzfs_handle {
struct zfs_handle {
libzfs_handle_t *zfs_hdl;
zpool_handle_t *zpool_hdl;
- char zfs_name[ZFS_MAXNAMELEN];
+ char zfs_name[ZFS_MAX_DATASET_NAME_LEN];
zfs_type_t zfs_type; /* type including snapshot */
zfs_type_t zfs_head_type; /* type excluding snapshot */
dmu_objset_stats_t zfs_dmustats;
@@ -106,7 +106,7 @@ struct zfs_handle {
struct zpool_handle {
libzfs_handle_t *zpool_hdl;
zpool_handle_t *zpool_next;
- char zpool_name[ZPOOL_MAXNAMELEN];
+ char zpool_name[ZFS_MAX_DATASET_NAME_LEN];
int zpool_state;
size_t zpool_config_size;
nvlist_t *zpool_config;
@@ -131,6 +131,8 @@ typedef enum {
SHARED_SMB = 0x4
} zfs_share_type_t;
+#define CONFIG_BUF_MINSIZE 262144
+
int zfs_error(libzfs_handle_t *, int, const char *);
int zfs_error_fmt(libzfs_handle_t *, int, const char *, ...);
void zfs_error_aux(libzfs_handle_t *, const char *, ...);
@@ -145,8 +147,6 @@ int zfs_standard_error_fmt(libzfs_handle_t *, int, const char *, ...);
int zpool_standard_error(libzfs_handle_t *, int, const char *);
int zpool_standard_error_fmt(libzfs_handle_t *, int, const char *, ...);
-int get_dependents(libzfs_handle_t *, boolean_t, const char *, char ***,
- size_t *);
zfs_handle_t *make_dataset_handle_zc(libzfs_handle_t *, zfs_cmd_t *);
zfs_handle_t *make_dataset_simple_handle_zc(zfs_handle_t *, zfs_cmd_t *);
diff --git a/zfs/include/linux/Makefile.am b/zfs/include/linux/Makefile.am
index 595d1db01128..9bb0b3493e5d 100644
--- a/zfs/include/linux/Makefile.am
+++ b/zfs/include/linux/Makefile.am
@@ -6,7 +6,10 @@ KERNEL_H = \
$(top_srcdir)/include/linux/vfs_compat.h \
$(top_srcdir)/include/linux/blkdev_compat.h \
$(top_srcdir)/include/linux/utsname_compat.h \
- $(top_srcdir)/include/linux/kmap_compat.h
+ $(top_srcdir)/include/linux/kmap_compat.h \
+ $(top_srcdir)/include/linux/simd_x86.h \
+ $(top_srcdir)/include/linux/simd_aarch64.h \
+ $(top_srcdir)/include/linux/mod_compat.h
USER_H =
diff --git a/zfs/include/linux/Makefile.in b/zfs/include/linux/Makefile.in
index 9deb2b6f136d..ba923de564d9 100644
--- a/zfs/include/linux/Makefile.in
+++ b/zfs/include/linux/Makefile.in
@@ -1,7 +1,7 @@
-# Makefile.in generated by automake 1.15 from Makefile.am.
+# Makefile.in generated by automake 1.15.1 from Makefile.am.
# @configure_input@
-# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+# Copyright (C) 1994-2017 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -91,7 +91,8 @@ host_triplet = @host@
target_triplet = @target@
subdir = include/linux
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
+am__aclocal_m4_deps = $(top_srcdir)/config/always-arch.m4 \
+ $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/always-no-unused-but-set-variable.m4 \
$(top_srcdir)/config/dkms.m4 \
$(top_srcdir)/config/kernel-acl.m4 \
@@ -107,6 +108,8 @@ am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/kernel-bio-op.m4 \
$(top_srcdir)/config/kernel-bio-rw-barrier.m4 \
$(top_srcdir)/config/kernel-bio-rw-discard.m4 \
+ $(top_srcdir)/config/kernel-bio_set_dev.m4 \
+ $(top_srcdir)/config/kernel-blk-queue-bdi.m4 \
$(top_srcdir)/config/kernel-blk-queue-flush.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-hw-sectors.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-segments.m4 \
@@ -114,7 +117,6 @@ am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/kernel-blkdev-get-by-path.m4 \
$(top_srcdir)/config/kernel-blkdev-get.m4 \
$(top_srcdir)/config/kernel-block-device-operations-release-void.m4 \
- $(top_srcdir)/config/kernel-check-disk-size-change.m4 \
$(top_srcdir)/config/kernel-clear-inode.m4 \
$(top_srcdir)/config/kernel-commit-metadata.m4 \
$(top_srcdir)/config/kernel-create-nameidata.m4 \
@@ -131,9 +133,11 @@ am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/kernel-encode-fh-inode.m4 \
$(top_srcdir)/config/kernel-evict-inode.m4 \
$(top_srcdir)/config/kernel-fallocate.m4 \
+ $(top_srcdir)/config/kernel-file-dentry.m4 \
$(top_srcdir)/config/kernel-file-inode.m4 \
$(top_srcdir)/config/kernel-fmode-t.m4 \
$(top_srcdir)/config/kernel-follow-down-one.m4 \
+ $(top_srcdir)/config/kernel-fpu.m4 \
$(top_srcdir)/config/kernel-fsync.m4 \
$(top_srcdir)/config/kernel-generic_io_acct.m4 \
$(top_srcdir)/config/kernel-generic_readlink.m4 \
@@ -141,17 +145,20 @@ am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/kernel-get-gendisk.m4 \
$(top_srcdir)/config/kernel-get-link.m4 \
$(top_srcdir)/config/kernel-inode-getattr.m4 \
+ $(top_srcdir)/config/kernel-inode-set-flags.m4 \
$(top_srcdir)/config/kernel-insert-inode-locked.m4 \
$(top_srcdir)/config/kernel-invalidate-bdev-args.m4 \
$(top_srcdir)/config/kernel-is_owner_or_cap.m4 \
$(top_srcdir)/config/kernel-kmap-atomic-args.m4 \
- $(top_srcdir)/config/kernel-kobj-name-len.m4 \
+ $(top_srcdir)/config/kernel-kuid-helpers.m4 \
$(top_srcdir)/config/kernel-lookup-bdev.m4 \
$(top_srcdir)/config/kernel-lookup-nameidata.m4 \
$(top_srcdir)/config/kernel-lseek-execute.m4 \
$(top_srcdir)/config/kernel-mk-request-fn.m4 \
$(top_srcdir)/config/kernel-mkdir-umode-t.m4 \
+ $(top_srcdir)/config/kernel-mod-param.m4 \
$(top_srcdir)/config/kernel-mount-nodev.m4 \
+ $(top_srcdir)/config/kernel-objtool.m4 \
$(top_srcdir)/config/kernel-open-bdev-exclusive.m4 \
$(top_srcdir)/config/kernel-put-link.m4 \
$(top_srcdir)/config/kernel-rename.m4 \
@@ -162,10 +169,13 @@ am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/kernel-show-options.m4 \
$(top_srcdir)/config/kernel-shrink.m4 \
$(top_srcdir)/config/kernel-submit_bio.m4 \
+ $(top_srcdir)/config/kernel-super-userns.m4 \
+ $(top_srcdir)/config/kernel-tmpfile.m4 \
$(top_srcdir)/config/kernel-truncate-range.m4 \
$(top_srcdir)/config/kernel-truncate-setsize.m4 \
$(top_srcdir)/config/kernel-vfs-iterate.m4 \
$(top_srcdir)/config/kernel-vfs-rw-iterate.m4 \
+ $(top_srcdir)/config/kernel-vm_node_stat.m4 \
$(top_srcdir)/config/kernel-xattr-handler.m4 \
$(top_srcdir)/config/kernel.m4 $(top_srcdir)/config/libtool.m4 \
$(top_srcdir)/config/ltoptions.m4 \
@@ -173,10 +183,13 @@ am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/ltversion.m4 \
$(top_srcdir)/config/lt~obsolete.m4 \
$(top_srcdir)/config/mount-helper.m4 \
- $(top_srcdir)/config/user-arch.m4 \
+ $(top_srcdir)/config/toolchain-simd.m4 \
$(top_srcdir)/config/user-dracut.m4 \
$(top_srcdir)/config/user-frame-larger-than.m4 \
+ $(top_srcdir)/config/user-libattr.m4 \
$(top_srcdir)/config/user-libblkid.m4 \
+ $(top_srcdir)/config/user-libtirpc.m4 \
+ $(top_srcdir)/config/user-libudev.m4 \
$(top_srcdir)/config/user-libuuid.m4 \
$(top_srcdir)/config/user-makedev.m4 \
$(top_srcdir)/config/user-no-format-truncation.m4 \
@@ -219,7 +232,10 @@ am__kernel_HEADERS_DIST = $(top_srcdir)/include/linux/dcache_compat.h \
$(top_srcdir)/include/linux/vfs_compat.h \
$(top_srcdir)/include/linux/blkdev_compat.h \
$(top_srcdir)/include/linux/utsname_compat.h \
- $(top_srcdir)/include/linux/kmap_compat.h
+ $(top_srcdir)/include/linux/kmap_compat.h \
+ $(top_srcdir)/include/linux/simd_x86.h \
+ $(top_srcdir)/include/linux/simd_aarch64.h \
+ $(top_srcdir)/include/linux/mod_compat.h
am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
am__vpath_adj = case $$p in \
$(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
@@ -290,7 +306,6 @@ CPP = @CPP@
CPPFLAGS = @CPPFLAGS@
CYGPATH_W = @CYGPATH_W@
DEBUG_CFLAGS = @DEBUG_CFLAGS@
-DEBUG_DMU_TX = @DEBUG_DMU_TX@
DEBUG_STACKFLAGS = @DEBUG_STACKFLAGS@
DEBUG_ZFS = @DEBUG_ZFS@
DEFAULT_INITCONF_DIR = @DEFAULT_INITCONF_DIR@
@@ -329,10 +344,14 @@ KERNELCPPFLAGS = @KERNELCPPFLAGS@
KERNELMAKE_PARAMS = @KERNELMAKE_PARAMS@
LD = @LD@
LDFLAGS = @LDFLAGS@
+LIBATTR = @LIBATTR@
LIBBLKID = @LIBBLKID@
LIBOBJS = @LIBOBJS@
LIBS = @LIBS@
+LIBTIRPC = @LIBTIRPC@
+LIBTIRPC_CFLAGS = @LIBTIRPC_CFLAGS@
LIBTOOL = @LIBTOOL@
+LIBUDEV = @LIBUDEV@
LIBUUID = @LIBUUID@
LINUX = @LINUX@
LINUX_OBJ = @LINUX_OBJ@
@@ -363,8 +382,12 @@ PACKAGE_TARNAME = @PACKAGE_TARNAME@
PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
+QAT_OBJ = @QAT_OBJ@
+QAT_SRC = @QAT_SRC@
+QAT_SYMBOLS = @QAT_SYMBOLS@
RANLIB = @RANLIB@
RELEASE = @RELEASE@
+RM = @RM@
RPM = @RPM@
RPMBUILD = @RPMBUILD@
RPMBUILD_VERSION = @RPMBUILD_VERSION@
@@ -404,6 +427,7 @@ ZFS_META_RELEASE = @ZFS_META_RELEASE@
ZFS_META_VERSION = @ZFS_META_VERSION@
ZFS_MODULE_LOAD = @ZFS_MODULE_LOAD@
ZLIB = @ZLIB@
+ZONENAME = @ZONENAME@
abs_builddir = @abs_builddir@
abs_srcdir = @abs_srcdir@
abs_top_builddir = @abs_top_builddir@
@@ -475,7 +499,10 @@ KERNEL_H = \
$(top_srcdir)/include/linux/vfs_compat.h \
$(top_srcdir)/include/linux/blkdev_compat.h \
$(top_srcdir)/include/linux/utsname_compat.h \
- $(top_srcdir)/include/linux/kmap_compat.h
+ $(top_srcdir)/include/linux/kmap_compat.h \
+ $(top_srcdir)/include/linux/simd_x86.h \
+ $(top_srcdir)/include/linux/simd_aarch64.h \
+ $(top_srcdir)/include/linux/mod_compat.h
USER_H =
EXTRA_DIST = $(COMMON_H) $(KERNEL_H) $(USER_H)
diff --git a/zfs/include/linux/blkdev_compat.h b/zfs/include/linux/blkdev_compat.h
index f3054a375e0d..c8a8e856dee5 100644
--- a/zfs/include/linux/blkdev_compat.h
+++ b/zfs/include/linux/blkdev_compat.h
@@ -31,6 +31,7 @@
#include <linux/blkdev.h>
#include <linux/elevator.h>
+#include <linux/backing-dev.h>
#ifndef HAVE_FMODE_T
typedef unsigned __bitwise__ fmode_t;
@@ -128,6 +129,16 @@ __blk_queue_max_segments(struct request_queue *q, unsigned short max_segments)
}
#endif
+static inline void
+blk_queue_set_read_ahead(struct request_queue *q, unsigned long ra_pages)
+{
+#ifdef HAVE_BLK_QUEUE_BDI_DYNAMIC
+ q->backing_dev_info->ra_pages = ra_pages;
+#else
+ q->backing_dev_info.ra_pages = ra_pages;
+#endif
+}
+
#ifndef HAVE_GET_DISK_RO
static inline int
get_disk_ro(struct gendisk *disk)
@@ -145,6 +156,7 @@ get_disk_ro(struct gendisk *disk)
#define BIO_BI_SECTOR(bio) (bio)->bi_iter.bi_sector
#define BIO_BI_SIZE(bio) (bio)->bi_iter.bi_size
#define BIO_BI_IDX(bio) (bio)->bi_iter.bi_idx
+#define BIO_BI_SKIP(bio) (bio)->bi_iter.bi_bvec_done
#define bio_for_each_segment4(bv, bvp, b, i) \
bio_for_each_segment((bv), (b), (i))
typedef struct bvec_iter bvec_iterator_t;
@@ -152,6 +164,7 @@ typedef struct bvec_iter bvec_iterator_t;
#define BIO_BI_SECTOR(bio) (bio)->bi_sector
#define BIO_BI_SIZE(bio) (bio)->bi_size
#define BIO_BI_IDX(bio) (bio)->bi_idx
+#define BIO_BI_SKIP(bio) (0)
#define bio_for_each_segment4(bv, bvp, b, i) \
bio_for_each_segment((bvp), (b), (i))
typedef int bvec_iterator_t;
@@ -439,8 +452,6 @@ bio_set_flush(struct bio *bio)
bio_set_op_attrs(bio, 0, WRITE_BARRIER);
#else
#error "Allowing the build will cause bio_set_flush requests to be ignored."
- "Please file an issue report at: "
- "https://github.com/zfsonlinux/zfs/issues/new"
#endif
}
@@ -478,8 +489,7 @@ bio_is_flush(struct bio *bio)
#elif defined(HAVE_BIO_RW_BARRIER)
return (bio->bi_rw & (1 << BIO_RW_BARRIER));
#else
-#error "Allowing the build will cause flush requests to be ignored. Please "
- "file an issue report at: https://github.com/zfsonlinux/zfs/issues/new"
+#error "Allowing the build will cause flush requests to be ignored."
#endif
}
@@ -498,8 +508,7 @@ bio_is_fua(struct bio *bio)
#elif defined(REQ_FUA)
return (bio->bi_rw & REQ_FUA);
#else
-#error "Allowing the build will cause fua requests to be ignored. Please "
- "file an issue report at: https://github.com/zfsonlinux/zfs/issues/new"
+#error "Allowing the build will cause fua requests to be ignored."
#endif
}
@@ -530,9 +539,8 @@ bio_is_discard(struct bio *bio)
#elif defined(REQ_DISCARD)
return (bio->bi_rw & REQ_DISCARD);
#else
-#error "Allowing the build will cause discard requests to become writes "
- "potentially triggering the DMU_MAX_ACCESS assertion. Please file "
- "an issue report at: https://github.com/zfsonlinux/zfs/issues/new"
+/* potentially triggering the DMU_MAX_ACCESS assertion. */
+#error "Allowing the build will cause discard requests to become writes."
#endif
}
@@ -590,9 +598,26 @@ blk_queue_discard_granularity(struct request_queue *q, unsigned int dg)
*/
#define VDEV_HOLDER ((void *)0x2401de7)
-#ifndef HAVE_GENERIC_IO_ACCT
-#define generic_start_io_acct(rw, slen, part) ((void)0)
-#define generic_end_io_acct(rw, part, start_jiffies) ((void)0)
+static inline void
+blk_generic_start_io_acct(struct request_queue *q, int rw,
+ unsigned long sectors, struct hd_struct *part)
+{
+#if defined(HAVE_GENERIC_IO_ACCT_3ARG)
+ generic_start_io_acct(rw, sectors, part);
+#elif defined(HAVE_GENERIC_IO_ACCT_4ARG)
+ generic_start_io_acct(q, rw, sectors, part);
+#endif
+}
+
+static inline void
+blk_generic_end_io_acct(struct request_queue *q, int rw,
+ struct hd_struct *part, unsigned long start_time)
+{
+#if defined(HAVE_GENERIC_IO_ACCT_3ARG)
+ generic_end_io_acct(rw, part, start_time);
+#elif defined(HAVE_GENERIC_IO_ACCT_4ARG)
+ generic_end_io_acct(q, rw, part, start_time);
#endif
+}
#endif /* _ZFS_BLKDEV_H */
diff --git a/zfs/include/linux/mod_compat.h b/zfs/include/linux/mod_compat.h
new file mode 100644
index 000000000000..32aea4471bfc
--- /dev/null
+++ b/zfs/include/linux/mod_compat.h
@@ -0,0 +1,39 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (C) 2016 Gvozden Neskovic <neskovic at gmail.com>.
+ */
+
+#ifndef _MOD_COMPAT_H
+#define _MOD_COMPAT_H
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+
+/* Grsecurity kernel API change */
+#ifdef MODULE_PARAM_CALL_CONST
+typedef const struct kernel_param zfs_kernel_param_t;
+#else
+typedef struct kernel_param zfs_kernel_param_t;
+#endif
+
+#endif /* _MOD_COMPAT_H */
diff --git a/zfs/include/linux/simd_aarch64.h b/zfs/include/linux/simd_aarch64.h
new file mode 100644
index 000000000000..155ef6205599
--- /dev/null
+++ b/zfs/include/linux/simd_aarch64.h
@@ -0,0 +1,62 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (C) 2016 Romain Dolbeau <romain at dolbeau.org>.
+ */
+
+/*
+ * USER API:
+ *
+ * Kernel fpu methods:
+ * kfpu_begin()
+ * kfpu_end()
+ */
+
+#ifndef _SIMD_AARCH64_H
+#define _SIMD_AARCH64_H
+
+#include <sys/isa_defs.h>
+
+#if defined(__aarch64__)
+
+#include <sys/types.h>
+
+#if defined(_KERNEL)
+#include <asm/neon.h>
+#define kfpu_begin() \
+{ \
+ kernel_neon_begin(); \
+}
+#define kfpu_end() \
+{ \
+ kernel_neon_end(); \
+}
+#else
+/*
+ * fpu dummy methods for userspace
+ */
+#define kfpu_begin() do {} while (0)
+#define kfpu_end() do {} while (0)
+#endif /* defined(_KERNEL) */
+
+#endif /* __aarch64__ */
+
+#endif /* _SIMD_AARCH64_H */
diff --git a/zfs/include/linux/simd_x86.h b/zfs/include/linux/simd_x86.h
new file mode 100644
index 000000000000..c9e3970c0cf3
--- /dev/null
+++ b/zfs/include/linux/simd_x86.h
@@ -0,0 +1,609 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (C) 2016 Gvozden Neskovic <neskovic at compeng.uni-frankfurt.de>.
+ */
+
+/*
+ * USER API:
+ *
+ * Kernel fpu methods:
+ * kfpu_begin()
+ * kfpu_end()
+ *
+ * SIMD support:
+ *
+ * Following functions should be called to determine whether CPU feature
+ * is supported. All functions are usable in kernel and user space.
+ * If a SIMD algorithm is using more than one instruction set
+ * all relevant feature test functions should be called.
+ *
+ * Supported features:
+ * zfs_sse_available()
+ * zfs_sse2_available()
+ * zfs_sse3_available()
+ * zfs_ssse3_available()
+ * zfs_sse4_1_available()
+ * zfs_sse4_2_available()
+ *
+ * zfs_avx_available()
+ * zfs_avx2_available()
+ *
+ * zfs_bmi1_available()
+ * zfs_bmi2_available()
+ *
+ * zfs_avx512f_available()
+ * zfs_avx512cd_available()
+ * zfs_avx512er_available()
+ * zfs_avx512pf_available()
+ * zfs_avx512bw_available()
+ * zfs_avx512dq_available()
+ * zfs_avx512vl_available()
+ * zfs_avx512ifma_available()
+ * zfs_avx512vbmi_available()
+ *
+ * NOTE(AVX-512VL): If using AVX-512 instructions with 128Bit registers
+ * also add zfs_avx512vl_available() to feature check.
+ */
+
+#ifndef _SIMD_X86_H
+#define _SIMD_X86_H
+
+#include <sys/isa_defs.h>
+
+/* only for __x86 */
+#if defined(__x86)
+
+#include <sys/types.h>
+
+#if defined(_KERNEL)
+#include <asm/cpufeature.h>
+#else
+#include <cpuid.h>
+#endif
+
+#if defined(_KERNEL)
+#if defined(HAVE_FPU_API_H)
+#include <asm/fpu/api.h>
+#include <asm/fpu/internal.h>
+#define kfpu_begin() \
+{ \
+ preempt_disable(); \
+ __kernel_fpu_begin(); \
+}
+#define kfpu_end() \
+{ \
+ __kernel_fpu_end(); \
+ preempt_enable(); \
+}
+#else
+#include <asm/i387.h>
+#include <asm/xcr.h>
+#define kfpu_begin() kernel_fpu_begin()
+#define kfpu_end() kernel_fpu_end()
+#endif /* defined(HAVE_FPU_API_H) */
+#else
+/*
+ * fpu dummy methods for userspace
+ */
+#define kfpu_begin() do {} while (0)
+#define kfpu_end() do {} while (0)
+#endif /* defined(_KERNEL) */
+
+/*
+ * CPUID feature tests for user-space. Linux kernel provides an interface for
+ * CPU feature testing.
+ */
+#if !defined(_KERNEL)
+
+/*
+ * x86 registers used implicitly by CPUID
+ */
+typedef enum cpuid_regs {
+ EAX = 0,
+ EBX,
+ ECX,
+ EDX,
+ CPUID_REG_CNT = 4
+} cpuid_regs_t;
+
+/*
+ * List of instruction sets identified by CPUID
+ */
+typedef enum cpuid_inst_sets {
+ SSE = 0,
+ SSE2,
+ SSE3,
+ SSSE3,
+ SSE4_1,
+ SSE4_2,
+ OSXSAVE,
+ AVX,
+ AVX2,
+ BMI1,
+ BMI2,
+ AVX512F,
+ AVX512CD,
+ AVX512DQ,
+ AVX512BW,
+ AVX512IFMA,
+ AVX512VBMI,
+ AVX512PF,
+ AVX512ER,
+ AVX512VL
+} cpuid_inst_sets_t;
+
+/*
+ * Instruction set descriptor.
+ */
+typedef struct cpuid_feature_desc {
+ uint32_t leaf; /* CPUID leaf */
+ uint32_t subleaf; /* CPUID sub-leaf */
+ uint32_t flag; /* bit mask of the feature */
+ cpuid_regs_t reg; /* which CPUID return register to test */
+} cpuid_feature_desc_t;
+
+#define _AVX512F_BIT (1U << 16)
+#define _AVX512CD_BIT (_AVX512F_BIT | (1U << 28))
+#define _AVX512DQ_BIT (_AVX512F_BIT | (1U << 17))
+#define _AVX512BW_BIT (_AVX512F_BIT | (1U << 30))
+#define _AVX512IFMA_BIT (_AVX512F_BIT | (1U << 21))
+#define _AVX512VBMI_BIT (1U << 1) /* AVX512F_BIT is on another leaf */
+#define _AVX512PF_BIT (_AVX512F_BIT | (1U << 26))
+#define _AVX512ER_BIT (_AVX512F_BIT | (1U << 27))
+#define _AVX512VL_BIT (1U << 31) /* if used also check other levels */
+
+/*
+ * Descriptions of supported instruction sets
+ */
+static const cpuid_feature_desc_t cpuid_features[] = {
+ [SSE] = {1U, 0U, 1U << 25, EDX },
+ [SSE2] = {1U, 0U, 1U << 26, EDX },
+ [SSE3] = {1U, 0U, 1U << 0, ECX },
+ [SSSE3] = {1U, 0U, 1U << 9, ECX },
+ [SSE4_1] = {1U, 0U, 1U << 19, ECX },
+ [SSE4_2] = {1U, 0U, 1U << 20, ECX },
+ [OSXSAVE] = {1U, 0U, 1U << 27, ECX },
+ [AVX] = {1U, 0U, 1U << 28, ECX },
+ [AVX2] = {7U, 0U, 1U << 5, EBX },
+ [BMI1] = {7U, 0U, 1U << 3, EBX },
+ [BMI2] = {7U, 0U, 1U << 8, EBX },
+ [AVX512F] = {7U, 0U, _AVX512F_BIT, EBX },
+ [AVX512CD] = {7U, 0U, _AVX512CD_BIT, EBX },
+ [AVX512DQ] = {7U, 0U, _AVX512DQ_BIT, EBX },
+ [AVX512BW] = {7U, 0U, _AVX512BW_BIT, EBX },
+ [AVX512IFMA] = {7U, 0U, _AVX512IFMA_BIT, EBX },
+ [AVX512VBMI] = {7U, 0U, _AVX512VBMI_BIT, ECX },
+ [AVX512PF] = {7U, 0U, _AVX512PF_BIT, EBX },
+ [AVX512ER] = {7U, 0U, _AVX512ER_BIT, EBX },
+ [AVX512VL] = {7U, 0U, _AVX512ER_BIT, EBX }
+};
+
+/*
+ * Check if OS supports AVX and AVX2 by checking XCR0
+ * Only call this function if CPUID indicates that AVX feature is
+ * supported by the CPU, otherwise it might be an illegal instruction.
+ */
+static inline uint64_t
+xgetbv(uint32_t index)
+{
+ uint32_t eax, edx;
+ /* xgetbv - instruction byte code */
+ __asm__ __volatile__(".byte 0x0f; .byte 0x01; .byte 0xd0"
+ : "=a" (eax), "=d" (edx)
+ : "c" (index));
+
+ return ((((uint64_t)edx)<<32) | (uint64_t)eax);
+}
+
+/*
+ * Check if CPU supports a feature
+ */
+static inline boolean_t
+__cpuid_check_feature(const cpuid_feature_desc_t *desc)
+{
+ uint32_t r[CPUID_REG_CNT];
+
+ if (__get_cpuid_max(0, NULL) >= desc->leaf) {
+ /*
+ * __cpuid_count is needed to properly check
+ * for AVX2. It is a macro, so return parameters
+ * are passed by value.
+ */
+ __cpuid_count(desc->leaf, desc->subleaf,
+ r[EAX], r[EBX], r[ECX], r[EDX]);
+ return ((r[desc->reg] & desc->flag) == desc->flag);
+ }
+ return (B_FALSE);
+}
+
+#define CPUID_FEATURE_CHECK(name, id) \
+static inline boolean_t \
+__cpuid_has_ ## name(void) \
+{ \
+ return (__cpuid_check_feature(&cpuid_features[id])); \
+}
+
+/*
+ * Define functions for user-space CPUID features testing
+ */
+CPUID_FEATURE_CHECK(sse, SSE);
+CPUID_FEATURE_CHECK(sse2, SSE2);
+CPUID_FEATURE_CHECK(sse3, SSE3);
+CPUID_FEATURE_CHECK(ssse3, SSSE3);
+CPUID_FEATURE_CHECK(sse4_1, SSE4_1);
+CPUID_FEATURE_CHECK(sse4_2, SSE4_2);
+CPUID_FEATURE_CHECK(avx, AVX);
+CPUID_FEATURE_CHECK(avx2, AVX2);
+CPUID_FEATURE_CHECK(osxsave, OSXSAVE);
+CPUID_FEATURE_CHECK(bmi1, BMI1);
+CPUID_FEATURE_CHECK(bmi2, BMI2);
+CPUID_FEATURE_CHECK(avx512f, AVX512F);
+CPUID_FEATURE_CHECK(avx512cd, AVX512CD);
+CPUID_FEATURE_CHECK(avx512dq, AVX512DQ);
+CPUID_FEATURE_CHECK(avx512bw, AVX512BW);
+CPUID_FEATURE_CHECK(avx512ifma, AVX512IFMA);
+CPUID_FEATURE_CHECK(avx512vbmi, AVX512VBMI);
+CPUID_FEATURE_CHECK(avx512pf, AVX512PF);
+CPUID_FEATURE_CHECK(avx512er, AVX512ER);
+CPUID_FEATURE_CHECK(avx512vl, AVX512VL);
+
+#endif /* !defined(_KERNEL) */
+
+
+/*
+ * Detect register set support
+ */
+static inline boolean_t
+__simd_state_enabled(const uint64_t state)
+{
+ boolean_t has_osxsave;
+ uint64_t xcr0;
+
+#if defined(_KERNEL) && defined(X86_FEATURE_OSXSAVE)
+ has_osxsave = !!boot_cpu_has(X86_FEATURE_OSXSAVE);
+#elif defined(_KERNEL) && !defined(X86_FEATURE_OSXSAVE)
+ has_osxsave = B_FALSE;
+#else
+ has_osxsave = __cpuid_has_osxsave();
+#endif
+
+ if (!has_osxsave)
+ return (B_FALSE);
+
+ xcr0 = xgetbv(0);
+ return ((xcr0 & state) == state);
+}
+
+#define _XSTATE_SSE_AVX (0x2 | 0x4)
+#define _XSTATE_AVX512 (0xE0 | _XSTATE_SSE_AVX)
+
+#define __ymm_enabled() __simd_state_enabled(_XSTATE_SSE_AVX)
+#define __zmm_enabled() __simd_state_enabled(_XSTATE_AVX512)
+
+
+/*
+ * Check if SSE instruction set is available
+ */
+static inline boolean_t
+zfs_sse_available(void)
+{
+#if defined(_KERNEL)
+ return (!!boot_cpu_has(X86_FEATURE_XMM));
+#else
+ return (__cpuid_has_sse());
+#endif
+}
+
+/*
+ * Check if SSE2 instruction set is available
+ */
+static inline boolean_t
+zfs_sse2_available(void)
+{
+#if defined(_KERNEL)
+ return (!!boot_cpu_has(X86_FEATURE_XMM2));
+#else
+ return (__cpuid_has_sse2());
+#endif
+}
+
+/*
+ * Check if SSE3 instruction set is available
+ */
+static inline boolean_t
+zfs_sse3_available(void)
+{
+#if defined(_KERNEL)
+ return (!!boot_cpu_has(X86_FEATURE_XMM3));
+#else
+ return (__cpuid_has_sse3());
+#endif
+}
+
+/*
+ * Check if SSSE3 instruction set is available
+ */
+static inline boolean_t
+zfs_ssse3_available(void)
+{
+#if defined(_KERNEL)
+ return (!!boot_cpu_has(X86_FEATURE_SSSE3));
+#else
+ return (__cpuid_has_ssse3());
+#endif
+}
+
+/*
+ * Check if SSE4.1 instruction set is available
+ */
+static inline boolean_t
+zfs_sse4_1_available(void)
+{
+#if defined(_KERNEL)
+ return (!!boot_cpu_has(X86_FEATURE_XMM4_1));
+#else
+ return (__cpuid_has_sse4_1());
+#endif
+}
+
+/*
+ * Check if SSE4.2 instruction set is available
+ */
+static inline boolean_t
+zfs_sse4_2_available(void)
+{
+#if defined(_KERNEL)
+ return (!!boot_cpu_has(X86_FEATURE_XMM4_2));
+#else
+ return (__cpuid_has_sse4_2());
+#endif
+}
+
+/*
+ * Check if AVX instruction set is available
+ */
+static inline boolean_t
+zfs_avx_available(void)
+{
+ boolean_t has_avx;
+#if defined(_KERNEL)
+ has_avx = !!boot_cpu_has(X86_FEATURE_AVX);
+#else
+ has_avx = __cpuid_has_avx();
+#endif
+
+ return (has_avx && __ymm_enabled());
+}
+
+/*
+ * Check if AVX2 instruction set is available
+ */
+static inline boolean_t
+zfs_avx2_available(void)
+{
+ boolean_t has_avx2;
+#if defined(_KERNEL) && defined(X86_FEATURE_AVX2)
+ has_avx2 = !!boot_cpu_has(X86_FEATURE_AVX2);
+#elif defined(_KERNEL) && !defined(X86_FEATURE_AVX2)
+ has_avx2 = B_FALSE;
+#else
+ has_avx2 = __cpuid_has_avx2();
+#endif
+
+ return (has_avx2 && __ymm_enabled());
+}
+
+/*
+ * Check if BMI1 instruction set is available
+ */
+static inline boolean_t
+zfs_bmi1_available(void)
+{
+#if defined(_KERNEL) && defined(X86_FEATURE_BMI1)
+ return (!!boot_cpu_has(X86_FEATURE_BMI1));
+#elif defined(_KERNEL) && !defined(X86_FEATURE_BMI1)
+ return (B_FALSE);
+#else
+ return (__cpuid_has_bmi1());
+#endif
+}
+
+/*
+ * Check if BMI2 instruction set is available
+ */
+static inline boolean_t
+zfs_bmi2_available(void)
+{
+#if defined(_KERNEL) && defined(X86_FEATURE_BMI2)
+ return (!!boot_cpu_has(X86_FEATURE_BMI2));
+#elif defined(_KERNEL) && !defined(X86_FEATURE_BMI2)
+ return (B_FALSE);
+#else
+ return (__cpuid_has_bmi2());
+#endif
+}
+
+
+/*
+ * AVX-512 family of instruction sets:
+ *
+ * AVX512F Foundation
+ * AVX512CD Conflict Detection Instructions
+ * AVX512ER Exponential and Reciprocal Instructions
+ * AVX512PF Prefetch Instructions
+ *
+ * AVX512BW Byte and Word Instructions
+ * AVX512DQ Double-word and Quadword Instructions
+ * AVX512VL Vector Length Extensions
+ *
+ * AVX512IFMA Integer Fused Multiply Add (Not supported by kernel 4.4)
+ * AVX512VBMI Vector Byte Manipulation Instructions
+ */
+
+
+/* Check if AVX512F instruction set is available */
+static inline boolean_t
+zfs_avx512f_available(void)
+{
+ boolean_t has_avx512 = B_FALSE;
+
+#if defined(_KERNEL) && defined(X86_FEATURE_AVX512F)
+ has_avx512 = !!boot_cpu_has(X86_FEATURE_AVX512F);
+#elif !defined(_KERNEL)
+ has_avx512 = __cpuid_has_avx512f();
+#endif
+
+ return (has_avx512 && __zmm_enabled());
+}
+
+/* Check if AVX512CD instruction set is available */
+static inline boolean_t
+zfs_avx512cd_available(void)
+{
+ boolean_t has_avx512 = B_FALSE;
+
+#if defined(_KERNEL) && defined(X86_FEATURE_AVX512CD)
+ has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
+ boot_cpu_has(X86_FEATURE_AVX512CD);
+#elif !defined(_KERNEL)
+ has_avx512 = __cpuid_has_avx512cd();
+#endif
+
+ return (has_avx512 && __zmm_enabled());
+}
+
+/* Check if AVX512ER instruction set is available */
+static inline boolean_t
+zfs_avx512er_available(void)
+{
+ boolean_t has_avx512 = B_FALSE;
+
+#if defined(_KERNEL) && defined(X86_FEATURE_AVX512ER)
+ has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
+ boot_cpu_has(X86_FEATURE_AVX512ER);
+#elif !defined(_KERNEL)
+ has_avx512 = __cpuid_has_avx512er();
+#endif
+
+ return (has_avx512 && __zmm_enabled());
+}
+
+/* Check if AVX512PF instruction set is available */
+static inline boolean_t
+zfs_avx512pf_available(void)
+{
+ boolean_t has_avx512 = B_FALSE;
+
+#if defined(_KERNEL) && defined(X86_FEATURE_AVX512PF)
+ has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
+ boot_cpu_has(X86_FEATURE_AVX512PF);
+#elif !defined(_KERNEL)
+ has_avx512 = __cpuid_has_avx512pf();
+#endif
+
+ return (has_avx512 && __zmm_enabled());
+}
+
+/* Check if AVX512BW instruction set is available */
+static inline boolean_t
+zfs_avx512bw_available(void)
+{
+ boolean_t has_avx512 = B_FALSE;
+
+#if defined(_KERNEL) && defined(X86_FEATURE_AVX512BW)
+ has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
+ boot_cpu_has(X86_FEATURE_AVX512BW);
+#elif !defined(_KERNEL)
+ has_avx512 = __cpuid_has_avx512bw();
+#endif
+
+ return (has_avx512 && __zmm_enabled());
+}
+
+/* Check if AVX512DQ instruction set is available */
+static inline boolean_t
+zfs_avx512dq_available(void)
+{
+ boolean_t has_avx512 = B_FALSE;
+
+#if defined(_KERNEL) && defined(X86_FEATURE_AVX512DQ)
+ has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
+ boot_cpu_has(X86_FEATURE_AVX512DQ);
+#elif !defined(_KERNEL)
+ has_avx512 = __cpuid_has_avx512dq();
+#endif
+
+ return (has_avx512 && __zmm_enabled());
+}
+
+/* Check if AVX512VL instruction set is available */
+static inline boolean_t
+zfs_avx512vl_available(void)
+{
+ boolean_t has_avx512 = B_FALSE;
+
+#if defined(_KERNEL) && defined(X86_FEATURE_AVX512VL)
+ has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
+ boot_cpu_has(X86_FEATURE_AVX512VL);
+#elif !defined(_KERNEL)
+ has_avx512 = __cpuid_has_avx512vl();
+#endif
+
+ return (has_avx512 && __zmm_enabled());
+}
+
+/* Check if AVX512IFMA instruction set is available */
+static inline boolean_t
+zfs_avx512ifma_available(void)
+{
+ boolean_t has_avx512 = B_FALSE;
+
+#if defined(_KERNEL) && defined(X86_FEATURE_AVX512IFMA)
+ has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
+ boot_cpu_has(X86_FEATURE_AVX512IFMA);
+#elif !defined(_KERNEL)
+ has_avx512 = __cpuid_has_avx512ifma();
+#endif
+
+ return (has_avx512 && __zmm_enabled());
+}
+
+/* Check if AVX512VBMI instruction set is available */
+static inline boolean_t
+zfs_avx512vbmi_available(void)
+{
+ boolean_t has_avx512 = B_FALSE;
+
+#if defined(_KERNEL) && defined(X86_FEATURE_AVX512VBMI)
+ has_avx512 = boot_cpu_has(X86_FEATURE_AVX512F) &&
+ boot_cpu_has(X86_FEATURE_AVX512VBMI);
+#elif !defined(_KERNEL)
+ has_avx512 = __cpuid_has_avx512f() &&
+ __cpuid_has_avx512vbmi();
+#endif
+
+ return (has_avx512 && __zmm_enabled());
+}
+
+#endif /* defined(__x86) */
+
+#endif /* _SIMD_X86_H */
diff --git a/zfs/include/linux/vfs_compat.h b/zfs/include/linux/vfs_compat.h
index a4432e0f051d..6111f0afca1d 100644
--- a/zfs/include/linux/vfs_compat.h
+++ b/zfs/include/linux/vfs_compat.h
@@ -28,6 +28,7 @@
#define _ZFS_VFS_H
#include <sys/taskq.h>
+#include <sys/cred.h>
#include <linux/backing-dev.h>
/*
@@ -294,7 +295,8 @@ zpl_posix_acl_release(struct posix_acl *acl)
#define zpl_forget_cached_acl(ip, ty) forget_cached_acl(ip, ty)
#else
static inline void
-zpl_set_cached_acl(struct inode *ip, int type, struct posix_acl *newer) {
+zpl_set_cached_acl(struct inode *ip, int type, struct posix_acl *newer)
+{
struct posix_acl *older = NULL;
spin_lock(&ip->i_lock);
@@ -319,7 +321,8 @@ zpl_set_cached_acl(struct inode *ip, int type, struct posix_acl *newer) {
}
static inline void
-zpl_forget_cached_acl(struct inode *ip, int type) {
+zpl_forget_cached_acl(struct inode *ip, int type)
+{
zpl_set_cached_acl(ip, type, (struct posix_acl *)ACL_NOT_CACHED);
}
#endif /* HAVE_SET_CACHED_ACL_USABLE */
@@ -330,7 +333,8 @@ zpl_forget_cached_acl(struct inode *ip, int type) {
#define __posix_acl_create(acl, gfp, mode) posix_acl_create(acl, gfp, mode)
#else
static inline int
-__posix_acl_chmod(struct posix_acl **acl, int flags, umode_t umode) {
+__posix_acl_chmod(struct posix_acl **acl, int flags, umode_t umode)
+{
struct posix_acl *oldacl = *acl;
mode_t mode = umode;
int error;
@@ -351,7 +355,8 @@ __posix_acl_chmod(struct posix_acl **acl, int flags, umode_t umode) {
}
static inline int
-__posix_acl_create(struct posix_acl **acl, int flags, umode_t *umodep) {
+__posix_acl_create(struct posix_acl **acl, int flags, umode_t *umodep)
+{
struct posix_acl *oldacl = *acl;
mode_t mode = *umodep;
int error;
@@ -416,6 +421,87 @@ static inline struct inode *file_inode(const struct file *f)
}
#endif /* HAVE_FILE_INODE */
+/*
+ * 4.1 API change
+ * struct access file->f_path.dentry was replaced by accessor function
+ * file_dentry(f)
+ */
+#ifndef HAVE_FILE_DENTRY
+static inline struct dentry *file_dentry(const struct file *f)
+{
+ return (f->f_path.dentry);
+}
+#endif /* HAVE_FILE_DENTRY */
+
+#ifdef HAVE_KUID_HELPERS
+static inline uid_t zfs_uid_read_impl(struct inode *ip)
+{
+#ifdef HAVE_SUPER_USER_NS
+ return (from_kuid(ip->i_sb->s_user_ns, ip->i_uid));
+#else
+ return (from_kuid(kcred->user_ns, ip->i_uid));
+#endif
+}
+
+static inline uid_t zfs_uid_read(struct inode *ip)
+{
+ return (zfs_uid_read_impl(ip));
+}
+
+static inline gid_t zfs_gid_read_impl(struct inode *ip)
+{
+#ifdef HAVE_SUPER_USER_NS
+ return (from_kgid(ip->i_sb->s_user_ns, ip->i_gid));
+#else
+ return (from_kgid(kcred->user_ns, ip->i_gid));
+#endif
+}
+
+static inline gid_t zfs_gid_read(struct inode *ip)
+{
+ return (zfs_gid_read_impl(ip));
+}
+
+static inline void zfs_uid_write(struct inode *ip, uid_t uid)
+{
+#ifdef HAVE_SUPER_USER_NS
+ ip->i_uid = make_kuid(ip->i_sb->s_user_ns, uid);
+#else
+ ip->i_uid = make_kuid(kcred->user_ns, uid);
+#endif
+}
+
+static inline void zfs_gid_write(struct inode *ip, gid_t gid)
+{
+#ifdef HAVE_SUPER_USER_NS
+ ip->i_gid = make_kgid(ip->i_sb->s_user_ns, gid);
+#else
+ ip->i_gid = make_kgid(kcred->user_ns, gid);
+#endif
+}
+
+#else
+static inline uid_t zfs_uid_read(struct inode *ip)
+{
+ return (ip->i_uid);
+}
+
+static inline gid_t zfs_gid_read(struct inode *ip)
+{
+ return (ip->i_gid);
+}
+
+static inline void zfs_uid_write(struct inode *ip, uid_t uid)
+{
+ ip->i_uid = uid;
+}
+
+static inline void zfs_gid_write(struct inode *ip, gid_t gid)
+{
+ ip->i_gid = gid;
+}
+#endif
+
/*
* 2.6.38 API change
*/
diff --git a/zfs/include/sys/Makefile.am b/zfs/include/sys/Makefile.am
index 77ecfb2dcf8e..be606b8c6060 100644
--- a/zfs/include/sys/Makefile.am
+++ b/zfs/include/sys/Makefile.am
@@ -1,6 +1,7 @@
-SUBDIRS = fm fs
+SUBDIRS = fm fs crypto sysevent
COMMON_H = \
+ $(top_srcdir)/include/sys/abd.h \
$(top_srcdir)/include/sys/arc.h \
$(top_srcdir)/include/sys/arc_impl.h \
$(top_srcdir)/include/sys/avl.h \
@@ -9,6 +10,7 @@ COMMON_H = \
$(top_srcdir)/include/sys/bplist.h \
$(top_srcdir)/include/sys/bpobj.h \
$(top_srcdir)/include/sys/bptree.h \
+ $(top_srcdir)/include/sys/bqueue.h \
$(top_srcdir)/include/sys/dbuf.h \
$(top_srcdir)/include/sys/ddt.h \
$(top_srcdir)/include/sys/dmu.h \
@@ -30,27 +32,36 @@ COMMON_H = \
$(top_srcdir)/include/sys/dsl_scan.h \
$(top_srcdir)/include/sys/dsl_synctask.h \
$(top_srcdir)/include/sys/dsl_userhold.h \
+ $(top_srcdir)/include/sys/edonr.h \
$(top_srcdir)/include/sys/efi_partition.h \
$(top_srcdir)/include/sys/metaslab.h \
$(top_srcdir)/include/sys/metaslab_impl.h \
+ $(top_srcdir)/include/sys/mmp.h \
$(top_srcdir)/include/sys/mntent.h \
$(top_srcdir)/include/sys/multilist.h \
$(top_srcdir)/include/sys/nvpair.h \
$(top_srcdir)/include/sys/nvpair_impl.h \
+ $(top_srcdir)/include/sys/pathname.h \
+ $(top_srcdir)/include/sys/policy.h \
$(top_srcdir)/include/sys/range_tree.h \
$(top_srcdir)/include/sys/refcount.h \
$(top_srcdir)/include/sys/rrwlock.h \
$(top_srcdir)/include/sys/sa.h \
$(top_srcdir)/include/sys/sa_impl.h \
$(top_srcdir)/include/sys/sdt.h \
+ $(top_srcdir)/include/sys/sha2.h \
+ $(top_srcdir)/include/sys/skein.h \
$(top_srcdir)/include/sys/spa_boot.h \
$(top_srcdir)/include/sys/space_map.h \
$(top_srcdir)/include/sys/space_reftree.h \
$(top_srcdir)/include/sys/spa.h \
$(top_srcdir)/include/sys/spa_impl.h \
+ $(top_srcdir)/include/sys/spa_checksum.h \
+ $(top_srcdir)/include/sys/sysevent.h \
$(top_srcdir)/include/sys/trace.h \
$(top_srcdir)/include/sys/trace_acl.h \
$(top_srcdir)/include/sys/trace_arc.h \
+ $(top_srcdir)/include/sys/trace_common.h \
$(top_srcdir)/include/sys/trace_dbgmsg.h \
$(top_srcdir)/include/sys/trace_dbuf.h \
$(top_srcdir)/include/sys/trace_dmu.h \
@@ -58,6 +69,7 @@ COMMON_H = \
$(top_srcdir)/include/sys/trace_multilist.h \
$(top_srcdir)/include/sys/trace_txg.h \
$(top_srcdir)/include/sys/trace_zil.h \
+ $(top_srcdir)/include/sys/trace_zio.h \
$(top_srcdir)/include/sys/trace_zrlock.h \
$(top_srcdir)/include/sys/txg.h \
$(top_srcdir)/include/sys/txg_impl.h \
@@ -72,6 +84,8 @@ COMMON_H = \
$(top_srcdir)/include/sys/vdev_file.h \
$(top_srcdir)/include/sys/vdev.h \
$(top_srcdir)/include/sys/vdev_impl.h \
+ $(top_srcdir)/include/sys/vdev_raidz.h \
+ $(top_srcdir)/include/sys/vdev_raidz_impl.h \
$(top_srcdir)/include/sys/xvattr.h \
$(top_srcdir)/include/sys/zap.h \
$(top_srcdir)/include/sys/zap_impl.h \
@@ -84,6 +98,7 @@ COMMON_H = \
$(top_srcdir)/include/sys/zfs_delay.h \
$(top_srcdir)/include/sys/zfs_dir.h \
$(top_srcdir)/include/sys/zfs_fuid.h \
+ $(top_srcdir)/include/sys/zfs_ratelimit.h \
$(top_srcdir)/include/sys/zfs_rlock.h \
$(top_srcdir)/include/sys/zfs_sa.h \
$(top_srcdir)/include/sys/zfs_stat.h \
@@ -96,6 +111,7 @@ COMMON_H = \
$(top_srcdir)/include/sys/zio_compress.h \
$(top_srcdir)/include/sys/zio.h \
$(top_srcdir)/include/sys/zio_impl.h \
+ $(top_srcdir)/include/sys/zio_priority.h \
$(top_srcdir)/include/sys/zrlock.h
KERNEL_H = \
diff --git a/zfs/include/sys/Makefile.in b/zfs/include/sys/Makefile.in
index 68f4a7bb5330..ac8262f0d535 100644
--- a/zfs/include/sys/Makefile.in
+++ b/zfs/include/sys/Makefile.in
@@ -1,7 +1,7 @@
-# Makefile.in generated by automake 1.15 from Makefile.am.
+# Makefile.in generated by automake 1.15.1 from Makefile.am.
# @configure_input@
-# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+# Copyright (C) 1994-2017 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -91,7 +91,8 @@ host_triplet = @host@
target_triplet = @target@
subdir = include/sys
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
+am__aclocal_m4_deps = $(top_srcdir)/config/always-arch.m4 \
+ $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/always-no-unused-but-set-variable.m4 \
$(top_srcdir)/config/dkms.m4 \
$(top_srcdir)/config/kernel-acl.m4 \
@@ -107,6 +108,8 @@ am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/kernel-bio-op.m4 \
$(top_srcdir)/config/kernel-bio-rw-barrier.m4 \
$(top_srcdir)/config/kernel-bio-rw-discard.m4 \
+ $(top_srcdir)/config/kernel-bio_set_dev.m4 \
+ $(top_srcdir)/config/kernel-blk-queue-bdi.m4 \
$(top_srcdir)/config/kernel-blk-queue-flush.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-hw-sectors.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-segments.m4 \
@@ -114,7 +117,6 @@ am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/kernel-blkdev-get-by-path.m4 \
$(top_srcdir)/config/kernel-blkdev-get.m4 \
$(top_srcdir)/config/kernel-block-device-operations-release-void.m4 \
- $(top_srcdir)/config/kernel-check-disk-size-change.m4 \
$(top_srcdir)/config/kernel-clear-inode.m4 \
$(top_srcdir)/config/kernel-commit-metadata.m4 \
$(top_srcdir)/config/kernel-create-nameidata.m4 \
@@ -131,9 +133,11 @@ am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/kernel-encode-fh-inode.m4 \
$(top_srcdir)/config/kernel-evict-inode.m4 \
$(top_srcdir)/config/kernel-fallocate.m4 \
+ $(top_srcdir)/config/kernel-file-dentry.m4 \
$(top_srcdir)/config/kernel-file-inode.m4 \
$(top_srcdir)/config/kernel-fmode-t.m4 \
$(top_srcdir)/config/kernel-follow-down-one.m4 \
+ $(top_srcdir)/config/kernel-fpu.m4 \
$(top_srcdir)/config/kernel-fsync.m4 \
$(top_srcdir)/config/kernel-generic_io_acct.m4 \
$(top_srcdir)/config/kernel-generic_readlink.m4 \
@@ -141,17 +145,20 @@ am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/kernel-get-gendisk.m4 \
$(top_srcdir)/config/kernel-get-link.m4 \
$(top_srcdir)/config/kernel-inode-getattr.m4 \
+ $(top_srcdir)/config/kernel-inode-set-flags.m4 \
$(top_srcdir)/config/kernel-insert-inode-locked.m4 \
$(top_srcdir)/config/kernel-invalidate-bdev-args.m4 \
$(top_srcdir)/config/kernel-is_owner_or_cap.m4 \
$(top_srcdir)/config/kernel-kmap-atomic-args.m4 \
- $(top_srcdir)/config/kernel-kobj-name-len.m4 \
+ $(top_srcdir)/config/kernel-kuid-helpers.m4 \
$(top_srcdir)/config/kernel-lookup-bdev.m4 \
$(top_srcdir)/config/kernel-lookup-nameidata.m4 \
$(top_srcdir)/config/kernel-lseek-execute.m4 \
$(top_srcdir)/config/kernel-mk-request-fn.m4 \
$(top_srcdir)/config/kernel-mkdir-umode-t.m4 \
+ $(top_srcdir)/config/kernel-mod-param.m4 \
$(top_srcdir)/config/kernel-mount-nodev.m4 \
+ $(top_srcdir)/config/kernel-objtool.m4 \
$(top_srcdir)/config/kernel-open-bdev-exclusive.m4 \
$(top_srcdir)/config/kernel-put-link.m4 \
$(top_srcdir)/config/kernel-rename.m4 \
@@ -162,10 +169,13 @@ am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/kernel-show-options.m4 \
$(top_srcdir)/config/kernel-shrink.m4 \
$(top_srcdir)/config/kernel-submit_bio.m4 \
+ $(top_srcdir)/config/kernel-super-userns.m4 \
+ $(top_srcdir)/config/kernel-tmpfile.m4 \
$(top_srcdir)/config/kernel-truncate-range.m4 \
$(top_srcdir)/config/kernel-truncate-setsize.m4 \
$(top_srcdir)/config/kernel-vfs-iterate.m4 \
$(top_srcdir)/config/kernel-vfs-rw-iterate.m4 \
+ $(top_srcdir)/config/kernel-vm_node_stat.m4 \
$(top_srcdir)/config/kernel-xattr-handler.m4 \
$(top_srcdir)/config/kernel.m4 $(top_srcdir)/config/libtool.m4 \
$(top_srcdir)/config/ltoptions.m4 \
@@ -173,10 +183,13 @@ am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/ltversion.m4 \
$(top_srcdir)/config/lt~obsolete.m4 \
$(top_srcdir)/config/mount-helper.m4 \
- $(top_srcdir)/config/user-arch.m4 \
+ $(top_srcdir)/config/toolchain-simd.m4 \
$(top_srcdir)/config/user-dracut.m4 \
$(top_srcdir)/config/user-frame-larger-than.m4 \
+ $(top_srcdir)/config/user-libattr.m4 \
$(top_srcdir)/config/user-libblkid.m4 \
+ $(top_srcdir)/config/user-libtirpc.m4 \
+ $(top_srcdir)/config/user-libudev.m4 \
$(top_srcdir)/config/user-libuuid.m4 \
$(top_srcdir)/config/user-makedev.m4 \
$(top_srcdir)/config/user-no-format-truncation.m4 \
@@ -222,7 +235,8 @@ am__can_run_installinfo = \
n|no|NO) false;; \
*) (install-info --version) >/dev/null 2>&1;; \
esac
-am__kernel_HEADERS_DIST = $(top_srcdir)/include/sys/arc.h \
+am__kernel_HEADERS_DIST = $(top_srcdir)/include/sys/abd.h \
+ $(top_srcdir)/include/sys/arc.h \
$(top_srcdir)/include/sys/arc_impl.h \
$(top_srcdir)/include/sys/avl.h \
$(top_srcdir)/include/sys/avl_impl.h \
@@ -230,6 +244,7 @@ am__kernel_HEADERS_DIST = $(top_srcdir)/include/sys/arc.h \
$(top_srcdir)/include/sys/bplist.h \
$(top_srcdir)/include/sys/bpobj.h \
$(top_srcdir)/include/sys/bptree.h \
+ $(top_srcdir)/include/sys/bqueue.h \
$(top_srcdir)/include/sys/dbuf.h \
$(top_srcdir)/include/sys/ddt.h \
$(top_srcdir)/include/sys/dmu.h \
@@ -251,27 +266,36 @@ am__kernel_HEADERS_DIST = $(top_srcdir)/include/sys/arc.h \
$(top_srcdir)/include/sys/dsl_scan.h \
$(top_srcdir)/include/sys/dsl_synctask.h \
$(top_srcdir)/include/sys/dsl_userhold.h \
+ $(top_srcdir)/include/sys/edonr.h \
$(top_srcdir)/include/sys/efi_partition.h \
$(top_srcdir)/include/sys/metaslab.h \
$(top_srcdir)/include/sys/metaslab_impl.h \
+ $(top_srcdir)/include/sys/mmp.h \
$(top_srcdir)/include/sys/mntent.h \
$(top_srcdir)/include/sys/multilist.h \
$(top_srcdir)/include/sys/nvpair.h \
$(top_srcdir)/include/sys/nvpair_impl.h \
+ $(top_srcdir)/include/sys/pathname.h \
+ $(top_srcdir)/include/sys/policy.h \
$(top_srcdir)/include/sys/range_tree.h \
$(top_srcdir)/include/sys/refcount.h \
$(top_srcdir)/include/sys/rrwlock.h \
$(top_srcdir)/include/sys/sa.h \
$(top_srcdir)/include/sys/sa_impl.h \
$(top_srcdir)/include/sys/sdt.h \
+ $(top_srcdir)/include/sys/sha2.h \
+ $(top_srcdir)/include/sys/skein.h \
$(top_srcdir)/include/sys/spa_boot.h \
$(top_srcdir)/include/sys/space_map.h \
$(top_srcdir)/include/sys/space_reftree.h \
$(top_srcdir)/include/sys/spa.h \
$(top_srcdir)/include/sys/spa_impl.h \
+ $(top_srcdir)/include/sys/spa_checksum.h \
+ $(top_srcdir)/include/sys/sysevent.h \
$(top_srcdir)/include/sys/trace.h \
$(top_srcdir)/include/sys/trace_acl.h \
$(top_srcdir)/include/sys/trace_arc.h \
+ $(top_srcdir)/include/sys/trace_common.h \
$(top_srcdir)/include/sys/trace_dbgmsg.h \
$(top_srcdir)/include/sys/trace_dbuf.h \
$(top_srcdir)/include/sys/trace_dmu.h \
@@ -279,6 +303,7 @@ am__kernel_HEADERS_DIST = $(top_srcdir)/include/sys/arc.h \
$(top_srcdir)/include/sys/trace_multilist.h \
$(top_srcdir)/include/sys/trace_txg.h \
$(top_srcdir)/include/sys/trace_zil.h \
+ $(top_srcdir)/include/sys/trace_zio.h \
$(top_srcdir)/include/sys/trace_zrlock.h \
$(top_srcdir)/include/sys/txg.h \
$(top_srcdir)/include/sys/txg_impl.h \
@@ -293,6 +318,8 @@ am__kernel_HEADERS_DIST = $(top_srcdir)/include/sys/arc.h \
$(top_srcdir)/include/sys/vdev_file.h \
$(top_srcdir)/include/sys/vdev.h \
$(top_srcdir)/include/sys/vdev_impl.h \
+ $(top_srcdir)/include/sys/vdev_raidz.h \
+ $(top_srcdir)/include/sys/vdev_raidz_impl.h \
$(top_srcdir)/include/sys/xvattr.h \
$(top_srcdir)/include/sys/zap.h \
$(top_srcdir)/include/sys/zap_impl.h \
@@ -305,6 +332,7 @@ am__kernel_HEADERS_DIST = $(top_srcdir)/include/sys/arc.h \
$(top_srcdir)/include/sys/zfs_delay.h \
$(top_srcdir)/include/sys/zfs_dir.h \
$(top_srcdir)/include/sys/zfs_fuid.h \
+ $(top_srcdir)/include/sys/zfs_ratelimit.h \
$(top_srcdir)/include/sys/zfs_rlock.h \
$(top_srcdir)/include/sys/zfs_sa.h \
$(top_srcdir)/include/sys/zfs_stat.h \
@@ -317,6 +345,7 @@ am__kernel_HEADERS_DIST = $(top_srcdir)/include/sys/arc.h \
$(top_srcdir)/include/sys/zio_compress.h \
$(top_srcdir)/include/sys/zio.h \
$(top_srcdir)/include/sys/zio_impl.h \
+ $(top_srcdir)/include/sys/zio_priority.h \
$(top_srcdir)/include/sys/zrlock.h \
$(top_srcdir)/include/sys/zfs_ioctl.h \
$(top_srcdir)/include/sys/zfs_onexit.h \
@@ -350,7 +379,8 @@ am__uninstall_files_from_dir = { \
$(am__cd) "$$dir" && rm -f $$files; }; \
}
am__installdirs = "$(DESTDIR)$(kerneldir)" "$(DESTDIR)$(libzfsdir)"
-am__libzfs_HEADERS_DIST = $(top_srcdir)/include/sys/arc.h \
+am__libzfs_HEADERS_DIST = $(top_srcdir)/include/sys/abd.h \
+ $(top_srcdir)/include/sys/arc.h \
$(top_srcdir)/include/sys/arc_impl.h \
$(top_srcdir)/include/sys/avl.h \
$(top_srcdir)/include/sys/avl_impl.h \
@@ -358,6 +388,7 @@ am__libzfs_HEADERS_DIST = $(top_srcdir)/include/sys/arc.h \
$(top_srcdir)/include/sys/bplist.h \
$(top_srcdir)/include/sys/bpobj.h \
$(top_srcdir)/include/sys/bptree.h \
+ $(top_srcdir)/include/sys/bqueue.h \
$(top_srcdir)/include/sys/dbuf.h \
$(top_srcdir)/include/sys/ddt.h \
$(top_srcdir)/include/sys/dmu.h \
@@ -379,27 +410,36 @@ am__libzfs_HEADERS_DIST = $(top_srcdir)/include/sys/arc.h \
$(top_srcdir)/include/sys/dsl_scan.h \
$(top_srcdir)/include/sys/dsl_synctask.h \
$(top_srcdir)/include/sys/dsl_userhold.h \
+ $(top_srcdir)/include/sys/edonr.h \
$(top_srcdir)/include/sys/efi_partition.h \
$(top_srcdir)/include/sys/metaslab.h \
$(top_srcdir)/include/sys/metaslab_impl.h \
+ $(top_srcdir)/include/sys/mmp.h \
$(top_srcdir)/include/sys/mntent.h \
$(top_srcdir)/include/sys/multilist.h \
$(top_srcdir)/include/sys/nvpair.h \
$(top_srcdir)/include/sys/nvpair_impl.h \
+ $(top_srcdir)/include/sys/pathname.h \
+ $(top_srcdir)/include/sys/policy.h \
$(top_srcdir)/include/sys/range_tree.h \
$(top_srcdir)/include/sys/refcount.h \
$(top_srcdir)/include/sys/rrwlock.h \
$(top_srcdir)/include/sys/sa.h \
$(top_srcdir)/include/sys/sa_impl.h \
$(top_srcdir)/include/sys/sdt.h \
+ $(top_srcdir)/include/sys/sha2.h \
+ $(top_srcdir)/include/sys/skein.h \
$(top_srcdir)/include/sys/spa_boot.h \
$(top_srcdir)/include/sys/space_map.h \
$(top_srcdir)/include/sys/space_reftree.h \
$(top_srcdir)/include/sys/spa.h \
$(top_srcdir)/include/sys/spa_impl.h \
+ $(top_srcdir)/include/sys/spa_checksum.h \
+ $(top_srcdir)/include/sys/sysevent.h \
$(top_srcdir)/include/sys/trace.h \
$(top_srcdir)/include/sys/trace_acl.h \
$(top_srcdir)/include/sys/trace_arc.h \
+ $(top_srcdir)/include/sys/trace_common.h \
$(top_srcdir)/include/sys/trace_dbgmsg.h \
$(top_srcdir)/include/sys/trace_dbuf.h \
$(top_srcdir)/include/sys/trace_dmu.h \
@@ -407,6 +447,7 @@ am__libzfs_HEADERS_DIST = $(top_srcdir)/include/sys/arc.h \
$(top_srcdir)/include/sys/trace_multilist.h \
$(top_srcdir)/include/sys/trace_txg.h \
$(top_srcdir)/include/sys/trace_zil.h \
+ $(top_srcdir)/include/sys/trace_zio.h \
$(top_srcdir)/include/sys/trace_zrlock.h \
$(top_srcdir)/include/sys/txg.h \
$(top_srcdir)/include/sys/txg_impl.h \
@@ -421,6 +462,8 @@ am__libzfs_HEADERS_DIST = $(top_srcdir)/include/sys/arc.h \
$(top_srcdir)/include/sys/vdev_file.h \
$(top_srcdir)/include/sys/vdev.h \
$(top_srcdir)/include/sys/vdev_impl.h \
+ $(top_srcdir)/include/sys/vdev_raidz.h \
+ $(top_srcdir)/include/sys/vdev_raidz_impl.h \
$(top_srcdir)/include/sys/xvattr.h \
$(top_srcdir)/include/sys/zap.h \
$(top_srcdir)/include/sys/zap_impl.h \
@@ -433,6 +476,7 @@ am__libzfs_HEADERS_DIST = $(top_srcdir)/include/sys/arc.h \
$(top_srcdir)/include/sys/zfs_delay.h \
$(top_srcdir)/include/sys/zfs_dir.h \
$(top_srcdir)/include/sys/zfs_fuid.h \
+ $(top_srcdir)/include/sys/zfs_ratelimit.h \
$(top_srcdir)/include/sys/zfs_rlock.h \
$(top_srcdir)/include/sys/zfs_sa.h \
$(top_srcdir)/include/sys/zfs_stat.h \
@@ -445,6 +489,7 @@ am__libzfs_HEADERS_DIST = $(top_srcdir)/include/sys/arc.h \
$(top_srcdir)/include/sys/zio_compress.h \
$(top_srcdir)/include/sys/zio.h \
$(top_srcdir)/include/sys/zio_impl.h \
+ $(top_srcdir)/include/sys/zio_priority.h \
$(top_srcdir)/include/sys/zrlock.h
HEADERS = $(kernel_HEADERS) $(libzfs_HEADERS)
RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \
@@ -522,7 +567,6 @@ CPP = @CPP@
CPPFLAGS = @CPPFLAGS@
CYGPATH_W = @CYGPATH_W@
DEBUG_CFLAGS = @DEBUG_CFLAGS@
-DEBUG_DMU_TX = @DEBUG_DMU_TX@
DEBUG_STACKFLAGS = @DEBUG_STACKFLAGS@
DEBUG_ZFS = @DEBUG_ZFS@
DEFAULT_INITCONF_DIR = @DEFAULT_INITCONF_DIR@
@@ -561,10 +605,14 @@ KERNELCPPFLAGS = @KERNELCPPFLAGS@
KERNELMAKE_PARAMS = @KERNELMAKE_PARAMS@
LD = @LD@
LDFLAGS = @LDFLAGS@
+LIBATTR = @LIBATTR@
LIBBLKID = @LIBBLKID@
LIBOBJS = @LIBOBJS@
LIBS = @LIBS@
+LIBTIRPC = @LIBTIRPC@
+LIBTIRPC_CFLAGS = @LIBTIRPC_CFLAGS@
LIBTOOL = @LIBTOOL@
+LIBUDEV = @LIBUDEV@
LIBUUID = @LIBUUID@
LINUX = @LINUX@
LINUX_OBJ = @LINUX_OBJ@
@@ -595,8 +643,12 @@ PACKAGE_TARNAME = @PACKAGE_TARNAME@
PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
+QAT_OBJ = @QAT_OBJ@
+QAT_SRC = @QAT_SRC@
+QAT_SYMBOLS = @QAT_SYMBOLS@
RANLIB = @RANLIB@
RELEASE = @RELEASE@
+RM = @RM@
RPM = @RPM@
RPMBUILD = @RPMBUILD@
RPMBUILD_VERSION = @RPMBUILD_VERSION@
@@ -636,6 +688,7 @@ ZFS_META_RELEASE = @ZFS_META_RELEASE@
ZFS_META_VERSION = @ZFS_META_VERSION@
ZFS_MODULE_LOAD = @ZFS_MODULE_LOAD@
ZLIB = @ZLIB@
+ZONENAME = @ZONENAME@
abs_builddir = @abs_builddir@
abs_srcdir = @abs_srcdir@
abs_top_builddir = @abs_top_builddir@
@@ -700,8 +753,9 @@ top_builddir = @top_builddir@
top_srcdir = @top_srcdir@
udevdir = @udevdir@
udevruledir = @udevruledir@
-SUBDIRS = fm fs
+SUBDIRS = fm fs crypto sysevent
COMMON_H = \
+ $(top_srcdir)/include/sys/abd.h \
$(top_srcdir)/include/sys/arc.h \
$(top_srcdir)/include/sys/arc_impl.h \
$(top_srcdir)/include/sys/avl.h \
@@ -710,6 +764,7 @@ COMMON_H = \
$(top_srcdir)/include/sys/bplist.h \
$(top_srcdir)/include/sys/bpobj.h \
$(top_srcdir)/include/sys/bptree.h \
+ $(top_srcdir)/include/sys/bqueue.h \
$(top_srcdir)/include/sys/dbuf.h \
$(top_srcdir)/include/sys/ddt.h \
$(top_srcdir)/include/sys/dmu.h \
@@ -731,27 +786,36 @@ COMMON_H = \
$(top_srcdir)/include/sys/dsl_scan.h \
$(top_srcdir)/include/sys/dsl_synctask.h \
$(top_srcdir)/include/sys/dsl_userhold.h \
+ $(top_srcdir)/include/sys/edonr.h \
$(top_srcdir)/include/sys/efi_partition.h \
$(top_srcdir)/include/sys/metaslab.h \
$(top_srcdir)/include/sys/metaslab_impl.h \
+ $(top_srcdir)/include/sys/mmp.h \
$(top_srcdir)/include/sys/mntent.h \
$(top_srcdir)/include/sys/multilist.h \
$(top_srcdir)/include/sys/nvpair.h \
$(top_srcdir)/include/sys/nvpair_impl.h \
+ $(top_srcdir)/include/sys/pathname.h \
+ $(top_srcdir)/include/sys/policy.h \
$(top_srcdir)/include/sys/range_tree.h \
$(top_srcdir)/include/sys/refcount.h \
$(top_srcdir)/include/sys/rrwlock.h \
$(top_srcdir)/include/sys/sa.h \
$(top_srcdir)/include/sys/sa_impl.h \
$(top_srcdir)/include/sys/sdt.h \
+ $(top_srcdir)/include/sys/sha2.h \
+ $(top_srcdir)/include/sys/skein.h \
$(top_srcdir)/include/sys/spa_boot.h \
$(top_srcdir)/include/sys/space_map.h \
$(top_srcdir)/include/sys/space_reftree.h \
$(top_srcdir)/include/sys/spa.h \
$(top_srcdir)/include/sys/spa_impl.h \
+ $(top_srcdir)/include/sys/spa_checksum.h \
+ $(top_srcdir)/include/sys/sysevent.h \
$(top_srcdir)/include/sys/trace.h \
$(top_srcdir)/include/sys/trace_acl.h \
$(top_srcdir)/include/sys/trace_arc.h \
+ $(top_srcdir)/include/sys/trace_common.h \
$(top_srcdir)/include/sys/trace_dbgmsg.h \
$(top_srcdir)/include/sys/trace_dbuf.h \
$(top_srcdir)/include/sys/trace_dmu.h \
@@ -759,6 +823,7 @@ COMMON_H = \
$(top_srcdir)/include/sys/trace_multilist.h \
$(top_srcdir)/include/sys/trace_txg.h \
$(top_srcdir)/include/sys/trace_zil.h \
+ $(top_srcdir)/include/sys/trace_zio.h \
$(top_srcdir)/include/sys/trace_zrlock.h \
$(top_srcdir)/include/sys/txg.h \
$(top_srcdir)/include/sys/txg_impl.h \
@@ -773,6 +838,8 @@ COMMON_H = \
$(top_srcdir)/include/sys/vdev_file.h \
$(top_srcdir)/include/sys/vdev.h \
$(top_srcdir)/include/sys/vdev_impl.h \
+ $(top_srcdir)/include/sys/vdev_raidz.h \
+ $(top_srcdir)/include/sys/vdev_raidz_impl.h \
$(top_srcdir)/include/sys/xvattr.h \
$(top_srcdir)/include/sys/zap.h \
$(top_srcdir)/include/sys/zap_impl.h \
@@ -785,6 +852,7 @@ COMMON_H = \
$(top_srcdir)/include/sys/zfs_delay.h \
$(top_srcdir)/include/sys/zfs_dir.h \
$(top_srcdir)/include/sys/zfs_fuid.h \
+ $(top_srcdir)/include/sys/zfs_ratelimit.h \
$(top_srcdir)/include/sys/zfs_rlock.h \
$(top_srcdir)/include/sys/zfs_sa.h \
$(top_srcdir)/include/sys/zfs_stat.h \
@@ -797,6 +865,7 @@ COMMON_H = \
$(top_srcdir)/include/sys/zio_compress.h \
$(top_srcdir)/include/sys/zio.h \
$(top_srcdir)/include/sys/zio_impl.h \
+ $(top_srcdir)/include/sys/zio_priority.h \
$(top_srcdir)/include/sys/zrlock.h
KERNEL_H = \
diff --git a/zfs/include/sys/abd.h b/zfs/include/sys/abd.h
new file mode 100644
index 000000000000..cd7105013705
--- /dev/null
+++ b/zfs/include/sys/abd.h
@@ -0,0 +1,179 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2014 by Chunwei Chen. All rights reserved.
+ * Copyright (c) 2016 by Delphix. All rights reserved.
+ */
+
+#ifndef _ABD_H
+#define _ABD_H
+
+#include <sys/isa_defs.h>
+#include <sys/int_types.h>
+#include <sys/debug.h>
+#include <sys/refcount.h>
+#ifdef _KERNEL
+#include <linux/mm.h>
+#include <linux/bio.h>
+#include <sys/uio.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum abd_flags {
+ ABD_FLAG_LINEAR = 1 << 0, /* is buffer linear (or scattered)? */
+ ABD_FLAG_OWNER = 1 << 1, /* does it own its data buffers? */
+ ABD_FLAG_META = 1 << 2, /* does this represent FS metadata? */
+ ABD_FLAG_MULTI_ZONE = 1 << 3, /* pages split over memory zones */
+ ABD_FLAG_MULTI_CHUNK = 1 << 4 /* pages split over multiple chunks */
+} abd_flags_t;
+
+typedef struct abd {
+ abd_flags_t abd_flags;
+ uint_t abd_size; /* excludes scattered abd_offset */
+ struct abd *abd_parent;
+ refcount_t abd_children;
+ union {
+ struct abd_scatter {
+ uint_t abd_offset;
+ uint_t abd_nents;
+ struct scatterlist *abd_sgl;
+ } abd_scatter;
+ struct abd_linear {
+ void *abd_buf;
+ } abd_linear;
+ } abd_u;
+} abd_t;
+
+typedef int abd_iter_func_t(void *buf, size_t len, void *private);
+typedef int abd_iter_func2_t(void *bufa, void *bufb, size_t len, void *private);
+
+extern int zfs_abd_scatter_enabled;
+
+static inline boolean_t
+abd_is_linear(abd_t *abd)
+{
+ return ((abd->abd_flags & ABD_FLAG_LINEAR) != 0 ? B_TRUE : B_FALSE);
+}
+
+/*
+ * Allocations and deallocations
+ */
+
+abd_t *abd_alloc(size_t, boolean_t);
+abd_t *abd_alloc_linear(size_t, boolean_t);
+abd_t *abd_alloc_for_io(size_t, boolean_t);
+abd_t *abd_alloc_sametype(abd_t *, size_t);
+void abd_free(abd_t *);
+abd_t *abd_get_offset(abd_t *, size_t);
+abd_t *abd_get_offset_size(abd_t *, size_t, size_t);
+abd_t *abd_get_from_buf(void *, size_t);
+void abd_put(abd_t *);
+
+/*
+ * Conversion to and from a normal buffer
+ */
+
+void *abd_to_buf(abd_t *);
+void *abd_borrow_buf(abd_t *, size_t);
+void *abd_borrow_buf_copy(abd_t *, size_t);
+void abd_return_buf(abd_t *, void *, size_t);
+void abd_return_buf_copy(abd_t *, void *, size_t);
+void abd_take_ownership_of_buf(abd_t *, boolean_t);
+void abd_release_ownership_of_buf(abd_t *);
+
+/*
+ * ABD operations
+ */
+
+int abd_iterate_func(abd_t *, size_t, size_t, abd_iter_func_t *, void *);
+int abd_iterate_func2(abd_t *, abd_t *, size_t, size_t, size_t,
+ abd_iter_func2_t *, void *);
+void abd_copy_off(abd_t *, abd_t *, size_t, size_t, size_t);
+void abd_copy_from_buf_off(abd_t *, const void *, size_t, size_t);
+void abd_copy_to_buf_off(void *, abd_t *, size_t, size_t);
+int abd_cmp(abd_t *, abd_t *);
+int abd_cmp_buf_off(abd_t *, const void *, size_t, size_t);
+void abd_zero_off(abd_t *, size_t, size_t);
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+unsigned int abd_scatter_bio_map_off(struct bio *, abd_t *, unsigned int,
+ size_t);
+unsigned long abd_nr_pages_off(abd_t *, unsigned int, size_t);
+#endif
+
+void abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd,
+ ssize_t csize, ssize_t dsize, const unsigned parity,
+ void (*func_raidz_gen)(void **, const void *, size_t, size_t));
+void abd_raidz_rec_iterate(abd_t **cabds, abd_t **tabds,
+ ssize_t tsize, const unsigned parity,
+ void (*func_raidz_rec)(void **t, const size_t tsize, void **c,
+ const unsigned *mul),
+ const unsigned *mul);
+
+/*
+ * Wrappers for calls with offsets of 0
+ */
+
+static inline void
+abd_copy(abd_t *dabd, abd_t *sabd, size_t size)
+{
+ abd_copy_off(dabd, sabd, 0, 0, size);
+}
+
+static inline void
+abd_copy_from_buf(abd_t *abd, const void *buf, size_t size)
+{
+ abd_copy_from_buf_off(abd, buf, 0, size);
+}
+
+static inline void
+abd_copy_to_buf(void* buf, abd_t *abd, size_t size)
+{
+ abd_copy_to_buf_off(buf, abd, 0, size);
+}
+
+static inline int
+abd_cmp_buf(abd_t *abd, const void *buf, size_t size)
+{
+ return (abd_cmp_buf_off(abd, buf, 0, size));
+}
+
+static inline void
+abd_zero(abd_t *abd, size_t size)
+{
+ abd_zero_off(abd, 0, size);
+}
+
+/*
+ * Module lifecycle
+ */
+
+void abd_init(void);
+void abd_fini(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ABD_H */
diff --git a/zfs/include/sys/arc.h b/zfs/include/sys/arc.h
index db7a64aa2e22..66f37cf71025 100644
--- a/zfs/include/sys/arc.h
+++ b/zfs/include/sys/arc.h
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
*/
@@ -44,12 +44,24 @@ extern "C" {
*/
#define ARC_EVICT_ALL -1ULL
+#define HDR_SET_LSIZE(hdr, x) do { \
+ ASSERT(IS_P2ALIGNED(x, 1U << SPA_MINBLOCKSHIFT)); \
+ (hdr)->b_lsize = ((x) >> SPA_MINBLOCKSHIFT); \
+_NOTE(CONSTCOND) } while (0)
+
+#define HDR_SET_PSIZE(hdr, x) do { \
+ ASSERT(IS_P2ALIGNED((x), 1U << SPA_MINBLOCKSHIFT)); \
+ (hdr)->b_psize = ((x) >> SPA_MINBLOCKSHIFT); \
+_NOTE(CONSTCOND) } while (0)
+
+#define HDR_GET_LSIZE(hdr) ((hdr)->b_lsize << SPA_MINBLOCKSHIFT)
+#define HDR_GET_PSIZE(hdr) ((hdr)->b_psize << SPA_MINBLOCKSHIFT)
+
typedef struct arc_buf_hdr arc_buf_hdr_t;
typedef struct arc_buf arc_buf_t;
typedef struct arc_prune arc_prune_t;
typedef void arc_done_func_t(zio_t *zio, arc_buf_t *buf, void *private);
typedef void arc_prune_func_t(int64_t bytes, void *private);
-typedef int arc_evict_func_t(void *private);
/* Shared module parameters */
extern int zfs_arc_average_blocksize;
@@ -77,46 +89,72 @@ typedef enum arc_flags
/*
* Public flags that can be passed into the ARC by external consumers.
*/
- ARC_FLAG_NONE = 1 << 0, /* No flags set */
- ARC_FLAG_WAIT = 1 << 1, /* perform sync I/O */
- ARC_FLAG_NOWAIT = 1 << 2, /* perform async I/O */
- ARC_FLAG_PREFETCH = 1 << 3, /* I/O is a prefetch */
- ARC_FLAG_CACHED = 1 << 4, /* I/O was in cache */
- ARC_FLAG_L2CACHE = 1 << 5, /* cache in L2ARC */
- ARC_FLAG_L2COMPRESS = 1 << 6, /* compress in L2ARC */
+ ARC_FLAG_WAIT = 1 << 0, /* perform sync I/O */
+ ARC_FLAG_NOWAIT = 1 << 1, /* perform async I/O */
+ ARC_FLAG_PREFETCH = 1 << 2, /* I/O is a prefetch */
+ ARC_FLAG_CACHED = 1 << 3, /* I/O was in cache */
+ ARC_FLAG_L2CACHE = 1 << 4, /* cache in L2ARC */
+ ARC_FLAG_PREDICTIVE_PREFETCH = 1 << 5, /* I/O from zfetch */
/*
* Private ARC flags. These flags are private ARC only flags that
* will show up in b_flags in the arc_hdr_buf_t. These flags should
* only be set by ARC code.
*/
- ARC_FLAG_IN_HASH_TABLE = 1 << 7, /* buffer is hashed */
- ARC_FLAG_IO_IN_PROGRESS = 1 << 8, /* I/O in progress */
- ARC_FLAG_IO_ERROR = 1 << 9, /* I/O failed for buf */
- ARC_FLAG_FREED_IN_READ = 1 << 10, /* freed during read */
- ARC_FLAG_BUF_AVAILABLE = 1 << 11, /* block not in use */
- ARC_FLAG_INDIRECT = 1 << 12, /* indirect block */
- ARC_FLAG_L2_WRITING = 1 << 13, /* write in progress */
- ARC_FLAG_L2_EVICTED = 1 << 14, /* evicted during I/O */
- ARC_FLAG_L2_WRITE_HEAD = 1 << 15, /* head of write list */
+ ARC_FLAG_IN_HASH_TABLE = 1 << 6, /* buffer is hashed */
+ ARC_FLAG_IO_IN_PROGRESS = 1 << 7, /* I/O in progress */
+ ARC_FLAG_IO_ERROR = 1 << 8, /* I/O failed for buf */
+ ARC_FLAG_INDIRECT = 1 << 9, /* indirect block */
+ /* Indicates that block was read with ASYNC priority. */
+ ARC_FLAG_PRIO_ASYNC_READ = 1 << 10,
+ ARC_FLAG_L2_WRITING = 1 << 11, /* write in progress */
+ ARC_FLAG_L2_EVICTED = 1 << 12, /* evicted during I/O */
+ ARC_FLAG_L2_WRITE_HEAD = 1 << 13, /* head of write list */
/* indicates that the buffer contains metadata (otherwise, data) */
- ARC_FLAG_BUFC_METADATA = 1 << 16,
+ ARC_FLAG_BUFC_METADATA = 1 << 14,
/* Flags specifying whether optional hdr struct fields are defined */
- ARC_FLAG_HAS_L1HDR = 1 << 17,
- ARC_FLAG_HAS_L2HDR = 1 << 18,
+ ARC_FLAG_HAS_L1HDR = 1 << 15,
+ ARC_FLAG_HAS_L2HDR = 1 << 16,
+
+ /*
+ * Indicates the arc_buf_hdr_t's b_pdata matches the on-disk data.
+ * This allows the l2arc to use the blkptr's checksum to verify
+ * the data without having to store the checksum in the hdr.
+ */
+ ARC_FLAG_COMPRESSED_ARC = 1 << 17,
+ ARC_FLAG_SHARED_DATA = 1 << 18,
+
+ /*
+ * The arc buffer's compression mode is stored in the top 7 bits of the
+ * flags field, so these dummy flags are included so that MDB can
+ * interpret the enum properly.
+ */
+ ARC_FLAG_COMPRESS_0 = 1 << 24,
+ ARC_FLAG_COMPRESS_1 = 1 << 25,
+ ARC_FLAG_COMPRESS_2 = 1 << 26,
+ ARC_FLAG_COMPRESS_3 = 1 << 27,
+ ARC_FLAG_COMPRESS_4 = 1 << 28,
+ ARC_FLAG_COMPRESS_5 = 1 << 29,
+ ARC_FLAG_COMPRESS_6 = 1 << 30
+
} arc_flags_t;
+typedef enum arc_buf_flags {
+ ARC_BUF_FLAG_SHARED = 1 << 0,
+ ARC_BUF_FLAG_COMPRESSED = 1 << 1
+} arc_buf_flags_t;
+
struct arc_buf {
arc_buf_hdr_t *b_hdr;
arc_buf_t *b_next;
kmutex_t b_evict_lock;
void *b_data;
- arc_evict_func_t *b_efunc;
- void *b_private;
+ arc_buf_flags_t b_flags;
};
typedef enum arc_buf_contents {
+ ARC_BUFC_INVALID, /* invalid type */
ARC_BUFC_DATA, /* buffer contains data */
ARC_BUFC_METADATA, /* buffer contains metadata */
ARC_BUFC_NUMTYPES
@@ -130,7 +168,9 @@ typedef enum arc_space_type {
ARC_SPACE_META,
ARC_SPACE_HDRS,
ARC_SPACE_L2HDRS,
- ARC_SPACE_OTHER,
+ ARC_SPACE_DBUF,
+ ARC_SPACE_DNODE,
+ ARC_SPACE_BONUS,
ARC_SPACE_NUMTYPES
} arc_space_type_t;
@@ -148,7 +188,7 @@ typedef struct arc_buf_info {
arc_state_type_t abi_state_type;
arc_buf_contents_t abi_state_contents;
uint32_t abi_flags;
- uint32_t abi_datacnt;
+ uint32_t abi_bufcnt;
uint64_t abi_size;
uint64_t abi_spa;
uint64_t abi_access;
@@ -165,21 +205,27 @@ typedef struct arc_buf_info {
void arc_space_consume(uint64_t space, arc_space_type_t type);
void arc_space_return(uint64_t space, arc_space_type_t type);
-arc_buf_t *arc_buf_alloc(spa_t *spa, uint64_t size, void *tag,
- arc_buf_contents_t type);
-arc_buf_t *arc_loan_buf(spa_t *spa, uint64_t size);
+boolean_t arc_is_metadata(arc_buf_t *buf);
+enum zio_compress arc_get_compression(arc_buf_t *buf);
+int arc_decompress(arc_buf_t *buf);
+arc_buf_t *arc_alloc_buf(spa_t *spa, void *tag, arc_buf_contents_t type,
+ int32_t size);
+arc_buf_t *arc_alloc_compressed_buf(spa_t *spa, void *tag,
+ uint64_t psize, uint64_t lsize, enum zio_compress compression_type);
+arc_buf_t *arc_loan_buf(spa_t *spa, boolean_t is_metadata, int size);
+arc_buf_t *arc_loan_compressed_buf(spa_t *spa, uint64_t psize, uint64_t lsize,
+ enum zio_compress compression_type);
void arc_return_buf(arc_buf_t *buf, void *tag);
void arc_loan_inuse_buf(arc_buf_t *buf, void *tag);
-void arc_buf_add_ref(arc_buf_t *buf, void *tag);
-boolean_t arc_buf_remove_ref(arc_buf_t *buf, void *tag);
+void arc_buf_destroy(arc_buf_t *buf, void *tag);
void arc_buf_info(arc_buf_t *buf, arc_buf_info_t *abi, int state_index);
uint64_t arc_buf_size(arc_buf_t *buf);
+uint64_t arc_buf_lsize(arc_buf_t *buf);
void arc_release(arc_buf_t *buf, void *tag);
int arc_released(arc_buf_t *buf);
void arc_buf_sigsegv(int sig, siginfo_t *si, void *unused);
void arc_buf_freeze(arc_buf_t *buf);
void arc_buf_thaw(arc_buf_t *buf);
-boolean_t arc_buf_eviction_needed(arc_buf_t *buf);
#ifdef ZFS_DEBUG
int arc_referenced(arc_buf_t *buf);
#endif
@@ -188,22 +234,21 @@ int arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
arc_done_func_t *done, void *private, zio_priority_t priority, int flags,
arc_flags_t *arc_flags, const zbookmark_phys_t *zb);
zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
- blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc, boolean_t l2arc_compress,
- const zio_prop_t *zp, arc_done_func_t *ready, arc_done_func_t *physdone,
- arc_done_func_t *done, void *private, zio_priority_t priority,
- int zio_flags, const zbookmark_phys_t *zb);
+ blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc, const zio_prop_t *zp,
+ arc_done_func_t *ready, arc_done_func_t *child_ready,
+ arc_done_func_t *physdone, arc_done_func_t *done,
+ void *private, zio_priority_t priority, int zio_flags,
+ const zbookmark_phys_t *zb);
arc_prune_t *arc_add_prune_callback(arc_prune_func_t *func, void *private);
void arc_remove_prune_callback(arc_prune_t *p);
void arc_freed(spa_t *spa, const blkptr_t *bp);
-void arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *private);
-boolean_t arc_clear_callback(arc_buf_t *buf);
-
void arc_flush(spa_t *spa, boolean_t retry);
void arc_tempreserve_clear(uint64_t reserve);
int arc_tempreserve_space(uint64_t reserve, uint64_t txg);
+uint64_t arc_target_bytes(void);
void arc_init(void);
void arc_fini(void);
diff --git a/zfs/include/sys/arc_impl.h b/zfs/include/sys/arc_impl.h
index a9dbfc8dd73e..c6363f2ab71a 100644
--- a/zfs/include/sys/arc_impl.h
+++ b/zfs/include/sys/arc_impl.h
@@ -54,7 +54,7 @@ extern "C" {
* a DVA. These are buffers that hold dirty block copies
* before they are written to stable storage. By definition,
* they are "ref'd" and are considered part of arc_mru
- * that cannot be freed. Generally, they will aquire a DVA
+ * that cannot be freed. Generally, they will acquire a DVA
* as they are written and migrate onto the arc_mru list.
*
* The ARC_l2c_only state is for buffers that are in the second
@@ -70,11 +70,11 @@ typedef struct arc_state {
/*
* list of evictable buffers
*/
- multilist_t arcs_list[ARC_BUFC_NUMTYPES];
+ multilist_t *arcs_list[ARC_BUFC_NUMTYPES];
/*
* total amount of evictable data in this state
*/
- uint64_t arcs_lsize[ARC_BUFC_NUMTYPES];
+ refcount_t arcs_esize[ARC_BUFC_NUMTYPES];
/*
* total amount of data in this state; this includes: evictable,
* non-evictable, ARC_BUFC_DATA, and ARC_BUFC_METADATA.
@@ -92,6 +92,7 @@ struct arc_callback {
void *acb_private;
arc_done_func_t *acb_done;
arc_buf_t *acb_buf;
+ boolean_t acb_compressed;
zio_t *acb_zio_dummy;
arc_callback_t *acb_next;
};
@@ -101,6 +102,7 @@ typedef struct arc_write_callback arc_write_callback_t;
struct arc_write_callback {
void *awcb_private;
arc_done_func_t *awcb_ready;
+ arc_done_func_t *awcb_children_ready;
arc_done_func_t *awcb_physdone;
arc_done_func_t *awcb_done;
arc_buf_t *awcb_buf;
@@ -139,11 +141,13 @@ struct arc_write_callback {
*/
typedef struct l1arc_buf_hdr {
kmutex_t b_freeze_lock;
+ zio_cksum_t *b_freeze_cksum;
arc_buf_t *b_buf;
- uint32_t b_datacnt;
+ uint32_t b_bufcnt;
/* for waiting on writes to complete */
kcondvar_t b_cv;
+ uint8_t b_byteswap;
/* protected by arc state mutex */
@@ -162,8 +166,7 @@ typedef struct l1arc_buf_hdr {
refcount_t b_refcnt;
arc_callback_t *b_acb;
- /* temporary buffer holder for in-flight compressed data */
- void *b_tmp_cdata;
+ abd_t *b_pabd;
} l1arc_buf_hdr_t;
typedef struct l2arc_dev {
@@ -184,10 +187,7 @@ typedef struct l2arc_buf_hdr {
/* protected by arc_buf_hdr mutex */
l2arc_dev_t *b_dev; /* L2ARC device */
uint64_t b_daddr; /* disk address, offset byte */
- /* real alloc'd buffer size depending on b_compress applied */
uint32_t b_hits;
- int32_t b_asize;
- uint8_t b_compress;
list_node_t b_l2node;
} l2arc_buf_hdr_t;
@@ -201,20 +201,37 @@ struct arc_buf_hdr {
/* protected by hash lock */
dva_t b_dva;
uint64_t b_birth;
- /*
- * Even though this checksum is only set/verified when a buffer is in
- * the L1 cache, it needs to be in the set of common fields because it
- * must be preserved from the time before a buffer is written out to
- * L2ARC until after it is read back in.
- */
- zio_cksum_t *b_freeze_cksum;
+ arc_buf_contents_t b_type;
arc_buf_hdr_t *b_hash_next;
arc_flags_t b_flags;
- /* immutable */
- int32_t b_size;
- uint64_t b_spa;
+ /*
+ * This field stores the size of the data buffer after
+ * compression, and is set in the arc's zio completion handlers.
+ * It is in units of SPA_MINBLOCKSIZE (e.g. 1 == 512 bytes).
+ *
+ * While the block pointers can store up to 32MB in their psize
+ * field, we can only store up to 32MB minus 512B. This is due
+ * to the bp using a bias of 1, whereas we use a bias of 0 (i.e.
+ * a field of zeros represents 512B in the bp). We can't use a
+ * bias of 1 since we need to reserve a psize of zero, here, to
+ * represent holes and embedded blocks.
+ *
+ * This isn't a problem in practice, since the maximum size of a
+ * buffer is limited to 16MB, so we never need to store 32MB in
+ * this field. Even in the upstream illumos code base, the
+ * maximum size of a buffer is limited to 16MB.
+ */
+ uint16_t b_psize;
+
+ /*
+ * This field stores the size of the data buffer before
+ * compression, and cannot change once set. It is in units
+ * of SPA_MINBLOCKSIZE (e.g. 2 == 1024 bytes)
+ */
+ uint16_t b_lsize; /* immutable */
+ uint64_t b_spa; /* immutable */
/* L2ARC fields. Undefined when not in L2ARC. */
l2arc_buf_hdr_t b_l2hdr;
diff --git a/zfs/include/sys/avl.h b/zfs/include/sys/avl.h
index 10e0ddaeef88..ba51e2a790f4 100644
--- a/zfs/include/sys/avl.h
+++ b/zfs/include/sys/avl.h
@@ -105,6 +105,13 @@ extern "C" {
* as is needed for any linked list implementation.
*/
+/*
+ * AVL comparator helpers
+ */
+#define AVL_ISIGN(a) (((a) > 0) - ((a) < 0))
+#define AVL_CMP(a, b) (((a) > (b)) - ((a) < (b)))
+#define AVL_PCMP(a, b) \
+ (((uintptr_t)(a) > (uintptr_t)(b)) - ((uintptr_t)(a) < (uintptr_t)(b)))
/*
* Type used for the root of the AVL tree.
diff --git a/zfs/include/sys/blkptr.h b/zfs/include/sys/blkptr.h
index b720482a73fe..77b1b827ac37 100644
--- a/zfs/include/sys/blkptr.h
+++ b/zfs/include/sys/blkptr.h
@@ -30,6 +30,7 @@ extern "C" {
void encode_embedded_bp_compressed(blkptr_t *, void *,
enum zio_compress, int, int);
void decode_embedded_bp_compressed(const blkptr_t *, void *);
+int decode_embedded_bp(const blkptr_t *, void *, int);
#ifdef __cplusplus
}
diff --git a/zfs/include/sys/bqueue.h b/zfs/include/sys/bqueue.h
new file mode 100644
index 000000000000..63722df1bbf3
--- /dev/null
+++ b/zfs/include/sys/bqueue.h
@@ -0,0 +1,54 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2014 by Delphix. All rights reserved.
+ */
+
+#ifndef _BQUEUE_H
+#define _BQUEUE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/zfs_context.h>
+
+typedef struct bqueue {
+ list_t bq_list;
+ kmutex_t bq_lock;
+ kcondvar_t bq_add_cv;
+ kcondvar_t bq_pop_cv;
+ uint64_t bq_size;
+ uint64_t bq_maxsize;
+ size_t bq_node_offset;
+} bqueue_t;
+
+typedef struct bqueue_node {
+ list_node_t bqn_node;
+ uint64_t bqn_size;
+} bqueue_node_t;
+
+
+int bqueue_init(bqueue_t *, uint64_t, size_t);
+void bqueue_destroy(bqueue_t *);
+void bqueue_enqueue(bqueue_t *, void *, uint64_t);
+void *bqueue_dequeue(bqueue_t *);
+boolean_t bqueue_empty(bqueue_t *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _BQUEUE_H */
diff --git a/zfs/include/sys/crypto/Makefile.am b/zfs/include/sys/crypto/Makefile.am
new file mode 100644
index 000000000000..7f8156b8f4a7
--- /dev/null
+++ b/zfs/include/sys/crypto/Makefile.am
@@ -0,0 +1,20 @@
+COMMON_H = \
+ $(top_srcdir)/include/sys/crypto/api.h \
+ $(top_srcdir)/include/sys/crypto/common.h \
+ $(top_srcdir)/include/sys/crypto/icp.h
+
+KERNEL_H =
+
+USER_H =
+
+EXTRA_DIST = $(COMMON_H) $(KERNEL_H) $(USER_H)
+
+if CONFIG_USER
+libzfsdir = $(includedir)/libzfs/sys/crypto
+libzfs_HEADERS = $(COMMON_H) $(USER_H)
+endif
+
+if CONFIG_KERNEL
+kerneldir = @prefix@/src/zfs-$(VERSION)/include/sys/crypto
+kernel_HEADERS = $(COMMON_H) $(KERNEL_H)
+endif
diff --git a/zfs/include/sys/crypto/Makefile.in b/zfs/include/sys/crypto/Makefile.in
new file mode 100644
index 000000000000..ba51e5c537d1
--- /dev/null
+++ b/zfs/include/sys/crypto/Makefile.in
@@ -0,0 +1,791 @@
+# Makefile.in generated by automake 1.15.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2017 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+ at SET_MAKE@
+
+VPATH = @srcdir@
+am__is_gnu_make = { \
+ if test -z '$(MAKELEVEL)'; then \
+ false; \
+ elif test -n '$(MAKE_HOST)'; then \
+ true; \
+ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+ true; \
+ else \
+ false; \
+ fi; \
+}
+am__make_running_with_option = \
+ case $${target_option-} in \
+ ?) ;; \
+ *) echo "am__make_running_with_option: internal error: invalid" \
+ "target option '$${target_option-}' specified" >&2; \
+ exit 1;; \
+ esac; \
+ has_opt=no; \
+ sane_makeflags=$$MAKEFLAGS; \
+ if $(am__is_gnu_make); then \
+ sane_makeflags=$$MFLAGS; \
+ else \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ bs=\\; \
+ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
+ esac; \
+ fi; \
+ skip_next=no; \
+ strip_trailopt () \
+ { \
+ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+ }; \
+ for flg in $$sane_makeflags; do \
+ test $$skip_next = yes && { skip_next=no; continue; }; \
+ case $$flg in \
+ *=*|--*) continue;; \
+ -*I) strip_trailopt 'I'; skip_next=yes;; \
+ -*I?*) strip_trailopt 'I';; \
+ -*O) strip_trailopt 'O'; skip_next=yes;; \
+ -*O?*) strip_trailopt 'O';; \
+ -*l) strip_trailopt 'l'; skip_next=yes;; \
+ -*l?*) strip_trailopt 'l';; \
+ -[dEDm]) skip_next=yes;; \
+ -[JT]) skip_next=yes;; \
+ esac; \
+ case $$flg in \
+ *$$target_option*) has_opt=yes; break;; \
+ esac; \
+ done; \
+ test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+target_triplet = @target@
+subdir = include/sys/crypto
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/config/always-arch.m4 \
+ $(top_srcdir)/config/always-no-bool-compare.m4 \
+ $(top_srcdir)/config/always-no-unused-but-set-variable.m4 \
+ $(top_srcdir)/config/dkms.m4 \
+ $(top_srcdir)/config/kernel-acl.m4 \
+ $(top_srcdir)/config/kernel-aio-fsync.m4 \
+ $(top_srcdir)/config/kernel-automount.m4 \
+ $(top_srcdir)/config/kernel-bdev-block-device-operations.m4 \
+ $(top_srcdir)/config/kernel-bdev-logical-size.m4 \
+ $(top_srcdir)/config/kernel-bdev-physical-size.m4 \
+ $(top_srcdir)/config/kernel-bdi.m4 \
+ $(top_srcdir)/config/kernel-bio-bvec-iter.m4 \
+ $(top_srcdir)/config/kernel-bio-end-io-t-args.m4 \
+ $(top_srcdir)/config/kernel-bio-failfast.m4 \
+ $(top_srcdir)/config/kernel-bio-op.m4 \
+ $(top_srcdir)/config/kernel-bio-rw-barrier.m4 \
+ $(top_srcdir)/config/kernel-bio-rw-discard.m4 \
+ $(top_srcdir)/config/kernel-bio_set_dev.m4 \
+ $(top_srcdir)/config/kernel-blk-queue-bdi.m4 \
+ $(top_srcdir)/config/kernel-blk-queue-flush.m4 \
+ $(top_srcdir)/config/kernel-blk-queue-max-hw-sectors.m4 \
+ $(top_srcdir)/config/kernel-blk-queue-max-segments.m4 \
+ $(top_srcdir)/config/kernel-blk-queue-unplug.m4 \
+ $(top_srcdir)/config/kernel-blkdev-get-by-path.m4 \
+ $(top_srcdir)/config/kernel-blkdev-get.m4 \
+ $(top_srcdir)/config/kernel-block-device-operations-release-void.m4 \
+ $(top_srcdir)/config/kernel-clear-inode.m4 \
+ $(top_srcdir)/config/kernel-commit-metadata.m4 \
+ $(top_srcdir)/config/kernel-create-nameidata.m4 \
+ $(top_srcdir)/config/kernel-current-time.m4 \
+ $(top_srcdir)/config/kernel-current_bio_tail.m4 \
+ $(top_srcdir)/config/kernel-d-make-root.m4 \
+ $(top_srcdir)/config/kernel-d-obtain-alias.m4 \
+ $(top_srcdir)/config/kernel-d-prune-aliases.m4 \
+ $(top_srcdir)/config/kernel-declare-event-class.m4 \
+ $(top_srcdir)/config/kernel-dentry-operations.m4 \
+ $(top_srcdir)/config/kernel-dirty-inode.m4 \
+ $(top_srcdir)/config/kernel-discard-granularity.m4 \
+ $(top_srcdir)/config/kernel-elevator-change.m4 \
+ $(top_srcdir)/config/kernel-encode-fh-inode.m4 \
+ $(top_srcdir)/config/kernel-evict-inode.m4 \
+ $(top_srcdir)/config/kernel-fallocate.m4 \
+ $(top_srcdir)/config/kernel-file-dentry.m4 \
+ $(top_srcdir)/config/kernel-file-inode.m4 \
+ $(top_srcdir)/config/kernel-fmode-t.m4 \
+ $(top_srcdir)/config/kernel-follow-down-one.m4 \
+ $(top_srcdir)/config/kernel-fpu.m4 \
+ $(top_srcdir)/config/kernel-fsync.m4 \
+ $(top_srcdir)/config/kernel-generic_io_acct.m4 \
+ $(top_srcdir)/config/kernel-generic_readlink.m4 \
+ $(top_srcdir)/config/kernel-get-disk-ro.m4 \
+ $(top_srcdir)/config/kernel-get-gendisk.m4 \
+ $(top_srcdir)/config/kernel-get-link.m4 \
+ $(top_srcdir)/config/kernel-inode-getattr.m4 \
+ $(top_srcdir)/config/kernel-inode-set-flags.m4 \
+ $(top_srcdir)/config/kernel-insert-inode-locked.m4 \
+ $(top_srcdir)/config/kernel-invalidate-bdev-args.m4 \
+ $(top_srcdir)/config/kernel-is_owner_or_cap.m4 \
+ $(top_srcdir)/config/kernel-kmap-atomic-args.m4 \
+ $(top_srcdir)/config/kernel-kuid-helpers.m4 \
+ $(top_srcdir)/config/kernel-lookup-bdev.m4 \
+ $(top_srcdir)/config/kernel-lookup-nameidata.m4 \
+ $(top_srcdir)/config/kernel-lseek-execute.m4 \
+ $(top_srcdir)/config/kernel-mk-request-fn.m4 \
+ $(top_srcdir)/config/kernel-mkdir-umode-t.m4 \
+ $(top_srcdir)/config/kernel-mod-param.m4 \
+ $(top_srcdir)/config/kernel-mount-nodev.m4 \
+ $(top_srcdir)/config/kernel-objtool.m4 \
+ $(top_srcdir)/config/kernel-open-bdev-exclusive.m4 \
+ $(top_srcdir)/config/kernel-put-link.m4 \
+ $(top_srcdir)/config/kernel-rename.m4 \
+ $(top_srcdir)/config/kernel-security-inode-init.m4 \
+ $(top_srcdir)/config/kernel-set-nlink.m4 \
+ $(top_srcdir)/config/kernel-setattr-prepare.m4 \
+ $(top_srcdir)/config/kernel-sget-args.m4 \
+ $(top_srcdir)/config/kernel-show-options.m4 \
+ $(top_srcdir)/config/kernel-shrink.m4 \
+ $(top_srcdir)/config/kernel-submit_bio.m4 \
+ $(top_srcdir)/config/kernel-super-userns.m4 \
+ $(top_srcdir)/config/kernel-tmpfile.m4 \
+ $(top_srcdir)/config/kernel-truncate-range.m4 \
+ $(top_srcdir)/config/kernel-truncate-setsize.m4 \
+ $(top_srcdir)/config/kernel-vfs-iterate.m4 \
+ $(top_srcdir)/config/kernel-vfs-rw-iterate.m4 \
+ $(top_srcdir)/config/kernel-vm_node_stat.m4 \
+ $(top_srcdir)/config/kernel-xattr-handler.m4 \
+ $(top_srcdir)/config/kernel.m4 $(top_srcdir)/config/libtool.m4 \
+ $(top_srcdir)/config/ltoptions.m4 \
+ $(top_srcdir)/config/ltsugar.m4 \
+ $(top_srcdir)/config/ltversion.m4 \
+ $(top_srcdir)/config/lt~obsolete.m4 \
+ $(top_srcdir)/config/mount-helper.m4 \
+ $(top_srcdir)/config/toolchain-simd.m4 \
+ $(top_srcdir)/config/user-dracut.m4 \
+ $(top_srcdir)/config/user-frame-larger-than.m4 \
+ $(top_srcdir)/config/user-libattr.m4 \
+ $(top_srcdir)/config/user-libblkid.m4 \
+ $(top_srcdir)/config/user-libtirpc.m4 \
+ $(top_srcdir)/config/user-libudev.m4 \
+ $(top_srcdir)/config/user-libuuid.m4 \
+ $(top_srcdir)/config/user-makedev.m4 \
+ $(top_srcdir)/config/user-no-format-truncation.m4 \
+ $(top_srcdir)/config/user-runstatedir.m4 \
+ $(top_srcdir)/config/user-systemd.m4 \
+ $(top_srcdir)/config/user-sysvinit.m4 \
+ $(top_srcdir)/config/user-udev.m4 \
+ $(top_srcdir)/config/user-zlib.m4 $(top_srcdir)/config/user.m4 \
+ $(top_srcdir)/config/zfs-build.m4 \
+ $(top_srcdir)/config/zfs-meta.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(am__kernel_HEADERS_DIST) \
+ $(am__libzfs_HEADERS_DIST) $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/zfs_config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+AM_V_P = $(am__v_P_ at AM_V@)
+am__v_P_ = $(am__v_P_ at AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_ at AM_V@)
+am__v_GEN_ = $(am__v_GEN_ at AM_DEFAULT_V@)
+am__v_GEN_0 = @echo " GEN " $@;
+am__v_GEN_1 =
+AM_V_at = $(am__v_at_ at AM_V@)
+am__v_at_ = $(am__v_at_ at AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 =
+SOURCES =
+DIST_SOURCES =
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
+am__kernel_HEADERS_DIST = $(top_srcdir)/include/sys/crypto/api.h \
+ $(top_srcdir)/include/sys/crypto/common.h \
+ $(top_srcdir)/include/sys/crypto/icp.h
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+ *) f=$$p;; \
+ esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+ for p in $$list; do echo "$$p $$p"; done | \
+ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+ if (++n[$$2] == $(am__install_max)) \
+ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+ END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
+am__installdirs = "$(DESTDIR)$(kerneldir)" "$(DESTDIR)$(libzfsdir)"
+am__libzfs_HEADERS_DIST = $(top_srcdir)/include/sys/crypto/api.h \
+ $(top_srcdir)/include/sys/crypto/common.h \
+ $(top_srcdir)/include/sys/crypto/icp.h
+HEADERS = $(kernel_HEADERS) $(libzfs_HEADERS)
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates. Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+ BEGIN { nonempty = 0; } \
+ { items[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique. This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+ list='$(am__tagged_files)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+am__DIST_COMMON = $(srcdir)/Makefile.in
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+ALIEN = @ALIEN@
+ALIEN_VERSION = @ALIEN_VERSION@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCAS = @CCAS@
+CCASDEPMODE = @CCASDEPMODE@
+CCASFLAGS = @CCASFLAGS@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEBUG_CFLAGS = @DEBUG_CFLAGS@
+DEBUG_STACKFLAGS = @DEBUG_STACKFLAGS@
+DEBUG_ZFS = @DEBUG_ZFS@
+DEFAULT_INITCONF_DIR = @DEFAULT_INITCONF_DIR@
+DEFAULT_INIT_DIR = @DEFAULT_INIT_DIR@
+DEFAULT_INIT_SCRIPT = @DEFAULT_INIT_SCRIPT@
+DEFAULT_PACKAGE = @DEFAULT_PACKAGE@
+DEFINE_INITRAMFS = @DEFINE_INITRAMFS@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DPKG = @DPKG@
+DPKGBUILD = @DPKGBUILD@
+DPKGBUILD_VERSION = @DPKGBUILD_VERSION@
+DPKG_VERSION = @DPKG_VERSION@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+FRAME_LARGER_THAN = @FRAME_LARGER_THAN@
+GREP = @GREP@
+HAVE_ALIEN = @HAVE_ALIEN@
+HAVE_DPKG = @HAVE_DPKG@
+HAVE_DPKGBUILD = @HAVE_DPKGBUILD@
+HAVE_RPM = @HAVE_RPM@
+HAVE_RPMBUILD = @HAVE_RPMBUILD@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+KERNELCPPFLAGS = @KERNELCPPFLAGS@
+KERNELMAKE_PARAMS = @KERNELMAKE_PARAMS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBATTR = @LIBATTR@
+LIBBLKID = @LIBBLKID@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTIRPC = @LIBTIRPC@
+LIBTIRPC_CFLAGS = @LIBTIRPC_CFLAGS@
+LIBTOOL = @LIBTOOL@
+LIBUDEV = @LIBUDEV@
+LIBUUID = @LIBUUID@
+LINUX = @LINUX@
+LINUX_OBJ = @LINUX_OBJ@
+LINUX_SYMBOLS = @LINUX_SYMBOLS@
+LINUX_VERSION = @LINUX_VERSION@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+NO_BOOL_COMPARE = @NO_BOOL_COMPARE@
+NO_FORMAT_TRUNCATION = @NO_FORMAT_TRUNCATION@
+NO_UNUSED_BUT_SET_VARIABLE = @NO_UNUSED_BUT_SET_VARIABLE@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+QAT_OBJ = @QAT_OBJ@
+QAT_SRC = @QAT_SRC@
+QAT_SYMBOLS = @QAT_SYMBOLS@
+RANLIB = @RANLIB@
+RELEASE = @RELEASE@
+RM = @RM@
+RPM = @RPM@
+RPMBUILD = @RPMBUILD@
+RPMBUILD_VERSION = @RPMBUILD_VERSION@
+RPM_DEFINE_COMMON = @RPM_DEFINE_COMMON@
+RPM_DEFINE_DKMS = @RPM_DEFINE_DKMS@
+RPM_DEFINE_KMOD = @RPM_DEFINE_KMOD@
+RPM_DEFINE_UTIL = @RPM_DEFINE_UTIL@
+RPM_SPEC_DIR = @RPM_SPEC_DIR@
+RPM_VERSION = @RPM_VERSION@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SPL = @SPL@
+SPL_OBJ = @SPL_OBJ@
+SPL_SYMBOLS = @SPL_SYMBOLS@
+SPL_VERSION = @SPL_VERSION@
+SRPM_DEFINE_COMMON = @SRPM_DEFINE_COMMON@
+SRPM_DEFINE_DKMS = @SRPM_DEFINE_DKMS@
+SRPM_DEFINE_KMOD = @SRPM_DEFINE_KMOD@
+SRPM_DEFINE_UTIL = @SRPM_DEFINE_UTIL@
+STRIP = @STRIP@
+TARGET_ASM_DIR = @TARGET_ASM_DIR@
+VENDOR = @VENDOR@
+VERSION = @VERSION@
+ZFS_CONFIG = @ZFS_CONFIG@
+ZFS_INIT_SYSTEMD = @ZFS_INIT_SYSTEMD@
+ZFS_INIT_SYSV = @ZFS_INIT_SYSV@
+ZFS_META_ALIAS = @ZFS_META_ALIAS@
+ZFS_META_AUTHOR = @ZFS_META_AUTHOR@
+ZFS_META_DATA = @ZFS_META_DATA@
+ZFS_META_LICENSE = @ZFS_META_LICENSE@
+ZFS_META_LT_AGE = @ZFS_META_LT_AGE@
+ZFS_META_LT_CURRENT = @ZFS_META_LT_CURRENT@
+ZFS_META_LT_REVISION = @ZFS_META_LT_REVISION@
+ZFS_META_NAME = @ZFS_META_NAME@
+ZFS_META_RELEASE = @ZFS_META_RELEASE@
+ZFS_META_VERSION = @ZFS_META_VERSION@
+ZFS_MODULE_LOAD = @ZFS_MODULE_LOAD@
+ZLIB = @ZLIB@
+ZONENAME = @ZONENAME@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dracutdir = @dracutdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+modulesloaddir = @modulesloaddir@
+mounthelperdir = @mounthelperdir@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+runstatedir = @runstatedir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+systemdpresetdir = @systemdpresetdir@
+systemdunitdir = @systemdunitdir@
+target = @target@
+target_alias = @target_alias@
+target_cpu = @target_cpu@
+target_os = @target_os@
+target_vendor = @target_vendor@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+udevdir = @udevdir@
+udevruledir = @udevruledir@
+COMMON_H = \
+ $(top_srcdir)/include/sys/crypto/api.h \
+ $(top_srcdir)/include/sys/crypto/common.h \
+ $(top_srcdir)/include/sys/crypto/icp.h
+
+KERNEL_H =
+USER_H =
+EXTRA_DIST = $(COMMON_H) $(KERNEL_H) $(USER_H)
+ at CONFIG_USER_TRUE@libzfsdir = $(includedir)/libzfs/sys/crypto
+ at CONFIG_USER_TRUE@libzfs_HEADERS = $(COMMON_H) $(USER_H)
+ at CONFIG_KERNEL_TRUE@kerneldir = @prefix@/src/zfs-$(VERSION)/include/sys/crypto
+ at CONFIG_KERNEL_TRUE@kernel_HEADERS = $(COMMON_H) $(KERNEL_H)
+all: all-am
+
+.SUFFIXES:
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu include/sys/crypto/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --gnu include/sys/crypto/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+install-kernelHEADERS: $(kernel_HEADERS)
+ @$(NORMAL_INSTALL)
+ @list='$(kernel_HEADERS)'; test -n "$(kerneldir)" || list=; \
+ if test -n "$$list"; then \
+ echo " $(MKDIR_P) '$(DESTDIR)$(kerneldir)'"; \
+ $(MKDIR_P) "$(DESTDIR)$(kerneldir)" || exit 1; \
+ fi; \
+ for p in $$list; do \
+ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+ echo "$$d$$p"; \
+ done | $(am__base_list) | \
+ while read files; do \
+ echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(kerneldir)'"; \
+ $(INSTALL_HEADER) $$files "$(DESTDIR)$(kerneldir)" || exit $$?; \
+ done
+
+uninstall-kernelHEADERS:
+ @$(NORMAL_UNINSTALL)
+ @list='$(kernel_HEADERS)'; test -n "$(kerneldir)" || list=; \
+ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
+ dir='$(DESTDIR)$(kerneldir)'; $(am__uninstall_files_from_dir)
+install-libzfsHEADERS: $(libzfs_HEADERS)
+ @$(NORMAL_INSTALL)
+ @list='$(libzfs_HEADERS)'; test -n "$(libzfsdir)" || list=; \
+ if test -n "$$list"; then \
+ echo " $(MKDIR_P) '$(DESTDIR)$(libzfsdir)'"; \
+ $(MKDIR_P) "$(DESTDIR)$(libzfsdir)" || exit 1; \
+ fi; \
+ for p in $$list; do \
+ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+ echo "$$d$$p"; \
+ done | $(am__base_list) | \
+ while read files; do \
+ echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(libzfsdir)'"; \
+ $(INSTALL_HEADER) $$files "$(DESTDIR)$(libzfsdir)" || exit $$?; \
+ done
+
+uninstall-libzfsHEADERS:
+ @$(NORMAL_UNINSTALL)
+ @list='$(libzfs_HEADERS)'; test -n "$(libzfsdir)" || list=; \
+ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
+ dir='$(DESTDIR)$(libzfsdir)'; $(am__uninstall_files_from_dir)
+
+ID: $(am__tagged_files)
+ $(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ set x; \
+ here=`pwd`; \
+ $(am__define_uniq_tagged_files); \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ $(am__define_uniq_tagged_files); \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+ list='$(am__tagged_files)'; \
+ case "$(srcdir)" in \
+ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+ *) sdir=$(subdir)/$(srcdir) ;; \
+ esac; \
+ for i in $$list; do \
+ if test -f "$$i"; then \
+ echo "$(subdir)/$$i"; \
+ else \
+ echo "$$sdir/$$i"; \
+ fi; \
+ done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(HEADERS)
+installdirs:
+ for dir in "$(DESTDIR)$(kerneldir)" "$(DESTDIR)$(libzfsdir)"; do \
+ test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+ done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+ -rm -f Makefile
+distclean-am: clean-am distclean-generic distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am: install-kernelHEADERS install-libzfsHEADERS
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-generic mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-kernelHEADERS uninstall-libzfsHEADERS
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \
+ clean-libtool cscopelist-am ctags ctags-am distclean \
+ distclean-generic distclean-libtool distclean-tags distdir dvi \
+ dvi-am html html-am info info-am install install-am \
+ install-data install-data-am install-dvi install-dvi-am \
+ install-exec install-exec-am install-html install-html-am \
+ install-info install-info-am install-kernelHEADERS \
+ install-libzfsHEADERS install-man install-pdf install-pdf-am \
+ install-ps install-ps-am install-strip installcheck \
+ installcheck-am installdirs maintainer-clean \
+ maintainer-clean-generic mostlyclean mostlyclean-generic \
+ mostlyclean-libtool pdf pdf-am ps ps-am tags tags-am uninstall \
+ uninstall-am uninstall-kernelHEADERS uninstall-libzfsHEADERS
+
+.PRECIOUS: Makefile
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/zfs/include/sys/crypto/api.h b/zfs/include/sys/crypto/api.h
new file mode 100644
index 000000000000..7c3c465513de
--- /dev/null
+++ b/zfs/include/sys/crypto/api.h
@@ -0,0 +1,425 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_CRYPTO_API_H
+#define _SYS_CRYPTO_API_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/zfs_context.h>
+#include <sys/crypto/common.h>
+
+typedef long crypto_req_id_t;
+typedef void *crypto_bc_t;
+typedef void *crypto_context_t;
+typedef void *crypto_ctx_template_t;
+
+typedef uint32_t crypto_call_flag_t;
+
+/* crypto_call_flag's values */
+#define CRYPTO_ALWAYS_QUEUE 0x00000001 /* ALWAYS queue the req. */
+#define CRYPTO_NOTIFY_OPDONE 0x00000002 /* Notify intermediate steps */
+#define CRYPTO_SKIP_REQID 0x00000004 /* Skip request ID generation */
+#define CRYPTO_RESTRICTED 0x00000008 /* cannot use restricted prov */
+
+typedef struct {
+ crypto_call_flag_t cr_flag;
+ void (*cr_callback_func)(void *, int);
+ void *cr_callback_arg;
+ crypto_req_id_t cr_reqid;
+} crypto_call_req_t;
+
+/*
+ * Returns the mechanism type corresponding to a mechanism name.
+ */
+
+#define CRYPTO_MECH_INVALID ((uint64_t)-1)
+extern crypto_mech_type_t crypto_mech2id(crypto_mech_name_t name);
+
+/*
+ * Create and destroy context templates.
+ */
+extern int crypto_create_ctx_template(crypto_mechanism_t *mech,
+ crypto_key_t *key, crypto_ctx_template_t *tmpl, int kmflag);
+extern void crypto_destroy_ctx_template(crypto_ctx_template_t tmpl);
+
+/*
+ * Single and multi-part digest operations.
+ */
+extern int crypto_digest(crypto_mechanism_t *mech, crypto_data_t *data,
+ crypto_data_t *digest, crypto_call_req_t *cr);
+extern int crypto_digest_prov(crypto_provider_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_data_t *, crypto_data_t *,
+ crypto_call_req_t *);
+extern int crypto_digest_init(crypto_mechanism_t *mech, crypto_context_t *ctxp,
+ crypto_call_req_t *cr);
+extern int crypto_digest_init_prov(crypto_provider_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_context_t *, crypto_call_req_t *);
+extern int crypto_digest_update(crypto_context_t ctx, crypto_data_t *data,
+ crypto_call_req_t *cr);
+extern int crypto_digest_final(crypto_context_t ctx, crypto_data_t *digest,
+ crypto_call_req_t *cr);
+
+/*
+ * Single and multi-part MAC operations.
+ */
+extern int crypto_mac(crypto_mechanism_t *mech, crypto_data_t *data,
+ crypto_key_t *key, crypto_ctx_template_t tmpl, crypto_data_t *mac,
+ crypto_call_req_t *cr);
+extern int crypto_mac_prov(crypto_provider_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_data_t *, crypto_key_t *,
+ crypto_ctx_template_t, crypto_data_t *, crypto_call_req_t *);
+extern int crypto_mac_verify(crypto_mechanism_t *mech, crypto_data_t *data,
+ crypto_key_t *key, crypto_ctx_template_t tmpl, crypto_data_t *mac,
+ crypto_call_req_t *cr);
+extern int crypto_mac_verify_prov(crypto_provider_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_data_t *, crypto_key_t *,
+ crypto_ctx_template_t, crypto_data_t *, crypto_call_req_t *);
+extern int crypto_mac_init(crypto_mechanism_t *mech, crypto_key_t *key,
+ crypto_ctx_template_t tmpl, crypto_context_t *ctxp, crypto_call_req_t *cr);
+extern int crypto_mac_init_prov(crypto_provider_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_key_t *, crypto_ctx_template_t,
+ crypto_context_t *, crypto_call_req_t *);
+extern int crypto_mac_update(crypto_context_t ctx, crypto_data_t *data,
+ crypto_call_req_t *cr);
+extern int crypto_mac_final(crypto_context_t ctx, crypto_data_t *data,
+ crypto_call_req_t *cr);
+
+/*
+ * Single and multi-part sign with private key operations.
+ */
+extern int crypto_sign(crypto_mechanism_t *mech, crypto_key_t *key,
+ crypto_data_t *data, crypto_ctx_template_t tmpl,
+ crypto_data_t *signature, crypto_call_req_t *cr);
+extern int crypto_sign_prov(crypto_provider_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_key_t *, crypto_data_t *,
+ crypto_ctx_template_t, crypto_data_t *, crypto_call_req_t *);
+extern int crypto_sign_init(crypto_mechanism_t *mech, crypto_key_t *key,
+ crypto_ctx_template_t tmpl, crypto_context_t *ctxp, crypto_call_req_t *cr);
+extern int crypto_sign_init_prov(crypto_provider_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_key_t *, crypto_ctx_template_t,
+ crypto_context_t *, crypto_call_req_t *);
+extern int crypto_sign_update(crypto_context_t ctx, crypto_data_t *data,
+ crypto_call_req_t *cr);
+extern int crypto_sign_final(crypto_context_t ctx, crypto_data_t *signature,
+ crypto_call_req_t *cr);
+extern int crypto_sign_recover_init_prov(crypto_provider_t,
+ crypto_session_id_t, crypto_mechanism_t *, crypto_key_t *,
+ crypto_ctx_template_t tmpl, crypto_context_t *, crypto_call_req_t *);
+extern int crypto_sign_recover(crypto_mechanism_t *mech, crypto_key_t *key,
+ crypto_data_t *data, crypto_ctx_template_t tmpl, crypto_data_t *signature,
+ crypto_call_req_t *cr);
+extern int crypto_sign_recover_prov(crypto_provider_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_key_t *, crypto_data_t *,
+ crypto_ctx_template_t, crypto_data_t *, crypto_call_req_t *);
+
+/*
+ * Single and multi-part verify with public key operations.
+ */
+extern int crypto_verify(crypto_mechanism_t *mech, crypto_key_t *key,
+ crypto_data_t *data, crypto_ctx_template_t tmpl, crypto_data_t *signature,
+ crypto_call_req_t *cr);
+extern int crypto_verify_prov(crypto_provider_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_key_t *, crypto_data_t *,
+ crypto_ctx_template_t, crypto_data_t *, crypto_call_req_t *);
+extern int crypto_verify_init(crypto_mechanism_t *mech, crypto_key_t *key,
+ crypto_ctx_template_t tmpl, crypto_context_t *ctxp, crypto_call_req_t *cr);
+extern int crypto_verify_init_prov(crypto_provider_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_key_t *, crypto_ctx_template_t,
+ crypto_context_t *, crypto_call_req_t *);
+extern int crypto_verify_update(crypto_context_t ctx, crypto_data_t *data,
+ crypto_call_req_t *cr);
+extern int crypto_verify_final(crypto_context_t ctx, crypto_data_t *signature,
+ crypto_call_req_t *cr);
+extern int crypto_verify_recover_init_prov(crypto_provider_t,
+ crypto_session_id_t, crypto_mechanism_t *, crypto_key_t *,
+ crypto_ctx_template_t tmpl, crypto_context_t *, crypto_call_req_t *);
+extern int crypto_verify_recover(crypto_mechanism_t *mech, crypto_key_t *key,
+ crypto_data_t *signature, crypto_ctx_template_t tmpl, crypto_data_t *data,
+ crypto_call_req_t *cr);
+extern int crypto_verify_recover_prov(crypto_provider_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_key_t *, crypto_data_t *,
+ crypto_ctx_template_t, crypto_data_t *, crypto_call_req_t *);
+
+/*
+ * Single and multi-part encryption operations.
+ */
+extern int crypto_encrypt(crypto_mechanism_t *mech, crypto_data_t *plaintext,
+ crypto_key_t *key, crypto_ctx_template_t tmpl, crypto_data_t *ciphertext,
+ crypto_call_req_t *cr);
+extern int crypto_encrypt_prov(crypto_provider_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_data_t *, crypto_key_t *,
+ crypto_ctx_template_t, crypto_data_t *, crypto_call_req_t *);
+extern int crypto_encrypt_init(crypto_mechanism_t *mech, crypto_key_t *key,
+ crypto_ctx_template_t tmpl, crypto_context_t *ctxp, crypto_call_req_t *cr);
+extern int crypto_encrypt_init_prov(crypto_provider_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_key_t *, crypto_ctx_template_t,
+ crypto_context_t *, crypto_call_req_t *);
+extern int crypto_encrypt_update(crypto_context_t ctx,
+ crypto_data_t *plaintext, crypto_data_t *ciphertext,
+ crypto_call_req_t *cr);
+extern int crypto_encrypt_final(crypto_context_t ctx,
+ crypto_data_t *ciphertext, crypto_call_req_t *cr);
+
+/*
+ * Single and multi-part decryption operations.
+ */
+extern int crypto_decrypt(crypto_mechanism_t *mech, crypto_data_t *ciphertext,
+ crypto_key_t *key, crypto_ctx_template_t tmpl, crypto_data_t *plaintext,
+ crypto_call_req_t *cr);
+extern int crypto_decrypt_prov(crypto_provider_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_data_t *, crypto_key_t *,
+ crypto_ctx_template_t, crypto_data_t *, crypto_call_req_t *);
+extern int crypto_decrypt_init(crypto_mechanism_t *mech, crypto_key_t *key,
+ crypto_ctx_template_t tmpl, crypto_context_t *ctxp,
+ crypto_call_req_t *cr);
+extern int crypto_decrypt_init_prov(crypto_provider_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_key_t *, crypto_ctx_template_t,
+ crypto_context_t *, crypto_call_req_t *);
+extern int crypto_decrypt_update(crypto_context_t ctx,
+ crypto_data_t *ciphertext, crypto_data_t *plaintext,
+ crypto_call_req_t *cr);
+extern int crypto_decrypt_final(crypto_context_t ctx, crypto_data_t *plaintext,
+ crypto_call_req_t *cr);
+
+/*
+ * Single and multi-part encrypt/MAC dual operations.
+ */
+extern int crypto_encrypt_mac(crypto_mechanism_t *encr_mech,
+ crypto_mechanism_t *mac_mech, crypto_data_t *pt,
+ crypto_key_t *encr_key, crypto_key_t *mac_key,
+ crypto_ctx_template_t encr_tmpl, crypto_ctx_template_t mac_tmpl,
+ crypto_dual_data_t *ct, crypto_data_t *mac, crypto_call_req_t *cr);
+extern int crypto_encrypt_mac_prov(crypto_provider_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_mechanism_t *, crypto_data_t *,
+ crypto_key_t *, crypto_key_t *, crypto_ctx_template_t,
+ crypto_ctx_template_t, crypto_dual_data_t *, crypto_data_t *,
+ crypto_call_req_t *);
+extern int crypto_encrypt_mac_init(crypto_mechanism_t *encr_mech,
+ crypto_mechanism_t *mac_mech, crypto_key_t *encr_key,
+ crypto_key_t *mac_key, crypto_ctx_template_t encr_tmpl,
+ crypto_ctx_template_t mac_tmpl, crypto_context_t *ctxp,
+ crypto_call_req_t *cr);
+extern int crypto_encrypt_mac_init_prov(crypto_provider_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_mechanism_t *, crypto_key_t *, crypto_key_t *,
+ crypto_ctx_template_t, crypto_ctx_template_t, crypto_context_t *,
+ crypto_call_req_t *);
+extern int crypto_encrypt_mac_update(crypto_context_t ctx,
+ crypto_data_t *pt, crypto_dual_data_t *ct, crypto_call_req_t *cr);
+extern int crypto_encrypt_mac_final(crypto_context_t ctx,
+ crypto_dual_data_t *ct, crypto_data_t *mac, crypto_call_req_t *cr);
+
+/*
+ * Single and multi-part MAC/decrypt dual operations.
+ */
+extern int crypto_mac_decrypt(crypto_mechanism_t *mac_mech,
+ crypto_mechanism_t *decr_mech, crypto_dual_data_t *ct,
+ crypto_key_t *mac_key, crypto_key_t *decr_key,
+ crypto_ctx_template_t mac_tmpl, crypto_ctx_template_t decr_tmpl,
+ crypto_data_t *mac, crypto_data_t *pt, crypto_call_req_t *cr);
+extern int crypto_mac_decrypt_prov(crypto_provider_t, crypto_session_id_t,
+ crypto_mechanism_t *mac_mech, crypto_mechanism_t *decr_mech,
+ crypto_dual_data_t *ct, crypto_key_t *mac_key, crypto_key_t *decr_key,
+ crypto_ctx_template_t mac_tmpl, crypto_ctx_template_t decr_tmpl,
+ crypto_data_t *mac, crypto_data_t *pt, crypto_call_req_t *cr);
+extern int crypto_mac_verify_decrypt(crypto_mechanism_t *mac_mech,
+ crypto_mechanism_t *decr_mech, crypto_dual_data_t *ct,
+ crypto_key_t *mac_key, crypto_key_t *decr_key,
+ crypto_ctx_template_t mac_tmpl, crypto_ctx_template_t decr_tmpl,
+ crypto_data_t *mac, crypto_data_t *pt, crypto_call_req_t *cr);
+extern int crypto_mac_verify_decrypt_prov(crypto_provider_t,
+ crypto_session_id_t, crypto_mechanism_t *mac_mech,
+ crypto_mechanism_t *decr_mech, crypto_dual_data_t *ct,
+ crypto_key_t *mac_key, crypto_key_t *decr_key,
+ crypto_ctx_template_t mac_tmpl, crypto_ctx_template_t decr_tmpl,
+ crypto_data_t *mac, crypto_data_t *pt, crypto_call_req_t *cr);
+extern int crypto_mac_decrypt_init(crypto_mechanism_t *mac_mech,
+ crypto_mechanism_t *decr_mech, crypto_key_t *mac_key,
+ crypto_key_t *decr_key, crypto_ctx_template_t mac_tmpl,
+ crypto_ctx_template_t decr_tmpl, crypto_context_t *ctxp,
+ crypto_call_req_t *cr);
+extern int crypto_mac_decrypt_init_prov(crypto_provider_t,
+ crypto_session_id_t, crypto_mechanism_t *mac_mech,
+ crypto_mechanism_t *decr_mech, crypto_key_t *mac_key,
+ crypto_key_t *decr_key, crypto_ctx_template_t mac_tmpl,
+ crypto_ctx_template_t decr_tmpl, crypto_context_t *ctxp,
+ crypto_call_req_t *cr);
+extern int crypto_mac_decrypt_update(crypto_context_t ctx,
+ crypto_dual_data_t *ct, crypto_data_t *pt, crypto_call_req_t *cr);
+extern int crypto_mac_decrypt_final(crypto_context_t ctx, crypto_data_t *mac,
+ crypto_data_t *pt, crypto_call_req_t *cr);
+
+/* Session Management */
+extern int crypto_session_open(crypto_provider_t, crypto_session_id_t *,
+ crypto_call_req_t *);
+extern int crypto_session_close(crypto_provider_t, crypto_session_id_t,
+ crypto_call_req_t *);
+extern int crypto_session_login(crypto_provider_t, crypto_session_id_t,
+ crypto_user_type_t, char *, size_t, crypto_call_req_t *);
+extern int crypto_session_logout(crypto_provider_t, crypto_session_id_t,
+ crypto_call_req_t *);
+
+/* Object Management */
+extern int crypto_object_copy(crypto_provider_t, crypto_session_id_t,
+ crypto_object_id_t, crypto_object_attribute_t *, uint_t,
+ crypto_object_id_t *, crypto_call_req_t *);
+extern int crypto_object_create(crypto_provider_t, crypto_session_id_t,
+ crypto_object_attribute_t *, uint_t, crypto_object_id_t *,
+ crypto_call_req_t *);
+extern int crypto_object_destroy(crypto_provider_t, crypto_session_id_t,
+ crypto_object_id_t, crypto_call_req_t *);
+extern int crypto_object_get_attribute_value(crypto_provider_t,
+ crypto_session_id_t, crypto_object_id_t, crypto_object_attribute_t *,
+ uint_t, crypto_call_req_t *);
+extern int crypto_object_get_size(crypto_provider_t, crypto_session_id_t,
+ crypto_object_id_t, size_t *, crypto_call_req_t *);
+extern int crypto_object_find_final(crypto_provider_t, void *,
+ crypto_call_req_t *);
+extern int crypto_object_find_init(crypto_provider_t, crypto_session_id_t,
+ crypto_object_attribute_t *, uint_t, void **, crypto_call_req_t *);
+extern int crypto_object_find(crypto_provider_t, void *, crypto_object_id_t *,
+ uint_t *, uint_t, crypto_call_req_t *);
+extern int crypto_object_set_attribute_value(crypto_provider_t,
+ crypto_session_id_t, crypto_object_id_t, crypto_object_attribute_t *,
+ uint_t, crypto_call_req_t *);
+
+/* Key Management */
+extern int crypto_key_derive(crypto_provider_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_key_t *, crypto_object_attribute_t *,
+ uint_t, crypto_object_id_t *, crypto_call_req_t *);
+extern int crypto_key_generate(crypto_provider_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_object_attribute_t *, uint_t,
+ crypto_object_id_t *, crypto_call_req_t *);
+extern int crypto_key_generate_pair(crypto_provider_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_object_attribute_t *, uint_t,
+ crypto_object_attribute_t *, uint_t, crypto_object_id_t *,
+ crypto_object_id_t *, crypto_call_req_t *);
+extern int crypto_key_unwrap(crypto_provider_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_key_t *, uchar_t *, size_t *,
+ crypto_object_attribute_t *, uint_t, crypto_object_id_t *,
+ crypto_call_req_t *);
+extern int crypto_key_wrap(crypto_provider_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_key_t *, crypto_object_id_t *, uchar_t *,
+ size_t *, crypto_call_req_t *);
+extern int crypto_key_check_prov(crypto_provider_t, crypto_mechanism_t *mech,
+ crypto_key_t *key);
+extern int crypto_key_check(crypto_mechanism_t *mech, crypto_key_t *key);
+
+
+/*
+ * Routines to cancel a single asynchronous request or all asynchronous
+ * requests associated with a particular context.
+ */
+extern void crypto_cancel_req(crypto_req_id_t req);
+extern void crypto_cancel_ctx(crypto_context_t ctx);
+
+/*
+ * crypto_get_mech_list(9F) allocates and returns the list of currently
+ * supported cryptographic mechanisms.
+ */
+extern crypto_mech_name_t *crypto_get_mech_list(uint_t *count, int kmflag);
+extern void crypto_free_mech_list(crypto_mech_name_t *mech_names,
+ uint_t count);
+
+extern crypto_provider_t crypto_get_provider(char *, char *, char *);
+extern int crypto_get_provinfo(crypto_provider_t, crypto_provider_ext_info_t *);
+extern void crypto_release_provider(crypto_provider_t);
+
+/*
+ * A kernel consumer can request to be notified when some particular event
+ * occurs. The valid events, callback function type, and functions to
+ * be called to register or unregister for notification are defined below.
+ */
+
+#define CRYPTO_EVENT_MECHS_CHANGED 0x00000001
+#define CRYPTO_EVENT_PROVIDER_REGISTERED 0x00000002
+#define CRYPTO_EVENT_PROVIDER_UNREGISTERED 0x00000004
+
+typedef enum {
+ CRYPTO_MECH_ADDED = 1,
+ CRYPTO_MECH_REMOVED
+} crypto_event_change_t;
+
+/* The event_arg argument structure for CRYPTO_EVENT_PROVIDERS_CHANGE event */
+typedef struct crypto_notify_event_change {
+ crypto_mech_name_t ec_mech_name;
+ crypto_provider_type_t ec_provider_type;
+ crypto_event_change_t ec_change;
+} crypto_notify_event_change_t;
+
+typedef void *crypto_notify_handle_t;
+typedef void (*crypto_notify_callback_t)(uint32_t event_mask, void *event_arg);
+
+extern crypto_notify_handle_t crypto_notify_events(
+ crypto_notify_callback_t nf, uint32_t event_mask);
+extern void crypto_unnotify_events(crypto_notify_handle_t);
+
+/*
+ * crypto_bufcall(9F) group of routines.
+ */
+extern crypto_bc_t crypto_bufcall_alloc(void);
+extern int crypto_bufcall_free(crypto_bc_t bc);
+extern int crypto_bufcall(crypto_bc_t bc, void (*func)(void *arg), void *arg);
+extern int crypto_unbufcall(crypto_bc_t bc);
+
+/*
+ * To obtain the list of key size ranges supported by a mechanism.
+ */
+
+#define CRYPTO_MECH_USAGE_ENCRYPT 0x00000001
+#define CRYPTO_MECH_USAGE_DECRYPT 0x00000002
+#define CRYPTO_MECH_USAGE_MAC 0x00000004
+
+typedef uint32_t crypto_mech_usage_t;
+
+typedef struct crypto_mechanism_info {
+ size_t mi_min_key_size;
+ size_t mi_max_key_size;
+ crypto_keysize_unit_t mi_keysize_unit; /* for mi_xxx_key_size */
+ crypto_mech_usage_t mi_usage;
+} crypto_mechanism_info_t;
+
+#ifdef _SYSCALL32
+
+typedef struct crypto_mechanism_info32 {
+ size32_t mi_min_key_size;
+ size32_t mi_max_key_size;
+ crypto_keysize_unit_t mi_keysize_unit; /* for mi_xxx_key_size */
+ crypto_mech_usage_t mi_usage;
+} crypto_mechanism_info32_t;
+
+#endif /* _SYSCALL32 */
+
+extern int crypto_get_all_mech_info(crypto_mech_type_t,
+ crypto_mechanism_info_t **, uint_t *, int);
+extern void crypto_free_all_mech_info(crypto_mechanism_info_t *, uint_t);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_CRYPTO_API_H */
diff --git a/zfs/include/sys/crypto/common.h b/zfs/include/sys/crypto/common.h
new file mode 100644
index 000000000000..a4f9d9848c23
--- /dev/null
+++ b/zfs/include/sys/crypto/common.h
@@ -0,0 +1,583 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+/*
+ * Copyright 2013 Saso Kiselkov. All rights reserved.
+ */
+
+#ifndef _SYS_CRYPTO_COMMON_H
+#define _SYS_CRYPTO_COMMON_H
+
+/*
+ * Header file for the common data structures of the cryptographic framework
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/zfs_context.h>
+
+/* Cryptographic Mechanisms */
+
+#define CRYPTO_MAX_MECH_NAME 32
+typedef char crypto_mech_name_t[CRYPTO_MAX_MECH_NAME];
+
+typedef uint64_t crypto_mech_type_t;
+
+typedef struct crypto_mechanism {
+ crypto_mech_type_t cm_type; /* mechanism type */
+ caddr_t cm_param; /* mech. parameter */
+ size_t cm_param_len; /* mech. parameter len */
+} crypto_mechanism_t;
+
+#ifdef _SYSCALL32
+
+typedef struct crypto_mechanism32 {
+ crypto_mech_type_t cm_type; /* mechanism type */
+ caddr32_t cm_param; /* mech. parameter */
+ size32_t cm_param_len; /* mech. parameter len */
+} crypto_mechanism32_t;
+
+#endif /* _SYSCALL32 */
+
+/* CK_AES_CTR_PARAMS provides parameters to the CKM_AES_CTR mechanism */
+typedef struct CK_AES_CTR_PARAMS {
+ ulong_t ulCounterBits;
+ uint8_t cb[16];
+} CK_AES_CTR_PARAMS;
+
+/* CK_AES_CCM_PARAMS provides parameters to the CKM_AES_CCM mechanism */
+typedef struct CK_AES_CCM_PARAMS {
+ ulong_t ulMACSize;
+ ulong_t ulNonceSize;
+ ulong_t ulAuthDataSize;
+ ulong_t ulDataSize; /* used for plaintext or ciphertext */
+ uchar_t *nonce;
+ uchar_t *authData;
+} CK_AES_CCM_PARAMS;
+
+/* CK_AES_GCM_PARAMS provides parameters to the CKM_AES_GCM mechanism */
+typedef struct CK_AES_GCM_PARAMS {
+ uchar_t *pIv;
+ ulong_t ulIvLen;
+ ulong_t ulIvBits;
+ uchar_t *pAAD;
+ ulong_t ulAADLen;
+ ulong_t ulTagBits;
+} CK_AES_GCM_PARAMS;
+
+/* CK_AES_GMAC_PARAMS provides parameters to the CKM_AES_GMAC mechanism */
+typedef struct CK_AES_GMAC_PARAMS {
+ uchar_t *pIv;
+ uchar_t *pAAD;
+ ulong_t ulAADLen;
+} CK_AES_GMAC_PARAMS;
+
+/*
+ * CK_ECDH1_DERIVE_PARAMS provides the parameters to the
+ * CKM_ECDH1_KEY_DERIVE mechanism
+ */
+typedef struct CK_ECDH1_DERIVE_PARAMS {
+ ulong_t kdf;
+ ulong_t ulSharedDataLen;
+ uchar_t *pSharedData;
+ ulong_t ulPublicDataLen;
+ uchar_t *pPublicData;
+} CK_ECDH1_DERIVE_PARAMS;
+
+#ifdef _SYSCALL32
+
+/* needed for 32-bit applications running on 64-bit kernels */
+typedef struct CK_AES_CTR_PARAMS32 {
+ uint32_t ulCounterBits;
+ uint8_t cb[16];
+} CK_AES_CTR_PARAMS32;
+
+/* needed for 32-bit applications running on 64-bit kernels */
+typedef struct CK_AES_CCM_PARAMS32 {
+ uint32_t ulMACSize;
+ uint32_t ulNonceSize;
+ uint32_t ulAuthDataSize;
+ uint32_t ulDataSize;
+ caddr32_t nonce;
+ caddr32_t authData;
+} CK_AES_CCM_PARAMS32;
+
+/* needed for 32-bit applications running on 64-bit kernels */
+typedef struct CK_AES_GCM_PARAMS32 {
+ caddr32_t pIv;
+ uint32_t ulIvLen;
+ uint32_t ulIvBits;
+ caddr32_t pAAD;
+ uint32_t ulAADLen;
+ uint32_t ulTagBits;
+} CK_AES_GCM_PARAMS32;
+
+/* needed for 32-bit applications running on 64-bit kernels */
+typedef struct CK_AES_GMAC_PARAMS32 {
+ caddr32_t pIv;
+ caddr32_t pAAD;
+ uint32_t ulAADLen;
+} CK_AES_GMAC_PARAMS32;
+
+typedef struct CK_ECDH1_DERIVE_PARAMS32 {
+ uint32_t kdf;
+ uint32_t ulSharedDataLen;
+ caddr32_t pSharedData;
+ uint32_t ulPublicDataLen;
+ caddr32_t pPublicData;
+} CK_ECDH1_DERIVE_PARAMS32;
+
+#endif /* _SYSCALL32 */
+
+/*
+ * The measurement unit bit flag for a mechanism's minimum or maximum key size.
+ * The unit are mechanism dependent. It can be in bits or in bytes.
+ */
+typedef uint32_t crypto_keysize_unit_t;
+
+/*
+ * The following bit flags are valid in cm_mech_flags field in
+ * the crypto_mech_info_t structure of the SPI.
+ *
+ * Only the first two bit flags are valid in mi_keysize_unit
+ * field in the crypto_mechanism_info_t structure of the API.
+ */
+#define CRYPTO_KEYSIZE_UNIT_IN_BITS 0x00000001
+#define CRYPTO_KEYSIZE_UNIT_IN_BYTES 0x00000002
+#define CRYPTO_CAN_SHARE_OPSTATE 0x00000004 /* supports sharing */
+
+
+/* Mechanisms supported out-of-the-box */
+#define SUN_CKM_MD4 "CKM_MD4"
+#define SUN_CKM_MD5 "CKM_MD5"
+#define SUN_CKM_MD5_HMAC "CKM_MD5_HMAC"
+#define SUN_CKM_MD5_HMAC_GENERAL "CKM_MD5_HMAC_GENERAL"
+#define SUN_CKM_SHA1 "CKM_SHA_1"
+#define SUN_CKM_SHA1_HMAC "CKM_SHA_1_HMAC"
+#define SUN_CKM_SHA1_HMAC_GENERAL "CKM_SHA_1_HMAC_GENERAL"
+#define SUN_CKM_SHA256 "CKM_SHA256"
+#define SUN_CKM_SHA256_HMAC "CKM_SHA256_HMAC"
+#define SUN_CKM_SHA256_HMAC_GENERAL "CKM_SHA256_HMAC_GENERAL"
+#define SUN_CKM_SHA384 "CKM_SHA384"
+#define SUN_CKM_SHA384_HMAC "CKM_SHA384_HMAC"
+#define SUN_CKM_SHA384_HMAC_GENERAL "CKM_SHA384_HMAC_GENERAL"
+#define SUN_CKM_SHA512 "CKM_SHA512"
+#define SUN_CKM_SHA512_HMAC "CKM_SHA512_HMAC"
+#define SUN_CKM_SHA512_HMAC_GENERAL "CKM_SHA512_HMAC_GENERAL"
+#define SUN_CKM_SHA512_224 "CKM_SHA512_224"
+#define SUN_CKM_SHA512_256 "CKM_SHA512_256"
+#define SUN_CKM_DES_CBC "CKM_DES_CBC"
+#define SUN_CKM_DES3_CBC "CKM_DES3_CBC"
+#define SUN_CKM_DES_ECB "CKM_DES_ECB"
+#define SUN_CKM_DES3_ECB "CKM_DES3_ECB"
+#define SUN_CKM_BLOWFISH_CBC "CKM_BLOWFISH_CBC"
+#define SUN_CKM_BLOWFISH_ECB "CKM_BLOWFISH_ECB"
+#define SUN_CKM_AES_CBC "CKM_AES_CBC"
+#define SUN_CKM_AES_ECB "CKM_AES_ECB"
+#define SUN_CKM_AES_CTR "CKM_AES_CTR"
+#define SUN_CKM_AES_CCM "CKM_AES_CCM"
+#define SUN_CKM_AES_GCM "CKM_AES_GCM"
+#define SUN_CKM_AES_GMAC "CKM_AES_GMAC"
+#define SUN_CKM_AES_CFB128 "CKM_AES_CFB128"
+#define SUN_CKM_RC4 "CKM_RC4"
+#define SUN_CKM_RSA_PKCS "CKM_RSA_PKCS"
+#define SUN_CKM_RSA_X_509 "CKM_RSA_X_509"
+#define SUN_CKM_MD5_RSA_PKCS "CKM_MD5_RSA_PKCS"
+#define SUN_CKM_SHA1_RSA_PKCS "CKM_SHA1_RSA_PKCS"
+#define SUN_CKM_SHA256_RSA_PKCS "CKM_SHA256_RSA_PKCS"
+#define SUN_CKM_SHA384_RSA_PKCS "CKM_SHA384_RSA_PKCS"
+#define SUN_CKM_SHA512_RSA_PKCS "CKM_SHA512_RSA_PKCS"
+#define SUN_CKM_EC_KEY_PAIR_GEN "CKM_EC_KEY_PAIR_GEN"
+#define SUN_CKM_ECDH1_DERIVE "CKM_ECDH1_DERIVE"
+#define SUN_CKM_ECDSA_SHA1 "CKM_ECDSA_SHA1"
+#define SUN_CKM_ECDSA "CKM_ECDSA"
+
+/* Shared operation context format for CKM_RC4 */
+typedef struct {
+#if defined(__amd64)
+ uint32_t i, j;
+ uint32_t arr[256];
+ uint32_t flag;
+#else
+ uchar_t arr[256];
+ uchar_t i, j;
+#endif /* __amd64 */
+ uint64_t pad; /* For 64-bit alignment */
+} arcfour_state_t;
+
+/* Data arguments of cryptographic operations */
+
+typedef enum crypto_data_format {
+ CRYPTO_DATA_RAW = 1,
+ CRYPTO_DATA_UIO,
+} crypto_data_format_t;
+
+typedef struct crypto_data {
+ crypto_data_format_t cd_format; /* Format identifier */
+ off_t cd_offset; /* Offset from the beginning */
+ size_t cd_length; /* # of bytes in use */
+ caddr_t cd_miscdata; /* ancillary data */
+ union {
+ /* Raw format */
+ iovec_t cdu_raw; /* Pointer and length */
+
+ /* uio scatter-gather format */
+ uio_t *cdu_uio;
+
+ } cdu; /* Crypto Data Union */
+} crypto_data_t;
+
+#define cd_raw cdu.cdu_raw
+#define cd_uio cdu.cdu_uio
+#define cd_mp cdu.cdu_mp
+
+typedef struct crypto_dual_data {
+ crypto_data_t dd_data; /* The data */
+ off_t dd_offset2; /* Used by dual operation */
+ size_t dd_len2; /* # of bytes to take */
+} crypto_dual_data_t;
+
+#define dd_format dd_data.cd_format
+#define dd_offset1 dd_data.cd_offset
+#define dd_len1 dd_data.cd_length
+#define dd_miscdata dd_data.cd_miscdata
+#define dd_raw dd_data.cd_raw
+#define dd_uio dd_data.cd_uio
+#define dd_mp dd_data.cd_mp
+
+/* The keys, and their contents */
+
+typedef enum {
+ CRYPTO_KEY_RAW = 1, /* ck_data is a cleartext key */
+ CRYPTO_KEY_REFERENCE, /* ck_obj_id is an opaque reference */
+ CRYPTO_KEY_ATTR_LIST /* ck_attrs is a list of object attributes */
+} crypto_key_format_t;
+
+typedef uint64_t crypto_attr_type_t;
+
+/* Attribute types to use for passing a RSA public key or a private key. */
+#define SUN_CKA_MODULUS 0x00000120
+#define SUN_CKA_MODULUS_BITS 0x00000121
+#define SUN_CKA_PUBLIC_EXPONENT 0x00000122
+#define SUN_CKA_PRIVATE_EXPONENT 0x00000123
+#define SUN_CKA_PRIME_1 0x00000124
+#define SUN_CKA_PRIME_2 0x00000125
+#define SUN_CKA_EXPONENT_1 0x00000126
+#define SUN_CKA_EXPONENT_2 0x00000127
+#define SUN_CKA_COEFFICIENT 0x00000128
+#define SUN_CKA_PRIME 0x00000130
+#define SUN_CKA_SUBPRIME 0x00000131
+#define SUN_CKA_BASE 0x00000132
+
+#define CKK_EC 0x00000003
+#define CKK_GENERIC_SECRET 0x00000010
+#define CKK_RC4 0x00000012
+#define CKK_AES 0x0000001F
+#define CKK_DES 0x00000013
+#define CKK_DES2 0x00000014
+#define CKK_DES3 0x00000015
+
+#define CKO_PUBLIC_KEY 0x00000002
+#define CKO_PRIVATE_KEY 0x00000003
+#define CKA_CLASS 0x00000000
+#define CKA_VALUE 0x00000011
+#define CKA_KEY_TYPE 0x00000100
+#define CKA_VALUE_LEN 0x00000161
+#define CKA_EC_PARAMS 0x00000180
+#define CKA_EC_POINT 0x00000181
+
+typedef uint32_t crypto_object_id_t;
+
+typedef struct crypto_object_attribute {
+ crypto_attr_type_t oa_type; /* attribute type */
+ caddr_t oa_value; /* attribute value */
+ ssize_t oa_value_len; /* length of attribute value */
+} crypto_object_attribute_t;
+
+typedef struct crypto_key {
+ crypto_key_format_t ck_format; /* format identifier */
+ union {
+ /* for CRYPTO_KEY_RAW ck_format */
+ struct {
+ uint_t cku_v_length; /* # of bits in ck_data */
+ void *cku_v_data; /* ptr to key value */
+ } cku_key_value;
+
+ /* for CRYPTO_KEY_REFERENCE ck_format */
+ crypto_object_id_t cku_key_id; /* reference to object key */
+
+ /* for CRYPTO_KEY_ATTR_LIST ck_format */
+ struct {
+ uint_t cku_a_count; /* number of attributes */
+ crypto_object_attribute_t *cku_a_oattr;
+ } cku_key_attrs;
+ } cku_data; /* Crypto Key union */
+} crypto_key_t;
+
+#ifdef _SYSCALL32
+
+typedef struct crypto_object_attribute32 {
+ uint64_t oa_type; /* attribute type */
+ caddr32_t oa_value; /* attribute value */
+ ssize32_t oa_value_len; /* length of attribute value */
+} crypto_object_attribute32_t;
+
+typedef struct crypto_key32 {
+ crypto_key_format_t ck_format; /* format identifier */
+ union {
+ /* for CRYPTO_KEY_RAW ck_format */
+ struct {
+ uint32_t cku_v_length; /* # of bytes in ck_data */
+ caddr32_t cku_v_data; /* ptr to key value */
+ } cku_key_value;
+
+ /* for CRYPTO_KEY_REFERENCE ck_format */
+ crypto_object_id_t cku_key_id; /* reference to object key */
+
+ /* for CRYPTO_KEY_ATTR_LIST ck_format */
+ struct {
+ uint32_t cku_a_count; /* number of attributes */
+ caddr32_t cku_a_oattr;
+ } cku_key_attrs;
+ } cku_data; /* Crypto Key union */
+} crypto_key32_t;
+
+#endif /* _SYSCALL32 */
+
+#define ck_data cku_data.cku_key_value.cku_v_data
+#define ck_length cku_data.cku_key_value.cku_v_length
+#define ck_obj_id cku_data.cku_key_id
+#define ck_count cku_data.cku_key_attrs.cku_a_count
+#define ck_attrs cku_data.cku_key_attrs.cku_a_oattr
+
+/*
+ * Raw key lengths are expressed in number of bits.
+ * The following macro returns the minimum number of
+ * bytes that can contain the specified number of bits.
+ * Round up without overflowing the integer type.
+ */
+#define CRYPTO_BITS2BYTES(n) ((n) == 0 ? 0 : (((n) - 1) >> 3) + 1)
+#define CRYPTO_BYTES2BITS(n) ((n) << 3)
+
+/* Providers */
+
+typedef enum {
+ CRYPTO_HW_PROVIDER = 0,
+ CRYPTO_SW_PROVIDER,
+ CRYPTO_LOGICAL_PROVIDER
+} crypto_provider_type_t;
+
+typedef uint32_t crypto_provider_id_t;
+#define KCF_PROVID_INVALID ((uint32_t)-1)
+
+typedef struct crypto_provider_entry {
+ crypto_provider_id_t pe_provider_id;
+ uint_t pe_mechanism_count;
+} crypto_provider_entry_t;
+
+typedef struct crypto_dev_list_entry {
+ char le_dev_name[MAXNAMELEN];
+ uint_t le_dev_instance;
+ uint_t le_mechanism_count;
+} crypto_dev_list_entry_t;
+
+/* User type for authentication ioctls and SPI entry points */
+
+typedef enum crypto_user_type {
+ CRYPTO_SO = 0,
+ CRYPTO_USER
+} crypto_user_type_t;
+
+/* Version for provider management ioctls and SPI entry points */
+
+typedef struct crypto_version {
+ uchar_t cv_major;
+ uchar_t cv_minor;
+} crypto_version_t;
+
+/* session data structure opaque to the consumer */
+typedef void *crypto_session_t;
+
+/* provider data structure opaque to the consumer */
+typedef void *crypto_provider_t;
+
+/* Limits used by both consumers and providers */
+#define CRYPTO_EXT_SIZE_LABEL 32
+#define CRYPTO_EXT_SIZE_MANUF 32
+#define CRYPTO_EXT_SIZE_MODEL 16
+#define CRYPTO_EXT_SIZE_SERIAL 16
+#define CRYPTO_EXT_SIZE_TIME 16
+
+typedef struct crypto_provider_ext_info {
+ uchar_t ei_label[CRYPTO_EXT_SIZE_LABEL];
+ uchar_t ei_manufacturerID[CRYPTO_EXT_SIZE_MANUF];
+ uchar_t ei_model[CRYPTO_EXT_SIZE_MODEL];
+ uchar_t ei_serial_number[CRYPTO_EXT_SIZE_SERIAL];
+ ulong_t ei_flags;
+ ulong_t ei_max_session_count;
+ ulong_t ei_max_pin_len;
+ ulong_t ei_min_pin_len;
+ ulong_t ei_total_public_memory;
+ ulong_t ei_free_public_memory;
+ ulong_t ei_total_private_memory;
+ ulong_t ei_free_private_memory;
+ crypto_version_t ei_hardware_version;
+ crypto_version_t ei_firmware_version;
+ uchar_t ei_time[CRYPTO_EXT_SIZE_TIME];
+ int ei_hash_max_input_len;
+ int ei_hmac_max_input_len;
+} crypto_provider_ext_info_t;
+
+typedef uint_t crypto_session_id_t;
+
+typedef enum cmd_type {
+ COPY_FROM_DATA,
+ COPY_TO_DATA,
+ COMPARE_TO_DATA,
+ MD5_DIGEST_DATA,
+ SHA1_DIGEST_DATA,
+ SHA2_DIGEST_DATA,
+ GHASH_DATA
+} cmd_type_t;
+
+#define CRYPTO_DO_UPDATE 0x01
+#define CRYPTO_DO_FINAL 0x02
+#define CRYPTO_DO_MD5 0x04
+#define CRYPTO_DO_SHA1 0x08
+#define CRYPTO_DO_SIGN 0x10
+#define CRYPTO_DO_VERIFY 0x20
+#define CRYPTO_DO_SHA2 0x40
+
+#define PROVIDER_OWNS_KEY_SCHEDULE 0x00000001
+
+/*
+ * Common cryptographic status and error codes.
+ */
+#define CRYPTO_SUCCESS 0x00000000
+#define CRYPTO_CANCEL 0x00000001
+#define CRYPTO_HOST_MEMORY 0x00000002
+#define CRYPTO_GENERAL_ERROR 0x00000003
+#define CRYPTO_FAILED 0x00000004
+#define CRYPTO_ARGUMENTS_BAD 0x00000005
+#define CRYPTO_ATTRIBUTE_READ_ONLY 0x00000006
+#define CRYPTO_ATTRIBUTE_SENSITIVE 0x00000007
+#define CRYPTO_ATTRIBUTE_TYPE_INVALID 0x00000008
+#define CRYPTO_ATTRIBUTE_VALUE_INVALID 0x00000009
+#define CRYPTO_CANCELED 0x0000000A
+#define CRYPTO_DATA_INVALID 0x0000000B
+#define CRYPTO_DATA_LEN_RANGE 0x0000000C
+#define CRYPTO_DEVICE_ERROR 0x0000000D
+#define CRYPTO_DEVICE_MEMORY 0x0000000E
+#define CRYPTO_DEVICE_REMOVED 0x0000000F
+#define CRYPTO_ENCRYPTED_DATA_INVALID 0x00000010
+#define CRYPTO_ENCRYPTED_DATA_LEN_RANGE 0x00000011
+#define CRYPTO_KEY_HANDLE_INVALID 0x00000012
+#define CRYPTO_KEY_SIZE_RANGE 0x00000013
+#define CRYPTO_KEY_TYPE_INCONSISTENT 0x00000014
+#define CRYPTO_KEY_NOT_NEEDED 0x00000015
+#define CRYPTO_KEY_CHANGED 0x00000016
+#define CRYPTO_KEY_NEEDED 0x00000017
+#define CRYPTO_KEY_INDIGESTIBLE 0x00000018
+#define CRYPTO_KEY_FUNCTION_NOT_PERMITTED 0x00000019
+#define CRYPTO_KEY_NOT_WRAPPABLE 0x0000001A
+#define CRYPTO_KEY_UNEXTRACTABLE 0x0000001B
+#define CRYPTO_MECHANISM_INVALID 0x0000001C
+#define CRYPTO_MECHANISM_PARAM_INVALID 0x0000001D
+#define CRYPTO_OBJECT_HANDLE_INVALID 0x0000001E
+#define CRYPTO_OPERATION_IS_ACTIVE 0x0000001F
+#define CRYPTO_OPERATION_NOT_INITIALIZED 0x00000020
+#define CRYPTO_PIN_INCORRECT 0x00000021
+#define CRYPTO_PIN_INVALID 0x00000022
+#define CRYPTO_PIN_LEN_RANGE 0x00000023
+#define CRYPTO_PIN_EXPIRED 0x00000024
+#define CRYPTO_PIN_LOCKED 0x00000025
+#define CRYPTO_SESSION_CLOSED 0x00000026
+#define CRYPTO_SESSION_COUNT 0x00000027
+#define CRYPTO_SESSION_HANDLE_INVALID 0x00000028
+#define CRYPTO_SESSION_READ_ONLY 0x00000029
+#define CRYPTO_SESSION_EXISTS 0x0000002A
+#define CRYPTO_SESSION_READ_ONLY_EXISTS 0x0000002B
+#define CRYPTO_SESSION_READ_WRITE_SO_EXISTS 0x0000002C
+#define CRYPTO_SIGNATURE_INVALID 0x0000002D
+#define CRYPTO_SIGNATURE_LEN_RANGE 0x0000002E
+#define CRYPTO_TEMPLATE_INCOMPLETE 0x0000002F
+#define CRYPTO_TEMPLATE_INCONSISTENT 0x00000030
+#define CRYPTO_UNWRAPPING_KEY_HANDLE_INVALID 0x00000031
+#define CRYPTO_UNWRAPPING_KEY_SIZE_RANGE 0x00000032
+#define CRYPTO_UNWRAPPING_KEY_TYPE_INCONSISTENT 0x00000033
+#define CRYPTO_USER_ALREADY_LOGGED_IN 0x00000034
+#define CRYPTO_USER_NOT_LOGGED_IN 0x00000035
+#define CRYPTO_USER_PIN_NOT_INITIALIZED 0x00000036
+#define CRYPTO_USER_TYPE_INVALID 0x00000037
+#define CRYPTO_USER_ANOTHER_ALREADY_LOGGED_IN 0x00000038
+#define CRYPTO_USER_TOO_MANY_TYPES 0x00000039
+#define CRYPTO_WRAPPED_KEY_INVALID 0x0000003A
+#define CRYPTO_WRAPPED_KEY_LEN_RANGE 0x0000003B
+#define CRYPTO_WRAPPING_KEY_HANDLE_INVALID 0x0000003C
+#define CRYPTO_WRAPPING_KEY_SIZE_RANGE 0x0000003D
+#define CRYPTO_WRAPPING_KEY_TYPE_INCONSISTENT 0x0000003E
+#define CRYPTO_RANDOM_SEED_NOT_SUPPORTED 0x0000003F
+#define CRYPTO_RANDOM_NO_RNG 0x00000040
+#define CRYPTO_DOMAIN_PARAMS_INVALID 0x00000041
+#define CRYPTO_BUFFER_TOO_SMALL 0x00000042
+#define CRYPTO_INFORMATION_SENSITIVE 0x00000043
+#define CRYPTO_NOT_SUPPORTED 0x00000044
+
+#define CRYPTO_QUEUED 0x00000045
+#define CRYPTO_BUFFER_TOO_BIG 0x00000046
+#define CRYPTO_INVALID_CONTEXT 0x00000047
+#define CRYPTO_INVALID_MAC 0x00000048
+#define CRYPTO_MECH_NOT_SUPPORTED 0x00000049
+#define CRYPTO_INCONSISTENT_ATTRIBUTE 0x0000004A
+#define CRYPTO_NO_PERMISSION 0x0000004B
+#define CRYPTO_INVALID_PROVIDER_ID 0x0000004C
+#define CRYPTO_VERSION_MISMATCH 0x0000004D
+#define CRYPTO_BUSY 0x0000004E
+#define CRYPTO_UNKNOWN_PROVIDER 0x0000004F
+#define CRYPTO_MODVERIFICATION_FAILED 0x00000050
+#define CRYPTO_OLD_CTX_TEMPLATE 0x00000051
+#define CRYPTO_WEAK_KEY 0x00000052
+#define CRYPTO_FIPS140_ERROR 0x00000053
+/*
+ * Don't forget to update CRYPTO_LAST_ERROR and the error_number_table[]
+ * in kernelUtil.c when new error code is added.
+ */
+#define CRYPTO_LAST_ERROR 0x00000053
+
+/*
+ * Special values that can be used to indicate that information is unavailable
+ * or that there is not practical limit. These values can be used
+ * by fields of the SPI crypto_provider_ext_info(9S) structure.
+ * The value of CRYPTO_UNAVAILABLE_INFO should be the same as
+ * CK_UNAVAILABLE_INFO in the PKCS#11 spec.
+ */
+#define CRYPTO_UNAVAILABLE_INFO ((ulong_t)(-1))
+#define CRYPTO_EFFECTIVELY_INFINITE 0x0
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_CRYPTO_COMMON_H */
diff --git a/zfs/include/sys/crypto/icp.h b/zfs/include/sys/crypto/icp.h
new file mode 100644
index 000000000000..d8948e022a23
--- /dev/null
+++ b/zfs/include/sys/crypto/icp.h
@@ -0,0 +1,47 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2016, Datto, Inc. All rights reserved.
+ */
+
+#ifndef _SYS_CRYPTO_ALGS_H
+#define _SYS_CRYPTO_ALGS_H
+
+int aes_mod_init(void);
+int aes_mod_fini(void);
+
+int edonr_mod_init(void);
+int edonr_mod_fini(void);
+
+int sha1_mod_init(void);
+int sha1_mod_fini(void);
+
+int sha2_mod_init(void);
+int sha2_mod_fini(void);
+
+int skein_mod_init(void);
+int skein_mod_fini(void);
+
+int icp_init(void);
+void icp_fini(void);
+
+#endif /* _SYS_CRYPTO_ALGS_H */
diff --git a/zfs/include/sys/dbuf.h b/zfs/include/sys/dbuf.h
index 0d262e87b5bc..6262f012e7ab 100644
--- a/zfs/include/sys/dbuf.h
+++ b/zfs/include/sys/dbuf.h
@@ -36,6 +36,7 @@
#include <sys/zfs_context.h>
#include <sys/refcount.h>
#include <sys/zrlock.h>
+#include <sys/multilist.h>
#ifdef __cplusplus
extern "C" {
@@ -121,6 +122,9 @@ typedef struct dbuf_dirty_record {
/* How much space was changed to dsl_pool_dirty_space() for this? */
unsigned int dr_accounted;
+ /* A copy of the bp that points to us */
+ blkptr_t dr_bp_copy;
+
union dirty_types {
struct dirty_indirect {
@@ -225,6 +229,11 @@ typedef struct dmu_buf_impl {
*/
avl_node_t db_link;
+ /*
+ * Link in dbuf_cache.
+ */
+ multilist_node_t db_cache_link;
+
/* Data which is unique to data (leaf) blocks: */
/* User callback information. */
@@ -261,8 +270,8 @@ typedef struct dbuf_hash_table {
kmutex_t hash_mutexes[DBUF_MUTEXES];
} dbuf_hash_table_t;
-
-uint64_t dbuf_whichblock(struct dnode *di, uint64_t offset);
+uint64_t dbuf_whichblock(const struct dnode *di, const int64_t level,
+ const uint64_t offset);
void dbuf_create_bonus(struct dnode *dn);
int dbuf_spill_set_blksz(dmu_buf_t *db, uint64_t blksz, dmu_tx_t *tx);
@@ -272,10 +281,12 @@ void dbuf_rm_spill(struct dnode *dn, dmu_tx_t *tx);
dmu_buf_impl_t *dbuf_hold(struct dnode *dn, uint64_t blkid, void *tag);
dmu_buf_impl_t *dbuf_hold_level(struct dnode *dn, int level, uint64_t blkid,
void *tag);
-int dbuf_hold_impl(struct dnode *dn, uint8_t level, uint64_t blkid, int create,
+int dbuf_hold_impl(struct dnode *dn, uint8_t level, uint64_t blkid,
+ boolean_t fail_sparse, boolean_t fail_uncached,
void *tag, dmu_buf_impl_t **dbp);
-void dbuf_prefetch(struct dnode *dn, uint64_t blkid, zio_priority_t prio);
+void dbuf_prefetch(struct dnode *dn, int64_t level, uint64_t blkid,
+ zio_priority_t prio, arc_flags_t aflags);
void dbuf_add_ref(dmu_buf_impl_t *db, void *tag);
boolean_t dbuf_try_add_ref(dmu_buf_t *db, objset_t *os, uint64_t obj,
@@ -299,8 +310,7 @@ void dmu_buf_write_embedded(dmu_buf_t *dbuf, void *data,
bp_embedded_type_t etype, enum zio_compress comp,
int uncompressed_size, int compressed_size, int byteorder, dmu_tx_t *tx);
-void dbuf_clear(dmu_buf_impl_t *db);
-void dbuf_evict(dmu_buf_impl_t *db);
+void dbuf_destroy(dmu_buf_impl_t *db);
void dbuf_unoverride(dbuf_dirty_record_t *dr);
void dbuf_sync_list(list_t *list, int level, dmu_tx_t *tx);
@@ -338,10 +348,6 @@ boolean_t dbuf_is_metadata(dmu_buf_impl_t *db);
(dbuf_is_metadata(_db) && \
((_db)->db_objset->os_secondary_cache == ZFS_CACHE_METADATA)))
-#define DBUF_IS_L2COMPRESSIBLE(_db) \
- ((_db)->db_objset->os_compress != ZIO_COMPRESS_OFF || \
- (dbuf_is_metadata(_db) && zfs_mdcomp_disable == B_FALSE))
-
#ifdef ZFS_DEBUG
/*
diff --git a/zfs/include/sys/ddt.h b/zfs/include/sys/ddt.h
index 3befcb84427c..667795f967f7 100644
--- a/zfs/include/sys/ddt.h
+++ b/zfs/include/sys/ddt.h
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2016 by Delphix. All rights reserved.
*/
#ifndef _SYS_DDT_H
@@ -35,6 +36,8 @@
extern "C" {
#endif
+struct abd;
+
/*
* On-disk DDT formats, in the desired search order (newest version first).
*/
@@ -108,7 +111,7 @@ struct ddt_entry {
ddt_key_t dde_key;
ddt_phys_t dde_phys[DDT_PHYS_TYPES];
zio_t *dde_lead_zio[DDT_PHYS_TYPES];
- void *dde_repair_data;
+ struct abd *dde_repair_abd;
enum ddt_type dde_type;
enum ddt_class dde_class;
uint8_t dde_loading;
diff --git a/zfs/include/sys/dmu.h b/zfs/include/sys/dmu.h
index d9434db46383..d24615262737 100644
--- a/zfs/include/sys/dmu.h
+++ b/zfs/include/sys/dmu.h
@@ -20,11 +20,12 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
* Copyright 2014 HybridCluster. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
+ * Copyright 2013 Saso Kiselkov. All rights reserved.
*/
/* Portions Copyright 2010 Robert Milkowski */
@@ -44,6 +45,8 @@
#include <sys/inttypes.h>
#include <sys/cred.h>
#include <sys/fs/zfs.h>
+#include <sys/zio_compress.h>
+#include <sys/zio_priority.h>
#include <sys/uio.h>
#ifdef __cplusplus
@@ -72,6 +75,7 @@ struct sa_handle;
typedef struct objset objset_t;
typedef struct dmu_tx dmu_tx_t;
typedef struct dsl_dir dsl_dir_t;
+typedef struct dnode dnode_t;
typedef enum dmu_object_byteswap {
DMU_BSWAP_UINT8,
@@ -252,6 +256,12 @@ void zfs_znode_byteswap(void *buf, size_t size);
#define DMU_USERUSED_OBJECT (-1ULL)
#define DMU_GROUPUSED_OBJECT (-2ULL)
+/*
+ * Zap prefix for object accounting in DMU_{USER,GROUP}USED_OBJECT.
+ */
+#define DMU_OBJACCT_PREFIX "obj-"
+#define DMU_OBJACCT_PREFIX_LEN 4
+
/*
* artificial blkids for bonus buffer and spill blocks
*/
@@ -314,6 +324,8 @@ typedef struct dmu_buf {
#define DMU_POOL_FREE_BPOBJ "free_bpobj"
#define DMU_POOL_BPTREE_OBJ "bptree_obj"
#define DMU_POOL_EMPTY_BPOBJ "empty_bpobj"
+#define DMU_POOL_CHECKSUM_SALT "org.illumos:checksum_salt"
+#define DMU_POOL_VDEV_ZAP_MAP "com.delphix:vdev_zap_map"
/*
* Allocate an object from this objset. The range of object numbers
@@ -332,10 +344,19 @@ typedef struct dmu_buf {
*/
uint64_t dmu_object_alloc(objset_t *os, dmu_object_type_t ot,
int blocksize, dmu_object_type_t bonus_type, int bonus_len, dmu_tx_t *tx);
+uint64_t dmu_object_alloc_dnsize(objset_t *os, dmu_object_type_t ot,
+ int blocksize, dmu_object_type_t bonus_type, int bonus_len,
+ int dnodesize, dmu_tx_t *tx);
int dmu_object_claim(objset_t *os, uint64_t object, dmu_object_type_t ot,
int blocksize, dmu_object_type_t bonus_type, int bonus_len, dmu_tx_t *tx);
+int dmu_object_claim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot,
+ int blocksize, dmu_object_type_t bonus_type, int bonus_len,
+ int dnodesize, dmu_tx_t *tx);
int dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *txp);
+int dmu_object_reclaim_dnsize(objset_t *os, uint64_t object,
+ dmu_object_type_t ot, int blocksize, dmu_object_type_t bonustype,
+ int bonuslen, int dnodesize, dmu_tx_t *txp);
/*
* Free an object from this objset.
@@ -409,7 +430,7 @@ dmu_write_embedded(objset_t *os, uint64_t object, uint64_t offset,
#define WP_DMU_SYNC 0x2
#define WP_SPILL 0x4
-void dmu_write_policy(objset_t *os, struct dnode *dn, int level, int wp,
+void dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp,
struct zio_prop *zp);
/*
* The bonus data is accessed more or less like a regular buffer.
@@ -435,7 +456,7 @@ int dmu_rm_spill(objset_t *, uint64_t, dmu_tx_t *);
*/
int dmu_spill_hold_by_bonus(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp);
-int dmu_spill_hold_by_dnode(struct dnode *dn, uint32_t flags,
+int dmu_spill_hold_by_dnode(dnode_t *dn, uint32_t flags,
void *tag, dmu_buf_t **dbp);
int dmu_spill_hold_existing(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp);
@@ -455,6 +476,8 @@ int dmu_spill_hold_existing(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp);
*/
int dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
void *tag, dmu_buf_t **, int flags);
+int dmu_buf_hold_by_dnode(dnode_t *dn, uint64_t offset,
+ void *tag, dmu_buf_t **dbp, int flags);
/*
* Add a reference to a dmu buffer that has already been held via
@@ -486,7 +509,8 @@ uint64_t dmu_buf_refcount(dmu_buf_t *db);
* individually with dmu_buf_rele.
*/
int dmu_buf_hold_array_by_bonus(dmu_buf_t *db, uint64_t offset,
- uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp);
+ uint64_t length, boolean_t read, void *tag,
+ int *numbufsp, dmu_buf_t ***dbpp);
void dmu_buf_rele_array(dmu_buf_t **, int numbufs, void *tag);
typedef void dmu_buf_evict_func_t(void *user_ptr);
@@ -525,8 +549,14 @@ typedef struct dmu_buf_user {
*/
taskq_ent_t dbu_tqent;
- /* This instance's eviction function pointer. */
- dmu_buf_evict_func_t *dbu_evict_func;
+ /*
+ * This instance's eviction function pointers.
+ *
+ * dbu_evict_func_sync is called synchronously and then
+ * dbu_evict_func_async is executed asynchronously on a taskq.
+ */
+ dmu_buf_evict_func_t *dbu_evict_func_sync;
+ dmu_buf_evict_func_t *dbu_evict_func_async;
#ifdef ZFS_DEBUG
/*
* Pointer to user's dbuf pointer. NULL for clients that do
@@ -546,25 +576,23 @@ typedef struct dmu_buf_user {
* NOTE: This function should only be called once on a given dmu_buf_user_t.
* To allow enforcement of this, dbu must already be zeroed on entry.
*/
-#ifdef __lint
-/* Very ugly, but it beats issuing suppression directives in many Makefiles. */
-extern void
-dmu_buf_init_user(dmu_buf_user_t *dbu, dmu_buf_evict_func_t *evict_func,
- dmu_buf_t **clear_on_evict_dbufp);
-#else /* __lint */
+/*ARGSUSED*/
static inline void
-dmu_buf_init_user(dmu_buf_user_t *dbu, dmu_buf_evict_func_t *evict_func,
- dmu_buf_t **clear_on_evict_dbufp)
+dmu_buf_init_user(dmu_buf_user_t *dbu, dmu_buf_evict_func_t *evict_func_sync,
+ dmu_buf_evict_func_t *evict_func_async, dmu_buf_t **clear_on_evict_dbufp)
{
- ASSERT(dbu->dbu_evict_func == NULL);
- ASSERT(evict_func != NULL);
- dbu->dbu_evict_func = evict_func;
+ ASSERT(dbu->dbu_evict_func_sync == NULL);
+ ASSERT(dbu->dbu_evict_func_async == NULL);
+
+ /* must have at least one evict func */
+ IMPLY(evict_func_sync == NULL, evict_func_async != NULL);
+ dbu->dbu_evict_func_sync = evict_func_sync;
+ dbu->dbu_evict_func_async = evict_func_async;
taskq_init_ent(&dbu->dbu_tqent);
#ifdef ZFS_DEBUG
dbu->dbu_clear_on_evict_dbufp = clear_on_evict_dbufp;
#endif
}
-#endif /* __lint */
/*
* Attach user data to a dbuf and mark it for normal (when the dbuf's
@@ -607,6 +635,10 @@ void *dmu_buf_remove_user(dmu_buf_t *db, dmu_buf_user_t *user);
*/
void *dmu_buf_get_user(dmu_buf_t *db);
+objset_t *dmu_buf_get_objset(dmu_buf_t *db);
+dnode_t *dmu_buf_dnode_enter(dmu_buf_t *db);
+void dmu_buf_dnode_exit(dmu_buf_t *db);
+
/* Block until any in-progress dmu buf user evictions complete. */
void dmu_buf_user_evict_wait(void);
@@ -624,11 +656,6 @@ struct blkptr *dmu_buf_get_blkptr(dmu_buf_t *db);
*/
void dmu_buf_will_dirty(dmu_buf_t *db, dmu_tx_t *tx);
-/*
- * Tells if the given dbuf is freeable.
- */
-boolean_t dmu_buf_freeable(dmu_buf_t *);
-
/*
* You must create a transaction, then hold the objects which you will
* (or might) modify as part of this transaction. Then you must assign
@@ -652,10 +679,17 @@ boolean_t dmu_buf_freeable(dmu_buf_t *);
dmu_tx_t *dmu_tx_create(objset_t *os);
void dmu_tx_hold_write(dmu_tx_t *tx, uint64_t object, uint64_t off, int len);
+void dmu_tx_hold_write_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off,
+ int len);
void dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off,
uint64_t len);
+void dmu_tx_hold_free_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off,
+ uint64_t len);
void dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name);
+void dmu_tx_hold_zap_by_dnode(dmu_tx_t *tx, dnode_t *dn, int add,
+ const char *name);
void dmu_tx_hold_bonus(dmu_tx_t *tx, uint64_t object);
+void dmu_tx_hold_bonus_by_dnode(dmu_tx_t *tx, dnode_t *dn);
void dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object);
void dmu_tx_hold_sa(dmu_tx_t *tx, struct sa_handle *hdl, boolean_t may_grow);
void dmu_tx_hold_sa_create(dmu_tx_t *tx, int total_size);
@@ -663,6 +697,7 @@ void dmu_tx_abort(dmu_tx_t *tx);
int dmu_tx_assign(dmu_tx_t *tx, enum txg_how txg_how);
void dmu_tx_wait(dmu_tx_t *tx);
void dmu_tx_commit(dmu_tx_t *tx);
+void dmu_tx_mark_netfree(dmu_tx_t *tx);
/*
* To register a commit callback, dmu_tx_callback_register() must be called.
@@ -704,26 +739,31 @@ int dmu_free_long_object(objset_t *os, uint64_t object);
#define DMU_READ_NO_PREFETCH 1 /* don't prefetch */
int dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
void *buf, uint32_t flags);
+int dmu_read_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size, void *buf,
+ uint32_t flags);
void dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
const void *buf, dmu_tx_t *tx);
+void dmu_write_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size,
+ const void *buf, dmu_tx_t *tx);
void dmu_prealloc(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
dmu_tx_t *tx);
#ifdef _KERNEL
#include <linux/blkdev_compat.h>
-int dmu_read_bio(objset_t *os, uint64_t object, struct bio *bio);
-int dmu_write_bio(objset_t *os, uint64_t object, struct bio *bio,
- dmu_tx_t *tx);
int dmu_read_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size);
int dmu_read_uio_dbuf(dmu_buf_t *zdb, struct uio *uio, uint64_t size);
+int dmu_read_uio_dnode(dnode_t *dn, struct uio *uio, uint64_t size);
int dmu_write_uio(objset_t *os, uint64_t object, struct uio *uio, uint64_t size,
dmu_tx_t *tx);
int dmu_write_uio_dbuf(dmu_buf_t *zdb, struct uio *uio, uint64_t size,
dmu_tx_t *tx);
+int dmu_write_uio_dnode(dnode_t *dn, struct uio *uio, uint64_t size,
+ dmu_tx_t *tx);
#endif
struct arc_buf *dmu_request_arcbuf(dmu_buf_t *handle, int size);
void dmu_return_arcbuf(struct arc_buf *buf);
void dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, struct arc_buf *buf,
dmu_tx_t *tx);
+#ifdef HAVE_UIO_ZEROCOPY
int dmu_xuio_init(struct xuio *uio, int niov);
void dmu_xuio_fini(struct xuio *uio);
int dmu_xuio_add(struct xuio *uio, struct arc_buf *abuf, offset_t off,
@@ -731,6 +771,7 @@ int dmu_xuio_add(struct xuio *uio, struct arc_buf *abuf, offset_t off,
int dmu_xuio_cnt(struct xuio *uio);
struct arc_buf *dmu_xuio_arcbuf(struct xuio *uio, int i);
void dmu_xuio_clear(struct xuio *uio, int i);
+#endif /* HAVE_UIO_ZEROCOPY */
void xuio_stat_wbuf_copied(void);
void xuio_stat_wbuf_nocopy(void);
@@ -740,8 +781,8 @@ extern int zfs_max_recordsize;
/*
* Asynchronously try to read in the data.
*/
-void dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset,
- uint64_t len);
+void dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset,
+ uint64_t len, enum zio_priority pri);
typedef struct dmu_object_info {
/* All sizes are in bytes unless otherwise indicated. */
@@ -755,6 +796,7 @@ typedef struct dmu_object_info {
uint8_t doi_compress;
uint8_t doi_nblkptr;
uint8_t doi_pad[4];
+ uint64_t doi_dnodesize;
uint64_t doi_physical_blocks_512; /* data + metadata, 512b blks */
uint64_t doi_max_offset;
uint64_t doi_fill_count; /* number of non-empty blocks */
@@ -786,7 +828,7 @@ extern const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS];
int dmu_object_info(objset_t *os, uint64_t object, dmu_object_info_t *doi);
void __dmu_object_info_from_dnode(struct dnode *dn, dmu_object_info_t *doi);
/* Like dmu_object_info, but faster if you have a held dnode in hand. */
-void dmu_object_info_from_dnode(struct dnode *dn, dmu_object_info_t *doi);
+void dmu_object_info_from_dnode(dnode_t *dn, dmu_object_info_t *doi);
/* Like dmu_object_info, but faster if you have a held dbuf in hand. */
void dmu_object_info_from_db(dmu_buf_t *db, dmu_object_info_t *doi);
/*
@@ -796,6 +838,8 @@ void dmu_object_info_from_db(dmu_buf_t *db, dmu_object_info_t *doi);
void dmu_object_size_from_db(dmu_buf_t *db, uint32_t *blksize,
u_longlong_t *nblk512);
+void dmu_object_dnsize_from_db(dmu_buf_t *db, int *dnsize);
+
typedef struct dmu_objset_stats {
uint64_t dds_num_clones; /* number of clones of this */
uint64_t dds_creation_txg;
@@ -803,7 +847,7 @@ typedef struct dmu_objset_stats {
dmu_objset_type_t dds_type;
uint8_t dds_is_snapshot;
uint8_t dds_inconsistent;
- char dds_origin[MAXNAMELEN];
+ char dds_origin[ZFS_MAX_DATASET_NAME_LEN];
} dmu_objset_stats_t;
/*
@@ -853,6 +897,7 @@ extern struct dsl_dataset *dmu_objset_ds(objset_t *os);
extern void dmu_objset_name(objset_t *os, char *buf);
extern dmu_objset_type_t dmu_objset_type(objset_t *os);
extern uint64_t dmu_objset_id(objset_t *os);
+extern uint64_t dmu_objset_dnodesize(objset_t *os);
extern zfs_sync_type_t dmu_objset_syncprop(objset_t *os);
extern zfs_logbias_op_t dmu_objset_logbias(objset_t *os);
extern int dmu_snapshot_list_next(objset_t *os, int namelen, char *name,
diff --git a/zfs/include/sys/dmu_impl.h b/zfs/include/sys/dmu_impl.h
index 75d094f0812e..65e417e3f665 100644
--- a/zfs/include/sys/dmu_impl.h
+++ b/zfs/include/sys/dmu_impl.h
@@ -24,7 +24,7 @@
*/
/*
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2015 by Delphix. All rights reserved.
*/
#ifndef _SYS_DMU_IMPL_H
@@ -86,7 +86,6 @@ extern "C" {
* held from:
* callers of dbuf_read_impl, dbuf_hold[_impl], dbuf_prefetch
* dmu_object_info_from_dnode: dn_dirty_mtx (dn_datablksz)
- * dmu_tx_count_free:
* dbuf_read_impl: db_mtx, dmu_zfetch()
* dmu_zfetch: zf_rwlock/r, zst_lock, dbuf_prefetch()
* dbuf_new_size: db_mtx
@@ -197,7 +196,6 @@ extern "C" {
* dsl_prop_changed_notify: none (dd_prop_cbs)
* dsl_prop_register: none (dd_prop_cbs)
* dsl_prop_unregister: none (dd_prop_cbs)
- * dsl_dataset_block_freeable: none (dd_sync_*)
*
* os_lock (leaf)
* protects:
@@ -268,10 +266,13 @@ typedef struct dmu_sendarg {
uint64_t dsa_toguid;
int dsa_err;
dmu_pendop_t dsa_pending_op;
- boolean_t dsa_incremental;
uint64_t dsa_featureflags;
uint64_t dsa_last_data_object;
uint64_t dsa_last_data_offset;
+ uint64_t dsa_resume_object;
+ uint64_t dsa_resume_offset;
+ boolean_t dsa_sent_begin;
+ boolean_t dsa_sent_end;
} dmu_sendarg_t;
void dmu_object_zapify(objset_t *, uint64_t, dmu_object_type_t, dmu_tx_t *);
diff --git a/zfs/include/sys/dmu_objset.h b/zfs/include/sys/dmu_objset.h
index 837a0d5107b7..a836e03722c3 100644
--- a/zfs/include/sys/dmu_objset.h
+++ b/zfs/include/sys/dmu_objset.h
@@ -20,8 +20,8 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
@@ -56,6 +56,7 @@ struct dmu_tx;
(arc_buf_size(buf) > OBJSET_OLD_PHYS_SIZE)
#define OBJSET_FLAG_USERACCOUNTING_COMPLETE (1ULL<<0)
+#define OBJSET_FLAG_USEROBJACCOUNTING_COMPLETE (1ULL<<1)
typedef struct objset_phys {
dnode_phys_t os_meta_dnode;
@@ -68,6 +69,8 @@ typedef struct objset_phys {
dnode_phys_t os_groupused_dnode;
} objset_phys_t;
+typedef int (*dmu_objset_upgrade_cb_t)(objset_t *);
+
struct objset {
/* Immutable: */
struct dsl_dataset *os_dsl_dataset;
@@ -88,6 +91,7 @@ struct objset {
list_node_t os_evicting_node;
/* can change, under dsl_dir's locks: */
+ uint64_t os_dnodesize; /* default dnode size for new objects */
enum zio_checksum os_checksum;
enum zio_compress os_compress;
uint8_t os_copies;
@@ -100,28 +104,48 @@ struct objset {
zfs_redundant_metadata_type_t os_redundant_metadata;
int os_recordsize;
+ /*
+ * Pointer is constant; the blkptr it points to is protected by
+ * os_dsl_dataset->ds_bp_rwlock
+ */
+ blkptr_t *os_rootbp;
+
/* no lock needed: */
struct dmu_tx *os_synctx; /* XXX sketchy */
- blkptr_t *os_rootbp;
zil_header_t os_zil_header;
- list_t os_synced_dnodes;
+ multilist_t *os_synced_dnodes;
uint64_t os_flags;
+ uint64_t os_freed_dnodes;
+ boolean_t os_rescan_dnodes;
/* Protected by os_obj_lock */
kmutex_t os_obj_lock;
- uint64_t os_obj_next;
+ uint64_t os_obj_next_chunk;
+
+ /* Per-CPU next object to allocate, protected by atomic ops. */
+ uint64_t *os_obj_next_percpu;
+ int os_obj_next_percpu_len;
/* Protected by os_lock */
kmutex_t os_lock;
- list_t os_dirty_dnodes[TXG_SIZE];
- list_t os_free_dnodes[TXG_SIZE];
+ multilist_t *os_dirty_dnodes[TXG_SIZE];
list_t os_dnodes;
list_t os_downgraded_dbufs;
+ /* Protects changes to DMU_{USER,GROUP}USED_OBJECT */
+ kmutex_t os_userused_lock;
+
/* stuff we store for the user */
kmutex_t os_user_ptr_lock;
void *os_user_ptr;
sa_os_t *os_sa;
+
+ /* kernel thread to upgrade this dataset */
+ kmutex_t os_upgrade_lock;
+ taskqid_t os_upgrade_id;
+ dmu_objset_upgrade_cb_t os_upgrade_cb;
+ boolean_t os_upgrade_exit;
+ int os_upgrade_status;
};
#define DMU_META_OBJSET 0
@@ -135,8 +159,6 @@ struct objset {
((os)->os_secondary_cache == ZFS_CACHE_ALL || \
(os)->os_secondary_cache == ZFS_CACHE_METADATA)
-#define DMU_OS_IS_L2COMPRESSIBLE(os) (zfs_mdcomp_disable == B_FALSE)
-
/* called from zpl */
int dmu_objset_hold(const char *name, void *tag, objset_t **osp);
int dmu_objset_own(const char *name, dmu_objset_type_t type,
@@ -172,9 +194,15 @@ void dmu_objset_userquota_get_ids(dnode_t *dn, boolean_t before, dmu_tx_t *tx);
boolean_t dmu_objset_userused_enabled(objset_t *os);
int dmu_objset_userspace_upgrade(objset_t *os);
boolean_t dmu_objset_userspace_present(objset_t *os);
+boolean_t dmu_objset_userobjused_enabled(objset_t *os);
+boolean_t dmu_objset_userobjspace_upgradable(objset_t *os);
+void dmu_objset_userobjspace_upgrade(objset_t *os);
+boolean_t dmu_objset_userobjspace_present(objset_t *os);
+
int dmu_fsname(const char *snapname, char *buf);
void dmu_objset_evict_done(objset_t *os);
+void dmu_objset_willuse_space(objset_t *os, int64_t space, dmu_tx_t *tx);
void dmu_objset_init(void);
void dmu_objset_fini(void);
diff --git a/zfs/include/sys/dmu_send.h b/zfs/include/sys/dmu_send.h
index 2442a1f8aab1..5cf67a6ab580 100644
--- a/zfs/include/sys/dmu_send.h
+++ b/zfs/include/sys/dmu_send.h
@@ -36,26 +36,32 @@ struct vnode;
struct dsl_dataset;
struct drr_begin;
struct avl_tree;
+struct dmu_replay_record;
-int dmu_send(const char *tosnap, const char *fromsnap,
- boolean_t embedok, boolean_t large_block_ok,
- int outfd, struct vnode *vp, offset_t *off);
+extern const char *recv_clone_name;
+
+int dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
+ boolean_t large_block_ok, boolean_t compressok, int outfd,
+ uint64_t resumeobj, uint64_t resumeoff, struct vnode *vp, offset_t *off);
int dmu_send_estimate(struct dsl_dataset *ds, struct dsl_dataset *fromds,
- uint64_t *sizep);
+ boolean_t stream_compressed, uint64_t *sizep);
int dmu_send_estimate_from_txg(struct dsl_dataset *ds, uint64_t fromtxg,
- uint64_t *sizep);
+ boolean_t stream_compressed, uint64_t *sizep);
int dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
- boolean_t embedok, boolean_t large_block_ok,
+ boolean_t embedok, boolean_t large_block_ok, boolean_t compressok,
int outfd, struct vnode *vp, offset_t *off);
typedef struct dmu_recv_cookie {
struct dsl_dataset *drc_ds;
+ struct dmu_replay_record *drc_drr_begin;
struct drr_begin *drc_drrb;
const char *drc_tofs;
const char *drc_tosnap;
boolean_t drc_newfs;
boolean_t drc_byteswap;
boolean_t drc_force;
+ boolean_t drc_resumable;
+ boolean_t drc_clone;
struct avl_tree *drc_guid_to_ds_map;
zio_cksum_t drc_cksum;
uint64_t drc_newsnapobj;
@@ -63,8 +69,9 @@ typedef struct dmu_recv_cookie {
cred_t *drc_cred;
} dmu_recv_cookie_t;
-int dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb,
- boolean_t force, char *origin, dmu_recv_cookie_t *drc);
+int dmu_recv_begin(char *tofs, char *tosnap,
+ struct dmu_replay_record *drr_begin,
+ boolean_t force, boolean_t resumable, char *origin, dmu_recv_cookie_t *drc);
int dmu_recv_stream(dmu_recv_cookie_t *drc, struct vnode *vp, offset_t *voffp,
int cleanup_fd, uint64_t *action_handlep);
int dmu_recv_end(dmu_recv_cookie_t *drc, void *owner);
diff --git a/zfs/include/sys/dmu_traverse.h b/zfs/include/sys/dmu_traverse.h
index 544b721e4612..c010edd440d9 100644
--- a/zfs/include/sys/dmu_traverse.h
+++ b/zfs/include/sys/dmu_traverse.h
@@ -54,6 +54,8 @@ typedef int (blkptr_cb_t)(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
int traverse_dataset(struct dsl_dataset *ds,
uint64_t txg_start, int flags, blkptr_cb_t func, void *arg);
+int traverse_dataset_resume(struct dsl_dataset *ds, uint64_t txg_start,
+ zbookmark_phys_t *resume, int flags, blkptr_cb_t func, void *arg);
int traverse_dataset_destroyed(spa_t *spa, blkptr_t *blkptr,
uint64_t txg_start, zbookmark_phys_t *resume, int flags,
blkptr_cb_t func, void *arg);
diff --git a/zfs/include/sys/dmu_tx.h b/zfs/include/sys/dmu_tx.h
index c70c97da03f2..f16e1e858041 100644
--- a/zfs/include/sys/dmu_tx.h
+++ b/zfs/include/sys/dmu_tx.h
@@ -23,7 +23,7 @@
* Use is subject to license terms.
*/
/*
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
*/
#ifndef _SYS_DMU_TX_H
@@ -70,6 +70,9 @@ struct dmu_tx {
/* has this transaction already been delayed? */
boolean_t tx_waited;
+ /* transaction is marked as being a "net free" of space */
+ boolean_t tx_netfree;
+
/* time this transaction was created */
hrtime_t tx_start;
@@ -77,14 +80,6 @@ struct dmu_tx {
boolean_t tx_wait_dirty;
int tx_err;
-#ifdef DEBUG_DMU_TX
- uint64_t tx_space_towrite;
- uint64_t tx_space_tofree;
- uint64_t tx_space_tooverwrite;
- uint64_t tx_space_tounref;
- refcount_t tx_space_written;
- refcount_t tx_space_freed;
-#endif
};
enum dmu_tx_hold_type {
@@ -102,17 +97,11 @@ typedef struct dmu_tx_hold {
dmu_tx_t *txh_tx;
list_node_t txh_node;
struct dnode *txh_dnode;
- uint64_t txh_space_towrite;
- uint64_t txh_space_tofree;
- uint64_t txh_space_tooverwrite;
- uint64_t txh_space_tounref;
- uint64_t txh_memory_tohold;
- uint64_t txh_fudge;
-#ifdef DEBUG_DMU_TX
+ refcount_t txh_space_towrite;
+ refcount_t txh_memory_tohold;
enum dmu_tx_hold_type txh_type;
uint64_t txh_arg1;
uint64_t txh_arg2;
-#endif
} dmu_tx_hold_t;
typedef struct dmu_tx_callback {
@@ -171,13 +160,11 @@ extern dmu_tx_t *dmu_tx_create_assigned(struct dsl_pool *dp, uint64_t txg);
dmu_tx_t *dmu_tx_create_dd(dsl_dir_t *dd);
int dmu_tx_is_syncing(dmu_tx_t *tx);
int dmu_tx_private_ok(dmu_tx_t *tx);
-void dmu_tx_add_new_object(dmu_tx_t *tx, objset_t *os, uint64_t object);
-void dmu_tx_willuse_space(dmu_tx_t *tx, int64_t delta);
+void dmu_tx_add_new_object(dmu_tx_t *tx, dnode_t *dn);
void dmu_tx_dirty_buf(dmu_tx_t *tx, struct dmu_buf_impl *db);
-int dmu_tx_holds(dmu_tx_t *tx, uint64_t object);
void dmu_tx_hold_space(dmu_tx_t *tx, uint64_t space);
-#ifdef DEBUG_DMU_TX
+#ifdef ZFS_DEBUG
#define DMU_TX_DIRTY_BUF(tx, db) dmu_tx_dirty_buf(tx, db)
#else
#define DMU_TX_DIRTY_BUF(tx, db)
diff --git a/zfs/include/sys/dmu_zfetch.h b/zfs/include/sys/dmu_zfetch.h
index 38ed1d872129..8125d0706239 100644
--- a/zfs/include/sys/dmu_zfetch.h
+++ b/zfs/include/sys/dmu_zfetch.h
@@ -23,8 +23,12 @@
* Use is subject to license terms.
*/
-#ifndef _DFETCH_H
-#define _DFETCH_H
+/*
+ * Copyright (c) 2014 by Delphix. All rights reserved.
+ */
+
+#ifndef _DMU_ZFETCH_H
+#define _DMU_ZFETCH_H
#include <sys/zfs_context.h>
@@ -36,41 +40,37 @@ extern unsigned long zfetch_array_rd_sz;
struct dnode; /* so we can reference dnode */
-typedef enum zfetch_dirn {
- ZFETCH_FORWARD = 1, /* prefetch increasing block numbers */
- ZFETCH_BACKWARD = -1 /* prefetch decreasing block numbers */
-} zfetch_dirn_t;
-
typedef struct zstream {
- uint64_t zst_offset; /* offset of starting block in range */
- uint64_t zst_len; /* length of range, in blocks */
- zfetch_dirn_t zst_direction; /* direction of prefetch */
- uint64_t zst_stride; /* length of stride, in blocks */
- uint64_t zst_ph_offset; /* prefetch offset, in blocks */
- uint64_t zst_cap; /* prefetch limit (cap), in blocks */
- kmutex_t zst_lock; /* protects stream */
- clock_t zst_last; /* lbolt of last prefetch */
- list_node_t zst_node; /* next zstream here */
+ uint64_t zs_blkid; /* expect next access at this blkid */
+ uint64_t zs_pf_blkid; /* next block to prefetch */
+
+ /*
+ * We will next prefetch the L1 indirect block of this level-0
+ * block id.
+ */
+ uint64_t zs_ipf_blkid;
+
+ kmutex_t zs_lock; /* protects stream */
+ hrtime_t zs_atime; /* time last prefetch issued */
+ list_node_t zs_node; /* link for zf_stream */
} zstream_t;
typedef struct zfetch {
krwlock_t zf_rwlock; /* protects zfetch structure */
- list_t zf_stream; /* AVL tree of zstream_t's */
+ list_t zf_stream; /* list of zstream_t's */
struct dnode *zf_dnode; /* dnode that owns this zfetch */
- uint32_t zf_stream_cnt; /* # of active streams */
- uint64_t zf_alloc_fail; /* # of failed attempts to alloc strm */
} zfetch_t;
void zfetch_init(void);
void zfetch_fini(void);
void dmu_zfetch_init(zfetch_t *, struct dnode *);
-void dmu_zfetch_rele(zfetch_t *);
-void dmu_zfetch(zfetch_t *, uint64_t, uint64_t, int);
+void dmu_zfetch_fini(zfetch_t *);
+void dmu_zfetch(zfetch_t *, uint64_t, uint64_t, boolean_t);
#ifdef __cplusplus
}
#endif
-#endif /* _DFETCH_H */
+#endif /* _DMU_ZFETCH_H */
diff --git a/zfs/include/sys/dnode.h b/zfs/include/sys/dnode.h
index 50e01155903a..c7efe5593566 100644
--- a/zfs/include/sys/dnode.h
+++ b/zfs/include/sys/dnode.h
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
@@ -35,6 +35,7 @@
#include <sys/refcount.h>
#include <sys/dmu_zfetch.h>
#include <sys/zrlock.h>
+#include <sys/multilist.h>
#ifdef __cplusplus
extern "C" {
@@ -57,8 +58,14 @@ extern "C" {
* Fixed constants.
*/
#define DNODE_SHIFT 9 /* 512 bytes */
-#define DN_MIN_INDBLKSHIFT 10 /* 1k */
-#define DN_MAX_INDBLKSHIFT 14 /* 16k */
+#define DN_MIN_INDBLKSHIFT 12 /* 4k */
+/*
+ * If we ever increase this value beyond 20, we need to revisit all logic that
+ * does x << level * ebps to handle overflow. With a 1M indirect block size,
+ * 4 levels of indirect blocks would not be able to guarantee addressing an
+ * entire object, so 5 levels will be used, but 5 * (20 - 7) = 65.
+ */
+#define DN_MAX_INDBLKSHIFT 17 /* 128k */
#define DNODE_BLOCK_SHIFT 14 /* 16k */
#define DNODE_CORE_SIZE 64 /* 64 bytes for dnode sans blkptrs */
#define DN_MAX_OBJECT_SHIFT 48 /* 256 trillion (zfs_fid_t limit) */
@@ -79,21 +86,39 @@ extern "C" {
/*
* Derived constants.
*/
-#define DNODE_SIZE (1 << DNODE_SHIFT)
-#define DN_MAX_NBLKPTR ((DNODE_SIZE - DNODE_CORE_SIZE) >> SPA_BLKPTRSHIFT)
-#define DN_MAX_BONUSLEN (DNODE_SIZE - DNODE_CORE_SIZE - (1 << SPA_BLKPTRSHIFT))
+#define DNODE_MIN_SIZE (1 << DNODE_SHIFT)
+#define DNODE_MAX_SIZE (1 << DNODE_BLOCK_SHIFT)
+#define DNODE_BLOCK_SIZE (1 << DNODE_BLOCK_SHIFT)
+#define DNODE_MIN_SLOTS (DNODE_MIN_SIZE >> DNODE_SHIFT)
+#define DNODE_MAX_SLOTS (DNODE_MAX_SIZE >> DNODE_SHIFT)
+#define DN_BONUS_SIZE(dnsize) ((dnsize) - DNODE_CORE_SIZE - \
+ (1 << SPA_BLKPTRSHIFT))
+#define DN_SLOTS_TO_BONUSLEN(slots) DN_BONUS_SIZE((slots) << DNODE_SHIFT)
+#define DN_OLD_MAX_BONUSLEN (DN_BONUS_SIZE(DNODE_MIN_SIZE))
+#define DN_MAX_NBLKPTR ((DNODE_MIN_SIZE - DNODE_CORE_SIZE) >> SPA_BLKPTRSHIFT)
#define DN_MAX_OBJECT (1ULL << DN_MAX_OBJECT_SHIFT)
-#define DN_ZERO_BONUSLEN (DN_MAX_BONUSLEN + 1)
+#define DN_ZERO_BONUSLEN (DN_BONUS_SIZE(DNODE_MAX_SIZE) + 1)
#define DN_KILL_SPILLBLK (1)
+#define DN_SLOT_UNINIT ((void *)NULL) /* Uninitialized */
+#define DN_SLOT_FREE ((void *)1UL) /* Free slot */
+#define DN_SLOT_ALLOCATED ((void *)2UL) /* Allocated slot */
+#define DN_SLOT_INTERIOR ((void *)3UL) /* Interior allocated slot */
+#define DN_SLOT_IS_PTR(dn) ((void *)dn > DN_SLOT_INTERIOR)
+#define DN_SLOT_IS_VALID(dn) ((void *)dn != NULL)
+
#define DNODES_PER_BLOCK_SHIFT (DNODE_BLOCK_SHIFT - DNODE_SHIFT)
#define DNODES_PER_BLOCK (1ULL << DNODES_PER_BLOCK_SHIFT)
+
+/*
+ * This is inaccurate if the indblkshift of the particular object is not the
+ * max. But it's only used by userland to calculate the zvol reservation.
+ */
#define DNODES_PER_LEVEL_SHIFT (DN_MAX_INDBLKSHIFT - SPA_BLKPTRSHIFT)
#define DNODES_PER_LEVEL (1ULL << DNODES_PER_LEVEL_SHIFT)
-/* The +2 here is a cheesy way to round up */
-#define DN_MAX_LEVELS (2 + ((DN_MAX_OFFSET_SHIFT - SPA_MINBLOCKSHIFT) / \
- (DN_MIN_INDBLKSHIFT - SPA_BLKPTRSHIFT)))
+#define DN_MAX_LEVELS (DIV_ROUND_UP(DN_MAX_OFFSET_SHIFT - SPA_MINBLOCKSHIFT, \
+ DN_MIN_INDBLKSHIFT - SPA_BLKPTRSHIFT) + 1)
#define DN_BONUS(dnp) ((void*)((dnp)->dn_bonus + \
(((dnp)->dn_nblkptr - 1) * sizeof (blkptr_t))))
@@ -114,11 +139,14 @@ enum dnode_dirtycontext {
};
/* Is dn_used in bytes? if not, it's in multiples of SPA_MINBLOCKSIZE */
-#define DNODE_FLAG_USED_BYTES (1<<0)
-#define DNODE_FLAG_USERUSED_ACCOUNTED (1<<1)
+#define DNODE_FLAG_USED_BYTES (1 << 0)
+#define DNODE_FLAG_USERUSED_ACCOUNTED (1 << 1)
/* Does dnode have a SA spill blkptr in bonus? */
-#define DNODE_FLAG_SPILL_BLKPTR (1<<2)
+#define DNODE_FLAG_SPILL_BLKPTR (1 << 2)
+
+/* User/Group dnode accounting */
+#define DNODE_FLAG_USEROBJUSED_ACCOUNTED (1 << 3)
typedef struct dnode_phys {
uint8_t dn_type; /* dmu_object_type_t */
@@ -131,7 +159,8 @@ typedef struct dnode_phys {
uint8_t dn_flags; /* DNODE_FLAG_* */
uint16_t dn_datablkszsec; /* data block size in 512b sectors */
uint16_t dn_bonuslen; /* length of dn_bonus */
- uint8_t dn_pad2[4];
+ uint8_t dn_extra_slots; /* # of subsequent slots consumed */
+ uint8_t dn_pad2[3];
/* accounting is protected by dn_dirty_mtx */
uint64_t dn_maxblkid; /* largest allocated block ID */
@@ -140,8 +169,11 @@ typedef struct dnode_phys {
uint64_t dn_pad3[4];
/*
- * The tail region is 448 bytes, and there are three ways to
- * look at it.
+ * The tail region is 448 bytes for a 512 byte dnode, and
+ * correspondingly larger for larger dnode sizes. The spill
+ * block pointer, when present, is always at the end of the tail
+ * region. There are three ways this space may be used, using
+ * a 512 byte dnode for this diagram:
*
* 0 64 128 192 256 320 384 448 (offset)
* +---------------+---------------+---------------+-------+
@@ -149,24 +181,28 @@ typedef struct dnode_phys {
* +---------------+---------------+---------------+-------+
* | dn_blkptr[0] | dn_bonus[0..319] |
* +---------------+-----------------------+---------------+
- * | dn_blkptr[0] | / | dn_spill |
+ * | dn_blkptr[0] | dn_bonus[0..191] | dn_spill |
* +---------------+-----------------------+---------------+
*/
union {
- blkptr_t dn_blkptr[1+DN_MAX_BONUSLEN/sizeof (blkptr_t)];
+ blkptr_t dn_blkptr[1+DN_OLD_MAX_BONUSLEN/sizeof (blkptr_t)];
struct {
blkptr_t __dn_ignore1;
- uint8_t dn_bonus[DN_MAX_BONUSLEN];
+ uint8_t dn_bonus[DN_OLD_MAX_BONUSLEN];
};
struct {
blkptr_t __dn_ignore2;
- uint8_t __dn_ignore3[DN_MAX_BONUSLEN-sizeof (blkptr_t)];
+ uint8_t __dn_ignore3[DN_OLD_MAX_BONUSLEN -
+ sizeof (blkptr_t)];
blkptr_t dn_spill;
};
};
} dnode_phys_t;
-typedef struct dnode {
+#define DN_SPILL_BLKPTR(dnp) (blkptr_t *)((char *)(dnp) + \
+ (((dnp)->dn_extra_slots + 1) << DNODE_SHIFT) - (1 << SPA_BLKPTRSHIFT))
+
+struct dnode {
/*
* Protects the structure of the dnode, including the number of levels
* of indirection (dn_nlevels), dn_maxblkid, and dn_next_*
@@ -202,6 +238,7 @@ typedef struct dnode {
uint32_t dn_datablksz; /* in bytes */
uint64_t dn_maxblkid;
uint8_t dn_next_type[TXG_SIZE];
+ uint8_t dn_num_slots; /* metadnode slots consumed on disk */
uint8_t dn_next_nblkptr[TXG_SIZE];
uint8_t dn_next_nlevels[TXG_SIZE];
uint8_t dn_next_indblkshift[TXG_SIZE];
@@ -212,11 +249,9 @@ typedef struct dnode {
/* protected by dn_dbufs_mtx; declared here to fill 32-bit hole */
uint32_t dn_dbufs_count; /* count of dn_dbufs */
- /* There are no level-0 blocks of this blkid or higher in dn_dbufs */
- uint64_t dn_unlisted_l0_blkid;
/* protected by os_lock: */
- list_node_t dn_dirty_link[TXG_SIZE]; /* next on dataset's dirty */
+ multilist_node_t dn_dirty_link[TXG_SIZE]; /* next on dataset's dirty */
/* protected by dn_mtx: */
kmutex_t dn_mtx;
@@ -242,7 +277,7 @@ typedef struct dnode {
* duplicate entries, we order the dbufs by an arbitrary value -
* their address in memory. This means that dn_dbufs cannot be used to
* directly look up a dbuf. Instead, callers must use avl_walk, have
- * a reference to the dbuf, or look up a non-existant node with
+ * a reference to the dbuf, or look up a non-existent node with
* db_state = DB_SEARCH (see dbuf_free_range for an example).
*/
avl_tree_t dn_dbufs;
@@ -264,7 +299,7 @@ typedef struct dnode {
/* holds prefetch structure */
struct zfetch dn_zfetch;
-} dnode_t;
+};
/*
* Adds a level of indirection between the dbuf and the dnode to avoid
@@ -299,7 +334,7 @@ void dnode_rm_spill(dnode_t *dn, dmu_tx_t *tx);
int dnode_hold(struct objset *dd, uint64_t object,
void *ref, dnode_t **dnp);
-int dnode_hold_impl(struct objset *dd, uint64_t object, int flag,
+int dnode_hold_impl(struct objset *dd, uint64_t object, int flag, int dn_slots,
void *ref, dnode_t **dnp);
boolean_t dnode_add_ref(dnode_t *dn, void *ref);
void dnode_rele(dnode_t *dn, void *ref);
@@ -307,9 +342,9 @@ void dnode_rele_and_unlock(dnode_t *dn, void *tag);
void dnode_setdirty(dnode_t *dn, dmu_tx_t *tx);
void dnode_sync(dnode_t *dn, dmu_tx_t *tx);
void dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
- dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
+ dmu_object_type_t bonustype, int bonuslen, int dn_slots, dmu_tx_t *tx);
void dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
- dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
+ dmu_object_type_t bonustype, int bonuslen, int dn_slots, dmu_tx_t *tx);
void dnode_free(dnode_t *dn, dmu_tx_t *tx);
void dnode_byteswap(dnode_phys_t *dnp);
void dnode_buf_byteswap(void *buf, size_t size);
@@ -317,7 +352,6 @@ void dnode_verify(dnode_t *dn);
int dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx);
void dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx);
void dnode_diduse_space(dnode_t *dn, int64_t space);
-void dnode_willuse_space(dnode_t *dn, int64_t space, dmu_tx_t *tx);
void dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t);
uint64_t dnode_block_freed(dnode_t *dn, uint64_t blkid);
void dnode_init(void);
@@ -327,6 +361,144 @@ int dnode_next_offset(dnode_t *dn, int flags, uint64_t *off,
void dnode_evict_dbufs(dnode_t *dn);
void dnode_evict_bonus(dnode_t *dn);
+#define DNODE_IS_CACHEABLE(_dn) \
+ ((_dn)->dn_objset->os_primary_cache == ZFS_CACHE_ALL || \
+ (DMU_OT_IS_METADATA((_dn)->dn_type) && \
+ (_dn)->dn_objset->os_primary_cache == ZFS_CACHE_METADATA))
+
+#define DNODE_META_IS_CACHEABLE(_dn) \
+ ((_dn)->dn_objset->os_primary_cache == ZFS_CACHE_ALL || \
+ (_dn)->dn_objset->os_primary_cache == ZFS_CACHE_METADATA)
+
+/*
+ * Used for dnodestats kstat.
+ */
+typedef struct dnode_stats {
+ /*
+ * Number of failed attempts to hold a meta dnode dbuf.
+ */
+ kstat_named_t dnode_hold_dbuf_hold;
+ /*
+ * Number of failed attempts to read a meta dnode dbuf.
+ */
+ kstat_named_t dnode_hold_dbuf_read;
+ /*
+ * Number of times dnode_hold(..., DNODE_MUST_BE_ALLOCATED) was able
+ * to hold the requested object number which was allocated. This is
+ * the common case when looking up any allocated object number.
+ */
+ kstat_named_t dnode_hold_alloc_hits;
+ /*
+ * Number of times dnode_hold(..., DNODE_MUST_BE_ALLOCATED) was not
+ * able to hold the request object number because it was not allocated.
+ */
+ kstat_named_t dnode_hold_alloc_misses;
+ /*
+ * Number of times dnode_hold(..., DNODE_MUST_BE_ALLOCATED) was not
+ * able to hold the request object number because the object number
+ * refers to an interior large dnode slot.
+ */
+ kstat_named_t dnode_hold_alloc_interior;
+ /*
+ * Number of times dnode_hold(..., DNODE_MUST_BE_ALLOCATED) needed
+ * to retry acquiring slot zrl locks due to contention.
+ */
+ kstat_named_t dnode_hold_alloc_lock_retry;
+ /*
+ * Number of times dnode_hold(..., DNODE_MUST_BE_ALLOCATED) did not
+ * need to create the dnode because another thread did so after
+ * dropping the read lock but before acquiring the write lock.
+ */
+ kstat_named_t dnode_hold_alloc_lock_misses;
+ /*
+ * Number of times dnode_hold(..., DNODE_MUST_BE_ALLOCATED) found
+ * a free dnode instantiated by dnode_create() but not yet allocated
+ * by dnode_allocate().
+ */
+ kstat_named_t dnode_hold_alloc_type_none;
+ /*
+ * Number of times dnode_hold(..., DNODE_MUST_BE_FREE) was able
+ * to hold the requested range of free dnode slots.
+ */
+ kstat_named_t dnode_hold_free_hits;
+ /*
+ * Number of times dnode_hold(..., DNODE_MUST_BE_FREE) was not
+ * able to hold the requested range of free dnode slots because
+ * at least one slot was allocated.
+ */
+ kstat_named_t dnode_hold_free_misses;
+ /*
+ * Number of times dnode_hold(..., DNODE_MUST_BE_FREE) was not
+ * able to hold the requested range of free dnode slots because
+ * after acquiring the zrl lock at least one slot was allocated.
+ */
+ kstat_named_t dnode_hold_free_lock_misses;
+ /*
+ * Number of times dnode_hold(..., DNODE_MUST_BE_FREE) needed
+ * to retry acquiring slot zrl locks due to contention.
+ */
+ kstat_named_t dnode_hold_free_lock_retry;
+ /*
+ * Number of times dnode_hold(..., DNODE_MUST_BE_FREE) requested
+ * a range of dnode slots which were held by another thread.
+ */
+ kstat_named_t dnode_hold_free_refcount;
+ /*
+ * Number of times dnode_hold(..., DNODE_MUST_BE_FREE) requested
+ * a range of dnode slots which would overflow the dnode_phys_t.
+ */
+ kstat_named_t dnode_hold_free_overflow;
+ /*
+ * Number of times a dnode_hold(...) was attempted on a dnode
+ * which had already been unlinked in an earlier txg.
+ */
+ kstat_named_t dnode_hold_free_txg;
+ /*
+ * Number of new dnodes allocated by dnode_allocate().
+ */
+ kstat_named_t dnode_allocate;
+ /*
+ * Number of dnodes re-allocated by dnode_reallocate().
+ */
+ kstat_named_t dnode_reallocate;
+ /*
+ * Number of meta dnode dbufs evicted.
+ */
+ kstat_named_t dnode_buf_evict;
+ /*
+ * Number of times dmu_object_alloc*() reached the end of the existing
+ * object ID chunk and advanced to a new one.
+ */
+ kstat_named_t dnode_alloc_next_chunk;
+ /*
+ * Number of times multiple threads attempted to allocate a dnode
+ * from the same block of free dnodes.
+ */
+ kstat_named_t dnode_alloc_race;
+ /*
+ * Number of times dmu_object_alloc*() was forced to advance to the
+ * next meta dnode dbuf due to an error from dmu_object_next().
+ */
+ kstat_named_t dnode_alloc_next_block;
+ /*
+ * Statistics for tracking dnodes which have been moved.
+ */
+ kstat_named_t dnode_move_invalid;
+ kstat_named_t dnode_move_recheck1;
+ kstat_named_t dnode_move_recheck2;
+ kstat_named_t dnode_move_special;
+ kstat_named_t dnode_move_handle;
+ kstat_named_t dnode_move_rwlock;
+ kstat_named_t dnode_move_active;
+} dnode_stats_t;
+
+extern dnode_stats_t dnode_stats;
+
+#define DNODE_STAT_INCR(stat, val) \
+ atomic_add_64(&dnode_stats.stat.value.ui64, (val));
+#define DNODE_STAT_BUMP(stat) \
+ DNODE_STAT_INCR(stat, 1);
+
#ifdef ZFS_DEBUG
/*
diff --git a/zfs/include/sys/dsl_dataset.h b/zfs/include/sys/dsl_dataset.h
index da6f21c2eeff..1281674bbec2 100644
--- a/zfs/include/sys/dsl_dataset.h
+++ b/zfs/include/sys/dsl_dataset.h
@@ -20,8 +20,8 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
@@ -38,6 +38,8 @@
#include <sys/zfs_context.h>
#include <sys/dsl_deadlist.h>
#include <sys/refcount.h>
+#include <sys/rrwlock.h>
+#include <zfeature_common.h>
#ifdef __cplusplus
extern "C" {
@@ -85,10 +87,24 @@ struct dsl_pool;
/*
* This field is present (with value=0) if this dataset may contain large
- * blocks (>128KB). If it is present, then this dataset
- * is counted in the refcount of the SPA_FEATURE_LARGE_BLOCKS feature.
+ * dnodes (>512B). If it is present, then this dataset is counted in the
+ * refcount of the SPA_FEATURE_LARGE_DNODE feature.
*/
-#define DS_FIELD_LARGE_BLOCKS "org.open-zfs:large_blocks"
+#define DS_FIELD_LARGE_DNODE "org.zfsonlinux:large_dnode"
+
+/*
+ * These fields are set on datasets that are in the middle of a resumable
+ * receive, and allow the sender to resume the send if it is interrupted.
+ */
+#define DS_FIELD_RESUME_FROMGUID "com.delphix:resume_fromguid"
+#define DS_FIELD_RESUME_TONAME "com.delphix:resume_toname"
+#define DS_FIELD_RESUME_TOGUID "com.delphix:resume_toguid"
+#define DS_FIELD_RESUME_OBJECT "com.delphix:resume_object"
+#define DS_FIELD_RESUME_OFFSET "com.delphix:resume_offset"
+#define DS_FIELD_RESUME_BYTES "com.delphix:resume_bytes"
+#define DS_FIELD_RESUME_LARGEBLOCK "com.delphix:resume_largeblockok"
+#define DS_FIELD_RESUME_EMBEDOK "com.delphix:resume_embedok"
+#define DS_FIELD_RESUME_COMPRESSOK "com.delphix:resume_compressok"
/*
* DS_FLAG_CI_DATASET is set if the dataset contains a file system whose
@@ -134,6 +150,7 @@ typedef struct dsl_dataset_phys {
typedef struct dsl_dataset {
dmu_buf_user_t ds_dbu;
+ rrwlock_t ds_bp_rwlock; /* Protects ds_phys->ds_bp */
/* Immutable: */
struct dsl_dir *ds_dir;
@@ -145,8 +162,6 @@ typedef struct dsl_dataset {
/* only used in syncing context, only valid for non-snapshots: */
struct dsl_dataset *ds_prev;
uint64_t ds_bookmarks; /* DMU_OTN_ZAP_METADATA */
- boolean_t ds_large_blocks;
- boolean_t ds_need_large_blocks;
/* has internal locking: */
dsl_deadlist_t ds_deadlist;
@@ -185,8 +200,31 @@ typedef struct dsl_dataset {
kmutex_t ds_sendstream_lock;
list_t ds_sendstreams;
+ /*
+ * When in the middle of a resumable receive, tracks how much
+ * progress we have made.
+ */
+ uint64_t ds_resume_object[TXG_SIZE];
+ uint64_t ds_resume_offset[TXG_SIZE];
+ uint64_t ds_resume_bytes[TXG_SIZE];
+
+ /* Protected by our dsl_dir's dd_lock */
+ list_t ds_prop_cbs;
+
+ /*
+ * For ZFEATURE_FLAG_PER_DATASET features, set if this dataset
+ * uses this feature.
+ */
+ uint8_t ds_feature_inuse[SPA_FEATURES];
+
+ /*
+ * Set if we need to activate the feature on this dataset this txg
+ * (used only in syncing context).
+ */
+ uint8_t ds_feature_activation_needed[SPA_FEATURES];
+
/* Protected by ds_lock; keep at end of struct for better locality */
- char ds_snapname[MAXNAMELEN];
+ char ds_snapname[ZFS_MAX_DATASET_NAME_LEN];
} dsl_dataset_t;
static inline dsl_dataset_phys_t *
@@ -222,6 +260,7 @@ void dsl_dataset_disown(dsl_dataset_t *ds, void *tag);
void dsl_dataset_name(dsl_dataset_t *ds, char *name);
boolean_t dsl_dataset_tryown(dsl_dataset_t *ds, void *tag);
int dsl_dataset_namelen(dsl_dataset_t *ds);
+boolean_t dsl_dataset_has_owner(dsl_dataset_t *ds);
uint64_t dsl_dataset_create_sync(dsl_dir_t *pds, const char *lastname,
dsl_dataset_t *origin, uint64_t flags, cred_t *, dmu_tx_t *);
uint64_t dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
@@ -234,27 +273,24 @@ int dsl_dataset_snapshot_tmp(const char *fsname, const char *snapname,
minor_t cleanup_minor, const char *htag);
blkptr_t *dsl_dataset_get_blkptr(dsl_dataset_t *ds);
-void dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx);
spa_t *dsl_dataset_get_spa(dsl_dataset_t *ds);
boolean_t dsl_dataset_modified_since_snap(dsl_dataset_t *ds,
dsl_dataset_t *snap);
-void dsl_dataset_sync(dsl_dataset_t *os, zio_t *zio, dmu_tx_t *tx);
+void dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx);
+void dsl_dataset_sync_done(dsl_dataset_t *ds, dmu_tx_t *tx);
void dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp,
dmu_tx_t *tx);
int dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp,
dmu_tx_t *tx, boolean_t async);
-boolean_t dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp,
- uint64_t blk_birth);
-uint64_t dsl_dataset_prev_snap_txg(dsl_dataset_t *ds);
int dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name,
uint64_t *value);
void dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx);
-void dsl_dataset_stats(dsl_dataset_t *os, nvlist_t *nv);
+void dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv);
void dsl_dataset_fast_stat(dsl_dataset_t *ds, dmu_objset_stats_t *stat);
void dsl_dataset_space(dsl_dataset_t *ds,
uint64_t *refdbytesp, uint64_t *availbytesp,
@@ -265,8 +301,6 @@ int dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new,
int dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap, dsl_dataset_t *last,
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
boolean_t dsl_dataset_is_dirty(dsl_dataset_t *ds);
-int dsl_dataset_activate_large_blocks(const char *dsname);
-void dsl_dataset_activate_large_blocks_sync_impl(uint64_t dsobj, dmu_tx_t *tx);
int dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf);
@@ -304,15 +338,21 @@ int dsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx,
void dsl_dataset_set_refreservation_sync_impl(dsl_dataset_t *ds,
zprop_source_t source, uint64_t value, dmu_tx_t *tx);
void dsl_dataset_zapify(dsl_dataset_t *ds, dmu_tx_t *tx);
-int dsl_dataset_rollback(const char *fsname, void *owner, nvlist_t *result);
+boolean_t dsl_dataset_is_zapified(dsl_dataset_t *ds);
+boolean_t dsl_dataset_has_resume_receive_state(dsl_dataset_t *ds);
+int dsl_dataset_rollback(const char *fsname, const char *tosnap, void *owner,
+ nvlist_t *result);
+
+void dsl_dataset_deactivate_feature(uint64_t dsobj,
+ spa_feature_t f, dmu_tx_t *tx);
#ifdef ZFS_DEBUG
#define dprintf_ds(ds, fmt, ...) do { \
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
- char *__ds_name = kmem_alloc(MAXNAMELEN, KM_SLEEP); \
+ char *__ds_name = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); \
dsl_dataset_name(ds, __ds_name); \
dprintf("ds=%s " fmt, __ds_name, __VA_ARGS__); \
- kmem_free(__ds_name, MAXNAMELEN); \
+ kmem_free(__ds_name, ZFS_MAX_DATASET_NAME_LEN); \
} \
_NOTE(CONSTCOND) } while (0)
#else
diff --git a/zfs/include/sys/dsl_deleg.h b/zfs/include/sys/dsl_deleg.h
index 59e8e055551a..d399d1da973b 100644
--- a/zfs/include/sys/dsl_deleg.h
+++ b/zfs/include/sys/dsl_deleg.h
@@ -51,8 +51,12 @@ extern "C" {
#define ZFS_DELEG_PERM_VSCAN "vscan"
#define ZFS_DELEG_PERM_USERQUOTA "userquota"
#define ZFS_DELEG_PERM_GROUPQUOTA "groupquota"
+#define ZFS_DELEG_PERM_USEROBJQUOTA "userobjquota"
+#define ZFS_DELEG_PERM_GROUPOBJQUOTA "groupobjquota"
#define ZFS_DELEG_PERM_USERUSED "userused"
#define ZFS_DELEG_PERM_GROUPUSED "groupused"
+#define ZFS_DELEG_PERM_USEROBJUSED "userobjused"
+#define ZFS_DELEG_PERM_GROUPOBJUSED "groupobjused"
#define ZFS_DELEG_PERM_HOLD "hold"
#define ZFS_DELEG_PERM_RELEASE "release"
#define ZFS_DELEG_PERM_DIFF "diff"
diff --git a/zfs/include/sys/dsl_dir.h b/zfs/include/sys/dsl_dir.h
index 55f3a8e5baa9..69b0b6a53559 100644
--- a/zfs/include/sys/dsl_dir.h
+++ b/zfs/include/sys/dsl_dir.h
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
* Copyright (c) 2014, Joyent, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
@@ -102,7 +102,7 @@ struct dsl_dir {
/* Protected by dd_lock */
kmutex_t dd_lock;
- list_t dd_prop_cbs; /* list of dsl_prop_cb_record_t's */
+ list_t dd_props; /* list of dsl_prop_record_t's */
timestruc_t dd_snap_cmtime; /* last time snapshot namespace changed */
uint64_t dd_origin_txg;
@@ -112,7 +112,7 @@ struct dsl_dir {
int64_t dd_space_towrite[TXG_SIZE];
/* protected by dd_lock; keep at end of struct for better locality */
- char dd_myname[MAXNAMELEN];
+ char dd_myname[ZFS_MAX_DATASET_NAME_LEN];
};
static inline dsl_dir_phys_t *
@@ -137,8 +137,7 @@ uint64_t dsl_dir_space_available(dsl_dir_t *dd,
void dsl_dir_dirty(dsl_dir_t *dd, dmu_tx_t *tx);
void dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx);
int dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t mem,
- uint64_t asize, uint64_t fsize, uint64_t usize, void **tr_cookiep,
- dmu_tx_t *tx);
+ uint64_t asize, boolean_t netfree, void **tr_cookiep, dmu_tx_t *tx);
void dsl_dir_tempreserve_clear(void *tr_cookie, dmu_tx_t *tx);
void dsl_dir_willuse_space(dsl_dir_t *dd, int64_t space, dmu_tx_t *tx);
void dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type,
@@ -176,11 +175,10 @@ boolean_t dsl_dir_is_zapified(dsl_dir_t *dd);
#ifdef ZFS_DEBUG
#define dprintf_dd(dd, fmt, ...) do { \
if (zfs_flags & ZFS_DEBUG_DPRINTF) { \
- char *__ds_name = kmem_alloc(MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, \
- KM_SLEEP); \
+ char *__ds_name = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); \
dsl_dir_name(dd, __ds_name); \
dprintf("dd=%s " fmt, __ds_name, __VA_ARGS__); \
- kmem_free(__ds_name, MAXNAMELEN + strlen(MOS_DIR_NAME) + 1); \
+ kmem_free(__ds_name, ZFS_MAX_DATASET_NAME_LEN); \
} \
_NOTE(CONSTCOND) } while (0)
#else
diff --git a/zfs/include/sys/dsl_pool.h b/zfs/include/sys/dsl_pool.h
index 48b12e8eb134..d2dabda6df19 100644
--- a/zfs/include/sys/dsl_pool.h
+++ b/zfs/include/sys/dsl_pool.h
@@ -20,7 +20,8 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
+ * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
*/
#ifndef _SYS_DSL_POOL_H
@@ -37,6 +38,7 @@
#include <sys/bpobj.h>
#include <sys/bptree.h>
#include <sys/rrwlock.h>
+#include <sys/mmp.h>
#ifdef __cplusplus
extern "C" {
@@ -106,6 +108,7 @@ typedef struct dsl_pool {
kcondvar_t dp_spaceavail_cv;
uint64_t dp_dirty_pertxg[TXG_SIZE];
uint64_t dp_dirty_total;
+ uint64_t dp_long_free_dirty_pertxg[TXG_SIZE];
uint64_t dp_mos_used_delta;
uint64_t dp_mos_compressed_delta;
uint64_t dp_mos_uncompressed_delta;
@@ -122,6 +125,7 @@ typedef struct dsl_pool {
txg_list_t dp_dirty_zilogs;
txg_list_t dp_dirty_dirs;
txg_list_t dp_sync_tasks;
+ taskq_t *dp_sync_taskq;
/*
* Protects administrative changes (properties, namespace)
diff --git a/zfs/include/sys/dsl_prop.h b/zfs/include/sys/dsl_prop.h
index 5fe18d6a7c55..62ef0ba67a6c 100644
--- a/zfs/include/sys/dsl_prop.h
+++ b/zfs/include/sys/dsl_prop.h
@@ -41,10 +41,17 @@ struct dsl_dir;
/* The callback func may not call into the DMU or DSL! */
typedef void (dsl_prop_changed_cb_t)(void *arg, uint64_t newval);
+typedef struct dsl_prop_record {
+ list_node_t pr_node; /* link on dd_props */
+ const char *pr_propname;
+ list_t pr_cbs;
+} dsl_prop_record_t;
+
typedef struct dsl_prop_cb_record {
- list_node_t cbr_node; /* link on dd_prop_cbs */
+ list_node_t cbr_pr_node; /* link on pr_cbs */
+ list_node_t cbr_ds_node; /* link on ds_prop_cbs */
+ dsl_prop_record_t *cbr_pr;
struct dsl_dataset *cbr_ds;
- const char *cbr_propname;
dsl_prop_changed_cb_t *cbr_func;
void *cbr_arg;
} dsl_prop_cb_record_t;
@@ -54,10 +61,13 @@ typedef struct dsl_props_arg {
zprop_source_t pa_source;
} dsl_props_arg_t;
+void dsl_prop_init(dsl_dir_t *dd);
+void dsl_prop_fini(dsl_dir_t *dd);
int dsl_prop_register(struct dsl_dataset *ds, const char *propname,
dsl_prop_changed_cb_t *callback, void *cbarg);
int dsl_prop_unregister(struct dsl_dataset *ds, const char *propname,
dsl_prop_changed_cb_t *callback, void *cbarg);
+void dsl_prop_unregister_all(struct dsl_dataset *ds, void *cbarg);
void dsl_prop_notify_all(struct dsl_dir *dd);
boolean_t dsl_prop_hascb(struct dsl_dataset *ds);
diff --git a/zfs/include/sys/dsl_scan.h b/zfs/include/sys/dsl_scan.h
index 44a11ba57207..5303d9a699bb 100644
--- a/zfs/include/sys/dsl_scan.h
+++ b/zfs/include/sys/dsl_scan.h
@@ -21,6 +21,7 @@
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2017 Datto Inc.
*/
#ifndef _SYS_DSL_SCAN_H
@@ -70,6 +71,7 @@ typedef struct dsl_scan_phys {
typedef enum dsl_scan_flags {
DSF_VISIT_DS_AGAIN = 1<<0,
+ DSF_SCRUB_PAUSED = 1<<1,
} dsl_scan_flags_t;
#define DSL_SCAN_FLAGS_MASK (DSF_VISIT_DS_AGAIN)
@@ -84,8 +86,8 @@ typedef enum dsl_scan_flags {
*
* The following members of this structure direct the behavior of the scan:
*
- * scn_pausing - a scan that cannot be completed in a single txg or
- * has exceeded its allotted time will need to pause.
+ * scn_suspending - a scan that cannot be completed in a single txg or
+ * has exceeded its allotted time will need to suspend.
* When this flag is set the scanner will stop traversing
* the pool and write out the current state to disk.
*
@@ -107,7 +109,7 @@ typedef enum dsl_scan_flags {
typedef struct dsl_scan {
struct dsl_pool *scn_dp;
- boolean_t scn_pausing;
+ boolean_t scn_suspending;
uint64_t scn_restart_txg;
uint64_t scn_done_txg;
uint64_t scn_sync_start_time;
@@ -117,8 +119,6 @@ typedef struct dsl_scan {
boolean_t scn_is_bptree;
boolean_t scn_async_destroying;
boolean_t scn_async_stalled;
-
- /* for debugging / information */
uint64_t scn_visited_this_txg;
dsl_scan_phys_t scn_phys;
@@ -129,6 +129,8 @@ void dsl_scan_fini(struct dsl_pool *dp);
void dsl_scan_sync(struct dsl_pool *, dmu_tx_t *);
int dsl_scan_cancel(struct dsl_pool *);
int dsl_scan(struct dsl_pool *, pool_scan_func_t);
+boolean_t dsl_scan_scrubbing(const struct dsl_pool *dp);
+int dsl_scrub_set_pause_resume(const struct dsl_pool *dp, pool_scrub_cmd_t cmd);
void dsl_resilver_restart(struct dsl_pool *, uint64_t txg);
boolean_t dsl_scan_resilvering(struct dsl_pool *dp);
boolean_t dsl_dataset_unstable(struct dsl_dataset *ds);
@@ -139,6 +141,7 @@ void dsl_scan_ds_snapshotted(struct dsl_dataset *ds, struct dmu_tx *tx);
void dsl_scan_ds_clone_swapped(struct dsl_dataset *ds1, struct dsl_dataset *ds2,
struct dmu_tx *tx);
boolean_t dsl_scan_active(dsl_scan_t *scn);
+boolean_t dsl_scan_is_paused_scrub(const dsl_scan_t *scn);
#ifdef __cplusplus
}
diff --git a/zfs/include/sys/edonr.h b/zfs/include/sys/edonr.h
new file mode 100644
index 000000000000..79b7cd8c75b8
--- /dev/null
+++ b/zfs/include/sys/edonr.h
@@ -0,0 +1,98 @@
+/*
+ * IDI,NTNU
+ *
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ *
+ * Copyright (C) 2009, 2010, Jorn Amundsen <jorn.amundsen at ntnu.no>
+ *
+ * Tweaked Edon-R implementation for SUPERCOP, based on NIST API.
+ *
+ * $Id: edonr.h 517 2013-02-17 20:34:39Z joern $
+ */
+/*
+ * Portions copyright (c) 2013, Saso Kiselkov, All rights reserved
+ */
+
+#ifndef _SYS_EDONR_H_
+#define _SYS_EDONR_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _KERNEL
+#include <sys/types.h>
+#else
+#include <stdint.h> /* uint32_t... */
+#include <stdlib.h> /* size_t ... */
+#endif
+
+/*
+ * EdonR allows to call EdonRUpdate() consecutively only if the total length
+ * of stored unprocessed data and the new supplied data is less than or equal
+ * to the BLOCK_SIZE on which the compression functions operates.
+ * Otherwise an assertion failure is invoked.
+ */
+
+/* Specific algorithm definitions */
+#define EdonR224_DIGEST_SIZE 28
+#define EdonR224_BLOCK_SIZE 64
+#define EdonR256_DIGEST_SIZE 32
+#define EdonR256_BLOCK_SIZE 64
+#define EdonR384_DIGEST_SIZE 48
+#define EdonR384_BLOCK_SIZE 128
+#define EdonR512_DIGEST_SIZE 64
+#define EdonR512_BLOCK_SIZE 128
+
+#define EdonR256_BLOCK_BITSIZE 512
+#define EdonR512_BLOCK_BITSIZE 1024
+
+typedef struct {
+ uint32_t DoublePipe[16];
+ uint8_t LastPart[EdonR256_BLOCK_SIZE * 2];
+} EdonRData256;
+typedef struct {
+ uint64_t DoublePipe[16];
+ uint8_t LastPart[EdonR512_BLOCK_SIZE * 2];
+} EdonRData512;
+
+typedef struct {
+ size_t hashbitlen;
+
+ /* + algorithm specific parameters */
+ int unprocessed_bits;
+ uint64_t bits_processed;
+ union {
+ EdonRData256 p256[1];
+ EdonRData512 p512[1];
+ } pipe[1];
+} EdonRState;
+
+void EdonRInit(EdonRState *state, size_t hashbitlen);
+void EdonRUpdate(EdonRState *state, const uint8_t *data, size_t databitlen);
+void EdonRFinal(EdonRState *state, uint8_t *hashval);
+void EdonRHash(size_t hashbitlen, const uint8_t *data, size_t databitlen,
+ uint8_t *hashval);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_EDONR_H_ */
diff --git a/zfs/include/sys/efi_partition.h b/zfs/include/sys/efi_partition.h
index ee367a574e46..684b3e588a16 100644
--- a/zfs/include/sys/efi_partition.h
+++ b/zfs/include/sys/efi_partition.h
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
*/
#ifndef _SYS_EFI_PARTITION_H
@@ -86,56 +87,192 @@ typedef struct efi_gpe_Attrs {
* 6a945a3b-1dd2-11b2-99a6-080020736631 V_CACHE
*/
-#define EFI_UNUSED { 0x00000000, 0x0000, 0x0000, 0x00, 0x00, \
- { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }
-#define EFI_RESV1 { 0x6a96237f, 0x1dd2, 0x11b2, 0x99, 0xa6, \
- { 0x08, 0x00, 0x20, 0x73, 0x66, 0x31 } }
-#define EFI_BOOT { 0x6a82cb45, 0x1dd2, 0x11b2, 0x99, 0xa6, \
- { 0x08, 0x00, 0x20, 0x73, 0x66, 0x31 } }
-#define EFI_ROOT { 0x6a85cf4d, 0x1dd2, 0x11b2, 0x99, 0xa6, \
- { 0x08, 0x00, 0x20, 0x73, 0x66, 0x31 } }
-#define EFI_SWAP { 0x6a87c46f, 0x1dd2, 0x11b2, 0x99, 0xa6, \
- { 0x08, 0x00, 0x20, 0x73, 0x66, 0x31 } }
-#define EFI_USR { 0x6a898cc3, 0x1dd2, 0x11b2, 0x99, 0xa6, \
- { 0x08, 0x00, 0x20, 0x73, 0x66, 0x31 } }
-#define EFI_BACKUP { 0x6a8b642b, 0x1dd2, 0x11b2, 0x99, 0xa6, \
- { 0x08, 0x00, 0x20, 0x73, 0x66, 0x31 } }
-#define EFI_RESV2 { 0x6a8d2ac7, 0x1dd2, 0x11b2, 0x99, 0xa6, \
- { 0x08, 0x00, 0x20, 0x73, 0x66, 0x31 } }
-#define EFI_VAR { 0x6a8ef2e9, 0x1dd2, 0x11b2, 0x99, 0xa6, \
- { 0x08, 0x00, 0x20, 0x73, 0x66, 0x31 } }
-#define EFI_HOME { 0x6a90ba39, 0x1dd2, 0x11b2, 0x99, 0xa6, \
- { 0x08, 0x00, 0x20, 0x73, 0x66, 0x31 } }
-#define EFI_ALTSCTR { 0x6a9283a5, 0x1dd2, 0x11b2, 0x99, 0xa6, \
- { 0x08, 0x00, 0x20, 0x73, 0x66, 0x31 } }
-#define EFI_RESERVED { 0x6a945a3b, 0x1dd2, 0x11b2, 0x99, 0xa6, \
- { 0x08, 0x00, 0x20, 0x73, 0x66, 0x31 } }
-#define EFI_SYSTEM { 0xC12A7328, 0xF81F, 0x11d2, 0xBA, 0x4B, \
- { 0x00, 0xA0, 0xC9, 0x3E, 0xC9, 0x3B } }
-#define EFI_LEGACY_MBR { 0x024DEE41, 0x33E7, 0x11d3, 0x9D, 0x69, \
- { 0x00, 0x08, 0xC7, 0x81, 0xF3, 0x9F } }
-#define EFI_SYMC_PUB { 0x6a9630d1, 0x1dd2, 0x11b2, 0x99, 0xa6, \
- { 0x08, 0x00, 0x20, 0x73, 0x66, 0x31 } }
-#define EFI_SYMC_CDS { 0x6a980767, 0x1dd2, 0x11b2, 0x99, 0xa6, \
- { 0x08, 0x00, 0x20, 0x73, 0x66, 0x31 } }
-#define EFI_MSFT_RESV { 0xE3C9E316, 0x0B5C, 0x4DB8, 0x81, 0x7D, \
- { 0xF9, 0x2D, 0xF0, 0x02, 0x15, 0xAE } }
-#define EFI_DELL_BASIC { 0xebd0a0a2, 0xb9e5, 0x4433, 0x87, 0xc0, \
- { 0x68, 0xb6, 0xb7, 0x26, 0x99, 0xc7 } }
-#define EFI_DELL_RAID { 0xa19d880f, 0x05fc, 0x4d3b, 0xa0, 0x06, \
- { 0x74, 0x3f, 0x0f, 0x84, 0x91, 0x1e } }
-#define EFI_DELL_SWAP { 0x0657fd6d, 0xa4ab, 0x43c4, 0x84, 0xe5, \
- { 0x09, 0x33, 0xc8, 0x4b, 0x4f, 0x4f } }
-#define EFI_DELL_LVM { 0xe6d6d379, 0xf507, 0x44c2, 0xa2, 0x3c, \
- { 0x23, 0x8f, 0x2a, 0x3d, 0xf9, 0x28 } }
-#define EFI_DELL_RESV { 0x8da63339, 0x0007, 0x60c0, 0xc4, 0x36, \
- { 0x08, 0x3a, 0xc8, 0x23, 0x09, 0x08 } }
-#define EFI_AAPL_HFS { 0x48465300, 0x0000, 0x11aa, 0xaa, 0x11, \
- { 0x00, 0x30, 0x65, 0x43, 0xec, 0xac } }
-#define EFI_AAPL_UFS { 0x55465300, 0x0000, 0x11aa, 0xaa, 0x11, \
- { 0x00, 0x30, 0x65, 0x43, 0xec, 0xac } }
-
-/* minimum # of bytes for partition table entires, per EFI spec */
+#define EFI_UNUSED { 0x00000000, 0x0000, 0x0000, 0x00, 0x00, \
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } }
+#define EFI_RESV1 { 0x6a96237f, 0x1dd2, 0x11b2, 0x99, 0xa6, \
+ { 0x08, 0x00, 0x20, 0x73, 0x66, 0x31 } }
+#define EFI_BOOT { 0x6a82cb45, 0x1dd2, 0x11b2, 0x99, 0xa6, \
+ { 0x08, 0x00, 0x20, 0x73, 0x66, 0x31 } }
+#define EFI_ROOT { 0x6a85cf4d, 0x1dd2, 0x11b2, 0x99, 0xa6, \
+ { 0x08, 0x00, 0x20, 0x73, 0x66, 0x31 } }
+#define EFI_SWAP { 0x6a87c46f, 0x1dd2, 0x11b2, 0x99, 0xa6, \
+ { 0x08, 0x00, 0x20, 0x73, 0x66, 0x31 } }
+#define EFI_USR { 0x6a898cc3, 0x1dd2, 0x11b2, 0x99, 0xa6, \
+ { 0x08, 0x00, 0x20, 0x73, 0x66, 0x31 } }
+#define EFI_BACKUP { 0x6a8b642b, 0x1dd2, 0x11b2, 0x99, 0xa6, \
+ { 0x08, 0x00, 0x20, 0x73, 0x66, 0x31 } }
+#define EFI_RESV2 { 0x6a8d2ac7, 0x1dd2, 0x11b2, 0x99, 0xa6, \
+ { 0x08, 0x00, 0x20, 0x73, 0x66, 0x31 } }
+#define EFI_VAR { 0x6a8ef2e9, 0x1dd2, 0x11b2, 0x99, 0xa6, \
+ { 0x08, 0x00, 0x20, 0x73, 0x66, 0x31 } }
+#define EFI_HOME { 0x6a90ba39, 0x1dd2, 0x11b2, 0x99, 0xa6, \
+ { 0x08, 0x00, 0x20, 0x73, 0x66, 0x31 } }
+#define EFI_ALTSCTR { 0x6a9283a5, 0x1dd2, 0x11b2, 0x99, 0xa6, \
+ { 0x08, 0x00, 0x20, 0x73, 0x66, 0x31 } }
+#define EFI_RESERVED { 0x6a945a3b, 0x1dd2, 0x11b2, 0x99, 0xa6, \
+ { 0x08, 0x00, 0x20, 0x73, 0x66, 0x31 } }
+#define EFI_SYSTEM { 0xC12A7328, 0xF81F, 0x11d2, 0xBA, 0x4B, \
+ { 0x00, 0xA0, 0xC9, 0x3E, 0xC9, 0x3B } }
+#define EFI_LEGACY_MBR { 0x024DEE41, 0x33E7, 0x11d3, 0x9D, 0x69, \
+ { 0x00, 0x08, 0xC7, 0x81, 0xF3, 0x9F } }
+#define EFI_SYMC_PUB { 0x6a9630d1, 0x1dd2, 0x11b2, 0x99, 0xa6, \
+ { 0x08, 0x00, 0x20, 0x73, 0x66, 0x31 } }
+#define EFI_SYMC_CDS { 0x6a980767, 0x1dd2, 0x11b2, 0x99, 0xa6, \
+ { 0x08, 0x00, 0x20, 0x73, 0x66, 0x31 } }
+#define EFI_MSFT_RESV { 0xE3C9E316, 0x0B5C, 0x4DB8, 0x81, 0x7D, \
+ { 0xF9, 0x2D, 0xF0, 0x02, 0x15, 0xAE } }
+#define EFI_DELL_BASIC { 0xebd0a0a2, 0xb9e5, 0x4433, 0x87, 0xc0, \
+ { 0x68, 0xb6, 0xb7, 0x26, 0x99, 0xc7 } }
+#define EFI_DELL_RAID { 0xa19d880f, 0x05fc, 0x4d3b, 0xa0, 0x06, \
+ { 0x74, 0x3f, 0x0f, 0x84, 0x91, 0x1e } }
+#define EFI_DELL_SWAP { 0x0657fd6d, 0xa4ab, 0x43c4, 0x84, 0xe5, \
+ { 0x09, 0x33, 0xc8, 0x4b, 0x4f, 0x4f } }
+#define EFI_DELL_LVM { 0xe6d6d379, 0xf507, 0x44c2, 0xa2, 0x3c, \
+ { 0x23, 0x8f, 0x2a, 0x3d, 0xf9, 0x28 } }
+#define EFI_DELL_RESV { 0x8da63339, 0x0007, 0x60c0, 0xc4, 0x36, \
+ { 0x08, 0x3a, 0xc8, 0x23, 0x09, 0x08 } }
+#define EFI_AAPL_HFS { 0x48465300, 0x0000, 0x11aa, 0xaa, 0x11, \
+ { 0x00, 0x30, 0x65, 0x43, 0xec, 0xac } }
+#define EFI_AAPL_UFS { 0x55465300, 0x0000, 0x11aa, 0xaa, 0x11, \
+ { 0x00, 0x30, 0x65, 0x43, 0xec, 0xac } }
+#define EFI_FREEBSD_BOOT { 0x83bd6b9d, 0x7f41, 0x11dc, 0xbe, 0x0b, \
+ { 0x00, 0x15, 0x60, 0xb8, 0x4f, 0x0f } }
+#define EFI_FREEBSD_SWAP { 0x516e7cb5, 0x6ecf, 0x11d6, 0x8f, 0xf8, \
+ { 0x00, 0x02, 0x2d, 0x09, 0x71, 0x2b } }
+#define EFI_FREEBSD_UFS { 0x516e7cb6, 0x6ecf, 0x11d6, 0x8f, 0xf8, \
+ { 0x00, 0x02, 0x2d, 0x09, 0x71, 0x2b } }
+#define EFI_FREEBSD_VINUM { 0x516e7cb8, 0x6ecf, 0x11d6, 0x8f, 0xf8, \
+ { 0x00, 0x02, 0x2d, 0x09, 0x71, 0x2b } }
+#define EFI_FREEBSD_ZFS { 0x516e7cba, 0x6ecf, 0x11d6, 0x8f, 0xf8, \
+ { 0x00, 0x02, 0x2d, 0x09, 0x71, 0x2b } }
+
+/* From Wikipedia */
+
+#define EFI_BIOS_BOOT { 0x21686148, 0x6449, 0x6e6f, 0x74, 0x4e, \
+ { 0x65, 0x65, 0x64, 0x45, 0x46, 0x49 } }
+#define EFI_INTC_RS { 0xd3bfe2de, 0x3daf, 0x11df, 0xba, 0x40, \
+ { 0xe3, 0xa5, 0x56, 0xd8, 0x95, 0x93 } }
+#define EFI_SNE_BOOT { 0xf4019732, 0x066e, 0x4e12, 0x82, 0x73, \
+ { 0x34, 0x6c, 0x56, 0x41, 0x49, 0x4f } }
+#define EFI_LENOVO_BOOT { 0xbfbfafe7, 0xa34f, 0x448a, 0x9a, 0x5b, \
+ { 0x62, 0x13, 0xeb, 0x73, 0x6c, 0x22 } }
+#define EFI_MSFT_LDMM { 0x5808c8aa, 0x7e8f, 0x42e0, 0x85, 0xd2, \
+ { 0xe1, 0xe9, 0x04, 0x34, 0xcf, 0xb3 } }
+#define EFI_MSFT_LDMD { 0xaf9b60a0, 0x1431, 0x4f62, 0xbc, 0x68, \
+ { 0x33, 0x11, 0x71, 0x4a, 0x69, 0xad } }
+#define EFI_MSFT_RE { 0xde94bba4, 0x06d1, 0x4d40, 0xa1, 0x6a, \
+ { 0xbf, 0xd5, 0x01, 0x79, 0xd6, 0xac } }
+#define EFI_IBM_GPFS { 0x37affc90, 0xef7d, 0x4e96, 0x91, 0xc3, \
+ { 0x2d, 0x7a, 0xe0, 0x55, 0xb1, 0x74 } }
+#define EFI_MSFT_STORAGESPACES { 0xe75caf8f, 0xf680, 0x4cee, 0xaf, 0xa3, \
+ { 0xb0, 0x01, 0xe5, 0x6e, 0xfc, 0x2d } }
+#define EFI_HPQ_DATA { 0x75894c1e, 0x3aeb, 0x11d3, 0xb7, 0xc1, \
+ { 0x7b, 0x03, 0xa0, 0x00, 0x00, 0x00 } }
+#define EFI_HPQ_SVC { 0xe2a1e728, 0x32e3, 0x11d6, 0xa6, 0x82, \
+ { 0x7b, 0x03, 0xa0, 0x00, 0x00, 0x00 } }
+#define EFI_RHT_DATA { 0x0fc63daf, 0x8483, 0x4772, 0x8e, 0x79, \
+ { 0x3d, 0x69, 0xd8, 0x47, 0x7d, 0xe4 } }
+#define EFI_RHT_HOME { 0x933ac7e1, 0x2eb4, 0x4f13, 0xb8, 0x44, \
+ { 0x0e, 0x14, 0xe2, 0xae, 0xf9, 0x15 } }
+#define EFI_RHT_SRV { 0x3b8f8425, 0x20e0, 0x4f3b, 0x90, 0x7f, \
+ { 0x1a, 0x25, 0xa7, 0x6f, 0x98, 0xe8 } }
+#define EFI_RHT_DMCRYPT { 0x7ffec5c9, 0x2d00, 0x49b7, 0x89, 0x41, \
+ { 0x3e, 0xa1, 0x0a, 0x55, 0x86, 0xb7 } }
+#define EFI_RHT_LUKS { 0xca7d7ccb, 0x63ed, 0x4c53, 0x86, 0x1c, \
+ { 0x17, 0x42, 0x53, 0x60, 0x59, 0xcc } }
+#define EFI_FREEBSD_DISKLABEL { 0x516e7cb4, 0x6ecf, 0x11d6, 0x8f, 0xf8, \
+ { 0x00, 0x02, 0x2d, 0x09, 0x71, 0x2b } }
+#define EFI_AAPL_RAID { 0x52414944, 0x0000, 0x11aa, 0xaa, 0x11, \
+ { 0x00, 0x30, 0x65, 0x43, 0xec, 0xac } }
+#define EFI_AAPL_RAIDOFFLINE { 0x52414944, 0x5f4f, 0x11aa, 0xaa, 0x11, \
+ { 0x00, 0x30, 0x65, 0x43, 0xec, 0xac } }
+#define EFI_AAPL_BOOT { 0x426f6f74, 0x0000, 0x11aa, 0xaa, 0x11, \
+ { 0x00, 0x30, 0x65, 0x43, 0xec, 0xac } }
+#define EFI_AAPL_LABEL { 0x4c616265, 0x6c00, 0x11aa, 0xaa, 0x11, \
+ { 0x00, 0x30, 0x65, 0x43, 0xec, 0xac } }
+#define EFI_AAPL_TVRECOVERY { 0x5265636f, 0x7665, 0x11aa, 0xaa, 0x11, \
+ { 0x00, 0x30, 0x65, 0x43, 0xec, 0xac } }
+#define EFI_AAPL_CORESTORAGE { 0x53746f72, 0x6167, 0x11aa, 0xaa, 0x11, \
+ { 0x00, 0x30, 0x65, 0x43, 0xec, 0xac } }
+#define EFI_NETBSD_SWAP { 0x49f48d32, 0xb10e, 0x11dc, 0xb9, 0x9b, \
+ { 0x00, 0x19, 0xd1, 0x87, 0x96, 0x48 } }
+#define EFI_NETBSD_FFS { 0x49f48d5a, 0xb10e, 0x11dc, 0xb9, 0x9b, \
+ { 0x00, 0x19, 0xd1, 0x87, 0x96, 0x48 } }
+#define EFI_NETBSD_LFS { 0x49f48d82, 0xb10e, 0x11dc, 0xb9, 0x9b, \
+ { 0x00, 0x19, 0xd1, 0x87, 0x96, 0x48 } }
+#define EFI_NETBSD_RAID { 0x49f48daa, 0xb10e, 0x11dc, 0xb9, 0x9b, \
+ { 0x00, 0x19, 0xd1, 0x87, 0x96, 0x48 } }
+#define EFI_NETBSD_CAT { 0x2db519c4, 0xb10f, 0x11dc, 0xb9, 0x9b, \
+ { 0x00, 0x19, 0xd1, 0x87, 0x96, 0x48 } }
+#define EFI_NETBSD_CRYPT { 0x2db519ec, 0xb10f, 0x11dc, 0xb9, 0x9b, \
+ { 0x00, 0x19, 0xd1, 0x87, 0x96, 0x48 } }
+#define EFI_GOOG_KERN { 0xfe3a2a5d, 0x4f32, 0x41a7, 0xb7, 0x25, \
+ { 0xac, 0xcc, 0x32, 0x85, 0xa3, 0x09 } }
+#define EFI_GOOG_ROOT { 0x3cb8e202, 0x3b7e, 0x47dd, 0x8a, 0x3c, \
+ { 0x7f, 0xf2, 0xa1, 0x3c, 0xfc, 0xec } }
+#define EFI_GOOG_RESV { 0x2e0a753d, 0x9e48, 0x43b0, 0x83, 0x37, \
+ { 0xb1, 0x51, 0x92, 0xcb, 0x1b, 0x5e } }
+#define EFI_HAIKU_BFS { 0x42465331, 0x3ba3, 0x10f1, 0x80, 0x2a, \
+ { 0x48, 0x61, 0x69, 0x6b, 0x75, 0x21 } }
+#define EFI_MIDNIGHTBSD_BOOT { 0x85d5e45e, 0x237c, 0x11e1, 0xb4, 0xb3, \
+ { 0xe8, 0x9a, 0x8f, 0x7f, 0xc3, 0xa7 } }
+#define EFI_MIDNIGHTBSD_DATA { 0x85d5e45a, 0x237c, 0x11e1, 0xb4, 0xb3, \
+ { 0xe8, 0x9a, 0x8f, 0x7f, 0xc3, 0xa7 } }
+#define EFI_MIDNIGHTBSD_SWAP { 0x85d5e45b, 0x237c, 0x11e1, 0xb4, 0xb3, \
+ { 0xe8, 0x9a, 0x8f, 0x7f, 0xc3, 0xa7 } }
+#define EFI_MIDNIGHTBSD_UFS { 0x0394ef8b, 0x237e, 0x11e1, 0xb4, 0xb3, \
+ { 0xe8, 0x9a, 0x8f, 0x7f, 0xc3, 0xa7 } }
+#define EFI_MIDNIGHTBSD_VINUM { 0x85d5e45c, 0x237c, 0x11e1, 0xb4, 0xb3, \
+ { 0xe8, 0x9a, 0x8f, 0x7f, 0xc3, 0xa7 } }
+#define EFI_MIDNIGHTBSD_ZFS { 0x85d5e45d, 0x237c, 0x11e1, 0xb4, 0xb3, \
+ { 0xe8, 0x9a, 0x8f, 0x7f, 0xc3, 0xa7 } }
+#define EFI_CEPH_JOURNAL { 0x45b0969e, 0x9b03, 0x4f30, 0xb4, 0xc6, \
+ { 0xb4, 0xb8, 0x0c, 0xef, 0xf1, 0x06 } }
+#define EFI_CEPH_DMCRYPTJOURNAL { 0x45b0969e, 0x9b03, 0x4f30, 0xb4, 0xc6, \
+ { 0x5e, 0xc0, 0x0c, 0xef, 0xf1, 0x06 } }
+#define EFI_CEPH_OSD { 0x4fbd7e29, 0x9d25, 0x41b8, 0xaf, 0xd0, \
+ { 0x06, 0x2c, 0x0c, 0xef, 0xf0, 0x5d } }
+#define EFI_CEPH_DMCRYPTOSD { 0x4fbd7e29, 0x9d25, 0x41b8, 0xaf, 0xd0, \
+ { 0x5e, 0xc0, 0x0c, 0xef, 0xf0, 0x5d } }
+#define EFI_CEPH_CREATE { 0x89c57f98, 0x2fe5, 0x4dc0, 0x89, 0xc1, \
+ { 0xf3, 0xad, 0x0c, 0xef, 0xf2, 0xbe } }
+#define EFI_CEPH_DMCRYPTCREATE { 0x89c57f98, 0x2fe5, 0x4dc0, 0x89, 0xc1, \
+ { 0x5e, 0xc0, 0x0c, 0xef, 0xf2, 0xbe } }
+#define EFI_OPENBSD_DISKLABEL { 0x824cc7a0, 0x36a8, 0x11e3, 0x89, 0x0a, \
+ { 0x95, 0x25, 0x19, 0xad, 0x3f, 0x61 } }
+#define EFI_BBRY_QNX { 0xcef5a9ad, 0x73bc, 0x4601, 0x89, 0xf3, \
+ { 0xcd, 0xee, 0xee, 0xe3, 0x21, 0xa1 } }
+#define EFI_BELL_PLAN9 { 0xc91818f9, 0x8025, 0x47af, 0x89, 0xd2, \
+ { 0xf0, 0x30, 0xd7, 0x00, 0x0c, 0x2c } }
+#define EFI_VMW_KCORE { 0x9d275380, 0x40ad, 0x11db, 0xbf, 0x97, \
+ { 0x00, 0x0c, 0x29, 0x11, 0xd1, 0xb8 } }
+#define EFI_VMW_VMFS { 0xaa31e02a, 0x400f, 0x11db, 0x95, 0x90, \
+ { 0x00, 0x0c, 0x29, 0x11, 0xd1, 0xb8 } }
+#define EFI_VMW_RESV { 0x9198effc, 0x31c0, 0x11db, 0x8f, 0x78, \
+ { 0x00, 0x0c, 0x29, 0x11, 0xd1, 0xb8 } }
+
+/* From GPT fdisk */
+
+#define EFI_RHT_ROOTX86 { 0x44479540, 0xf297, 0x41b2, 0x9a, 0xf7, \
+ { 0xd1, 0x31, 0xd5, 0xf0, 0x45, 0x8a } }
+#define EFI_RHT_ROOTAMD64 { 0x4f68bce3, 0xe8cd, 0x4db1, 0x96, 0xe7, \
+ { 0xfb, 0xca, 0xf9, 0x84, 0xb7, 0x09 } }
+#define EFI_RHT_ROOTARM { 0x69dad710, 0x2ce4, 0x4e3c, 0xb1, 0x6c, \
+ { 0x21, 0xa1, 0xd4, 0x9a, 0xbe, 0xd3 } }
+#define EFI_RHT_ROOTARM64 { 0xb921b045, 0x1df0, 0x41c3, 0xaf, 0x44, \
+ { 0x4c, 0x6f, 0x28, 0x0d, 0x3f, 0xae } }
+#define EFI_ACRONIS_SECUREZONE { 0x0311fc50, 0x01ca, 0x4725, 0xad, 0x77, \
+ { 0x9a, 0xdb, 0xb2, 0x0a, 0xce, 0x98 } }
+#define EFI_ONIE_BOOT { 0x7412f7d5, 0xa156, 0x4b13, 0x81, 0xdc, \
+ { 0x86, 0x71, 0x74, 0x92, 0x93, 0x25 } }
+#define EFI_ONIE_CONFIG { 0xd4e6e2cd, 0x4469, 0x46f3, 0xb5, 0xcb, \
+ { 0x1b, 0xff, 0x57, 0xaf, 0xc1, 0x49 } }
+#define EFI_IBM_PPRPBOOT { 0x9e1a2d38, 0xc612, 0x4316, 0xaa, 0x26, \
+ { 0x8b, 0x49, 0x52, 0x1e, 0x5a, 0x8b } }
+#define EFI_FREEDESKTOP_BOOT { 0xbc13c2ff, 0x59e6, 0x4262, 0xa3, 0x52, \
+ { 0xb2, 0x75, 0xfd, 0x6f, 0x71, 0x72 } }
+
+/* minimum # of bytes for partition table entries, per EFI spec */
#define EFI_MIN_ARRAY_SIZE (16 * 1024)
#define EFI_PART_NAME_LEN 36
diff --git a/zfs/include/sys/fm/Makefile.in b/zfs/include/sys/fm/Makefile.in
index aaa8bf9eed13..dc3a4d19693e 100644
--- a/zfs/include/sys/fm/Makefile.in
+++ b/zfs/include/sys/fm/Makefile.in
@@ -1,7 +1,7 @@
-# Makefile.in generated by automake 1.15 from Makefile.am.
+# Makefile.in generated by automake 1.15.1 from Makefile.am.
# @configure_input@
-# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+# Copyright (C) 1994-2017 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -91,7 +91,8 @@ host_triplet = @host@
target_triplet = @target@
subdir = include/sys/fm
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
+am__aclocal_m4_deps = $(top_srcdir)/config/always-arch.m4 \
+ $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/always-no-unused-but-set-variable.m4 \
$(top_srcdir)/config/dkms.m4 \
$(top_srcdir)/config/kernel-acl.m4 \
@@ -107,6 +108,8 @@ am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/kernel-bio-op.m4 \
$(top_srcdir)/config/kernel-bio-rw-barrier.m4 \
$(top_srcdir)/config/kernel-bio-rw-discard.m4 \
+ $(top_srcdir)/config/kernel-bio_set_dev.m4 \
+ $(top_srcdir)/config/kernel-blk-queue-bdi.m4 \
$(top_srcdir)/config/kernel-blk-queue-flush.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-hw-sectors.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-segments.m4 \
@@ -114,7 +117,6 @@ am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/kernel-blkdev-get-by-path.m4 \
$(top_srcdir)/config/kernel-blkdev-get.m4 \
$(top_srcdir)/config/kernel-block-device-operations-release-void.m4 \
- $(top_srcdir)/config/kernel-check-disk-size-change.m4 \
$(top_srcdir)/config/kernel-clear-inode.m4 \
$(top_srcdir)/config/kernel-commit-metadata.m4 \
$(top_srcdir)/config/kernel-create-nameidata.m4 \
@@ -131,9 +133,11 @@ am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/kernel-encode-fh-inode.m4 \
$(top_srcdir)/config/kernel-evict-inode.m4 \
$(top_srcdir)/config/kernel-fallocate.m4 \
+ $(top_srcdir)/config/kernel-file-dentry.m4 \
$(top_srcdir)/config/kernel-file-inode.m4 \
$(top_srcdir)/config/kernel-fmode-t.m4 \
$(top_srcdir)/config/kernel-follow-down-one.m4 \
+ $(top_srcdir)/config/kernel-fpu.m4 \
$(top_srcdir)/config/kernel-fsync.m4 \
$(top_srcdir)/config/kernel-generic_io_acct.m4 \
$(top_srcdir)/config/kernel-generic_readlink.m4 \
@@ -141,17 +145,20 @@ am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/kernel-get-gendisk.m4 \
$(top_srcdir)/config/kernel-get-link.m4 \
$(top_srcdir)/config/kernel-inode-getattr.m4 \
+ $(top_srcdir)/config/kernel-inode-set-flags.m4 \
$(top_srcdir)/config/kernel-insert-inode-locked.m4 \
$(top_srcdir)/config/kernel-invalidate-bdev-args.m4 \
$(top_srcdir)/config/kernel-is_owner_or_cap.m4 \
$(top_srcdir)/config/kernel-kmap-atomic-args.m4 \
- $(top_srcdir)/config/kernel-kobj-name-len.m4 \
+ $(top_srcdir)/config/kernel-kuid-helpers.m4 \
$(top_srcdir)/config/kernel-lookup-bdev.m4 \
$(top_srcdir)/config/kernel-lookup-nameidata.m4 \
$(top_srcdir)/config/kernel-lseek-execute.m4 \
$(top_srcdir)/config/kernel-mk-request-fn.m4 \
$(top_srcdir)/config/kernel-mkdir-umode-t.m4 \
+ $(top_srcdir)/config/kernel-mod-param.m4 \
$(top_srcdir)/config/kernel-mount-nodev.m4 \
+ $(top_srcdir)/config/kernel-objtool.m4 \
$(top_srcdir)/config/kernel-open-bdev-exclusive.m4 \
$(top_srcdir)/config/kernel-put-link.m4 \
$(top_srcdir)/config/kernel-rename.m4 \
@@ -162,10 +169,13 @@ am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/kernel-show-options.m4 \
$(top_srcdir)/config/kernel-shrink.m4 \
$(top_srcdir)/config/kernel-submit_bio.m4 \
+ $(top_srcdir)/config/kernel-super-userns.m4 \
+ $(top_srcdir)/config/kernel-tmpfile.m4 \
$(top_srcdir)/config/kernel-truncate-range.m4 \
$(top_srcdir)/config/kernel-truncate-setsize.m4 \
$(top_srcdir)/config/kernel-vfs-iterate.m4 \
$(top_srcdir)/config/kernel-vfs-rw-iterate.m4 \
+ $(top_srcdir)/config/kernel-vm_node_stat.m4 \
$(top_srcdir)/config/kernel-xattr-handler.m4 \
$(top_srcdir)/config/kernel.m4 $(top_srcdir)/config/libtool.m4 \
$(top_srcdir)/config/ltoptions.m4 \
@@ -173,10 +183,13 @@ am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/ltversion.m4 \
$(top_srcdir)/config/lt~obsolete.m4 \
$(top_srcdir)/config/mount-helper.m4 \
- $(top_srcdir)/config/user-arch.m4 \
+ $(top_srcdir)/config/toolchain-simd.m4 \
$(top_srcdir)/config/user-dracut.m4 \
$(top_srcdir)/config/user-frame-larger-than.m4 \
+ $(top_srcdir)/config/user-libattr.m4 \
$(top_srcdir)/config/user-libblkid.m4 \
+ $(top_srcdir)/config/user-libtirpc.m4 \
+ $(top_srcdir)/config/user-libudev.m4 \
$(top_srcdir)/config/user-libuuid.m4 \
$(top_srcdir)/config/user-makedev.m4 \
$(top_srcdir)/config/user-no-format-truncation.m4 \
@@ -330,7 +343,6 @@ CPP = @CPP@
CPPFLAGS = @CPPFLAGS@
CYGPATH_W = @CYGPATH_W@
DEBUG_CFLAGS = @DEBUG_CFLAGS@
-DEBUG_DMU_TX = @DEBUG_DMU_TX@
DEBUG_STACKFLAGS = @DEBUG_STACKFLAGS@
DEBUG_ZFS = @DEBUG_ZFS@
DEFAULT_INITCONF_DIR = @DEFAULT_INITCONF_DIR@
@@ -369,10 +381,14 @@ KERNELCPPFLAGS = @KERNELCPPFLAGS@
KERNELMAKE_PARAMS = @KERNELMAKE_PARAMS@
LD = @LD@
LDFLAGS = @LDFLAGS@
+LIBATTR = @LIBATTR@
LIBBLKID = @LIBBLKID@
LIBOBJS = @LIBOBJS@
LIBS = @LIBS@
+LIBTIRPC = @LIBTIRPC@
+LIBTIRPC_CFLAGS = @LIBTIRPC_CFLAGS@
LIBTOOL = @LIBTOOL@
+LIBUDEV = @LIBUDEV@
LIBUUID = @LIBUUID@
LINUX = @LINUX@
LINUX_OBJ = @LINUX_OBJ@
@@ -403,8 +419,12 @@ PACKAGE_TARNAME = @PACKAGE_TARNAME@
PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
+QAT_OBJ = @QAT_OBJ@
+QAT_SRC = @QAT_SRC@
+QAT_SYMBOLS = @QAT_SYMBOLS@
RANLIB = @RANLIB@
RELEASE = @RELEASE@
+RM = @RM@
RPM = @RPM@
RPMBUILD = @RPMBUILD@
RPMBUILD_VERSION = @RPMBUILD_VERSION@
@@ -444,6 +464,7 @@ ZFS_META_RELEASE = @ZFS_META_RELEASE@
ZFS_META_VERSION = @ZFS_META_VERSION@
ZFS_MODULE_LOAD = @ZFS_MODULE_LOAD@
ZLIB = @ZLIB@
+ZONENAME = @ZONENAME@
abs_builddir = @abs_builddir@
abs_srcdir = @abs_srcdir@
abs_top_builddir = @abs_top_builddir@
diff --git a/zfs/include/sys/fm/fs/Makefile.in b/zfs/include/sys/fm/fs/Makefile.in
index a0a9a7e3d350..81089cfdecf6 100644
--- a/zfs/include/sys/fm/fs/Makefile.in
+++ b/zfs/include/sys/fm/fs/Makefile.in
@@ -1,7 +1,7 @@
-# Makefile.in generated by automake 1.15 from Makefile.am.
+# Makefile.in generated by automake 1.15.1 from Makefile.am.
# @configure_input@
-# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+# Copyright (C) 1994-2017 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -91,7 +91,8 @@ host_triplet = @host@
target_triplet = @target@
subdir = include/sys/fm/fs
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
+am__aclocal_m4_deps = $(top_srcdir)/config/always-arch.m4 \
+ $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/always-no-unused-but-set-variable.m4 \
$(top_srcdir)/config/dkms.m4 \
$(top_srcdir)/config/kernel-acl.m4 \
@@ -107,6 +108,8 @@ am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/kernel-bio-op.m4 \
$(top_srcdir)/config/kernel-bio-rw-barrier.m4 \
$(top_srcdir)/config/kernel-bio-rw-discard.m4 \
+ $(top_srcdir)/config/kernel-bio_set_dev.m4 \
+ $(top_srcdir)/config/kernel-blk-queue-bdi.m4 \
$(top_srcdir)/config/kernel-blk-queue-flush.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-hw-sectors.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-segments.m4 \
@@ -114,7 +117,6 @@ am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/kernel-blkdev-get-by-path.m4 \
$(top_srcdir)/config/kernel-blkdev-get.m4 \
$(top_srcdir)/config/kernel-block-device-operations-release-void.m4 \
- $(top_srcdir)/config/kernel-check-disk-size-change.m4 \
$(top_srcdir)/config/kernel-clear-inode.m4 \
$(top_srcdir)/config/kernel-commit-metadata.m4 \
$(top_srcdir)/config/kernel-create-nameidata.m4 \
@@ -131,9 +133,11 @@ am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/kernel-encode-fh-inode.m4 \
$(top_srcdir)/config/kernel-evict-inode.m4 \
$(top_srcdir)/config/kernel-fallocate.m4 \
+ $(top_srcdir)/config/kernel-file-dentry.m4 \
$(top_srcdir)/config/kernel-file-inode.m4 \
$(top_srcdir)/config/kernel-fmode-t.m4 \
$(top_srcdir)/config/kernel-follow-down-one.m4 \
+ $(top_srcdir)/config/kernel-fpu.m4 \
$(top_srcdir)/config/kernel-fsync.m4 \
$(top_srcdir)/config/kernel-generic_io_acct.m4 \
$(top_srcdir)/config/kernel-generic_readlink.m4 \
@@ -141,17 +145,20 @@ am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/kernel-get-gendisk.m4 \
$(top_srcdir)/config/kernel-get-link.m4 \
$(top_srcdir)/config/kernel-inode-getattr.m4 \
+ $(top_srcdir)/config/kernel-inode-set-flags.m4 \
$(top_srcdir)/config/kernel-insert-inode-locked.m4 \
$(top_srcdir)/config/kernel-invalidate-bdev-args.m4 \
$(top_srcdir)/config/kernel-is_owner_or_cap.m4 \
$(top_srcdir)/config/kernel-kmap-atomic-args.m4 \
- $(top_srcdir)/config/kernel-kobj-name-len.m4 \
+ $(top_srcdir)/config/kernel-kuid-helpers.m4 \
$(top_srcdir)/config/kernel-lookup-bdev.m4 \
$(top_srcdir)/config/kernel-lookup-nameidata.m4 \
$(top_srcdir)/config/kernel-lseek-execute.m4 \
$(top_srcdir)/config/kernel-mk-request-fn.m4 \
$(top_srcdir)/config/kernel-mkdir-umode-t.m4 \
+ $(top_srcdir)/config/kernel-mod-param.m4 \
$(top_srcdir)/config/kernel-mount-nodev.m4 \
+ $(top_srcdir)/config/kernel-objtool.m4 \
$(top_srcdir)/config/kernel-open-bdev-exclusive.m4 \
$(top_srcdir)/config/kernel-put-link.m4 \
$(top_srcdir)/config/kernel-rename.m4 \
@@ -162,10 +169,13 @@ am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/kernel-show-options.m4 \
$(top_srcdir)/config/kernel-shrink.m4 \
$(top_srcdir)/config/kernel-submit_bio.m4 \
+ $(top_srcdir)/config/kernel-super-userns.m4 \
+ $(top_srcdir)/config/kernel-tmpfile.m4 \
$(top_srcdir)/config/kernel-truncate-range.m4 \
$(top_srcdir)/config/kernel-truncate-setsize.m4 \
$(top_srcdir)/config/kernel-vfs-iterate.m4 \
$(top_srcdir)/config/kernel-vfs-rw-iterate.m4 \
+ $(top_srcdir)/config/kernel-vm_node_stat.m4 \
$(top_srcdir)/config/kernel-xattr-handler.m4 \
$(top_srcdir)/config/kernel.m4 $(top_srcdir)/config/libtool.m4 \
$(top_srcdir)/config/ltoptions.m4 \
@@ -173,10 +183,13 @@ am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/ltversion.m4 \
$(top_srcdir)/config/lt~obsolete.m4 \
$(top_srcdir)/config/mount-helper.m4 \
- $(top_srcdir)/config/user-arch.m4 \
+ $(top_srcdir)/config/toolchain-simd.m4 \
$(top_srcdir)/config/user-dracut.m4 \
$(top_srcdir)/config/user-frame-larger-than.m4 \
+ $(top_srcdir)/config/user-libattr.m4 \
$(top_srcdir)/config/user-libblkid.m4 \
+ $(top_srcdir)/config/user-libtirpc.m4 \
+ $(top_srcdir)/config/user-libudev.m4 \
$(top_srcdir)/config/user-libuuid.m4 \
$(top_srcdir)/config/user-makedev.m4 \
$(top_srcdir)/config/user-no-format-truncation.m4 \
@@ -286,7 +299,6 @@ CPP = @CPP@
CPPFLAGS = @CPPFLAGS@
CYGPATH_W = @CYGPATH_W@
DEBUG_CFLAGS = @DEBUG_CFLAGS@
-DEBUG_DMU_TX = @DEBUG_DMU_TX@
DEBUG_STACKFLAGS = @DEBUG_STACKFLAGS@
DEBUG_ZFS = @DEBUG_ZFS@
DEFAULT_INITCONF_DIR = @DEFAULT_INITCONF_DIR@
@@ -325,10 +337,14 @@ KERNELCPPFLAGS = @KERNELCPPFLAGS@
KERNELMAKE_PARAMS = @KERNELMAKE_PARAMS@
LD = @LD@
LDFLAGS = @LDFLAGS@
+LIBATTR = @LIBATTR@
LIBBLKID = @LIBBLKID@
LIBOBJS = @LIBOBJS@
LIBS = @LIBS@
+LIBTIRPC = @LIBTIRPC@
+LIBTIRPC_CFLAGS = @LIBTIRPC_CFLAGS@
LIBTOOL = @LIBTOOL@
+LIBUDEV = @LIBUDEV@
LIBUUID = @LIBUUID@
LINUX = @LINUX@
LINUX_OBJ = @LINUX_OBJ@
@@ -359,8 +375,12 @@ PACKAGE_TARNAME = @PACKAGE_TARNAME@
PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
+QAT_OBJ = @QAT_OBJ@
+QAT_SRC = @QAT_SRC@
+QAT_SYMBOLS = @QAT_SYMBOLS@
RANLIB = @RANLIB@
RELEASE = @RELEASE@
+RM = @RM@
RPM = @RPM@
RPMBUILD = @RPMBUILD@
RPMBUILD_VERSION = @RPMBUILD_VERSION@
@@ -400,6 +420,7 @@ ZFS_META_RELEASE = @ZFS_META_RELEASE@
ZFS_META_VERSION = @ZFS_META_VERSION@
ZFS_MODULE_LOAD = @ZFS_MODULE_LOAD@
ZLIB = @ZLIB@
+ZONENAME = @ZONENAME@
abs_builddir = @abs_builddir@
abs_srcdir = @abs_srcdir@
abs_top_builddir = @abs_top_builddir@
diff --git a/zfs/include/sys/fm/fs/zfs.h b/zfs/include/sys/fm/fs/zfs.h
index 0d7eadd4f445..6bef8b4eeddd 100644
--- a/zfs/include/sys/fm/fs/zfs.h
+++ b/zfs/include/sys/fm/fs/zfs.h
@@ -36,10 +36,7 @@ extern "C" {
#define FM_EREPORT_ZFS_IO "io"
#define FM_EREPORT_ZFS_DATA "data"
#define FM_EREPORT_ZFS_DELAY "delay"
-#define FM_EREPORT_ZFS_CONFIG_SYNC "config.sync"
#define FM_EREPORT_ZFS_POOL "zpool"
-#define FM_EREPORT_ZFS_POOL_DESTROY "zpool.destroy"
-#define FM_EREPORT_ZFS_POOL_REGUID "zpool.reguid"
#define FM_EREPORT_ZFS_DEVICE_UNKNOWN "vdev.unknown"
#define FM_EREPORT_ZFS_DEVICE_OPEN_FAILED "vdev.open_failed"
#define FM_EREPORT_ZFS_DEVICE_CORRUPT_DATA "vdev.corrupt_data"
@@ -48,30 +45,25 @@ extern "C" {
#define FM_EREPORT_ZFS_DEVICE_TOO_SMALL "vdev.too_small"
#define FM_EREPORT_ZFS_DEVICE_BAD_LABEL "vdev.bad_label"
#define FM_EREPORT_ZFS_DEVICE_BAD_ASHIFT "vdev.bad_ashift"
-#define FM_EREPORT_ZFS_DEVICE_REMOVE "vdev.remove"
-#define FM_EREPORT_ZFS_DEVICE_CLEAR "vdev.clear"
-#define FM_EREPORT_ZFS_DEVICE_CHECK "vdev.check"
-#define FM_EREPORT_ZFS_DEVICE_SPARE "vdev.spare"
-#define FM_EREPORT_ZFS_DEVICE_AUTOEXPAND "vdev.autoexpand"
#define FM_EREPORT_ZFS_IO_FAILURE "io_failure"
#define FM_EREPORT_ZFS_PROBE_FAILURE "probe_failure"
#define FM_EREPORT_ZFS_LOG_REPLAY "log_replay"
-#define FM_EREPORT_ZFS_RESILVER_START "resilver.start"
-#define FM_EREPORT_ZFS_RESILVER_FINISH "resilver.finish"
-#define FM_EREPORT_ZFS_SCRUB_START "scrub.start"
-#define FM_EREPORT_ZFS_SCRUB_FINISH "scrub.finish"
-#define FM_EREPORT_ZFS_BOOTFS_VDEV_ATTACH "bootfs.vdev.attach"
+#define FM_EREPORT_ZFS_CONFIG_CACHE_WRITE "config_cache_write"
#define FM_EREPORT_PAYLOAD_ZFS_POOL "pool"
#define FM_EREPORT_PAYLOAD_ZFS_POOL_FAILMODE "pool_failmode"
#define FM_EREPORT_PAYLOAD_ZFS_POOL_GUID "pool_guid"
#define FM_EREPORT_PAYLOAD_ZFS_POOL_CONTEXT "pool_context"
+#define FM_EREPORT_PAYLOAD_ZFS_POOL_STATE "pool_state"
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID "vdev_guid"
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE "vdev_type"
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_PATH "vdev_path"
+#define FM_EREPORT_PAYLOAD_ZFS_VDEV_PHYSPATH "vdev_physpath"
+#define FM_EREPORT_PAYLOAD_ZFS_VDEV_ENC_SYSFS_PATH "vdev_enc_sysfs_path"
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_DEVID "vdev_devid"
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_FRU "vdev_fru"
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_STATE "vdev_state"
+#define FM_EREPORT_PAYLOAD_ZFS_VDEV_LASTSTATE "vdev_laststate"
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_ASHIFT "vdev_ashift"
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_COMP_TS "vdev_complete_ts"
#define FM_EREPORT_PAYLOAD_ZFS_VDEV_DELTA_TS "vdev_delta_ts"
@@ -115,9 +107,9 @@ extern "C" {
#define FM_EREPORT_FAILMODE_CONTINUE "continue"
#define FM_EREPORT_FAILMODE_PANIC "panic"
-#define FM_EREPORT_RESOURCE_REMOVED "removed"
-#define FM_EREPORT_RESOURCE_AUTOREPLACE "autoreplace"
-#define FM_EREPORT_RESOURCE_STATECHANGE "statechange"
+#define FM_RESOURCE_REMOVED "removed"
+#define FM_RESOURCE_AUTOREPLACE "autoreplace"
+#define FM_RESOURCE_STATECHANGE "statechange"
#ifdef __cplusplus
}
diff --git a/zfs/include/sys/fm/protocol.h b/zfs/include/sys/fm/protocol.h
index de05bb296741..74aef3a92270 100644
--- a/zfs/include/sys/fm/protocol.h
+++ b/zfs/include/sys/fm/protocol.h
@@ -50,6 +50,7 @@ extern "C" {
#define FM_RSRC_CLASS "resource"
#define FM_LIST_EVENT "list"
#define FM_IREPORT_CLASS "ireport"
+#define FM_SYSEVENT_CLASS "sysevent"
/* FM list.* event class values */
#define FM_LIST_SUSPECT_CLASS FM_LIST_EVENT ".suspect"
@@ -360,6 +361,7 @@ extern uint64_t fm_ena_generation_get(uint64_t);
extern uchar_t fm_ena_format_get(uint64_t);
extern uint64_t fm_ena_id_get(uint64_t);
extern uint64_t fm_ena_time_get(uint64_t);
+extern void fm_erpt_dropped_increment(void);
#ifdef __cplusplus
}
diff --git a/zfs/include/sys/fm/util.h b/zfs/include/sys/fm/util.h
index 6ee31764bfac..ff54b05bb6af 100644
--- a/zfs/include/sys/fm/util.h
+++ b/zfs/include/sys/fm/util.h
@@ -93,6 +93,7 @@ typedef struct zfs_zevent {
extern void fm_init(void);
extern void fm_fini(void);
extern void fm_nvprint(nvlist_t *);
+extern void zfs_zevent_post_cb(nvlist_t *nvl, nvlist_t *detector);
extern int zfs_zevent_post(nvlist_t *, nvlist_t *, zevent_cb_t *);
extern void zfs_zevent_drain_all(int *);
extern int zfs_zevent_fd_hold(int, minor_t *, zfs_zevent_t **);
diff --git a/zfs/include/sys/fs/Makefile.in b/zfs/include/sys/fs/Makefile.in
index c556d057ceb5..1113fe703ec5 100644
--- a/zfs/include/sys/fs/Makefile.in
+++ b/zfs/include/sys/fs/Makefile.in
@@ -1,7 +1,7 @@
-# Makefile.in generated by automake 1.15 from Makefile.am.
+# Makefile.in generated by automake 1.15.1 from Makefile.am.
# @configure_input@
-# Copyright (C) 1994-2014 Free Software Foundation, Inc.
+# Copyright (C) 1994-2017 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -91,7 +91,8 @@ host_triplet = @host@
target_triplet = @target@
subdir = include/sys/fs
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
-am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
+am__aclocal_m4_deps = $(top_srcdir)/config/always-arch.m4 \
+ $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/always-no-unused-but-set-variable.m4 \
$(top_srcdir)/config/dkms.m4 \
$(top_srcdir)/config/kernel-acl.m4 \
@@ -107,6 +108,8 @@ am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/kernel-bio-op.m4 \
$(top_srcdir)/config/kernel-bio-rw-barrier.m4 \
$(top_srcdir)/config/kernel-bio-rw-discard.m4 \
+ $(top_srcdir)/config/kernel-bio_set_dev.m4 \
+ $(top_srcdir)/config/kernel-blk-queue-bdi.m4 \
$(top_srcdir)/config/kernel-blk-queue-flush.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-hw-sectors.m4 \
$(top_srcdir)/config/kernel-blk-queue-max-segments.m4 \
@@ -114,7 +117,6 @@ am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/kernel-blkdev-get-by-path.m4 \
$(top_srcdir)/config/kernel-blkdev-get.m4 \
$(top_srcdir)/config/kernel-block-device-operations-release-void.m4 \
- $(top_srcdir)/config/kernel-check-disk-size-change.m4 \
$(top_srcdir)/config/kernel-clear-inode.m4 \
$(top_srcdir)/config/kernel-commit-metadata.m4 \
$(top_srcdir)/config/kernel-create-nameidata.m4 \
@@ -131,9 +133,11 @@ am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/kernel-encode-fh-inode.m4 \
$(top_srcdir)/config/kernel-evict-inode.m4 \
$(top_srcdir)/config/kernel-fallocate.m4 \
+ $(top_srcdir)/config/kernel-file-dentry.m4 \
$(top_srcdir)/config/kernel-file-inode.m4 \
$(top_srcdir)/config/kernel-fmode-t.m4 \
$(top_srcdir)/config/kernel-follow-down-one.m4 \
+ $(top_srcdir)/config/kernel-fpu.m4 \
$(top_srcdir)/config/kernel-fsync.m4 \
$(top_srcdir)/config/kernel-generic_io_acct.m4 \
$(top_srcdir)/config/kernel-generic_readlink.m4 \
@@ -141,17 +145,20 @@ am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/kernel-get-gendisk.m4 \
$(top_srcdir)/config/kernel-get-link.m4 \
$(top_srcdir)/config/kernel-inode-getattr.m4 \
+ $(top_srcdir)/config/kernel-inode-set-flags.m4 \
$(top_srcdir)/config/kernel-insert-inode-locked.m4 \
$(top_srcdir)/config/kernel-invalidate-bdev-args.m4 \
$(top_srcdir)/config/kernel-is_owner_or_cap.m4 \
$(top_srcdir)/config/kernel-kmap-atomic-args.m4 \
- $(top_srcdir)/config/kernel-kobj-name-len.m4 \
+ $(top_srcdir)/config/kernel-kuid-helpers.m4 \
$(top_srcdir)/config/kernel-lookup-bdev.m4 \
$(top_srcdir)/config/kernel-lookup-nameidata.m4 \
$(top_srcdir)/config/kernel-lseek-execute.m4 \
$(top_srcdir)/config/kernel-mk-request-fn.m4 \
$(top_srcdir)/config/kernel-mkdir-umode-t.m4 \
+ $(top_srcdir)/config/kernel-mod-param.m4 \
$(top_srcdir)/config/kernel-mount-nodev.m4 \
+ $(top_srcdir)/config/kernel-objtool.m4 \
$(top_srcdir)/config/kernel-open-bdev-exclusive.m4 \
$(top_srcdir)/config/kernel-put-link.m4 \
$(top_srcdir)/config/kernel-rename.m4 \
@@ -162,10 +169,13 @@ am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/kernel-show-options.m4 \
$(top_srcdir)/config/kernel-shrink.m4 \
$(top_srcdir)/config/kernel-submit_bio.m4 \
+ $(top_srcdir)/config/kernel-super-userns.m4 \
+ $(top_srcdir)/config/kernel-tmpfile.m4 \
$(top_srcdir)/config/kernel-truncate-range.m4 \
$(top_srcdir)/config/kernel-truncate-setsize.m4 \
$(top_srcdir)/config/kernel-vfs-iterate.m4 \
$(top_srcdir)/config/kernel-vfs-rw-iterate.m4 \
+ $(top_srcdir)/config/kernel-vm_node_stat.m4 \
$(top_srcdir)/config/kernel-xattr-handler.m4 \
$(top_srcdir)/config/kernel.m4 $(top_srcdir)/config/libtool.m4 \
$(top_srcdir)/config/ltoptions.m4 \
@@ -173,10 +183,13 @@ am__aclocal_m4_deps = $(top_srcdir)/config/always-no-bool-compare.m4 \
$(top_srcdir)/config/ltversion.m4 \
$(top_srcdir)/config/lt~obsolete.m4 \
$(top_srcdir)/config/mount-helper.m4 \
- $(top_srcdir)/config/user-arch.m4 \
+ $(top_srcdir)/config/toolchain-simd.m4 \
$(top_srcdir)/config/user-dracut.m4 \
$(top_srcdir)/config/user-frame-larger-than.m4 \
+ $(top_srcdir)/config/user-libattr.m4 \
$(top_srcdir)/config/user-libblkid.m4 \
+ $(top_srcdir)/config/user-libtirpc.m4 \
+ $(top_srcdir)/config/user-libudev.m4 \
$(top_srcdir)/config/user-libuuid.m4 \
$(top_srcdir)/config/user-makedev.m4 \
$(top_srcdir)/config/user-no-format-truncation.m4 \
@@ -286,7 +299,6 @@ CPP = @CPP@
CPPFLAGS = @CPPFLAGS@
CYGPATH_W = @CYGPATH_W@
DEBUG_CFLAGS = @DEBUG_CFLAGS@
-DEBUG_DMU_TX = @DEBUG_DMU_TX@
DEBUG_STACKFLAGS = @DEBUG_STACKFLAGS@
DEBUG_ZFS = @DEBUG_ZFS@
DEFAULT_INITCONF_DIR = @DEFAULT_INITCONF_DIR@
@@ -325,10 +337,14 @@ KERNELCPPFLAGS = @KERNELCPPFLAGS@
KERNELMAKE_PARAMS = @KERNELMAKE_PARAMS@
LD = @LD@
LDFLAGS = @LDFLAGS@
+LIBATTR = @LIBATTR@
LIBBLKID = @LIBBLKID@
LIBOBJS = @LIBOBJS@
LIBS = @LIBS@
+LIBTIRPC = @LIBTIRPC@
+LIBTIRPC_CFLAGS = @LIBTIRPC_CFLAGS@
LIBTOOL = @LIBTOOL@
+LIBUDEV = @LIBUDEV@
LIBUUID = @LIBUUID@
LINUX = @LINUX@
LINUX_OBJ = @LINUX_OBJ@
@@ -359,8 +375,12 @@ PACKAGE_TARNAME = @PACKAGE_TARNAME@
PACKAGE_URL = @PACKAGE_URL@
PACKAGE_VERSION = @PACKAGE_VERSION@
PATH_SEPARATOR = @PATH_SEPARATOR@
+QAT_OBJ = @QAT_OBJ@
+QAT_SRC = @QAT_SRC@
+QAT_SYMBOLS = @QAT_SYMBOLS@
RANLIB = @RANLIB@
RELEASE = @RELEASE@
+RM = @RM@
RPM = @RPM@
RPMBUILD = @RPMBUILD@
RPMBUILD_VERSION = @RPMBUILD_VERSION@
@@ -400,6 +420,7 @@ ZFS_META_RELEASE = @ZFS_META_RELEASE@
ZFS_META_VERSION = @ZFS_META_VERSION@
ZFS_MODULE_LOAD = @ZFS_MODULE_LOAD@
ZLIB = @ZLIB@
+ZONENAME = @ZONENAME@
abs_builddir = @abs_builddir@
abs_srcdir = @abs_srcdir@
abs_top_builddir = @abs_top_builddir@
diff --git a/zfs/include/sys/fs/zfs.h b/zfs/include/sys/fs/zfs.h
index 57bf55f9365f..13b25a695639 100644
--- a/zfs/include/sys/fs/zfs.h
+++ b/zfs/include/sys/fs/zfs.h
@@ -23,7 +23,9 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2013, 2017 Joyent, Inc. All rights reserved.
+ * Copyright (c) 2014 Integros [integros.com]
+ * Copyright (c) 2017 Datto Inc.
*/
/* Portions Copyright 2010 Robert Milkowski */
@@ -32,6 +34,7 @@
#define _SYS_FS_ZFS_H
#include <sys/time.h>
+#include <sys/zio_priority.h>
#ifdef __cplusplus
extern "C" {
@@ -53,6 +56,10 @@ typedef enum {
ZFS_TYPE_BOOKMARK = (1 << 4)
} zfs_type_t;
+/*
+ * NB: lzc_dataset_type should be updated whenever a new objset type is added,
+ * if it represents a real type of a dataset that can be created from userland.
+ */
typedef enum dmu_objset_type {
DMU_OST_NONE,
DMU_OST_META,
@@ -66,6 +73,9 @@ typedef enum dmu_objset_type {
#define ZFS_TYPE_DATASET \
(ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME | ZFS_TYPE_SNAPSHOT)
+/*
+ * All of these include the terminating NUL byte.
+ */
#define ZAP_MAXNAMELEN 256
#define ZAP_MAXVALUELEN (1024 * 8)
#define ZAP_OLDMAXVALUELEN 1024
@@ -78,7 +88,8 @@ typedef enum dmu_objset_type {
* the property table in module/zcommon/zfs_prop.c.
*/
typedef enum {
- ZFS_PROP_TYPE,
+ ZFS_PROP_BAD = -1,
+ ZFS_PROP_TYPE = 0,
ZFS_PROP_CREATION,
ZFS_PROP_USED,
ZFS_PROP_AVAILABLE,
@@ -104,7 +115,7 @@ typedef enum {
ZFS_PROP_SNAPDIR,
ZFS_PROP_PRIVATE, /* not exposed to user, temporary */
ZFS_PROP_ACLINHERIT,
- ZFS_PROP_CREATETXG, /* not exposed to the user */
+ ZFS_PROP_CREATETXG,
ZFS_PROP_NAME, /* not exposed to the user */
ZFS_PROP_CANMOUNT,
ZFS_PROP_ISCSIOPTIONS, /* not exposed to the user */
@@ -137,12 +148,14 @@ typedef enum {
ZFS_PROP_DEDUP,
ZFS_PROP_MLSLABEL,
ZFS_PROP_SYNC,
+ ZFS_PROP_DNODESIZE,
ZFS_PROP_REFRATIO,
ZFS_PROP_WRITTEN,
ZFS_PROP_CLONES,
ZFS_PROP_LOGICALUSED,
ZFS_PROP_LOGICALREFERENCED,
ZFS_PROP_INCONSISTENT, /* not exposed to the user */
+ ZFS_PROP_VOLMODE,
ZFS_PROP_FILESYSTEM_LIMIT,
ZFS_PROP_SNAPSHOT_LIMIT,
ZFS_PROP_FILESYSTEM_COUNT,
@@ -156,6 +169,8 @@ typedef enum {
ZFS_PROP_RELATIME,
ZFS_PROP_REDUNDANT_METADATA,
ZFS_PROP_OVERLAY,
+ ZFS_PROP_PREV_SNAP,
+ ZFS_PROP_RECEIVE_RESUME_TOKEN,
ZFS_NUM_PROPS
} zfs_prop_t;
@@ -164,6 +179,10 @@ typedef enum {
ZFS_PROP_USERQUOTA,
ZFS_PROP_GROUPUSED,
ZFS_PROP_GROUPQUOTA,
+ ZFS_PROP_USEROBJUSED,
+ ZFS_PROP_USEROBJQUOTA,
+ ZFS_PROP_GROUPOBJUSED,
+ ZFS_PROP_GROUPOBJQUOTA,
ZFS_NUM_USERQUOTA_PROPS
} zfs_userquota_prop_t;
@@ -203,6 +222,8 @@ typedef enum {
ZPOOL_PROP_LEAKED,
ZPOOL_PROP_MAXBLOCKSIZE,
ZPOOL_PROP_TNAME,
+ ZPOOL_PROP_MAXDNODESIZE,
+ ZPOOL_PROP_MULTIHOST,
ZPOOL_NUM_PROPS
} zpool_prop_t;
@@ -228,6 +249,7 @@ typedef enum {
#define ZPROP_SOURCE_VAL_RECVD "$recvd"
#define ZPROP_N_MORE_ERRORS "N_MORE_ERRORS"
+
/*
* Dataset flag implemented as a special entry in the props zap object
* indicating that the dataset has received properties on or after
@@ -360,11 +382,28 @@ typedef enum {
ZFS_XATTR_SA = 2
} zfs_xattr_type_t;
+typedef enum {
+ ZFS_DNSIZE_LEGACY = 0,
+ ZFS_DNSIZE_AUTO = 1,
+ ZFS_DNSIZE_1K = 1024,
+ ZFS_DNSIZE_2K = 2048,
+ ZFS_DNSIZE_4K = 4096,
+ ZFS_DNSIZE_8K = 8192,
+ ZFS_DNSIZE_16K = 16384
+} zfs_dnsize_type_t;
+
typedef enum {
ZFS_REDUNDANT_METADATA_ALL,
ZFS_REDUNDANT_METADATA_MOST
} zfs_redundant_metadata_type_t;
+typedef enum {
+ ZFS_VOLMODE_DEFAULT = 0,
+ ZFS_VOLMODE_GEOM = 1,
+ ZFS_VOLMODE_DEV = 2,
+ ZFS_VOLMODE_NONE = 3
+} zfs_volmode_t;
+
/*
* On-disk version number.
*/
@@ -528,6 +567,50 @@ typedef struct zpool_rewind_policy {
#define ZPOOL_CONFIG_DTL "DTL"
#define ZPOOL_CONFIG_SCAN_STATS "scan_stats" /* not stored on disk */
#define ZPOOL_CONFIG_VDEV_STATS "vdev_stats" /* not stored on disk */
+
+/* container nvlist of extended stats */
+#define ZPOOL_CONFIG_VDEV_STATS_EX "vdev_stats_ex"
+
+/* Active queue read/write stats */
+#define ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE "vdev_sync_r_active_queue"
+#define ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE "vdev_sync_w_active_queue"
+#define ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE "vdev_async_r_active_queue"
+#define ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE "vdev_async_w_active_queue"
+#define ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE "vdev_async_scrub_active_queue"
+
+/* Queue sizes */
+#define ZPOOL_CONFIG_VDEV_SYNC_R_PEND_QUEUE "vdev_sync_r_pend_queue"
+#define ZPOOL_CONFIG_VDEV_SYNC_W_PEND_QUEUE "vdev_sync_w_pend_queue"
+#define ZPOOL_CONFIG_VDEV_ASYNC_R_PEND_QUEUE "vdev_async_r_pend_queue"
+#define ZPOOL_CONFIG_VDEV_ASYNC_W_PEND_QUEUE "vdev_async_w_pend_queue"
+#define ZPOOL_CONFIG_VDEV_SCRUB_PEND_QUEUE "vdev_async_scrub_pend_queue"
+
+/* Latency read/write histogram stats */
+#define ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO "vdev_tot_r_lat_histo"
+#define ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO "vdev_tot_w_lat_histo"
+#define ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO "vdev_disk_r_lat_histo"
+#define ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO "vdev_disk_w_lat_histo"
+#define ZPOOL_CONFIG_VDEV_SYNC_R_LAT_HISTO "vdev_sync_r_lat_histo"
+#define ZPOOL_CONFIG_VDEV_SYNC_W_LAT_HISTO "vdev_sync_w_lat_histo"
+#define ZPOOL_CONFIG_VDEV_ASYNC_R_LAT_HISTO "vdev_async_r_lat_histo"
+#define ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO "vdev_async_w_lat_histo"
+#define ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO "vdev_scrub_histo"
+
+/* Request size histograms */
+#define ZPOOL_CONFIG_VDEV_SYNC_IND_R_HISTO "vdev_sync_ind_r_histo"
+#define ZPOOL_CONFIG_VDEV_SYNC_IND_W_HISTO "vdev_sync_ind_w_histo"
+#define ZPOOL_CONFIG_VDEV_ASYNC_IND_R_HISTO "vdev_async_ind_r_histo"
+#define ZPOOL_CONFIG_VDEV_ASYNC_IND_W_HISTO "vdev_async_ind_w_histo"
+#define ZPOOL_CONFIG_VDEV_IND_SCRUB_HISTO "vdev_ind_scrub_histo"
+#define ZPOOL_CONFIG_VDEV_SYNC_AGG_R_HISTO "vdev_sync_agg_r_histo"
+#define ZPOOL_CONFIG_VDEV_SYNC_AGG_W_HISTO "vdev_sync_agg_w_histo"
+#define ZPOOL_CONFIG_VDEV_ASYNC_AGG_R_HISTO "vdev_async_agg_r_histo"
+#define ZPOOL_CONFIG_VDEV_ASYNC_AGG_W_HISTO "vdev_async_agg_w_histo"
+#define ZPOOL_CONFIG_VDEV_AGG_SCRUB_HISTO "vdev_agg_scrub_histo"
+
+/* vdev enclosure sysfs path */
+#define ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH "vdev_enc_sysfs_path"
+
#define ZPOOL_CONFIG_WHOLE_DISK "whole_disk"
#define ZPOOL_CONFIG_ERRCOUNT "error_count"
#define ZPOOL_CONFIG_NOT_PRESENT "not_present"
@@ -566,6 +649,14 @@ typedef struct zpool_rewind_policy {
#define ZPOOL_CONFIG_FEATURES_FOR_READ "features_for_read"
#define ZPOOL_CONFIG_FEATURE_STATS "feature_stats" /* not stored on disk */
#define ZPOOL_CONFIG_ERRATA "errata" /* not stored on disk */
+#define ZPOOL_CONFIG_VDEV_TOP_ZAP "com.delphix:vdev_zap_top"
+#define ZPOOL_CONFIG_VDEV_LEAF_ZAP "com.delphix:vdev_zap_leaf"
+#define ZPOOL_CONFIG_HAS_PER_VDEV_ZAPS "com.delphix:has_per_vdev_zaps"
+#define ZPOOL_CONFIG_MMP_STATE "mmp_state" /* not stored on disk */
+#define ZPOOL_CONFIG_MMP_TXG "mmp_txg" /* not stored on disk */
+#define ZPOOL_CONFIG_MMP_HOSTNAME "mmp_hostname" /* not stored on disk */
+#define ZPOOL_CONFIG_MMP_HOSTID "mmp_hostid" /* not stored on disk */
+
/*
* The persistent vdev state is stored as separate values rather than a single
* 'vdev_state' entry. This is because a device can be in multiple states, such
@@ -656,8 +747,11 @@ typedef enum vdev_aux {
VDEV_AUX_ERR_EXCEEDED, /* too many errors */
VDEV_AUX_IO_FAILURE, /* experienced I/O failure */
VDEV_AUX_BAD_LOG, /* cannot read log chain(s) */
- VDEV_AUX_EXTERNAL, /* external diagnosis */
- VDEV_AUX_SPLIT_POOL /* vdev was split off into another pool */
+ VDEV_AUX_EXTERNAL, /* external diagnosis or forced fault */
+ VDEV_AUX_SPLIT_POOL, /* vdev was split off into another pool */
+ VDEV_AUX_BAD_ASHIFT, /* vdev ashift is invalid */
+ VDEV_AUX_EXTERNAL_PERSIST, /* persistent forced fault */
+ VDEV_AUX_ACTIVE, /* vdev active on a different host */
} vdev_aux_t;
/*
@@ -677,6 +771,16 @@ typedef enum pool_state {
POOL_STATE_POTENTIALLY_ACTIVE /* Internal libzfs state */
} pool_state_t;
+/*
+ * mmp state. The following states provide additional detail describing
+ * why a pool couldn't be safely imported.
+ */
+typedef enum mmp_state {
+ MMP_STATE_ACTIVE = 0, /* In active use */
+ MMP_STATE_INACTIVE, /* Inactive and safe to import */
+ MMP_STATE_NO_HOSTID /* System hostid is not set */
+} mmp_state_t;
+
/*
* Scan Functions.
*/
@@ -687,6 +791,16 @@ typedef enum pool_scan_func {
POOL_SCAN_FUNCS
} pool_scan_func_t;
+/*
+ * Used to control scrub pause and resume.
+ */
+typedef enum pool_scrub_cmd {
+ POOL_SCRUB_NORMAL = 0,
+ POOL_SCRUB_PAUSE,
+ POOL_SCRUB_FLAGS_END
+} pool_scrub_cmd_t;
+
+
/*
* ZIO types. Needed to interpret vdev statistics below.
*/
@@ -719,6 +833,9 @@ typedef struct pool_scan_stat {
/* values not stored on disk */
uint64_t pss_pass_exam; /* examined bytes per scan pass */
uint64_t pss_pass_start; /* start time of a scan pass */
+ uint64_t pss_pass_scrub_pause; /* pause time of a scurb pass */
+ /* cumulative time scrub spent paused, needed for rate calculation */
+ uint64_t pss_pass_scrub_spent_paused;
} pool_scan_stat_t;
typedef enum dsl_scan_state {
@@ -763,14 +880,68 @@ typedef struct vdev_stat {
uint64_t vs_scan_removing; /* removing? */
uint64_t vs_scan_processed; /* scan processed bytes */
uint64_t vs_fragmentation; /* device fragmentation */
+
} vdev_stat_t;
+/*
+ * Extended stats
+ *
+ * These are stats which aren't included in the original iostat output. For
+ * convenience, they are grouped together in vdev_stat_ex, although each stat
+ * is individually exported as an nvlist.
+ */
+typedef struct vdev_stat_ex {
+ /* Number of ZIOs issued to disk and waiting to finish */
+ uint64_t vsx_active_queue[ZIO_PRIORITY_NUM_QUEUEABLE];
+
+ /* Number of ZIOs pending to be issued to disk */
+ uint64_t vsx_pend_queue[ZIO_PRIORITY_NUM_QUEUEABLE];
+
+ /*
+ * Below are the histograms for various latencies. Buckets are in
+ * units of nanoseconds.
+ */
+
+ /*
+ * 2^37 nanoseconds = 134s. Timeouts will probably start kicking in
+ * before this.
+ */
+#define VDEV_L_HISTO_BUCKETS 37 /* Latency histo buckets */
+#define VDEV_RQ_HISTO_BUCKETS 25 /* Request size histo buckets */
+
+
+ /* Amount of time in ZIO queue (ns) */
+ uint64_t vsx_queue_histo[ZIO_PRIORITY_NUM_QUEUEABLE]
+ [VDEV_L_HISTO_BUCKETS];
+
+ /* Total ZIO latency (ns). Includes queuing and disk access time */
+ uint64_t vsx_total_histo[ZIO_TYPES][VDEV_L_HISTO_BUCKETS];
+
+ /* Amount of time to read/write the disk (ns) */
+ uint64_t vsx_disk_histo[ZIO_TYPES][VDEV_L_HISTO_BUCKETS];
+
+ /* "lookup the bucket for a value" histogram macros */
+#define HISTO(val, buckets) (val != 0 ? MIN(highbit64(val) - 1, \
+ buckets - 1) : 0)
+#define L_HISTO(a) HISTO(a, VDEV_L_HISTO_BUCKETS)
+#define RQ_HISTO(a) HISTO(a, VDEV_RQ_HISTO_BUCKETS)
+
+ /* Physical IO histogram */
+ uint64_t vsx_ind_histo[ZIO_PRIORITY_NUM_QUEUEABLE]
+ [VDEV_RQ_HISTO_BUCKETS];
+
+ /* Delegated (aggregated) physical IO histogram */
+ uint64_t vsx_agg_histo[ZIO_PRIORITY_NUM_QUEUEABLE]
+ [VDEV_RQ_HISTO_BUCKETS];
+
+} vdev_stat_ex_t;
+
/*
* DDT statistics. Note: all fields should be 64-bit because this
* is passed between kernel and userland as an nvlist uint64 array.
*/
typedef struct ddt_object {
- uint64_t ddo_count; /* number of elments in ddt */
+ uint64_t ddo_count; /* number of elements in ddt */
uint64_t ddo_dspace; /* size of ddt on disk */
uint64_t ddo_mspace; /* size of ddt in-core */
} ddt_object_t;
@@ -793,6 +964,9 @@ typedef struct ddt_histogram {
#define ZVOL_DRIVER "zvol"
#define ZFS_DRIVER "zfs"
#define ZFS_DEV "/dev/zfs"
+#define ZFS_SHARETAB "/etc/dfs/sharetab"
+
+#define ZFS_SUPER_MAGIC 0x2fc12fc1
/* general zvol path */
#define ZVOL_DIR "/dev"
@@ -811,7 +985,7 @@ typedef struct ddt_histogram {
*/
typedef enum zfs_ioc {
/*
- * Illumos - 70/128 numbers reserved.
+ * Illumos - 71/128 numbers reserved.
*/
ZFS_IOC_FIRST = ('Z' << 8),
ZFS_IOC = ZFS_IOC_FIRST,
@@ -885,6 +1059,8 @@ typedef enum zfs_ioc {
ZFS_IOC_BOOKMARK,
ZFS_IOC_GET_BOOKMARKS,
ZFS_IOC_DESTROY_BOOKMARKS,
+ ZFS_IOC_RECV_NEW,
+ ZFS_IOC_POOL_SYNC,
/*
* Linux - 3/64 numbers reserved.
@@ -905,7 +1081,7 @@ typedef enum zfs_ioc {
/*
* zvol ioctl to get dataset name
*/
-#define BLKZNAME _IOR(0x12, 125, char[ZFS_MAXNAMELEN])
+#define BLKZNAME _IOR(0x12, 125, char[ZFS_MAX_DATASET_NAME_LEN])
/*
* Internal SPA load state. Used by FMA diagnosis engine.
@@ -916,7 +1092,8 @@ typedef enum {
SPA_LOAD_IMPORT, /* import in progress */
SPA_LOAD_TRYIMPORT, /* tryimport in progress */
SPA_LOAD_RECOVER, /* recovery requested */
- SPA_LOAD_ERROR /* load failed */
+ SPA_LOAD_ERROR, /* load failed */
+ SPA_LOAD_CREATE /* creation in progress */
} spa_load_state_t;
/*
@@ -966,6 +1143,7 @@ typedef enum {
#define ZFS_IMPORT_MISSING_LOG 0x4
#define ZFS_IMPORT_ONLY 0x8
#define ZFS_IMPORT_TEMP_NAME 0x10
+#define ZFS_IMPORT_SKIP_MMP 0x20
/*
* Sysevent payload members. ZFS will generate the following sysevents with the
@@ -987,11 +1165,45 @@ typedef enum {
* ZFS_EV_POOL_GUID DATA_TYPE_UINT64
* ZFS_EV_VDEV_PATH DATA_TYPE_STRING (optional)
* ZFS_EV_VDEV_GUID DATA_TYPE_UINT64
+ *
+ * ESC_ZFS_HISTORY_EVENT
+ *
+ * ZFS_EV_POOL_NAME DATA_TYPE_STRING
+ * ZFS_EV_POOL_GUID DATA_TYPE_UINT64
+ * ZFS_EV_HIST_TIME DATA_TYPE_UINT64 (optional)
+ * ZFS_EV_HIST_CMD DATA_TYPE_STRING (optional)
+ * ZFS_EV_HIST_WHO DATA_TYPE_UINT64 (optional)
+ * ZFS_EV_HIST_ZONE DATA_TYPE_STRING (optional)
+ * ZFS_EV_HIST_HOST DATA_TYPE_STRING (optional)
+ * ZFS_EV_HIST_TXG DATA_TYPE_UINT64 (optional)
+ * ZFS_EV_HIST_INT_EVENT DATA_TYPE_UINT64 (optional)
+ * ZFS_EV_HIST_INT_STR DATA_TYPE_STRING (optional)
+ * ZFS_EV_HIST_INT_NAME DATA_TYPE_STRING (optional)
+ * ZFS_EV_HIST_IOCTL DATA_TYPE_STRING (optional)
+ * ZFS_EV_HIST_DSNAME DATA_TYPE_STRING (optional)
+ * ZFS_EV_HIST_DSID DATA_TYPE_UINT64 (optional)
+ *
+ * The ZFS_EV_HIST_* members will correspond to the ZPOOL_HIST_* members in the
+ * history log nvlist. The keynames will be free of any spaces or other
+ * characters that could be potentially unexpected to consumers of the
+ * sysevents.
*/
#define ZFS_EV_POOL_NAME "pool_name"
#define ZFS_EV_POOL_GUID "pool_guid"
#define ZFS_EV_VDEV_PATH "vdev_path"
#define ZFS_EV_VDEV_GUID "vdev_guid"
+#define ZFS_EV_HIST_TIME "history_time"
+#define ZFS_EV_HIST_CMD "history_command"
+#define ZFS_EV_HIST_WHO "history_who"
+#define ZFS_EV_HIST_ZONE "history_zone"
+#define ZFS_EV_HIST_HOST "history_hostname"
+#define ZFS_EV_HIST_TXG "history_txg"
+#define ZFS_EV_HIST_INT_EVENT "history_internal_event"
+#define ZFS_EV_HIST_INT_STR "history_internal_str"
+#define ZFS_EV_HIST_INT_NAME "history_internal_name"
+#define ZFS_EV_HIST_IOCTL "history_ioctl"
+#define ZFS_EV_HIST_DSNAME "history_dsname"
+#define ZFS_EV_HIST_DSID "history_dsid"
#ifdef __cplusplus
}
diff --git a/zfs/include/sys/metaslab.h b/zfs/include/sys/metaslab.h
index 5f831a1f5604..be271c7020d5 100644
--- a/zfs/include/sys/metaslab.h
+++ b/zfs/include/sys/metaslab.h
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
*/
#ifndef _SYS_METASLAB_H
@@ -36,10 +36,12 @@
extern "C" {
#endif
+
typedef struct metaslab_ops {
- uint64_t (*msop_alloc)(metaslab_t *msp, uint64_t size);
+ uint64_t (*msop_alloc)(metaslab_t *, uint64_t);
} metaslab_ops_t;
+
extern metaslab_ops_t *zfs_metaslab_ops;
int metaslab_init(metaslab_group_t *, uint64_t, uint64_t, uint64_t,
@@ -55,27 +57,36 @@ void metaslab_sync_done(metaslab_t *, uint64_t);
void metaslab_sync_reassess(metaslab_group_t *);
uint64_t metaslab_block_maxsize(metaslab_t *);
-#define METASLAB_HINTBP_FAVOR 0x0
-#define METASLAB_HINTBP_AVOID 0x1
-#define METASLAB_GANG_HEADER 0x2
-#define METASLAB_GANG_CHILD 0x4
-#define METASLAB_GANG_AVOID 0x8
-#define METASLAB_FASTWRITE 0x10
+#define METASLAB_HINTBP_FAVOR 0x0
+#define METASLAB_HINTBP_AVOID 0x1
+#define METASLAB_GANG_HEADER 0x2
+#define METASLAB_GANG_CHILD 0x4
+#define METASLAB_ASYNC_ALLOC 0x8
+#define METASLAB_DONT_THROTTLE 0x10
+#define METASLAB_FASTWRITE 0x20
int metaslab_alloc(spa_t *, metaslab_class_t *, uint64_t,
- blkptr_t *, int, uint64_t, blkptr_t *, int);
+ blkptr_t *, int, uint64_t, blkptr_t *, int, zio_alloc_list_t *, zio_t *);
void metaslab_free(spa_t *, const blkptr_t *, uint64_t, boolean_t);
int metaslab_claim(spa_t *, const blkptr_t *, uint64_t);
void metaslab_check_free(spa_t *, const blkptr_t *);
void metaslab_fastwrite_mark(spa_t *, const blkptr_t *);
void metaslab_fastwrite_unmark(spa_t *, const blkptr_t *);
+void metaslab_alloc_trace_init(void);
+void metaslab_alloc_trace_fini(void);
+void metaslab_trace_init(zio_alloc_list_t *);
+void metaslab_trace_fini(zio_alloc_list_t *);
+
metaslab_class_t *metaslab_class_create(spa_t *, metaslab_ops_t *);
void metaslab_class_destroy(metaslab_class_t *);
int metaslab_class_validate(metaslab_class_t *);
void metaslab_class_histogram_verify(metaslab_class_t *);
uint64_t metaslab_class_fragmentation(metaslab_class_t *);
uint64_t metaslab_class_expandable_space(metaslab_class_t *);
+boolean_t metaslab_class_throttle_reserve(metaslab_class_t *, int,
+ zio_t *, int);
+void metaslab_class_throttle_unreserve(metaslab_class_t *, int, zio_t *);
void metaslab_class_space_update(metaslab_class_t *, int64_t, int64_t,
int64_t, int64_t);
@@ -88,10 +99,13 @@ metaslab_group_t *metaslab_group_create(metaslab_class_t *, vdev_t *);
void metaslab_group_destroy(metaslab_group_t *);
void metaslab_group_activate(metaslab_group_t *);
void metaslab_group_passivate(metaslab_group_t *);
+boolean_t metaslab_group_initialized(metaslab_group_t *);
uint64_t metaslab_group_get_space(metaslab_group_t *);
void metaslab_group_histogram_verify(metaslab_group_t *);
uint64_t metaslab_group_fragmentation(metaslab_group_t *);
void metaslab_group_histogram_remove(metaslab_group_t *, metaslab_t *);
+void metaslab_group_alloc_decrement(spa_t *, uint64_t, void *, int);
+void metaslab_group_alloc_verify(spa_t *, const blkptr_t *, void *);
#ifdef __cplusplus
}
diff --git a/zfs/include/sys/metaslab_impl.h b/zfs/include/sys/metaslab_impl.h
index 88bda071fa73..f8a713a4f1ff 100644
--- a/zfs/include/sys/metaslab_impl.h
+++ b/zfs/include/sys/metaslab_impl.h
@@ -24,7 +24,7 @@
*/
/*
- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
*/
#ifndef _SYS_METASLAB_IMPL_H
@@ -41,6 +41,94 @@
extern "C" {
#endif
+/*
+ * Metaslab allocation tracing record.
+ */
+typedef struct metaslab_alloc_trace {
+ list_node_t mat_list_node;
+ metaslab_group_t *mat_mg;
+ metaslab_t *mat_msp;
+ uint64_t mat_size;
+ uint64_t mat_weight;
+ uint32_t mat_dva_id;
+ uint64_t mat_offset;
+} metaslab_alloc_trace_t;
+
+/*
+ * Used by the metaslab allocation tracing facility to indicate
+ * error conditions. These errors are stored to the offset member
+ * of the metaslab_alloc_trace_t record and displayed by mdb.
+ */
+typedef enum trace_alloc_type {
+ TRACE_ALLOC_FAILURE = -1ULL,
+ TRACE_TOO_SMALL = -2ULL,
+ TRACE_FORCE_GANG = -3ULL,
+ TRACE_NOT_ALLOCATABLE = -4ULL,
+ TRACE_GROUP_FAILURE = -5ULL,
+ TRACE_ENOSPC = -6ULL,
+ TRACE_CONDENSING = -7ULL,
+ TRACE_VDEV_ERROR = -8ULL
+} trace_alloc_type_t;
+
+#define METASLAB_WEIGHT_PRIMARY (1ULL << 63)
+#define METASLAB_WEIGHT_SECONDARY (1ULL << 62)
+#define METASLAB_WEIGHT_TYPE (1ULL << 61)
+#define METASLAB_ACTIVE_MASK \
+ (METASLAB_WEIGHT_PRIMARY | METASLAB_WEIGHT_SECONDARY)
+
+/*
+ * The metaslab weight is used to encode the amount of free space in a
+ * metaslab, such that the "best" metaslab appears first when sorting the
+ * metaslabs by weight. The weight (and therefore the "best" metaslab) can
+ * be determined in two different ways: by computing a weighted sum of all
+ * the free space in the metaslab (a space based weight) or by counting only
+ * the free segments of the largest size (a segment based weight). We prefer
+ * the segment based weight because it reflects how the free space is
+ * comprised, but we cannot always use it -- legacy pools do not have the
+ * space map histogram information necessary to determine the largest
+ * contiguous regions. Pools that have the space map histogram determine
+ * the segment weight by looking at each bucket in the histogram and
+ * determining the free space whose size in bytes is in the range:
+ * [2^i, 2^(i+1))
+ * We then encode the largest index, i, that contains regions into the
+ * segment-weighted value.
+ *
+ * Space-based weight:
+ *
+ * 64 56 48 40 32 24 16 8 0
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * |PS1| weighted-free space |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ *
+ * PS - indicates primary and secondary activation
+ * space - the fragmentation-weighted space
+ *
+ * Segment-based weight:
+ *
+ * 64 56 48 40 32 24 16 8 0
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ * |PS0| idx| count of segments in region |
+ * +-------+-------+-------+-------+-------+-------+-------+-------+
+ *
+ * PS - indicates primary and secondary activation
+ * idx - index for the highest bucket in the histogram
+ * count - number of segments in the specified bucket
+ */
+#define WEIGHT_GET_ACTIVE(weight) BF64_GET((weight), 62, 2)
+#define WEIGHT_SET_ACTIVE(weight, x) BF64_SET((weight), 62, 2, x)
+
+#define WEIGHT_IS_SPACEBASED(weight) \
+ ((weight) == 0 || BF64_GET((weight), 61, 1))
+#define WEIGHT_SET_SPACEBASED(weight) BF64_SET((weight), 61, 1, 1)
+
+/*
+ * These macros are only applicable to segment-based weighting.
+ */
+#define WEIGHT_GET_INDEX(weight) BF64_GET((weight), 55, 6)
+#define WEIGHT_SET_INDEX(weight, x) BF64_SET((weight), 55, 6, x)
+#define WEIGHT_GET_COUNT(weight) BF64_GET((weight), 0, 55)
+#define WEIGHT_SET_COUNT(weight, x) BF64_SET((weight), 0, 55, x)
+
/*
* A metaslab class encompasses a category of allocatable top-level vdevs.
* Each top-level vdev is associated with a metaslab group which defines
@@ -59,22 +147,52 @@ extern "C" {
* to use a block allocator that best suits that class.
*/
struct metaslab_class {
+ kmutex_t mc_lock;
spa_t *mc_spa;
metaslab_group_t *mc_rotor;
metaslab_ops_t *mc_ops;
uint64_t mc_aliquot;
+
+ /*
+ * Track the number of metaslab groups that have been initialized
+ * and can accept allocations. An initialized metaslab group is
+ * one has been completely added to the config (i.e. we have
+ * updated the MOS config and the space has been added to the pool).
+ */
+ uint64_t mc_groups;
+
+ /*
+ * Toggle to enable/disable the allocation throttle.
+ */
+ boolean_t mc_alloc_throttle_enabled;
+
+ /*
+ * The allocation throttle works on a reservation system. Whenever
+ * an asynchronous zio wants to perform an allocation it must
+ * first reserve the number of blocks that it wants to allocate.
+ * If there aren't sufficient slots available for the pending zio
+ * then that I/O is throttled until more slots free up. The current
+ * number of reserved allocations is maintained by the mc_alloc_slots
+ * refcount. The mc_alloc_max_slots value determines the maximum
+ * number of allocations that the system allows. Gang blocks are
+ * allowed to reserve slots even if we've reached the maximum
+ * number of allocations allowed.
+ */
+ uint64_t mc_alloc_max_slots;
+ refcount_t mc_alloc_slots;
+
uint64_t mc_alloc_groups; /* # of allocatable groups */
+
uint64_t mc_alloc; /* total allocated space */
uint64_t mc_deferred; /* total deferred frees */
uint64_t mc_space; /* total space (alloc + free) */
uint64_t mc_dspace; /* total deflated space */
uint64_t mc_histogram[RANGE_TREE_HISTOGRAM_SIZE];
- kmutex_t mc_fastwrite_lock;
};
/*
* Metaslab groups encapsulate all the allocatable regions (i.e. metaslabs)
- * of a top-level vdev. They are linked togther to form a circular linked
+ * of a top-level vdev. They are linked together to form a circular linked
* list and can belong to only one metaslab class. Metaslab groups may become
* ineligible for allocations for a number of reasons such as limited free
* space, fragmentation, or going offline. When this happens the allocator will
@@ -86,6 +204,15 @@ struct metaslab_group {
avl_tree_t mg_metaslab_tree;
uint64_t mg_aliquot;
boolean_t mg_allocatable; /* can we allocate? */
+
+ /*
+ * A metaslab group is considered to be initialized only after
+ * we have updated the MOS config and added the space to the pool.
+ * We only allow allocation attempts to a metaslab group if it
+ * has been initialized.
+ */
+ boolean_t mg_initialized;
+
uint64_t mg_free_capacity; /* percentage free */
int64_t mg_bias;
int64_t mg_activation_count;
@@ -94,6 +221,27 @@ struct metaslab_group {
taskq_t *mg_taskq;
metaslab_group_t *mg_prev;
metaslab_group_t *mg_next;
+
+ /*
+ * Each metaslab group can handle mg_max_alloc_queue_depth allocations
+ * which are tracked by mg_alloc_queue_depth. It's possible for a
+ * metaslab group to handle more allocations than its max. This
+ * can occur when gang blocks are required or when other groups
+ * are unable to handle their share of allocations.
+ */
+ uint64_t mg_max_alloc_queue_depth;
+ refcount_t mg_alloc_queue_depth;
+
+ /*
+ * A metalab group that can no longer allocate the minimum block
+ * size will set mg_no_free_space. Once a metaslab group is out
+ * of space then its share of work must be distributed to other
+ * groups.
+ */
+ boolean_t mg_no_free_space;
+
+ uint64_t mg_allocations;
+ uint64_t mg_failed_allocations;
uint64_t mg_fragmentation;
uint64_t mg_histogram[RANGE_TREE_HISTOGRAM_SIZE];
};
@@ -106,21 +254,24 @@ struct metaslab_group {
#define MAX_LBAS 64
/*
- * Each metaslab maintains a set of in-core trees to track metaslab operations.
- * The in-core free tree (ms_tree) contains the current list of free segments.
- * As blocks are allocated, the allocated segment are removed from the ms_tree
- * and added to a per txg allocation tree (ms_alloctree). As blocks are freed,
- * they are added to the per txg free tree (ms_freetree). These per txg
- * trees allow us to process all allocations and frees in syncing context
- * where it is safe to update the on-disk space maps. One additional in-core
- * tree is maintained to track deferred frees (ms_defertree). Once a block
- * is freed it will move from the ms_freetree to the ms_defertree. A deferred
- * free means that a block has been freed but cannot be used by the pool
- * until TXG_DEFER_SIZE transactions groups later. For example, a block
- * that is freed in txg 50 will not be available for reallocation until
- * txg 52 (50 + TXG_DEFER_SIZE). This provides a safety net for uberblock
- * rollback. A pool could be safely rolled back TXG_DEFERS_SIZE
- * transactions groups and ensure that no block has been reallocated.
+ * Each metaslab maintains a set of in-core trees to track metaslab
+ * operations. The in-core free tree (ms_tree) contains the list of
+ * free segments which are eligible for allocation. As blocks are
+ * allocated, the allocated segments are removed from the ms_tree and
+ * added to a per txg allocation tree (ms_alloctree). This allows us to
+ * process all allocations in syncing context where it is safe to update
+ * the on-disk space maps. Frees are also processed in syncing context.
+ * Most frees are generated from syncing context, and those that are not
+ * are held in the spa_free_bplist for processing in syncing context.
+ * An additional set of in-core trees is maintained to track deferred
+ * frees (ms_defertree). Once a block is freed it will move from the
+ * ms_freedtree to the ms_defertree. A deferred free means that a block
+ * has been freed but cannot be used by the pool until TXG_DEFER_SIZE
+ * transactions groups later. For example, a block that is freed in txg
+ * 50 will not be available for reallocation until txg 52 (50 +
+ * TXG_DEFER_SIZE). This provides a safety net for uberblock rollback.
+ * A pool could be safely rolled back TXG_DEFERS_SIZE transactions
+ * groups and ensure that no block has been reallocated.
*
* The simplified transition diagram looks like this:
*
@@ -128,57 +279,79 @@ struct metaslab_group {
* ALLOCATE
* |
* V
- * free segment (ms_tree) --------> ms_alloctree ----> (write to space map)
+ * free segment (ms_tree) -----> ms_alloctree[4] ----> (write to space map)
* ^
- * |
- * | ms_freetree <--- FREE
- * | |
+ * | ms_freeingtree <--- FREE
* | |
+ * | v
+ * | ms_freedtree
* | |
- * +----------- ms_defertree <-------+---------> (write to space map)
+ * +-------- ms_defertree[2] <-------+---------> (write to space map)
*
*
* Each metaslab's space is tracked in a single space map in the MOS,
- * which is only updated in syncing context. Each time we sync a txg,
- * we append the allocs and frees from that txg to the space map.
- * The pool space is only updated once all metaslabs have finished syncing.
+ * which is only updated in syncing context. Each time we sync a txg,
+ * we append the allocs and frees from that txg to the space map. The
+ * pool space is only updated once all metaslabs have finished syncing.
*
- * To load the in-core free tree we read the space map from disk.
- * This object contains a series of alloc and free records that are
- * combined to make up the list of all free segments in this metaslab. These
+ * To load the in-core free tree we read the space map from disk. This
+ * object contains a series of alloc and free records that are combined
+ * to make up the list of all free segments in this metaslab. These
* segments are represented in-core by the ms_tree and are stored in an
* AVL tree.
*
* As the space map grows (as a result of the appends) it will
- * eventually become space-inefficient. When the metaslab's in-core free tree
- * is zfs_condense_pct/100 times the size of the minimal on-disk
- * representation, we rewrite it in its minimized form. If a metaslab
- * needs to condense then we must set the ms_condensing flag to ensure
- * that allocations are not performed on the metaslab that is being written.
+ * eventually become space-inefficient. When the metaslab's in-core
+ * free tree is zfs_condense_pct/100 times the size of the minimal
+ * on-disk representation, we rewrite it in its minimized form. If a
+ * metaslab needs to condense then we must set the ms_condensing flag to
+ * ensure that allocations are not performed on the metaslab that is
+ * being written.
*/
struct metaslab {
kmutex_t ms_lock;
kcondvar_t ms_load_cv;
space_map_t *ms_sm;
- metaslab_ops_t *ms_ops;
uint64_t ms_id;
uint64_t ms_start;
uint64_t ms_size;
uint64_t ms_fragmentation;
range_tree_t *ms_alloctree[TXG_SIZE];
- range_tree_t *ms_freetree[TXG_SIZE];
- range_tree_t *ms_defertree[TXG_DEFER_SIZE];
range_tree_t *ms_tree;
+ /*
+ * The following range trees are accessed only from syncing context.
+ * ms_free*tree only have entries while syncing, and are empty
+ * between syncs.
+ */
+ range_tree_t *ms_freeingtree; /* to free this syncing txg */
+ range_tree_t *ms_freedtree; /* already freed this syncing txg */
+ range_tree_t *ms_defertree[TXG_DEFER_SIZE];
+
boolean_t ms_condensing; /* condensing? */
boolean_t ms_condense_wanted;
+
+ /*
+ * We must hold both ms_lock and ms_group->mg_lock in order to
+ * modify ms_loaded.
+ */
boolean_t ms_loaded;
boolean_t ms_loading;
int64_t ms_deferspace; /* sum of ms_defermap[] space */
uint64_t ms_weight; /* weight vs. others in group */
- uint64_t ms_access_txg;
+ uint64_t ms_activation_weight; /* activation weight */
+
+ /*
+ * Track of whenever a metaslab is selected for loading or allocation.
+ * We use this value to determine how long the metaslab should
+ * stay cached.
+ */
+ uint64_t ms_selected_txg;
+
+ uint64_t ms_alloc_txg; /* last successful alloc (debug only) */
+ uint64_t ms_max_size; /* maximum allocatable size */
/*
* The metaslab block allocators can optionally use a size-ordered
diff --git a/zfs/include/sys/mmp.h b/zfs/include/sys/mmp.h
new file mode 100644
index 000000000000..5b2fea1a66b1
--- /dev/null
+++ b/zfs/include/sys/mmp.h
@@ -0,0 +1,64 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (C) 2017 by Lawrence Livermore National Security, LLC.
+ */
+
+#ifndef _SYS_MMP_H
+#define _SYS_MMP_H
+
+#include <sys/spa.h>
+#include <sys/zfs_context.h>
+#include <sys/uberblock_impl.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define MMP_MIN_INTERVAL 100 /* ms */
+#define MMP_DEFAULT_INTERVAL 1000 /* ms */
+#define MMP_DEFAULT_IMPORT_INTERVALS 10
+#define MMP_DEFAULT_FAIL_INTERVALS 5
+
+typedef struct mmp_thread {
+ kmutex_t mmp_thread_lock; /* protect thread mgmt fields */
+ kcondvar_t mmp_thread_cv;
+ kthread_t *mmp_thread;
+ uint8_t mmp_thread_exiting;
+ kmutex_t mmp_io_lock; /* protect below */
+ hrtime_t mmp_last_write; /* last successful MMP write */
+ uint64_t mmp_delay; /* decaying avg ns between MMP writes */
+ uberblock_t mmp_ub; /* last ub written by sync */
+ zio_t *mmp_zio_root; /* root of mmp write zios */
+} mmp_thread_t;
+
+
+extern void mmp_init(struct spa *spa);
+extern void mmp_fini(struct spa *spa);
+extern void mmp_thread_start(struct spa *spa);
+extern void mmp_thread_stop(struct spa *spa);
+extern void mmp_update_uberblock(struct spa *spa, struct uberblock *ub);
+extern void mmp_signal_all_threads(void);
+
+/* Global tuning */
+extern ulong_t zfs_multihost_interval;
+extern uint_t zfs_multihost_fail_intervals;
+extern uint_t zfs_multihost_import_intervals;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_MMP_H */
diff --git a/zfs/include/sys/multilist.h b/zfs/include/sys/multilist.h
index 98d707dd71ef..439540685971 100644
--- a/zfs/include/sys/multilist.h
+++ b/zfs/include/sys/multilist.h
@@ -13,7 +13,7 @@
* CDDL HEADER END
*/
/*
- * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
*/
#ifndef _SYS_MULTILIST_H
@@ -72,8 +72,7 @@ struct multilist {
};
void multilist_destroy(multilist_t *);
-void multilist_create(multilist_t *, size_t, size_t, unsigned int,
- multilist_sublist_index_func_t *);
+multilist_t *multilist_create(size_t, size_t, multilist_sublist_index_func_t *);
void multilist_insert(multilist_t *, void *);
void multilist_remove(multilist_t *, void *);
@@ -83,6 +82,7 @@ unsigned int multilist_get_num_sublists(multilist_t *);
unsigned int multilist_get_random_index(multilist_t *);
multilist_sublist_t *multilist_sublist_lock(multilist_t *, unsigned int);
+multilist_sublist_t *multilist_sublist_lock_obj(multilist_t *, void *);
void multilist_sublist_unlock(multilist_sublist_t *);
void multilist_sublist_insert_head(multilist_sublist_t *, void *);
diff --git a/zfs/include/sys/pathname.h b/zfs/include/sys/pathname.h
new file mode 100644
index 000000000000..5db69b1784c9
--- /dev/null
+++ b/zfs/include/sys/pathname.h
@@ -0,0 +1,70 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
+/* All Rights Reserved */
+
+/*
+ * Portions of this source code were derived from Berkeley 4.3 BSD
+ * under license from the Regents of the University of California.
+ */
+
+#ifndef _SYS_PATHNAME_H
+#define _SYS_PATHNAME_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Pathname structure.
+ * System calls that operate on path names gather the path name
+ * from the system call into this structure and reduce it by
+ * peeling off translated components. If a symbolic link is
+ * encountered the new path name to be translated is also
+ * assembled in this structure.
+ *
+ * By convention pn_buf is not changed once it's been set to point
+ * to the underlying storage; routines which manipulate the pathname
+ * do so by changing pn_path and pn_pathlen. pn_pathlen is redundant
+ * since the path name is null-terminated, but is provided to make
+ * some computations faster.
+ */
+typedef struct pathname {
+ char *pn_buf; /* underlying storage */
+ char *pn_path; /* remaining pathname */
+ size_t pn_pathlen; /* remaining length */
+ size_t pn_bufsize; /* total size of pn_buf */
+} pathname_t;
+
+extern void pn_alloc(struct pathname *);
+extern void pn_alloc_sz(struct pathname *, size_t);
+extern void pn_free(struct pathname *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_PATHNAME_H */
diff --git a/zfs/include/sys/policy.h b/zfs/include/sys/policy.h
new file mode 100644
index 000000000000..23d7d4db77f2
--- /dev/null
+++ b/zfs/include/sys/policy.h
@@ -0,0 +1,60 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2016, Lawrence Livermore National Security, LLC.
+ */
+
+#ifndef _SYS_POLICY_H
+#define _SYS_POLICY_H
+
+#ifdef _KERNEL
+
+#include <sys/cred.h>
+#include <sys/types.h>
+#include <sys/xvattr.h>
+#include <sys/zpl.h>
+
+int secpolicy_nfs(const cred_t *);
+int secpolicy_sys_config(const cred_t *, boolean_t);
+int secpolicy_vnode_access2(const cred_t *, struct inode *,
+ uid_t, mode_t, mode_t);
+int secpolicy_vnode_any_access(const cred_t *, struct inode *, uid_t);
+int secpolicy_vnode_chown(const cred_t *, uid_t);
+int secpolicy_vnode_create_gid(const cred_t *);
+int secpolicy_vnode_remove(const cred_t *);
+int secpolicy_vnode_setdac(const cred_t *, uid_t);
+int secpolicy_vnode_setid_retain(const cred_t *, boolean_t);
+int secpolicy_vnode_setids_setgids(const cred_t *, gid_t);
+int secpolicy_zinject(const cred_t *);
+int secpolicy_zfs(const cred_t *);
+void secpolicy_setid_clear(vattr_t *, cred_t *);
+int secpolicy_setid_setsticky_clear(struct inode *, vattr_t *,
+ const vattr_t *, cred_t *);
+int secpolicy_xvattr(xvattr_t *, uid_t, cred_t *, vtype_t);
+int secpolicy_vnode_setattr(cred_t *, struct inode *, struct vattr *,
+ const struct vattr *, int, int (void *, int, cred_t *), void *);
+int secpolicy_basic_link(const cred_t *);
+
+#endif /* _KERNEL */
+#endif /* _SYS_POLICY_H */
diff --git a/zfs/include/sys/refcount.h b/zfs/include/sys/refcount.h
index 1d74187d4039..a96220b29ad0 100644
--- a/zfs/include/sys/refcount.h
+++ b/zfs/include/sys/refcount.h
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
*/
#ifndef _SYS_REFCOUNT_H
@@ -64,14 +65,15 @@ typedef struct refcount {
boolean_t rc_tracked;
list_t rc_list;
list_t rc_removed;
- int64_t rc_count;
- int64_t rc_removed_count;
+ uint64_t rc_count;
+ uint64_t rc_removed_count;
} zfs_refcount_t;
/* Note: refcount_t must be initialized with refcount_create[_untracked]() */
void refcount_create(refcount_t *rc);
void refcount_create_untracked(refcount_t *rc);
+void refcount_create_tracked(refcount_t *rc);
void refcount_destroy(refcount_t *rc);
void refcount_destroy_many(refcount_t *rc, uint64_t number);
int refcount_is_zero(refcount_t *rc);
@@ -81,6 +83,9 @@ int64_t refcount_remove(refcount_t *rc, void *holder_tag);
int64_t refcount_add_many(refcount_t *rc, uint64_t number, void *holder_tag);
int64_t refcount_remove_many(refcount_t *rc, uint64_t number, void *holder_tag);
void refcount_transfer(refcount_t *dst, refcount_t *src);
+void refcount_transfer_ownership(refcount_t *, void *, void *);
+boolean_t refcount_held(refcount_t *, void *);
+boolean_t refcount_not_held(refcount_t *, void *);
void refcount_init(void);
void refcount_fini(void);
@@ -93,12 +98,13 @@ typedef struct refcount {
#define refcount_create(rc) ((rc)->rc_count = 0)
#define refcount_create_untracked(rc) ((rc)->rc_count = 0)
+#define refcount_create_tracked(rc) ((rc)->rc_count = 0)
#define refcount_destroy(rc) ((rc)->rc_count = 0)
#define refcount_destroy_many(rc, number) ((rc)->rc_count = 0)
#define refcount_is_zero(rc) ((rc)->rc_count == 0)
#define refcount_count(rc) ((rc)->rc_count)
-#define zfs_refcount_add(rc, holder) atomic_add_64_nv(&(rc)->rc_count, 1)
-#define refcount_remove(rc, holder) atomic_add_64_nv(&(rc)->rc_count, -1)
+#define zfs_refcount_add(rc, holder) atomic_inc_64_nv(&(rc)->rc_count)
+#define refcount_remove(rc, holder) atomic_dec_64_nv(&(rc)->rc_count)
#define refcount_add_many(rc, number, holder) \
atomic_add_64_nv(&(rc)->rc_count, number)
#define refcount_remove_many(rc, number, holder) \
@@ -108,6 +114,9 @@ typedef struct refcount {
atomic_add_64(&(src)->rc_count, -__tmp); \
atomic_add_64(&(dst)->rc_count, __tmp); \
}
+#define refcount_transfer_ownership(rc, current_holder, new_holder) (void)0
+#define refcount_held(rc, holder) ((rc)->rc_count > 0)
+#define refcount_not_held(rc, holder) (B_TRUE)
#define refcount_init()
#define refcount_fini()
diff --git a/zfs/include/sys/sa.h b/zfs/include/sys/sa.h
index 01d24662a0e0..b7ed9fe38cbd 100644
--- a/zfs/include/sys/sa.h
+++ b/zfs/include/sys/sa.h
@@ -134,8 +134,6 @@ int sa_bulk_lookup(sa_handle_t *, sa_bulk_attr_t *, int count);
int sa_bulk_lookup_locked(sa_handle_t *, sa_bulk_attr_t *, int count);
int sa_bulk_update(sa_handle_t *, sa_bulk_attr_t *, int count, dmu_tx_t *);
int sa_size(sa_handle_t *, sa_attr_type_t, int *);
-int sa_update_from_cb(sa_handle_t *, sa_attr_type_t,
- uint32_t buflen, sa_data_locator_t *, void *userdata, dmu_tx_t *);
void sa_object_info(sa_handle_t *, dmu_object_info_t *);
void sa_object_size(sa_handle_t *, uint32_t *, u_longlong_t *);
void *sa_get_userdata(sa_handle_t *);
diff --git a/zfs/include/sys/sa_impl.h b/zfs/include/sys/sa_impl.h
index 6f2f1db6dcf9..b68b7610b25e 100644
--- a/zfs/include/sys/sa_impl.h
+++ b/zfs/include/sys/sa_impl.h
@@ -235,7 +235,7 @@ struct sa_handle {
#define SA_BONUSTYPE_FROM_DB(db) \
(dmu_get_bonustype((dmu_buf_t *)db))
-#define SA_BLKPTR_SPACE (DN_MAX_BONUSLEN - sizeof (blkptr_t))
+#define SA_BLKPTR_SPACE (DN_OLD_MAX_BONUSLEN - sizeof (blkptr_t))
#define SA_LAYOUT_NUM(x, type) \
((!IS_SA_BONUSTYPE(type) ? 0 : (((IS_SA_BONUSTYPE(type)) && \
diff --git a/zfs/include/sys/sdt.h b/zfs/include/sys/sdt.h
index 56efa1b3998b..9704072cb747 100644
--- a/zfs/include/sys/sdt.h
+++ b/zfs/include/sys/sdt.h
@@ -34,13 +34,8 @@
#define ZFS_PROBE2(a, c, e) ((void) 0)
#define ZFS_PROBE3(a, c, e, g) ((void) 0)
#define ZFS_PROBE4(a, c, e, g, i) ((void) 0)
-#define ZFS_SET_ERROR(err) ((void) 0)
-#else
-
-#if defined(HAVE_DECLARE_EVENT_CLASS)
-
-#include <sys/trace.h>
+#endif /* _KERNEL */
/*
* The set-error SDT probe is extra static, in that we declare its fake
@@ -55,16 +50,9 @@
* twice, so it should not have side effects (e.g. something like:
* "return (SET_ERROR(log_error(EINVAL, info)));" would log the error twice).
*/
-#define SET_ERROR(err) \
- (trace_zfs_set__error(__FILE__, __func__, __LINE__, err), err)
-
-#else
-
+extern void __set_error(const char *file, const char *func, int line, int err);
#undef SET_ERROR
-#define SET_ERROR(err) (err)
-
-#endif /* HAVE_DECLARE_EVENT_CLASS */
-
-#endif /* _KERNEL */
+#define SET_ERROR(err) \
+ (__set_error(__FILE__, __func__, __LINE__, err), err)
#endif /* _SYS_SDT_H */
diff --git a/zfs/include/sys/sha2.h b/zfs/include/sys/sha2.h
new file mode 100644
index 000000000000..9039835f18ff
--- /dev/null
+++ b/zfs/include/sys/sha2.h
@@ -0,0 +1,155 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+/* Copyright 2013 Saso Kiselkov. All rights reserved. */
+
+#ifndef _SYS_SHA2_H
+#define _SYS_SHA2_H
+
+#ifdef _KERNEL
+#include <sys/types.h> /* for uint_* */
+#else
+#include <stdint.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define SHA2_HMAC_MIN_KEY_LEN 1 /* SHA2-HMAC min key length in bytes */
+#define SHA2_HMAC_MAX_KEY_LEN INT_MAX /* SHA2-HMAC max key length in bytes */
+
+#define SHA256_DIGEST_LENGTH 32 /* SHA256 digest length in bytes */
+#define SHA384_DIGEST_LENGTH 48 /* SHA384 digest length in bytes */
+#define SHA512_DIGEST_LENGTH 64 /* SHA512 digest length in bytes */
+
+/* Truncated versions of SHA-512 according to FIPS-180-4, section 5.3.6 */
+#define SHA512_224_DIGEST_LENGTH 28 /* SHA512/224 digest length */
+#define SHA512_256_DIGEST_LENGTH 32 /* SHA512/256 digest length */
+
+#define SHA256_HMAC_BLOCK_SIZE 64 /* SHA256-HMAC block size */
+#define SHA512_HMAC_BLOCK_SIZE 128 /* SHA512-HMAC block size */
+
+#define SHA256 0
+#define SHA256_HMAC 1
+#define SHA256_HMAC_GEN 2
+#define SHA384 3
+#define SHA384_HMAC 4
+#define SHA384_HMAC_GEN 5
+#define SHA512 6
+#define SHA512_HMAC 7
+#define SHA512_HMAC_GEN 8
+#define SHA512_224 9
+#define SHA512_256 10
+
+/*
+ * SHA2 context.
+ * The contents of this structure are a private interface between the
+ * Init/Update/Final calls of the functions defined below.
+ * Callers must never attempt to read or write any of the fields
+ * in this structure directly.
+ */
+typedef struct {
+ uint32_t algotype; /* Algorithm Type */
+
+ /* state (ABCDEFGH) */
+ union {
+ uint32_t s32[8]; /* for SHA256 */
+ uint64_t s64[8]; /* for SHA384/512 */
+ } state;
+ /* number of bits */
+ union {
+ uint32_t c32[2]; /* for SHA256 , modulo 2^64 */
+ uint64_t c64[2]; /* for SHA384/512, modulo 2^128 */
+ } count;
+ union {
+ uint8_t buf8[128]; /* undigested input */
+ uint32_t buf32[32]; /* realigned input */
+ uint64_t buf64[16]; /* realigned input */
+ } buf_un;
+} SHA2_CTX;
+
+typedef SHA2_CTX SHA256_CTX;
+typedef SHA2_CTX SHA384_CTX;
+typedef SHA2_CTX SHA512_CTX;
+
+extern void SHA2Init(uint64_t mech, SHA2_CTX *);
+
+extern void SHA2Update(SHA2_CTX *, const void *, size_t);
+
+extern void SHA2Final(void *, SHA2_CTX *);
+
+extern void SHA256Init(SHA256_CTX *);
+
+extern void SHA256Update(SHA256_CTX *, const void *, size_t);
+
+extern void SHA256Final(void *, SHA256_CTX *);
+
+extern void SHA384Init(SHA384_CTX *);
+
+extern void SHA384Update(SHA384_CTX *, const void *, size_t);
+
+extern void SHA384Final(void *, SHA384_CTX *);
+
+extern void SHA512Init(SHA512_CTX *);
+
+extern void SHA512Update(SHA512_CTX *, const void *, size_t);
+
+extern void SHA512Final(void *, SHA512_CTX *);
+
+#ifdef _SHA2_IMPL
+/*
+ * The following types/functions are all private to the implementation
+ * of the SHA2 functions and must not be used by consumers of the interface
+ */
+
+/*
+ * List of support mechanisms in this module.
+ *
+ * It is important to note that in the module, division or modulus calculations
+ * are used on the enumerated type to determine which mechanism is being used;
+ * therefore, changing the order or additional mechanisms should be done
+ * carefully
+ */
+typedef enum sha2_mech_type {
+ SHA256_MECH_INFO_TYPE, /* SUN_CKM_SHA256 */
+ SHA256_HMAC_MECH_INFO_TYPE, /* SUN_CKM_SHA256_HMAC */
+ SHA256_HMAC_GEN_MECH_INFO_TYPE, /* SUN_CKM_SHA256_HMAC_GENERAL */
+ SHA384_MECH_INFO_TYPE, /* SUN_CKM_SHA384 */
+ SHA384_HMAC_MECH_INFO_TYPE, /* SUN_CKM_SHA384_HMAC */
+ SHA384_HMAC_GEN_MECH_INFO_TYPE, /* SUN_CKM_SHA384_HMAC_GENERAL */
+ SHA512_MECH_INFO_TYPE, /* SUN_CKM_SHA512 */
+ SHA512_HMAC_MECH_INFO_TYPE, /* SUN_CKM_SHA512_HMAC */
+ SHA512_HMAC_GEN_MECH_INFO_TYPE, /* SUN_CKM_SHA512_HMAC_GENERAL */
+ SHA512_224_MECH_INFO_TYPE, /* SUN_CKM_SHA512_224 */
+ SHA512_256_MECH_INFO_TYPE /* SUN_CKM_SHA512_256 */
+} sha2_mech_type_t;
+
+#endif /* _SHA2_IMPL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_SHA2_H */
diff --git a/zfs/include/sys/skein.h b/zfs/include/sys/skein.h
new file mode 100644
index 000000000000..2f649d6b269a
--- /dev/null
+++ b/zfs/include/sys/skein.h
@@ -0,0 +1,183 @@
+/*
+ * Interface declarations for Skein hashing.
+ * Source code author: Doug Whiting, 2008.
+ * This algorithm and source code is released to the public domain.
+ *
+ * The following compile-time switches may be defined to control some
+ * tradeoffs between speed, code size, error checking, and security.
+ *
+ * The "default" note explains what happens when the switch is not defined.
+ *
+ * SKEIN_DEBUG -- make callouts from inside Skein code
+ * to examine/display intermediate values.
+ * [default: no callouts (no overhead)]
+ *
+ * SKEIN_ERR_CHECK -- how error checking is handled inside Skein
+ * code. If not defined, most error checking
+ * is disabled (for performance). Otherwise,
+ * the switch value is interpreted as:
+ * 0: use assert() to flag errors
+ * 1: return SKEIN_FAIL to flag errors
+ */
+/* Copyright 2013 Doug Whiting. This code is released to the public domain. */
+#ifndef _SYS_SKEIN_H_
+#define _SYS_SKEIN_H_
+
+#ifdef _KERNEL
+#include <sys/types.h> /* get size_t definition */
+#else
+#include <stdint.h>
+#include <stdlib.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum {
+ SKEIN_SUCCESS = 0, /* return codes from Skein calls */
+ SKEIN_FAIL = 1,
+ SKEIN_BAD_HASHLEN = 2
+};
+
+#define SKEIN_MODIFIER_WORDS (2) /* number of modifier (tweak) words */
+
+#define SKEIN_256_STATE_WORDS (4)
+#define SKEIN_512_STATE_WORDS (8)
+#define SKEIN1024_STATE_WORDS (16)
+#define SKEIN_MAX_STATE_WORDS (16)
+
+#define SKEIN_256_STATE_BYTES (8 * SKEIN_256_STATE_WORDS)
+#define SKEIN_512_STATE_BYTES (8 * SKEIN_512_STATE_WORDS)
+#define SKEIN1024_STATE_BYTES (8 * SKEIN1024_STATE_WORDS)
+
+#define SKEIN_256_STATE_BITS (64 * SKEIN_256_STATE_WORDS)
+#define SKEIN_512_STATE_BITS (64 * SKEIN_512_STATE_WORDS)
+#define SKEIN1024_STATE_BITS (64 * SKEIN1024_STATE_WORDS)
+
+#define SKEIN_256_BLOCK_BYTES (8 * SKEIN_256_STATE_WORDS)
+#define SKEIN_512_BLOCK_BYTES (8 * SKEIN_512_STATE_WORDS)
+#define SKEIN1024_BLOCK_BYTES (8 * SKEIN1024_STATE_WORDS)
+
+typedef struct {
+ size_t hashBitLen; /* size of hash result, in bits */
+ size_t bCnt; /* current byte count in buffer b[] */
+ /* tweak words: T[0]=byte cnt, T[1]=flags */
+ uint64_t T[SKEIN_MODIFIER_WORDS];
+} Skein_Ctxt_Hdr_t;
+
+typedef struct { /* 256-bit Skein hash context structure */
+ Skein_Ctxt_Hdr_t h; /* common header context variables */
+ uint64_t X[SKEIN_256_STATE_WORDS]; /* chaining variables */
+ /* partial block buffer (8-byte aligned) */
+ uint8_t b[SKEIN_256_BLOCK_BYTES];
+} Skein_256_Ctxt_t;
+
+typedef struct { /* 512-bit Skein hash context structure */
+ Skein_Ctxt_Hdr_t h; /* common header context variables */
+ uint64_t X[SKEIN_512_STATE_WORDS]; /* chaining variables */
+ /* partial block buffer (8-byte aligned) */
+ uint8_t b[SKEIN_512_BLOCK_BYTES];
+} Skein_512_Ctxt_t;
+
+typedef struct { /* 1024-bit Skein hash context structure */
+ Skein_Ctxt_Hdr_t h; /* common header context variables */
+ uint64_t X[SKEIN1024_STATE_WORDS]; /* chaining variables */
+ /* partial block buffer (8-byte aligned) */
+ uint8_t b[SKEIN1024_BLOCK_BYTES];
+} Skein1024_Ctxt_t;
+
+/* Skein APIs for (incremental) "straight hashing" */
+int Skein_256_Init(Skein_256_Ctxt_t *ctx, size_t hashBitLen);
+int Skein_512_Init(Skein_512_Ctxt_t *ctx, size_t hashBitLen);
+int Skein1024_Init(Skein1024_Ctxt_t *ctx, size_t hashBitLen);
+
+int Skein_256_Update(Skein_256_Ctxt_t *ctx, const uint8_t *msg,
+ size_t msgByteCnt);
+int Skein_512_Update(Skein_512_Ctxt_t *ctx, const uint8_t *msg,
+ size_t msgByteCnt);
+int Skein1024_Update(Skein1024_Ctxt_t *ctx, const uint8_t *msg,
+ size_t msgByteCnt);
+
+int Skein_256_Final(Skein_256_Ctxt_t *ctx, uint8_t *hashVal);
+int Skein_512_Final(Skein_512_Ctxt_t *ctx, uint8_t *hashVal);
+int Skein1024_Final(Skein1024_Ctxt_t *ctx, uint8_t *hashVal);
+
+/*
+ * Skein APIs for "extended" initialization: MAC keys, tree hashing.
+ * After an InitExt() call, just use Update/Final calls as with Init().
+ *
+ * Notes: Same parameters as _Init() calls, plus treeInfo/key/keyBytes.
+ * When keyBytes == 0 and treeInfo == SKEIN_SEQUENTIAL,
+ * the results of InitExt() are identical to calling Init().
+ * The function Init() may be called once to "precompute" the IV for
+ * a given hashBitLen value, then by saving a copy of the context
+ * the IV computation may be avoided in later calls.
+ * Similarly, the function InitExt() may be called once per MAC key
+ * to precompute the MAC IV, then a copy of the context saved and
+ * reused for each new MAC computation.
+ */
+int Skein_256_InitExt(Skein_256_Ctxt_t *ctx, size_t hashBitLen,
+ uint64_t treeInfo, const uint8_t *key, size_t keyBytes);
+int Skein_512_InitExt(Skein_512_Ctxt_t *ctx, size_t hashBitLen,
+ uint64_t treeInfo, const uint8_t *key, size_t keyBytes);
+int Skein1024_InitExt(Skein1024_Ctxt_t *ctx, size_t hashBitLen,
+ uint64_t treeInfo, const uint8_t *key, size_t keyBytes);
+
+/*
+ * Skein APIs for MAC and tree hash:
+ * Final_Pad: pad, do final block, but no OUTPUT type
+ * Output: do just the output stage
+ */
+int Skein_256_Final_Pad(Skein_256_Ctxt_t *ctx, uint8_t *hashVal);
+int Skein_512_Final_Pad(Skein_512_Ctxt_t *ctx, uint8_t *hashVal);
+int Skein1024_Final_Pad(Skein1024_Ctxt_t *ctx, uint8_t *hashVal);
+
+#ifndef SKEIN_TREE_HASH
+#define SKEIN_TREE_HASH (1)
+#endif
+#if SKEIN_TREE_HASH
+int Skein_256_Output(Skein_256_Ctxt_t *ctx, uint8_t *hashVal);
+int Skein_512_Output(Skein_512_Ctxt_t *ctx, uint8_t *hashVal);
+int Skein1024_Output(Skein1024_Ctxt_t *ctx, uint8_t *hashVal);
+#endif
+
+/*
+ * When you initialize a Skein KCF hashing method you can pass this param
+ * structure in cm_param to fine-tune the algorithm's defaults.
+ */
+typedef struct skein_param {
+ size_t sp_digest_bitlen; /* length of digest in bits */
+} skein_param_t;
+
+/* Module definitions */
+#ifdef SKEIN_MODULE_IMPL
+#define CKM_SKEIN_256 "CKM_SKEIN_256"
+#define CKM_SKEIN_512 "CKM_SKEIN_512"
+#define CKM_SKEIN1024 "CKM_SKEIN1024"
+#define CKM_SKEIN_256_MAC "CKM_SKEIN_256_MAC"
+#define CKM_SKEIN_512_MAC "CKM_SKEIN_512_MAC"
+#define CKM_SKEIN1024_MAC "CKM_SKEIN1024_MAC"
+
+typedef enum skein_mech_type {
+ SKEIN_256_MECH_INFO_TYPE,
+ SKEIN_512_MECH_INFO_TYPE,
+ SKEIN1024_MECH_INFO_TYPE,
+ SKEIN_256_MAC_MECH_INFO_TYPE,
+ SKEIN_512_MAC_MECH_INFO_TYPE,
+ SKEIN1024_MAC_MECH_INFO_TYPE
+} skein_mech_type_t;
+
+#define VALID_SKEIN_DIGEST_MECH(__mech) \
+ ((int)(__mech) >= SKEIN_256_MECH_INFO_TYPE && \
+ (__mech) <= SKEIN1024_MECH_INFO_TYPE)
+#define VALID_SKEIN_MAC_MECH(__mech) \
+ ((int)(__mech) >= SKEIN_256_MAC_MECH_INFO_TYPE && \
+ (__mech) <= SKEIN1024_MAC_MECH_INFO_TYPE)
+#endif /* SKEIN_MODULE_IMPL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_SKEIN_H_ */
diff --git a/zfs/include/sys/spa.h b/zfs/include/sys/spa.h
index bfd2e7902ea3..de942ad2b5a4 100644
--- a/zfs/include/sys/spa.h
+++ b/zfs/include/sys/spa.h
@@ -23,6 +23,10 @@
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
+ * Copyright 2013 Saso Kiselkov. All rights reserved.
+ * Copyright (c) 2014 Integros [integros.com]
+ * Copyright 2017 Joyent, Inc.
+ * Copyright (c) 2017 Datto Inc.
*/
#ifndef _SYS_SPA_H
@@ -35,6 +39,8 @@
#include <sys/sysmacros.h>
#include <sys/types.h>
#include <sys/fs/zfs.h>
+#include <sys/spa_checksum.h>
+#include <sys/dmu.h>
#ifdef __cplusplus
extern "C" {
@@ -118,6 +124,17 @@ _NOTE(CONSTCOND) } while (0)
#define SPA_OLD_MAXBLOCKSIZE (1ULL << SPA_OLD_MAXBLOCKSHIFT)
#define SPA_MAXBLOCKSIZE (1ULL << SPA_MAXBLOCKSHIFT)
+/*
+ * Alignment Shift (ashift) is an immutable, internal top-level vdev property
+ * which can only be set at vdev creation time. Physical writes are always done
+ * according to it, which makes 2^ashift the smallest possible IO on a vdev.
+ *
+ * We currently allow values ranging from 512 bytes (2^9 = 512) to 64 KiB
+ * (2^16 = 65,536).
+ */
+#define ASHIFT_MIN 9
+#define ASHIFT_MAX 16
+
/*
* Size of block to hold the configuration data (a packed nvlist)
*/
@@ -134,6 +151,8 @@ _NOTE(CONSTCOND) } while (0)
#define SPA_PSIZEBITS 16 /* PSIZE up to 32M (2^16 * 512) */
#define SPA_ASIZEBITS 24 /* ASIZE up to 64 times larger */
+#define SPA_COMPRESSBITS 7
+
/*
* All SPA data is represented by 128-bit data virtual addresses (DVAs).
* The members of the dva_t should be considered opaque outside the SPA.
@@ -142,12 +161,14 @@ typedef struct dva {
uint64_t dva_word[2];
} dva_t;
+
/*
- * Each block has a 256-bit checksum -- strong enough for cryptographic hashes.
+ * Some checksums/hashes need a 256-bit initialization salt. This salt is kept
+ * secret and is suitable for use in MAC algorithms as the key.
*/
-typedef struct zio_cksum {
- uint64_t zc_word[4];
-} zio_cksum_t;
+typedef struct zio_cksum_salt {
+ uint8_t zcs_bytes[32];
+} zio_cksum_salt_t;
/*
* Each block is described by its DVAs, time of birth, checksum, etc.
@@ -368,8 +389,10 @@ _NOTE(CONSTCOND) } while (0)
16, SPA_PSIZEBITS, SPA_MINBLOCKSHIFT, 1, x); \
_NOTE(CONSTCOND) } while (0)
-#define BP_GET_COMPRESS(bp) BF64_GET((bp)->blk_prop, 32, 7)
-#define BP_SET_COMPRESS(bp, x) BF64_SET((bp)->blk_prop, 32, 7, x)
+#define BP_GET_COMPRESS(bp) \
+ BF64_GET((bp)->blk_prop, 32, SPA_COMPRESSBITS)
+#define BP_SET_COMPRESS(bp, x) \
+ BF64_SET((bp)->blk_prop, 32, SPA_COMPRESSBITS, x)
#define BP_IS_EMBEDDED(bp) BF64_GET((bp)->blk_prop, 39, 1)
#define BP_SET_EMBEDDED(bp, x) BF64_SET((bp)->blk_prop, 39, 1, x)
@@ -407,15 +430,17 @@ _NOTE(CONSTCOND) } while (0)
#define BP_GET_FILL(bp) (BP_IS_EMBEDDED(bp) ? 1 : (bp)->blk_fill)
+#define BP_IS_METADATA(bp) \
+ (BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp)))
+
#define BP_GET_ASIZE(bp) \
(BP_IS_EMBEDDED(bp) ? 0 : \
DVA_GET_ASIZE(&(bp)->blk_dva[0]) + \
DVA_GET_ASIZE(&(bp)->blk_dva[1]) + \
DVA_GET_ASIZE(&(bp)->blk_dva[2]))
-#define BP_GET_UCSIZE(bp) \
- ((BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp))) ? \
- BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp))
+#define BP_GET_UCSIZE(bp) \
+ (BP_IS_METADATA(bp) ? BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp))
#define BP_GET_NDVAS(bp) \
(BP_IS_EMBEDDED(bp) ? 0 : \
@@ -440,26 +465,9 @@ _NOTE(CONSTCOND) } while (0)
DVA_EQUAL(&(bp1)->blk_dva[1], &(bp2)->blk_dva[1]) && \
DVA_EQUAL(&(bp1)->blk_dva[2], &(bp2)->blk_dva[2]))
-#define ZIO_CHECKSUM_EQUAL(zc1, zc2) \
- (0 == (((zc1).zc_word[0] - (zc2).zc_word[0]) | \
- ((zc1).zc_word[1] - (zc2).zc_word[1]) | \
- ((zc1).zc_word[2] - (zc2).zc_word[2]) | \
- ((zc1).zc_word[3] - (zc2).zc_word[3])))
-
-#define ZIO_CHECKSUM_IS_ZERO(zc) \
- (0 == ((zc)->zc_word[0] | (zc)->zc_word[1] | \
- (zc)->zc_word[2] | (zc)->zc_word[3]))
#define DVA_IS_VALID(dva) (DVA_GET_ASIZE(dva) != 0)
-#define ZIO_SET_CHECKSUM(zcp, w0, w1, w2, w3) \
-{ \
- (zcp)->zc_word[0] = w0; \
- (zcp)->zc_word[1] = w1; \
- (zcp)->zc_word[2] = w2; \
- (zcp)->zc_word[3] = w3; \
-}
-
#define BP_IDENTITY(bp) (ASSERT(!BP_IS_EMBEDDED(bp)), &(bp)->blk_dva[0])
#define BP_IS_GANG(bp) \
(BP_IS_EMBEDDED(bp) ? B_FALSE : DVA_GET_GANG(BP_IDENTITY(bp)))
@@ -576,11 +584,8 @@ _NOTE(CONSTCOND) } while (0)
ASSERT(len < size); \
}
-#include <sys/dmu.h>
-
#define BP_GET_BUFC_TYPE(bp) \
- (((BP_GET_LEVEL(bp) > 0) || (DMU_OT_IS_METADATA(BP_GET_TYPE(bp)))) ? \
- ARC_BUFC_METADATA : ARC_BUFC_DATA)
+ (BP_IS_METADATA(bp) ? ARC_BUFC_METADATA : ARC_BUFC_DATA)
typedef enum spa_import_type {
SPA_IMPORT_EXISTING,
@@ -595,7 +600,6 @@ extern int spa_get_stats(const char *pool, nvlist_t **config, char *altroot,
size_t buflen);
extern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props,
nvlist_t *zplprops);
-extern int spa_import_rootpool(char *devpath, char *devid);
extern int spa_import(char *pool, nvlist_t *config, nvlist_t *props,
uint64_t flags);
extern nvlist_t *spa_tryimport(nvlist_t *tryconfig);
@@ -656,6 +660,7 @@ extern void spa_l2cache_drop(spa_t *spa);
/* scanning */
extern int spa_scan(spa_t *spa, pool_scan_func_t func);
extern int spa_scan_stop(spa_t *spa);
+extern int spa_scrub_pause_resume(spa_t *spa, pool_scrub_cmd_t flag);
/* spa syncing */
extern void spa_sync(spa_t *spa, uint64_t txg); /* only for DMU use */
@@ -724,6 +729,7 @@ typedef struct spa_stats {
spa_stats_history_t txg_history;
spa_stats_history_t tx_assign_histogram;
spa_stats_history_t io_history;
+ spa_stats_history_t mmp_history;
} spa_stats_t;
typedef enum txg_state {
@@ -735,6 +741,13 @@ typedef enum txg_state {
TXG_STATE_COMMITTED = 5,
} txg_state_t;
+typedef struct txg_stat {
+ vdev_stat_t vs1;
+ vdev_stat_t vs2;
+ uint64_t txg;
+ uint64_t ndirty;
+} txg_stat_t;
+
extern void spa_stats_init(spa_t *spa);
extern void spa_stats_destroy(spa_t *spa);
extern void spa_read_history_add(spa_t *spa, const zbookmark_phys_t *zb,
@@ -742,9 +755,12 @@ extern void spa_read_history_add(spa_t *spa, const zbookmark_phys_t *zb,
extern void spa_txg_history_add(spa_t *spa, uint64_t txg, hrtime_t birth_time);
extern int spa_txg_history_set(spa_t *spa, uint64_t txg,
txg_state_t completed_state, hrtime_t completed_time);
-extern int spa_txg_history_set_io(spa_t *spa, uint64_t txg, uint64_t nread,
- uint64_t nwritten, uint64_t reads, uint64_t writes, uint64_t ndirty);
+extern txg_stat_t *spa_txg_history_init_io(spa_t *, uint64_t,
+ struct dsl_pool *);
+extern void spa_txg_history_fini_io(spa_t *, txg_stat_t *);
extern void spa_tx_assign_add_nsecs(spa_t *spa, uint64_t nsecs);
+extern void spa_mmp_history_add(uint64_t txg, uint64_t timestamp,
+ uint64_t mmp_delay, vdev_t *vd, int label);
/* Pool configuration locks */
extern int spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw);
@@ -793,11 +809,12 @@ extern uint64_t spa_load_guid(spa_t *spa);
extern uint64_t spa_last_synced_txg(spa_t *spa);
extern uint64_t spa_first_txg(spa_t *spa);
extern uint64_t spa_syncing_txg(spa_t *spa);
+extern uint64_t spa_final_dirty_txg(spa_t *spa);
extern uint64_t spa_version(spa_t *spa);
extern pool_state_t spa_state(spa_t *spa);
extern spa_load_state_t spa_load_state(spa_t *spa);
extern uint64_t spa_freeze_txg(spa_t *spa);
-extern uint64_t spa_get_asize(spa_t *spa, uint64_t lsize);
+extern uint64_t spa_get_worst_case_asize(spa_t *spa, uint64_t lsize);
extern uint64_t spa_get_dspace(spa_t *spa);
extern uint64_t spa_get_slop_space(spa_t *spa);
extern void spa_update_dspace(spa_t *spa);
@@ -844,10 +861,13 @@ extern boolean_t spa_is_root(spa_t *spa);
extern boolean_t spa_writeable(spa_t *spa);
extern boolean_t spa_has_pending_synctask(spa_t *spa);
extern int spa_maxblocksize(spa_t *spa);
+extern int spa_maxdnodesize(spa_t *spa);
extern void zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp);
+extern boolean_t spa_multihost(spa_t *spa);
+extern unsigned long spa_get_hostid(void);
extern int spa_mode(spa_t *spa);
-extern uint64_t strtonum(const char *str, char **nptr);
+extern uint64_t zfs_strtonum(const char *str, char **nptr);
extern char *spa_his_ievent_table[];
@@ -869,8 +889,10 @@ struct zbookmark_phys;
extern void spa_log_error(spa_t *spa, zio_t *zio);
extern void zfs_ereport_post(const char *class, spa_t *spa, vdev_t *vd,
zio_t *zio, uint64_t stateoroffset, uint64_t length);
+extern nvlist_t *zfs_event_create(spa_t *spa, vdev_t *vd, const char *type,
+ const char *name, nvlist_t *aux);
extern void zfs_post_remove(spa_t *spa, vdev_t *vd);
-extern void zfs_post_state_change(spa_t *spa, vdev_t *vd);
+extern void zfs_post_state_change(spa_t *spa, vdev_t *vd, uint64_t laststate);
extern void zfs_post_autoreplace(spa_t *spa, vdev_t *vd);
extern uint64_t spa_get_errlog_size(spa_t *spa);
extern int spa_get_errlog(spa_t *spa, void *uaddr, size_t *count);
@@ -895,7 +917,8 @@ extern void spa_prop_clear_bootfs(spa_t *spa, uint64_t obj, dmu_tx_t *tx);
extern void spa_configfile_set(spa_t *, nvlist_t *, boolean_t);
/* asynchronous event notification */
-extern void spa_event_notify(spa_t *spa, vdev_t *vdev, const char *name);
+extern void spa_event_notify(spa_t *spa, vdev_t *vdev, nvlist_t *hist_nvl,
+ const char *name);
#ifdef ZFS_DEBUG
#define dprintf_bp(bp, fmt, ...) do { \
diff --git a/zfs/include/sys/spa_checksum.h b/zfs/include/sys/spa_checksum.h
new file mode 100644
index 000000000000..b87990105a71
--- /dev/null
+++ b/zfs/include/sys/spa_checksum.h
@@ -0,0 +1,72 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SPA_CHECKSUM_H
+#define _SPA_CHECKSUM_H
+
+#include <sys/types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Each block has a 256-bit checksum -- strong enough for cryptographic hashes.
+ */
+typedef struct zio_cksum {
+ uint64_t zc_word[4];
+} zio_cksum_t;
+
+#define ZIO_SET_CHECKSUM(zcp, w0, w1, w2, w3) \
+{ \
+ (zcp)->zc_word[0] = w0; \
+ (zcp)->zc_word[1] = w1; \
+ (zcp)->zc_word[2] = w2; \
+ (zcp)->zc_word[3] = w3; \
+}
+
+#define ZIO_CHECKSUM_EQUAL(zc1, zc2) \
+ (0 == (((zc1).zc_word[0] - (zc2).zc_word[0]) | \
+ ((zc1).zc_word[1] - (zc2).zc_word[1]) | \
+ ((zc1).zc_word[2] - (zc2).zc_word[2]) | \
+ ((zc1).zc_word[3] - (zc2).zc_word[3])))
+
+#define ZIO_CHECKSUM_IS_ZERO(zc) \
+ (0 == ((zc)->zc_word[0] | (zc)->zc_word[1] | \
+ (zc)->zc_word[2] | (zc)->zc_word[3]))
+
+#define ZIO_CHECKSUM_BSWAP(zcp) \
+{ \
+ (zcp)->zc_word[0] = BSWAP_64((zcp)->zc_word[0]); \
+ (zcp)->zc_word[1] = BSWAP_64((zcp)->zc_word[1]); \
+ (zcp)->zc_word[2] = BSWAP_64((zcp)->zc_word[2]); \
+ (zcp)->zc_word[3] = BSWAP_64((zcp)->zc_word[3]); \
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/zfs/include/sys/spa_impl.h b/zfs/include/sys/spa_impl.h
index 5176eb84842c..06de244218f3 100644
--- a/zfs/include/sys/spa_impl.h
+++ b/zfs/include/sys/spa_impl.h
@@ -23,7 +23,9 @@
* Copyright (c) 2011, 2015 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
+ * Copyright 2013 Saso Kiselkov. All rights reserved.
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
+ * Copyright (c) 2017 Datto Inc.
*/
#ifndef _SYS_SPA_IMPL_H
@@ -116,11 +118,18 @@ typedef struct spa_taskqs {
taskq_t **stqs_taskq;
} spa_taskqs_t;
+typedef enum spa_all_vdev_zap_action {
+ AVZ_ACTION_NONE = 0,
+ AVZ_ACTION_DESTROY, /* Destroy all per-vdev ZAPs and the AVZ. */
+ AVZ_ACTION_REBUILD, /* Populate the new AVZ, see spa_avz_rebuild */
+ AVZ_ACTION_INITIALIZE
+} spa_avz_action_t;
+
struct spa {
/*
* Fields protected by spa_namespace_lock.
*/
- char spa_name[MAXNAMELEN]; /* pool name */
+ char spa_name[ZFS_MAX_DATASET_NAME_LEN]; /* pool name */
char *spa_comment; /* comment */
avl_node_t spa_avl; /* node in spa_namespace_avl */
nvlist_t *spa_config; /* last synced config */
@@ -158,6 +167,8 @@ struct spa {
uint64_t spa_last_synced_guid; /* last synced guid */
list_t spa_config_dirty_list; /* vdevs with dirty config */
list_t spa_state_dirty_list; /* vdevs with dirty state */
+ kmutex_t spa_alloc_lock;
+ avl_tree_t spa_alloc_tree;
spa_aux_vdev_t spa_spares; /* hot spares */
spa_aux_vdev_t spa_l2cache; /* L2ARC cache devices */
nvlist_t *spa_label_features; /* Features for reading MOS */
@@ -166,6 +177,10 @@ struct spa {
uint64_t spa_syncing_txg; /* txg currently syncing */
bpobj_t spa_deferred_bpobj; /* deferred-free bplist */
bplist_t spa_free_bplist[TXG_SIZE]; /* bplist of stuff to free */
+ zio_cksum_salt_t spa_cksum_salt; /* secret salt for cksum */
+ /* checksum context templates */
+ kmutex_t spa_cksum_tmpls_lock;
+ void *spa_cksum_tmpls[ZIO_CHECKSUM_FUNCTIONS];
uberblock_t spa_ubsync; /* last synced uberblock */
uberblock_t spa_uberblock; /* current uberblock */
boolean_t spa_extreme_rewind; /* rewind past deferred frees */
@@ -179,6 +194,8 @@ struct spa {
uint8_t spa_scrub_started; /* started since last boot */
uint8_t spa_scrub_reopen; /* scrub doing vdev_reopen */
uint64_t spa_scan_pass_start; /* start time per pass/reboot */
+ uint64_t spa_scan_pass_scrub_pause; /* scrub pause time */
+ uint64_t spa_scan_pass_scrub_spent_paused; /* total paused */
uint64_t spa_scan_pass_exam; /* examined bytes per pass */
kmutex_t spa_async_lock; /* protect async state */
kthread_t *spa_async_thread; /* thread doing async task */
@@ -226,6 +243,7 @@ struct spa {
uint64_t spa_autoexpand; /* lun expansion on/off */
ddt_t *spa_ddt[ZIO_CHECKSUM_FUNCTIONS]; /* in-core DDTs */
uint64_t spa_ddt_stat_object; /* DDT statistics */
+ uint64_t spa_dedup_dspace; /* Cache get_dedup_dspace() */
uint64_t spa_dedup_ditto; /* dedup ditto threshold */
uint64_t spa_dedup_checksum; /* default dedup checksum */
uint64_t spa_dspace; /* dspace in normal class */
@@ -251,9 +269,14 @@ struct spa {
uint64_t spa_deadman_calls; /* number of deadman calls */
hrtime_t spa_sync_starttime; /* starting time of spa_sync */
uint64_t spa_deadman_synctime; /* deadman expiration timer */
+ uint64_t spa_all_vdev_zaps; /* ZAP of per-vd ZAP obj #s */
+ spa_avz_action_t spa_avz_action; /* destroy/rebuild AVZ? */
uint64_t spa_errata; /* errata issues detected */
spa_stats_t spa_stats; /* assorted spa statistics */
- taskq_t *spa_zvol_taskq; /* Taskq for minor managment */
+ hrtime_t spa_ccw_fail_time; /* Conf cache write fail time */
+ taskq_t *spa_zvol_taskq; /* Taskq for minor management */
+ uint64_t spa_multihost; /* multihost aware (mmp) */
+ mmp_thread_t spa_mmp; /* multihost mmp thread */
/*
* spa_refcount & spa_config_lock must be the last elements
@@ -263,6 +286,8 @@ struct spa {
*/
spa_config_lock_t spa_config_lock[SCL_LOCKS]; /* config changes */
refcount_t spa_refcount; /* number of opens */
+
+ taskq_t *spa_upgrade_taskq; /* taskq for upgrade jobs */
};
extern char *spa_config_path;
diff --git a/zfs/include/sys/sysevent.h b/zfs/include/sys/sysevent.h
new file mode 100644
index 000000000000..6510297d601f
--- /dev/null
+++ b/zfs/include/sys/sysevent.h
@@ -0,0 +1,36 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_SYSEVENT_H
+#define _SYS_SYSEVENT_H
+
+#include <sys/nvpair.h>
+
+typedef struct sysevent {
+ nvlist_t *resource;
+} sysevent_t;
+
+#endif
diff --git a/zfs/include/sys/sysevent/Makefile.am b/zfs/include/sys/sysevent/Makefile.am
new file mode 100644
index 000000000000..e9af2684f1b9
--- /dev/null
+++ b/zfs/include/sys/sysevent/Makefile.am
@@ -0,0 +1,19 @@
+COMMON_H = \
+ $(top_srcdir)/include/sys/sysevent/eventdefs.h \
+ $(top_srcdir)/include/sys/sysevent/dev.h
+
+KERNEL_H =
+
+USER_H =
+
+EXTRA_DIST = $(COMMON_H) $(KERNEL_H) $(USER_H)
+
+if CONFIG_USER
+libzfsdir = $(includedir)/libzfs/sys/sysevent
+libzfs_HEADERS = $(COMMON_H) $(USER_H)
+endif
+
+if CONFIG_KERNEL
+kerneldir = @prefix@/src/zfs-$(VERSION)/include/sys/sysevent
+kernel_HEADERS = $(COMMON_H) $(KERNEL_H)
+endif
diff --git a/zfs/include/sys/sysevent/Makefile.in b/zfs/include/sys/sysevent/Makefile.in
new file mode 100644
index 000000000000..2a1986241ef4
--- /dev/null
+++ b/zfs/include/sys/sysevent/Makefile.in
@@ -0,0 +1,790 @@
+# Makefile.in generated by automake 1.15.1 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2017 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+ at SET_MAKE@
+
+VPATH = @srcdir@
+am__is_gnu_make = { \
+ if test -z '$(MAKELEVEL)'; then \
+ false; \
+ elif test -n '$(MAKE_HOST)'; then \
+ true; \
+ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+ true; \
+ else \
+ false; \
+ fi; \
+}
+am__make_running_with_option = \
+ case $${target_option-} in \
+ ?) ;; \
+ *) echo "am__make_running_with_option: internal error: invalid" \
+ "target option '$${target_option-}' specified" >&2; \
+ exit 1;; \
+ esac; \
+ has_opt=no; \
+ sane_makeflags=$$MAKEFLAGS; \
+ if $(am__is_gnu_make); then \
+ sane_makeflags=$$MFLAGS; \
+ else \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ bs=\\; \
+ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
+ esac; \
+ fi; \
+ skip_next=no; \
+ strip_trailopt () \
+ { \
+ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+ }; \
+ for flg in $$sane_makeflags; do \
+ test $$skip_next = yes && { skip_next=no; continue; }; \
+ case $$flg in \
+ *=*|--*) continue;; \
+ -*I) strip_trailopt 'I'; skip_next=yes;; \
+ -*I?*) strip_trailopt 'I';; \
+ -*O) strip_trailopt 'O'; skip_next=yes;; \
+ -*O?*) strip_trailopt 'O';; \
+ -*l) strip_trailopt 'l'; skip_next=yes;; \
+ -*l?*) strip_trailopt 'l';; \
+ -[dEDm]) skip_next=yes;; \
+ -[JT]) skip_next=yes;; \
+ esac; \
+ case $$flg in \
+ *$$target_option*) has_opt=yes; break;; \
+ esac; \
+ done; \
+ test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+target_triplet = @target@
+subdir = include/sys/sysevent
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/config/always-arch.m4 \
+ $(top_srcdir)/config/always-no-bool-compare.m4 \
+ $(top_srcdir)/config/always-no-unused-but-set-variable.m4 \
+ $(top_srcdir)/config/dkms.m4 \
+ $(top_srcdir)/config/kernel-acl.m4 \
+ $(top_srcdir)/config/kernel-aio-fsync.m4 \
+ $(top_srcdir)/config/kernel-automount.m4 \
+ $(top_srcdir)/config/kernel-bdev-block-device-operations.m4 \
+ $(top_srcdir)/config/kernel-bdev-logical-size.m4 \
+ $(top_srcdir)/config/kernel-bdev-physical-size.m4 \
+ $(top_srcdir)/config/kernel-bdi.m4 \
+ $(top_srcdir)/config/kernel-bio-bvec-iter.m4 \
+ $(top_srcdir)/config/kernel-bio-end-io-t-args.m4 \
+ $(top_srcdir)/config/kernel-bio-failfast.m4 \
+ $(top_srcdir)/config/kernel-bio-op.m4 \
+ $(top_srcdir)/config/kernel-bio-rw-barrier.m4 \
+ $(top_srcdir)/config/kernel-bio-rw-discard.m4 \
+ $(top_srcdir)/config/kernel-bio_set_dev.m4 \
+ $(top_srcdir)/config/kernel-blk-queue-bdi.m4 \
+ $(top_srcdir)/config/kernel-blk-queue-flush.m4 \
+ $(top_srcdir)/config/kernel-blk-queue-max-hw-sectors.m4 \
+ $(top_srcdir)/config/kernel-blk-queue-max-segments.m4 \
+ $(top_srcdir)/config/kernel-blk-queue-unplug.m4 \
+ $(top_srcdir)/config/kernel-blkdev-get-by-path.m4 \
+ $(top_srcdir)/config/kernel-blkdev-get.m4 \
+ $(top_srcdir)/config/kernel-block-device-operations-release-void.m4 \
+ $(top_srcdir)/config/kernel-clear-inode.m4 \
+ $(top_srcdir)/config/kernel-commit-metadata.m4 \
+ $(top_srcdir)/config/kernel-create-nameidata.m4 \
+ $(top_srcdir)/config/kernel-current-time.m4 \
+ $(top_srcdir)/config/kernel-current_bio_tail.m4 \
+ $(top_srcdir)/config/kernel-d-make-root.m4 \
+ $(top_srcdir)/config/kernel-d-obtain-alias.m4 \
+ $(top_srcdir)/config/kernel-d-prune-aliases.m4 \
+ $(top_srcdir)/config/kernel-declare-event-class.m4 \
+ $(top_srcdir)/config/kernel-dentry-operations.m4 \
+ $(top_srcdir)/config/kernel-dirty-inode.m4 \
+ $(top_srcdir)/config/kernel-discard-granularity.m4 \
+ $(top_srcdir)/config/kernel-elevator-change.m4 \
+ $(top_srcdir)/config/kernel-encode-fh-inode.m4 \
+ $(top_srcdir)/config/kernel-evict-inode.m4 \
+ $(top_srcdir)/config/kernel-fallocate.m4 \
+ $(top_srcdir)/config/kernel-file-dentry.m4 \
+ $(top_srcdir)/config/kernel-file-inode.m4 \
+ $(top_srcdir)/config/kernel-fmode-t.m4 \
+ $(top_srcdir)/config/kernel-follow-down-one.m4 \
+ $(top_srcdir)/config/kernel-fpu.m4 \
+ $(top_srcdir)/config/kernel-fsync.m4 \
+ $(top_srcdir)/config/kernel-generic_io_acct.m4 \
+ $(top_srcdir)/config/kernel-generic_readlink.m4 \
+ $(top_srcdir)/config/kernel-get-disk-ro.m4 \
+ $(top_srcdir)/config/kernel-get-gendisk.m4 \
+ $(top_srcdir)/config/kernel-get-link.m4 \
+ $(top_srcdir)/config/kernel-inode-getattr.m4 \
+ $(top_srcdir)/config/kernel-inode-set-flags.m4 \
+ $(top_srcdir)/config/kernel-insert-inode-locked.m4 \
+ $(top_srcdir)/config/kernel-invalidate-bdev-args.m4 \
+ $(top_srcdir)/config/kernel-is_owner_or_cap.m4 \
+ $(top_srcdir)/config/kernel-kmap-atomic-args.m4 \
+ $(top_srcdir)/config/kernel-kuid-helpers.m4 \
+ $(top_srcdir)/config/kernel-lookup-bdev.m4 \
+ $(top_srcdir)/config/kernel-lookup-nameidata.m4 \
+ $(top_srcdir)/config/kernel-lseek-execute.m4 \
+ $(top_srcdir)/config/kernel-mk-request-fn.m4 \
+ $(top_srcdir)/config/kernel-mkdir-umode-t.m4 \
+ $(top_srcdir)/config/kernel-mod-param.m4 \
+ $(top_srcdir)/config/kernel-mount-nodev.m4 \
+ $(top_srcdir)/config/kernel-objtool.m4 \
+ $(top_srcdir)/config/kernel-open-bdev-exclusive.m4 \
+ $(top_srcdir)/config/kernel-put-link.m4 \
+ $(top_srcdir)/config/kernel-rename.m4 \
+ $(top_srcdir)/config/kernel-security-inode-init.m4 \
+ $(top_srcdir)/config/kernel-set-nlink.m4 \
+ $(top_srcdir)/config/kernel-setattr-prepare.m4 \
+ $(top_srcdir)/config/kernel-sget-args.m4 \
+ $(top_srcdir)/config/kernel-show-options.m4 \
+ $(top_srcdir)/config/kernel-shrink.m4 \
+ $(top_srcdir)/config/kernel-submit_bio.m4 \
+ $(top_srcdir)/config/kernel-super-userns.m4 \
+ $(top_srcdir)/config/kernel-tmpfile.m4 \
+ $(top_srcdir)/config/kernel-truncate-range.m4 \
+ $(top_srcdir)/config/kernel-truncate-setsize.m4 \
+ $(top_srcdir)/config/kernel-vfs-iterate.m4 \
+ $(top_srcdir)/config/kernel-vfs-rw-iterate.m4 \
+ $(top_srcdir)/config/kernel-vm_node_stat.m4 \
+ $(top_srcdir)/config/kernel-xattr-handler.m4 \
+ $(top_srcdir)/config/kernel.m4 $(top_srcdir)/config/libtool.m4 \
+ $(top_srcdir)/config/ltoptions.m4 \
+ $(top_srcdir)/config/ltsugar.m4 \
+ $(top_srcdir)/config/ltversion.m4 \
+ $(top_srcdir)/config/lt~obsolete.m4 \
+ $(top_srcdir)/config/mount-helper.m4 \
+ $(top_srcdir)/config/toolchain-simd.m4 \
+ $(top_srcdir)/config/user-dracut.m4 \
+ $(top_srcdir)/config/user-frame-larger-than.m4 \
+ $(top_srcdir)/config/user-libattr.m4 \
+ $(top_srcdir)/config/user-libblkid.m4 \
+ $(top_srcdir)/config/user-libtirpc.m4 \
+ $(top_srcdir)/config/user-libudev.m4 \
+ $(top_srcdir)/config/user-libuuid.m4 \
+ $(top_srcdir)/config/user-makedev.m4 \
+ $(top_srcdir)/config/user-no-format-truncation.m4 \
+ $(top_srcdir)/config/user-runstatedir.m4 \
+ $(top_srcdir)/config/user-systemd.m4 \
+ $(top_srcdir)/config/user-sysvinit.m4 \
+ $(top_srcdir)/config/user-udev.m4 \
+ $(top_srcdir)/config/user-zlib.m4 $(top_srcdir)/config/user.m4 \
+ $(top_srcdir)/config/zfs-build.m4 \
+ $(top_srcdir)/config/zfs-meta.m4 $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(am__kernel_HEADERS_DIST) \
+ $(am__libzfs_HEADERS_DIST) $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/zfs_config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+AM_V_P = $(am__v_P_ at AM_V@)
+am__v_P_ = $(am__v_P_ at AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_ at AM_V@)
+am__v_GEN_ = $(am__v_GEN_ at AM_DEFAULT_V@)
+am__v_GEN_0 = @echo " GEN " $@;
+am__v_GEN_1 =
+AM_V_at = $(am__v_at_ at AM_V@)
+am__v_at_ = $(am__v_at_ at AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 =
+SOURCES =
+DIST_SOURCES =
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
+am__kernel_HEADERS_DIST = \
+ $(top_srcdir)/include/sys/sysevent/eventdefs.h \
+ $(top_srcdir)/include/sys/sysevent/dev.h
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+ *) f=$$p;; \
+ esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+ for p in $$list; do echo "$$p $$p"; done | \
+ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+ if (++n[$$2] == $(am__install_max)) \
+ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+ END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
+am__installdirs = "$(DESTDIR)$(kerneldir)" "$(DESTDIR)$(libzfsdir)"
+am__libzfs_HEADERS_DIST = \
+ $(top_srcdir)/include/sys/sysevent/eventdefs.h \
+ $(top_srcdir)/include/sys/sysevent/dev.h
+HEADERS = $(kernel_HEADERS) $(libzfs_HEADERS)
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates. Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+ BEGIN { nonempty = 0; } \
+ { items[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique. This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+ list='$(am__tagged_files)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | $(am__uniquify_input)`
+ETAGS = etags
+CTAGS = ctags
+am__DIST_COMMON = $(srcdir)/Makefile.in
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+ALIEN = @ALIEN@
+ALIEN_VERSION = @ALIEN_VERSION@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+CC = @CC@
+CCAS = @CCAS@
+CCASDEPMODE = @CCASDEPMODE@
+CCASFLAGS = @CCASFLAGS@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEBUG_CFLAGS = @DEBUG_CFLAGS@
+DEBUG_STACKFLAGS = @DEBUG_STACKFLAGS@
+DEBUG_ZFS = @DEBUG_ZFS@
+DEFAULT_INITCONF_DIR = @DEFAULT_INITCONF_DIR@
+DEFAULT_INIT_DIR = @DEFAULT_INIT_DIR@
+DEFAULT_INIT_SCRIPT = @DEFAULT_INIT_SCRIPT@
+DEFAULT_PACKAGE = @DEFAULT_PACKAGE@
+DEFINE_INITRAMFS = @DEFINE_INITRAMFS@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+DPKG = @DPKG@
+DPKGBUILD = @DPKGBUILD@
+DPKGBUILD_VERSION = @DPKGBUILD_VERSION@
+DPKG_VERSION = @DPKG_VERSION@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+FRAME_LARGER_THAN = @FRAME_LARGER_THAN@
+GREP = @GREP@
+HAVE_ALIEN = @HAVE_ALIEN@
+HAVE_DPKG = @HAVE_DPKG@
+HAVE_DPKGBUILD = @HAVE_DPKGBUILD@
+HAVE_RPM = @HAVE_RPM@
+HAVE_RPMBUILD = @HAVE_RPMBUILD@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+KERNELCPPFLAGS = @KERNELCPPFLAGS@
+KERNELMAKE_PARAMS = @KERNELMAKE_PARAMS@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBATTR = @LIBATTR@
+LIBBLKID = @LIBBLKID@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTIRPC = @LIBTIRPC@
+LIBTIRPC_CFLAGS = @LIBTIRPC_CFLAGS@
+LIBTOOL = @LIBTOOL@
+LIBUDEV = @LIBUDEV@
+LIBUUID = @LIBUUID@
+LINUX = @LINUX@
+LINUX_OBJ = @LINUX_OBJ@
+LINUX_SYMBOLS = @LINUX_SYMBOLS@
+LINUX_VERSION = @LINUX_VERSION@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@
+MAINT = @MAINT@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+NO_BOOL_COMPARE = @NO_BOOL_COMPARE@
+NO_FORMAT_TRUNCATION = @NO_FORMAT_TRUNCATION@
+NO_UNUSED_BUT_SET_VARIABLE = @NO_UNUSED_BUT_SET_VARIABLE@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+QAT_OBJ = @QAT_OBJ@
+QAT_SRC = @QAT_SRC@
+QAT_SYMBOLS = @QAT_SYMBOLS@
+RANLIB = @RANLIB@
+RELEASE = @RELEASE@
+RM = @RM@
+RPM = @RPM@
+RPMBUILD = @RPMBUILD@
+RPMBUILD_VERSION = @RPMBUILD_VERSION@
+RPM_DEFINE_COMMON = @RPM_DEFINE_COMMON@
+RPM_DEFINE_DKMS = @RPM_DEFINE_DKMS@
+RPM_DEFINE_KMOD = @RPM_DEFINE_KMOD@
+RPM_DEFINE_UTIL = @RPM_DEFINE_UTIL@
+RPM_SPEC_DIR = @RPM_SPEC_DIR@
+RPM_VERSION = @RPM_VERSION@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+SPL = @SPL@
+SPL_OBJ = @SPL_OBJ@
+SPL_SYMBOLS = @SPL_SYMBOLS@
+SPL_VERSION = @SPL_VERSION@
+SRPM_DEFINE_COMMON = @SRPM_DEFINE_COMMON@
+SRPM_DEFINE_DKMS = @SRPM_DEFINE_DKMS@
+SRPM_DEFINE_KMOD = @SRPM_DEFINE_KMOD@
+SRPM_DEFINE_UTIL = @SRPM_DEFINE_UTIL@
+STRIP = @STRIP@
+TARGET_ASM_DIR = @TARGET_ASM_DIR@
+VENDOR = @VENDOR@
+VERSION = @VERSION@
+ZFS_CONFIG = @ZFS_CONFIG@
+ZFS_INIT_SYSTEMD = @ZFS_INIT_SYSTEMD@
+ZFS_INIT_SYSV = @ZFS_INIT_SYSV@
+ZFS_META_ALIAS = @ZFS_META_ALIAS@
+ZFS_META_AUTHOR = @ZFS_META_AUTHOR@
+ZFS_META_DATA = @ZFS_META_DATA@
+ZFS_META_LICENSE = @ZFS_META_LICENSE@
+ZFS_META_LT_AGE = @ZFS_META_LT_AGE@
+ZFS_META_LT_CURRENT = @ZFS_META_LT_CURRENT@
+ZFS_META_LT_REVISION = @ZFS_META_LT_REVISION@
+ZFS_META_NAME = @ZFS_META_NAME@
+ZFS_META_RELEASE = @ZFS_META_RELEASE@
+ZFS_META_VERSION = @ZFS_META_VERSION@
+ZFS_MODULE_LOAD = @ZFS_MODULE_LOAD@
+ZLIB = @ZLIB@
+ZONENAME = @ZONENAME@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dracutdir = @dracutdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+modulesloaddir = @modulesloaddir@
+mounthelperdir = @mounthelperdir@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+runstatedir = @runstatedir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+systemdpresetdir = @systemdpresetdir@
+systemdunitdir = @systemdunitdir@
+target = @target@
+target_alias = @target_alias@
+target_cpu = @target_cpu@
+target_os = @target_os@
+target_vendor = @target_vendor@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+udevdir = @udevdir@
+udevruledir = @udevruledir@
+COMMON_H = \
+ $(top_srcdir)/include/sys/sysevent/eventdefs.h \
+ $(top_srcdir)/include/sys/sysevent/dev.h
+
+KERNEL_H =
+USER_H =
+EXTRA_DIST = $(COMMON_H) $(KERNEL_H) $(USER_H)
+ at CONFIG_USER_TRUE@libzfsdir = $(includedir)/libzfs/sys/sysevent
+ at CONFIG_USER_TRUE@libzfs_HEADERS = $(COMMON_H) $(USER_H)
+ at CONFIG_KERNEL_TRUE@kerneldir = @prefix@/src/zfs-$(VERSION)/include/sys/sysevent
+ at CONFIG_KERNEL_TRUE@kernel_HEADERS = $(COMMON_H) $(KERNEL_H)
+all: all-am
+
+.SUFFIXES:
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu include/sys/sysevent/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --gnu include/sys/sysevent/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+install-kernelHEADERS: $(kernel_HEADERS)
+ @$(NORMAL_INSTALL)
+ @list='$(kernel_HEADERS)'; test -n "$(kerneldir)" || list=; \
+ if test -n "$$list"; then \
+ echo " $(MKDIR_P) '$(DESTDIR)$(kerneldir)'"; \
+ $(MKDIR_P) "$(DESTDIR)$(kerneldir)" || exit 1; \
+ fi; \
+ for p in $$list; do \
+ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+ echo "$$d$$p"; \
+ done | $(am__base_list) | \
+ while read files; do \
+ echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(kerneldir)'"; \
+ $(INSTALL_HEADER) $$files "$(DESTDIR)$(kerneldir)" || exit $$?; \
+ done
+
+uninstall-kernelHEADERS:
+ @$(NORMAL_UNINSTALL)
+ @list='$(kernel_HEADERS)'; test -n "$(kerneldir)" || list=; \
+ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
+ dir='$(DESTDIR)$(kerneldir)'; $(am__uninstall_files_from_dir)
+install-libzfsHEADERS: $(libzfs_HEADERS)
+ @$(NORMAL_INSTALL)
+ @list='$(libzfs_HEADERS)'; test -n "$(libzfsdir)" || list=; \
+ if test -n "$$list"; then \
+ echo " $(MKDIR_P) '$(DESTDIR)$(libzfsdir)'"; \
+ $(MKDIR_P) "$(DESTDIR)$(libzfsdir)" || exit 1; \
+ fi; \
+ for p in $$list; do \
+ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+ echo "$$d$$p"; \
+ done | $(am__base_list) | \
+ while read files; do \
+ echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(libzfsdir)'"; \
+ $(INSTALL_HEADER) $$files "$(DESTDIR)$(libzfsdir)" || exit $$?; \
+ done
+
+uninstall-libzfsHEADERS:
+ @$(NORMAL_UNINSTALL)
+ @list='$(libzfs_HEADERS)'; test -n "$(libzfsdir)" || list=; \
+ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
+ dir='$(DESTDIR)$(libzfsdir)'; $(am__uninstall_files_from_dir)
+
+ID: $(am__tagged_files)
+ $(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ set x; \
+ here=`pwd`; \
+ $(am__define_uniq_tagged_files); \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ $(am__define_uniq_tagged_files); \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+ list='$(am__tagged_files)'; \
+ case "$(srcdir)" in \
+ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+ *) sdir=$(subdir)/$(srcdir) ;; \
+ esac; \
+ for i in $$list; do \
+ if test -f "$$i"; then \
+ echo "$(subdir)/$$i"; \
+ else \
+ echo "$$sdir/$$i"; \
+ fi; \
+ done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+
+distdir: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(HEADERS)
+installdirs:
+ for dir in "$(DESTDIR)$(kerneldir)" "$(DESTDIR)$(libzfsdir)"; do \
+ test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+ done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+ -rm -f Makefile
+distclean-am: clean-am distclean-generic distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am: install-kernelHEADERS install-libzfsHEADERS
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-generic mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-kernelHEADERS uninstall-libzfsHEADERS
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am check check-am clean clean-generic \
+ clean-libtool cscopelist-am ctags ctags-am distclean \
+ distclean-generic distclean-libtool distclean-tags distdir dvi \
+ dvi-am html html-am info info-am install install-am \
+ install-data install-data-am install-dvi install-dvi-am \
+ install-exec install-exec-am install-html install-html-am \
+ install-info install-info-am install-kernelHEADERS \
+ install-libzfsHEADERS install-man install-pdf install-pdf-am \
+ install-ps install-ps-am install-strip installcheck \
+ installcheck-am installdirs maintainer-clean \
+ maintainer-clean-generic mostlyclean mostlyclean-generic \
+ mostlyclean-libtool pdf pdf-am ps ps-am tags tags-am uninstall \
+ uninstall-am uninstall-kernelHEADERS uninstall-libzfsHEADERS
+
+.PRECIOUS: Makefile
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/zfs/include/sys/sysevent/dev.h b/zfs/include/sys/sysevent/dev.h
new file mode 100644
index 000000000000..1117538d822d
--- /dev/null
+++ b/zfs/include/sys/sysevent/dev.h
@@ -0,0 +1,261 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_SYSEVENT_DEV_H
+#define _SYS_SYSEVENT_DEV_H
+
+#include <sys/sysevent/eventdefs.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Event schema for EC_DEV_ADD/ESC_DISK
+ *
+ * Event Class - EC_DEV_ADD
+ * Event Sub-Class - ESC_DISK
+ *
+ * Attribute Name - EV_VERSION
+ * Attribute Type - DATA_TYPE_INT32
+ * Attribute Value - event version number
+ *
+ * Attribute Name - DEV_NAME
+ * Attribute Type - DATA_TYPE_STRING
+ * Attribute Value - /dev name to the raw device.
+ * The name does not include the slice number component.
+ *
+ * Attribute Name - DEV_PHYS_PATH
+ * Attribute Type - DATA_TYPE_STRING
+ * Attribute Value - physical path of the device without the "/devices"
+ * prefix.
+ *
+ * Attribute Name - DEV_DRIVER_NAME
+ * Attribute Type - DATA_TYPE_STRING
+ * Attribute Value - driver name
+ *
+ * Attribute Name - DEV_INSTANCE
+ * Attribute Type - DATA_TYPE_INT32
+ * Attribute Value - driver instance number
+ *
+ * Attribute Name - DEV_PROP_PREFIX<devinfo_node_property>
+ * Attribute Type - data type of the devinfo_node_property
+ * Attribute Value - value of the devinfo_node_property
+ *
+ *
+ * Event schema for EC_DEV_ADD/ESC_NETWORK
+ *
+ * Event Class - EC_DEV_ADD
+ * Event Sub-Class - ESC_NETWORK
+ *
+ * Attribute Name - EV_VERSION
+ * Attribute Type - DATA_TYPE_INT32
+ * Attribute Value - event version number
+ *
+ * Attribute Name - DEV_NAME
+ * Attribute Type - DATA_TYPE_STRING
+ * Attribute Value - /dev name associated with the device if exists.
+ * /dev name associated with the driver for DLPI
+ * Style-2 only drivers.
+ *
+ * Attribute Name - DEV_PHYS_PATH
+ * Attribute Type - DATA_TYPE_STRING
+ * Attribute Value - physical path of the device without the "/devices"
+ * prefix.
+ *
+ * Attribute Name - DEV_DRIVER_NAME
+ * Attribute Type - DATA_TYPE_STRING
+ * Attribute Value - driver name
+ *
+ * Attribute Name - DEV_INSTANCE
+ * Attribute Type - DATA_TYPE_INT32
+ * Attribute Value - driver instance number
+ *
+ * Attribute Name - DEV_PROP_PREFIX<devinfo_node_property>
+ * Attribute Type - data type of the devinfo_node_property
+ * Attribute Value - value of the devinfo_node_property
+ *
+ *
+ * Event schema for EC_DEV_ADD/ESC_PRINTER
+ *
+ * Event Class - EC_DEV_ADD
+ * Event Sub-Class - ESC_PRINTER
+ *
+ * Attribute Name - EV_VERSION
+ * Attribute Type - DATA_TYPE_INT32
+ * Attribute Value - event version number
+ *
+ * Attribute Name - DEV_NAME
+ * Attribute Type - DATA_TYPE_STRING
+ * Attribute Value - /dev/printers name associated with the device
+ * if exists.
+ * /dev name associated with the device if it exists
+ *
+ * Attribute Name - DEV_PHYS_PATH
+ * Attribute Type - DATA_TYPE_STRING
+ * Attribute Value - physical path of the device without the "/devices"
+ * prefix.
+ *
+ * Attribute Name - DEV_DRIVER_NAME
+ * Attribute Type - DATA_TYPE_STRING
+ * Attribute Value - driver name
+ *
+ * Attribute Name - DEV_INSTANCE
+ * Attribute Type - DATA_TYPE_INT32
+ * Attribute Value - driver instance number
+ *
+ * Attribute Name - DEV_PROP_PREFIX<devinfo_node_property>
+ * Attribute Type - data type of the devinfo_node_property
+ * Attribute Value - value of the devinfo_node_property
+ *
+ *
+ * Event schema for EC_DEV_REMOVE/ESC_DISK
+ *
+ * Event Class - EC_DEV_REMOVE
+ * Event Sub-Class - ESC_DISK
+ *
+ * Attribute Name - EV_VERSION
+ * Attribute Type - DATA_TYPE_INT32
+ * Attribute Value - event version number
+ *
+ * Attribute Name - DEV_NAME
+ * Attribute Type - DATA_TYPE_STRING
+ * Attribute Value - /dev name to the raw device.
+ * The name does not include the slice number component.
+ *
+ * Attribute Name - DEV_PHYS_PATH
+ * Attribute Type - DATA_TYPE_STRING
+ * Attribute Value - physical path of the device without the "/devices"
+ * prefix.
+ *
+ * Attribute Name - DEV_DRIVER_NAME
+ * Attribute Type - DATA_TYPE_STRING
+ * Attribute Value - driver name
+ *
+ * Attribute Name - DEV_INSTANCE
+ * Attribute Type - DATA_TYPE_INT32
+ * Attribute Value - driver instance number
+ *
+ *
+ * Event schema for EC_DEV_REMOVE/ESC_NETWORK
+ *
+ * Event Class - EC_DEV_REMOVE
+ * Event Sub-Class - ESC_NETWORK
+ *
+ * Attribute Name - EV_VERSION
+ * Attribute Type - DATA_TYPE_INT32
+ * Attribute Value - event version number
+ *
+ * Attribute Name - DEV_NAME
+ * Attribute Type - DATA_TYPE_STRING
+ * Attribute Value - /dev name associated with the device if exists.
+ * /dev name associated with the driver for DLPI
+ * Style-2 only drivers.
+ *
+ * Attribute Name - DEV_PHYS_PATH
+ * Attribute Type - DATA_TYPE_STRING
+ * Attribute Value - physical path of the device without the "/devices"
+ * prefix.
+ *
+ * Attribute Name - DEV_DRIVER_NAME
+ * Attribute Type - DATA_TYPE_STRING
+ * Attribute Value - driver name
+ *
+ * Attribute Name - DEV_INSTANCE
+ * Attribute Type - DATA_TYPE_INT32
+ * Attribute Value - driver instance number
+ *
+ *
+ * Event schema for EC_DEV_REMOVE/ESC_PRINTER
+ *
+ * Event Class - EC_DEV_REMOVE
+ * Event Sub-Class - ESC_PRINTER
+ *
+ * Attribute Name - EV_VERSION
+ * Attribute Type - DATA_TYPE_INT32
+ * Attribute Value - event version number
+ *
+ * Attribute Name - DEV_NAME
+ * Attribute Type - DATA_TYPE_STRING
+ * Attribute Value - /dev/printers name associated with the device
+ * if exists.
+ * /dev name associated with the device if it exists
+ *
+ * Attribute Name - DEV_PHYS_PATH
+ * Attribute Type - DATA_TYPE_STRING
+ * Attribute Value - physical path of the device without the "/devices"
+ * prefix.
+ *
+ * Attribute Name - DEV_DRIVER_NAME
+ * Attribute Type - DATA_TYPE_STRING
+ * Attribute Value - driver name
+ *
+ * Attribute Name - DEV_INSTANCE
+ * Attribute Type - DATA_TYPE_INT32
+ * Attribute Value - driver instance number
+ *
+ *
+ * Event schema for EC_DEV_BRANCH/ESC_DEV_BRANCH_ADD or ESC_DEV_BRANCH_REMOVE
+ *
+ * Event Class - EC_DEV_BRANCH
+ * Event Sub-Class - ESC_DEV_BRANCH_ADD or ESC_DEV_BRANCH_REMOVE
+ *
+ * Attribute Name - EV_VERSION
+ * Attribute Type - DATA_TYPE_INT32
+ * Attribute Value - event version number
+ *
+ * Attribute Name - DEV_PHYS_PATH
+ * Attribute Type - DATA_TYPE_STRING
+ * Attribute Value - physical path to the root node of the device subtree
+ * without the "/devices" prefix.
+ */
+
+#define EV_VERSION "version"
+#define DEV_PHYS_PATH "phys_path"
+#define DEV_NAME "dev_name"
+#define DEV_DRIVER_NAME "driver_name"
+#define DEV_INSTANCE "instance"
+#define DEV_PROP_PREFIX "prop-"
+
+#ifdef __linux__
+#define DEV_IDENTIFIER "devid"
+#define DEV_PATH "path"
+#define DEV_IS_PART "is_slice"
+#define DEV_SIZE "dev_size"
+#endif /* __linux__ */
+
+#define EV_V1 1
+
+/* maximum number of devinfo node properties added to the event */
+#define MAX_PROP_COUNT 100
+
+/* only properties with size less than PROP_LEN_LIMIT are added to the event */
+#define PROP_LEN_LIMIT 1024
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_SYSEVENT_DEV_H */
diff --git a/zfs/include/sys/sysevent/eventdefs.h b/zfs/include/sys/sysevent/eventdefs.h
new file mode 100644
index 000000000000..fc2687842ccd
--- /dev/null
+++ b/zfs/include/sys/sysevent/eventdefs.h
@@ -0,0 +1,127 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2017 Joyent, Inc.
+ */
+
+#ifndef _SYS_SYSEVENT_EVENTDEFS_H
+#define _SYS_SYSEVENT_EVENTDEFS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * eventdefs.h contains public definitions for sysevent types (classes
+ * and subclasses). All additions/removal/changes are subject
+ * to PSARC approval.
+ */
+
+/* Sysevent Class definitions */
+#define EC_NONE "EC_none"
+#define EC_PRIV "EC_priv"
+#define EC_PLATFORM "EC_platform" /* events private to platform */
+#define EC_DR "EC_dr" /* Dynamic reconfiguration event class */
+#define EC_ENV "EC_env" /* Environmental monitor event class */
+#define EC_DOMAIN "EC_domain" /* Domain event class */
+#define EC_AP_DRIVER "EC_ap_driver" /* Alternate Pathing event class */
+#define EC_IPMP "EC_ipmp" /* IP Multipathing event class */
+#define EC_DEV_ADD "EC_dev_add" /* device add event class */
+#define EC_DEV_REMOVE "EC_dev_remove" /* device remove event class */
+#define EC_DEV_BRANCH "EC_dev_branch" /* device tree branch event class */
+#define EC_DEV_STATUS "EC_dev_status" /* device status event class */
+#define EC_FM "EC_fm" /* FMA error report event */
+#define EC_ZFS "EC_zfs" /* ZFS event */
+#define EC_DATALINK "EC_datalink" /* datalink event */
+#define EC_VRRP "EC_vrrp" /* VRRP event */
+
+/*
+ * EC_DEV_ADD and EC_DEV_REMOVE subclass definitions - supporting attributes
+ * (name/value pairs) are found in sys/sysevent/dev.h
+ */
+#define ESC_DISK "disk" /* disk device */
+#define ESC_NETWORK "network" /* network interface */
+#define ESC_PRINTER "printer" /* printer device */
+#define ESC_LOFI "lofi" /* lofi device */
+
+/*
+ * EC_DEV_BRANCH subclass definitions - supporting attributes (name/value pairs)
+ * are found in sys/sysevent/dev.h
+ */
+
+/* device tree branch added */
+#define ESC_DEV_BRANCH_ADD "dev_branch_add"
+
+/* device tree branch removed */
+#define ESC_DEV_BRANCH_REMOVE "dev_branch_remove"
+
+/*
+ * EC_DEV_STATUS subclass definitions
+ *
+ * device capacity dynamically changed
+ */
+#define ESC_DEV_DLE "dev_dle"
+
+/* LUN has received an eject request from the user */
+#define ESC_DEV_EJECT_REQUEST "dev_eject_request"
+
+/* FMA Fault and Error event protocol subclass */
+#define ESC_FM_ERROR "error"
+#define ESC_FM_ERROR_REPLAY "error_replay"
+
+/*
+ * ZFS subclass definitions. supporting attributes (name/value paris) are found
+ * in sys/fs/zfs.h
+ */
+#define ESC_ZFS_RESILVER_START "resilver_start"
+#define ESC_ZFS_RESILVER_FINISH "resilver_finish"
+#define ESC_ZFS_VDEV_REMOVE "vdev_remove"
+#define ESC_ZFS_VDEV_REMOVE_AUX "vdev_remove_aux"
+#define ESC_ZFS_VDEV_REMOVE_DEV "vdev_remove_dev"
+#define ESC_ZFS_POOL_CREATE "pool_create"
+#define ESC_ZFS_POOL_DESTROY "pool_destroy"
+#define ESC_ZFS_POOL_IMPORT "pool_import"
+#define ESC_ZFS_VDEV_ADD "vdev_add"
+#define ESC_ZFS_VDEV_ATTACH "vdev_attach"
+#define ESC_ZFS_VDEV_CLEAR "vdev_clear"
+#define ESC_ZFS_VDEV_CHECK "vdev_check"
+#define ESC_ZFS_VDEV_ONLINE "vdev_online"
+#define ESC_ZFS_CONFIG_SYNC "config_sync"
+#define ESC_ZFS_SCRUB_START "scrub_start"
+#define ESC_ZFS_SCRUB_FINISH "scrub_finish"
+#define ESC_ZFS_VDEV_SPARE "vdev_spare"
+#define ESC_ZFS_VDEV_AUTOEXPAND "vdev_autoexpand"
+#define ESC_ZFS_BOOTFS_VDEV_ATTACH "bootfs_vdev_attach"
+#define ESC_ZFS_POOL_REGUID "pool_reguid"
+#define ESC_ZFS_HISTORY_EVENT "history_event"
+
+/*
+ * datalink subclass definitions.
+ */
+#define ESC_DATALINK_PHYS_ADD "datalink_phys_add" /* new physical link */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_SYSEVENT_EVENTDEFS_H */
diff --git a/zfs/include/sys/trace_acl.h b/zfs/include/sys/trace_acl.h
index 2308942294e0..1057e560ba6c 100644
--- a/zfs/include/sys/trace_acl.h
+++ b/zfs/include/sys/trace_acl.h
@@ -31,6 +31,7 @@
#define _TRACE_ACL_H
#include <linux/tracepoint.h>
+#include <linux/vfs_compat.h>
#include <sys/types.h>
/*
@@ -41,7 +42,7 @@
* zfs_ace_hdr_t *, ...,
* uint32_t, ...);
*/
-
+/* BEGIN CSTYLED */
DECLARE_EVENT_CLASS(zfs_ace_class,
TP_PROTO(znode_t *zn, zfs_ace_hdr_t *ace, uint32_t mask_matched),
TP_ARGS(zn, ace, mask_matched),
@@ -54,12 +55,8 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
__field(uint_t, z_blksz)
__field(uint_t, z_seq)
__field(uint64_t, z_mapcnt)
- __field(uint64_t, z_gen)
__field(uint64_t, z_size)
- __field(uint64_t, z_links)
__field(uint64_t, z_pflags)
- __field(uint64_t, z_uid)
- __field(uint64_t, z_gid)
__field(uint32_t, z_sync_cnt)
__field(mode_t, z_mode)
__field(boolean_t, z_is_sa)
@@ -67,6 +64,8 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
__field(boolean_t, z_is_ctldir)
__field(boolean_t, z_is_stale)
+ __field(uint32_t, i_uid)
+ __field(uint32_t, i_gid)
__field(unsigned long, i_ino)
__field(unsigned int, i_nlink)
__field(u64, i_version)
@@ -91,12 +90,8 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
__entry->z_blksz = zn->z_blksz;
__entry->z_seq = zn->z_seq;
__entry->z_mapcnt = zn->z_mapcnt;
- __entry->z_gen = zn->z_gen;
__entry->z_size = zn->z_size;
- __entry->z_links = zn->z_links;
__entry->z_pflags = zn->z_pflags;
- __entry->z_uid = zn->z_uid;
- __entry->z_gid = zn->z_gid;
__entry->z_sync_cnt = zn->z_sync_cnt;
__entry->z_mode = zn->z_mode;
__entry->z_is_sa = zn->z_is_sa;
@@ -104,6 +99,8 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
__entry->z_is_ctldir = zn->z_is_ctldir;
__entry->z_is_stale = zn->z_is_stale;
+ __entry->i_uid = KUID_TO_SUID(ZTOI(zn)->i_uid);
+ __entry->i_gid = KGID_TO_SGID(ZTOI(zn)->i_gid);
__entry->i_ino = zn->z_inode.i_ino;
__entry->i_nlink = zn->z_inode.i_nlink;
__entry->i_version = zn->z_inode.i_version;
@@ -121,31 +118,32 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
),
TP_printk("zn { id %llu unlinked %u atime_dirty %u "
"zn_prefetch %u moved %u blksz %u seq %u "
- "mapcnt %llu gen %llu size %llu "
- "links %llu pflags %llu uid %llu gid %llu "
+ "mapcnt %llu size %llu pflags %llu "
"sync_cnt %u mode 0x%x is_sa %d "
"is_mapped %d is_ctldir %d is_stale %d inode { "
- "ino %lu nlink %u version %llu size %lli blkbits %u "
- "bytes %u mode 0x%x generation %x } } ace { type %u "
- "flags %u access_mask %u } mask_matched %u",
+ "uid %u gid %u ino %lu nlink %u version %llu size %lli "
+ "blkbits %u bytes %u mode 0x%x generation %x } } "
+ "ace { type %u flags %u access_mask %u } mask_matched %u",
__entry->z_id, __entry->z_unlinked, __entry->z_atime_dirty,
__entry->z_zn_prefetch, __entry->z_moved, __entry->z_blksz,
- __entry->z_seq, __entry->z_mapcnt, __entry->z_gen,
- __entry->z_size,
- __entry->z_links, __entry->z_pflags, __entry->z_uid,
- __entry->z_gid, __entry->z_sync_cnt, __entry->z_mode,
+ __entry->z_seq, __entry->z_mapcnt, __entry->z_size,
+ __entry->z_pflags, __entry->z_sync_cnt, __entry->z_mode,
__entry->z_is_sa, __entry->z_is_mapped,
- __entry->z_is_ctldir, __entry->z_is_stale, __entry->i_ino,
- __entry->i_nlink, __entry->i_version, __entry->i_size,
- __entry->i_blkbits, __entry->i_bytes, __entry->i_mode,
- __entry->i_generation, __entry->z_type, __entry->z_flags,
- __entry->z_access_mask, __entry->mask_matched)
+ __entry->z_is_ctldir, __entry->z_is_stale, __entry->i_uid,
+ __entry->i_gid, __entry->i_ino, __entry->i_nlink,
+ __entry->i_version, __entry->i_size, __entry->i_blkbits,
+ __entry->i_bytes, __entry->i_mode, __entry->i_generation,
+ __entry->z_type, __entry->z_flags, __entry->z_access_mask,
+ __entry->mask_matched)
);
+/* END CSTYLED */
+/* BEGIN CSTYLED */
#define DEFINE_ACE_EVENT(name) \
DEFINE_EVENT(zfs_ace_class, name, \
TP_PROTO(znode_t *zn, zfs_ace_hdr_t *ace, uint32_t mask_matched), \
TP_ARGS(zn, ace, mask_matched))
+/* END CSTYLED */
DEFINE_ACE_EVENT(zfs_zfs__ace__denies);
DEFINE_ACE_EVENT(zfs_zfs__ace__allows);
diff --git a/zfs/include/sys/trace_arc.h b/zfs/include/sys/trace_arc.h
index 31c3cdcb9b21..74a76520dffd 100644
--- a/zfs/include/sys/trace_arc.h
+++ b/zfs/include/sys/trace_arc.h
@@ -34,6 +34,7 @@
#include <linux/tracepoint.h>
#include <sys/types.h>
+#include <sys/trace_common.h> /* For ZIO macros */
/*
* Generic support for one argument tracepoints of the form:
@@ -41,7 +42,7 @@
* DTRACE_PROBE1(...,
* arc_buf_hdr_t *, ...);
*/
-
+/* BEGIN CSTYLED */
DECLARE_EVENT_CLASS(zfs_arc_buf_hdr_class,
TP_PROTO(arc_buf_hdr_t *ab),
TP_ARGS(ab),
@@ -49,9 +50,10 @@ DECLARE_EVENT_CLASS(zfs_arc_buf_hdr_class,
__array(uint64_t, hdr_dva_word, 2)
__field(uint64_t, hdr_birth)
__field(uint32_t, hdr_flags)
- __field(uint32_t, hdr_datacnt)
+ __field(uint32_t, hdr_bufcnt)
__field(arc_buf_contents_t, hdr_type)
- __field(uint64_t, hdr_size)
+ __field(uint16_t, hdr_psize)
+ __field(uint16_t, hdr_lsize)
__field(uint64_t, hdr_spa)
__field(arc_state_type_t, hdr_state_type)
__field(clock_t, hdr_access)
@@ -67,8 +69,9 @@ DECLARE_EVENT_CLASS(zfs_arc_buf_hdr_class,
__entry->hdr_dva_word[1] = ab->b_dva.dva_word[1];
__entry->hdr_birth = ab->b_birth;
__entry->hdr_flags = ab->b_flags;
- __entry->hdr_datacnt = ab->b_l1hdr.b_datacnt;
- __entry->hdr_size = ab->b_size;
+ __entry->hdr_bufcnt = ab->b_l1hdr.b_bufcnt;
+ __entry->hdr_psize = ab->b_psize;
+ __entry->hdr_lsize = ab->b_lsize;
__entry->hdr_spa = ab->b_spa;
__entry->hdr_state_type = ab->b_l1hdr.b_state->arcs_state;
__entry->hdr_access = ab->b_l1hdr.b_arc_access;
@@ -80,28 +83,33 @@ DECLARE_EVENT_CLASS(zfs_arc_buf_hdr_class,
__entry->hdr_refcount = ab->b_l1hdr.b_refcnt.rc_count;
),
TP_printk("hdr { dva 0x%llx:0x%llx birth %llu "
- "flags 0x%x datacnt %u type %u size %llu spa %llu "
+ "flags 0x%x bufcnt %u type %u psize %u lsize %u spa %llu "
"state_type %u access %lu mru_hits %u mru_ghost_hits %u "
"mfu_hits %u mfu_ghost_hits %u l2_hits %u refcount %lli }",
__entry->hdr_dva_word[0], __entry->hdr_dva_word[1],
__entry->hdr_birth, __entry->hdr_flags,
- __entry->hdr_datacnt, __entry->hdr_type, __entry->hdr_size,
- __entry->hdr_spa, __entry->hdr_state_type,
+ __entry->hdr_bufcnt, __entry->hdr_type, __entry->hdr_psize,
+ __entry->hdr_lsize, __entry->hdr_spa, __entry->hdr_state_type,
__entry->hdr_access, __entry->hdr_mru_hits,
__entry->hdr_mru_ghost_hits, __entry->hdr_mfu_hits,
__entry->hdr_mfu_ghost_hits, __entry->hdr_l2_hits,
__entry->hdr_refcount)
);
+/* END CSTYLED */
+/* BEGIN CSTYLED */
#define DEFINE_ARC_BUF_HDR_EVENT(name) \
DEFINE_EVENT(zfs_arc_buf_hdr_class, name, \
TP_PROTO(arc_buf_hdr_t *ab), \
TP_ARGS(ab))
+/* END CSTYLED */
DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__hit);
DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__evict);
DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__delete);
DEFINE_ARC_BUF_HDR_EVENT(zfs_new_state__mru);
DEFINE_ARC_BUF_HDR_EVENT(zfs_new_state__mfu);
+DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__sync__wait__for__async);
+DEFINE_ARC_BUF_HDR_EVENT(zfs_arc__demand__hit__predictive__prefetch);
DEFINE_ARC_BUF_HDR_EVENT(zfs_l2arc__hit);
DEFINE_ARC_BUF_HDR_EVENT(zfs_l2arc__miss);
@@ -112,87 +120,7 @@ DEFINE_ARC_BUF_HDR_EVENT(zfs_l2arc__miss);
* vdev_t *, ...,
* zio_t *, ...);
*/
-
-#define ZIO_TP_STRUCT_ENTRY \
- __field(zio_type_t, zio_type) \
- __field(int, zio_cmd) \
- __field(zio_priority_t, zio_priority) \
- __field(uint64_t, zio_size) \
- __field(uint64_t, zio_orig_size) \
- __field(uint64_t, zio_offset) \
- __field(hrtime_t, zio_timestamp) \
- __field(hrtime_t, zio_delta) \
- __field(uint64_t, zio_delay) \
- __field(enum zio_flag, zio_flags) \
- __field(enum zio_stage, zio_stage) \
- __field(enum zio_stage, zio_pipeline) \
- __field(enum zio_flag, zio_orig_flags) \
- __field(enum zio_stage, zio_orig_stage) \
- __field(enum zio_stage, zio_orig_pipeline) \
- __field(uint8_t, zio_reexecute) \
- __field(uint64_t, zio_txg) \
- __field(int, zio_error) \
- __field(uint64_t, zio_ena) \
- \
- __field(enum zio_checksum, zp_checksum) \
- __field(enum zio_compress, zp_compress) \
- __field(dmu_object_type_t, zp_type) \
- __field(uint8_t, zp_level) \
- __field(uint8_t, zp_copies) \
- __field(boolean_t, zp_dedup) \
- __field(boolean_t, zp_dedup_verify) \
- __field(boolean_t, zp_nopwrite)
-
-#define ZIO_TP_FAST_ASSIGN \
- __entry->zio_type = zio->io_type; \
- __entry->zio_cmd = zio->io_cmd; \
- __entry->zio_priority = zio->io_priority; \
- __entry->zio_size = zio->io_size; \
- __entry->zio_orig_size = zio->io_orig_size; \
- __entry->zio_offset = zio->io_offset; \
- __entry->zio_timestamp = zio->io_timestamp; \
- __entry->zio_delta = zio->io_delta; \
- __entry->zio_delay = zio->io_delay; \
- __entry->zio_flags = zio->io_flags; \
- __entry->zio_stage = zio->io_stage; \
- __entry->zio_pipeline = zio->io_pipeline; \
- __entry->zio_orig_flags = zio->io_orig_flags; \
- __entry->zio_orig_stage = zio->io_orig_stage; \
- __entry->zio_orig_pipeline = zio->io_orig_pipeline; \
- __entry->zio_reexecute = zio->io_reexecute; \
- __entry->zio_txg = zio->io_txg; \
- __entry->zio_error = zio->io_error; \
- __entry->zio_ena = zio->io_ena; \
- \
- __entry->zp_checksum = zio->io_prop.zp_checksum; \
- __entry->zp_compress = zio->io_prop.zp_compress; \
- __entry->zp_type = zio->io_prop.zp_type; \
- __entry->zp_level = zio->io_prop.zp_level; \
- __entry->zp_copies = zio->io_prop.zp_copies; \
- __entry->zp_dedup = zio->io_prop.zp_dedup; \
- __entry->zp_nopwrite = zio->io_prop.zp_nopwrite; \
- __entry->zp_dedup_verify = zio->io_prop.zp_dedup_verify;
-
-#define ZIO_TP_PRINTK_FMT \
- "zio { type %u cmd %i prio %u size %llu orig_size %llu " \
- "offset %llu timestamp %llu delta %llu delay %llu " \
- "flags 0x%x stage 0x%x pipeline 0x%x orig_flags 0x%x " \
- "orig_stage 0x%x orig_pipeline 0x%x reexecute %u " \
- "txg %llu error %d ena %llu prop { checksum %u compress %u " \
- "type %u level %u copies %u dedup %u dedup_verify %u nopwrite %u } }"
-
-#define ZIO_TP_PRINTK_ARGS \
- __entry->zio_type, __entry->zio_cmd, __entry->zio_priority, \
- __entry->zio_size, __entry->zio_orig_size, __entry->zio_offset, \
- __entry->zio_timestamp, __entry->zio_delta, __entry->zio_delay, \
- __entry->zio_flags, __entry->zio_stage, __entry->zio_pipeline, \
- __entry->zio_orig_flags, __entry->zio_orig_stage, \
- __entry->zio_orig_pipeline, __entry->zio_reexecute, \
- __entry->zio_txg, __entry->zio_error, __entry->zio_ena, \
- __entry->zp_checksum, __entry->zp_compress, __entry->zp_type, \
- __entry->zp_level, __entry->zp_copies, __entry->zp_dedup, \
- __entry->zp_dedup_verify, __entry->zp_nopwrite
-
+/* BEGIN CSTYLED */
DECLARE_EVENT_CLASS(zfs_l2arc_rw_class,
TP_PROTO(vdev_t *vd, zio_t *zio),
TP_ARGS(vd, zio),
@@ -212,11 +140,14 @@ DECLARE_EVENT_CLASS(zfs_l2arc_rw_class,
ZIO_TP_PRINTK_FMT, __entry->vdev_id, __entry->vdev_guid,
__entry->vdev_state, ZIO_TP_PRINTK_ARGS)
);
+/* END CSTYLED */
+/* BEGIN CSTYLED */
#define DEFINE_L2ARC_RW_EVENT(name) \
DEFINE_EVENT(zfs_l2arc_rw_class, name, \
TP_PROTO(vdev_t *vd, zio_t *zio), \
TP_ARGS(vd, zio))
+/* END CSTYLED */
DEFINE_L2ARC_RW_EVENT(zfs_l2arc__read);
DEFINE_L2ARC_RW_EVENT(zfs_l2arc__write);
@@ -228,7 +159,7 @@ DEFINE_L2ARC_RW_EVENT(zfs_l2arc__write);
* zio_t *, ...,
* l2arc_write_callback_t *, ...);
*/
-
+/* BEGIN CSTYLED */
DECLARE_EVENT_CLASS(zfs_l2arc_iodone_class,
TP_PROTO(zio_t *zio, l2arc_write_callback_t *cb),
TP_ARGS(zio, cb),
@@ -236,11 +167,14 @@ DECLARE_EVENT_CLASS(zfs_l2arc_iodone_class,
TP_fast_assign(ZIO_TP_FAST_ASSIGN),
TP_printk(ZIO_TP_PRINTK_FMT, ZIO_TP_PRINTK_ARGS)
);
+/* END CSTYLED */
+/* BEGIN CSTYLED */
#define DEFINE_L2ARC_IODONE_EVENT(name) \
DEFINE_EVENT(zfs_l2arc_iodone_class, name, \
TP_PROTO(zio_t *zio, l2arc_write_callback_t *cb), \
TP_ARGS(zio, cb))
+/* END CSTYLED */
DEFINE_L2ARC_IODONE_EVENT(zfs_l2arc__iodone);
@@ -253,7 +187,7 @@ DEFINE_L2ARC_IODONE_EVENT(zfs_l2arc__iodone);
* uint64_t,
* const zbookmark_phys_t *);
*/
-
+/* BEGIN CSTYLED */
DECLARE_EVENT_CLASS(zfs_arc_miss_class,
TP_PROTO(arc_buf_hdr_t *hdr,
const blkptr_t *bp, uint64_t size, const zbookmark_phys_t *zb),
@@ -262,9 +196,10 @@ DECLARE_EVENT_CLASS(zfs_arc_miss_class,
__array(uint64_t, hdr_dva_word, 2)
__field(uint64_t, hdr_birth)
__field(uint32_t, hdr_flags)
- __field(uint32_t, hdr_datacnt)
+ __field(uint32_t, hdr_bufcnt)
__field(arc_buf_contents_t, hdr_type)
- __field(uint64_t, hdr_size)
+ __field(uint16_t, hdr_psize)
+ __field(uint16_t, hdr_lsize)
__field(uint64_t, hdr_spa)
__field(arc_state_type_t, hdr_state_type)
__field(clock_t, hdr_access)
@@ -292,8 +227,9 @@ DECLARE_EVENT_CLASS(zfs_arc_miss_class,
__entry->hdr_dva_word[1] = hdr->b_dva.dva_word[1];
__entry->hdr_birth = hdr->b_birth;
__entry->hdr_flags = hdr->b_flags;
- __entry->hdr_datacnt = hdr->b_l1hdr.b_datacnt;
- __entry->hdr_size = hdr->b_size;
+ __entry->hdr_bufcnt = hdr->b_l1hdr.b_bufcnt;
+ __entry->hdr_psize = hdr->b_psize;
+ __entry->hdr_lsize = hdr->b_lsize;
__entry->hdr_spa = hdr->b_spa;
__entry->hdr_state_type = hdr->b_l1hdr.b_state->arcs_state;
__entry->hdr_access = hdr->b_l1hdr.b_arc_access;
@@ -323,7 +259,7 @@ DECLARE_EVENT_CLASS(zfs_arc_miss_class,
__entry->zb_blkid = zb->zb_blkid;
),
TP_printk("hdr { dva 0x%llx:0x%llx birth %llu "
- "flags 0x%x datacnt %u size %llu spa %llu state_type %u "
+ "flags 0x%x bufcnt %u psize %u lsize %u spa %llu state_type %u "
"access %lu mru_hits %u mru_ghost_hits %u mfu_hits %u "
"mfu_ghost_hits %u l2_hits %u refcount %lli } "
"bp { dva0 0x%llx:0x%llx dva1 0x%llx:0x%llx dva2 "
@@ -332,7 +268,7 @@ DECLARE_EVENT_CLASS(zfs_arc_miss_class,
"blkid %llu }",
__entry->hdr_dva_word[0], __entry->hdr_dva_word[1],
__entry->hdr_birth, __entry->hdr_flags,
- __entry->hdr_datacnt, __entry->hdr_size,
+ __entry->hdr_bufcnt, __entry->hdr_psize, __entry->hdr_lsize,
__entry->hdr_spa, __entry->hdr_state_type, __entry->hdr_access,
__entry->hdr_mru_hits, __entry->hdr_mru_ghost_hits,
__entry->hdr_mfu_hits, __entry->hdr_mfu_ghost_hits,
@@ -345,12 +281,15 @@ DECLARE_EVENT_CLASS(zfs_arc_miss_class,
__entry->bp_lsize, __entry->zb_objset, __entry->zb_object,
__entry->zb_level, __entry->zb_blkid)
);
+/* END CSTYLED */
+/* BEGIN CSTYLED */
#define DEFINE_ARC_MISS_EVENT(name) \
DEFINE_EVENT(zfs_arc_miss_class, name, \
TP_PROTO(arc_buf_hdr_t *hdr, \
const blkptr_t *bp, uint64_t size, const zbookmark_phys_t *zb), \
TP_ARGS(hdr, bp, size, zb))
+/* END CSTYLED */
DEFINE_ARC_MISS_EVENT(zfs_arc__miss);
/*
@@ -362,7 +301,7 @@ DEFINE_ARC_MISS_EVENT(zfs_arc__miss);
* uint64_t, ...,
* boolean_t, ...);
*/
-
+/* BEGIN CSTYLED */
DECLARE_EVENT_CLASS(zfs_l2arc_evict_class,
TP_PROTO(l2arc_dev_t *dev,
list_t *buflist, uint64_t taddr, boolean_t all),
@@ -403,12 +342,15 @@ DECLARE_EVENT_CLASS(zfs_l2arc_evict_class,
__entry->l2ad_end, __entry->l2ad_first, __entry->l2ad_writing,
__entry->taddr, __entry->all)
);
+/* END CSTYLED */
+/* BEGIN CSTYLED */
#define DEFINE_L2ARC_EVICT_EVENT(name) \
DEFINE_EVENT(zfs_l2arc_evict_class, name, \
TP_PROTO(l2arc_dev_t *dev, \
list_t *buflist, uint64_t taddr, boolean_t all), \
TP_ARGS(dev, buflist, taddr, all))
+/* END CSTYLED */
DEFINE_L2ARC_EVICT_EVENT(zfs_l2arc__evict);
#endif /* _TRACE_ARC_H */
diff --git a/zfs/include/sys/trace_common.h b/zfs/include/sys/trace_common.h
new file mode 100644
index 000000000000..6922d1a1810a
--- /dev/null
+++ b/zfs/include/sys/trace_common.h
@@ -0,0 +1,112 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * This file contains commonly used trace macros. Feel free to add and use
+ * them in your tracepoint headers.
+ */
+
+#ifndef _SYS_TRACE_COMMON_H
+#define _SYS_TRACE_COMMON_H
+#include <linux/tracepoint.h>
+
+/* ZIO macros */
+#define ZIO_TP_STRUCT_ENTRY \
+ __field(zio_type_t, zio_type) \
+ __field(int, zio_cmd) \
+ __field(zio_priority_t, zio_priority) \
+ __field(uint64_t, zio_size) \
+ __field(uint64_t, zio_orig_size) \
+ __field(uint64_t, zio_offset) \
+ __field(hrtime_t, zio_timestamp) \
+ __field(hrtime_t, zio_delta) \
+ __field(uint64_t, zio_delay) \
+ __field(enum zio_flag, zio_flags) \
+ __field(enum zio_stage, zio_stage) \
+ __field(enum zio_stage, zio_pipeline) \
+ __field(enum zio_flag, zio_orig_flags) \
+ __field(enum zio_stage, zio_orig_stage) \
+ __field(enum zio_stage, zio_orig_pipeline) \
+ __field(uint8_t, zio_reexecute) \
+ __field(uint64_t, zio_txg) \
+ __field(int, zio_error) \
+ __field(uint64_t, zio_ena) \
+ \
+ __field(enum zio_checksum, zp_checksum) \
+ __field(enum zio_compress, zp_compress) \
+ __field(dmu_object_type_t, zp_type) \
+ __field(uint8_t, zp_level) \
+ __field(uint8_t, zp_copies) \
+ __field(boolean_t, zp_dedup) \
+ __field(boolean_t, zp_dedup_verify) \
+ __field(boolean_t, zp_nopwrite)
+
+#define ZIO_TP_FAST_ASSIGN \
+ __entry->zio_type = zio->io_type; \
+ __entry->zio_cmd = zio->io_cmd; \
+ __entry->zio_priority = zio->io_priority; \
+ __entry->zio_size = zio->io_size; \
+ __entry->zio_orig_size = zio->io_orig_size; \
+ __entry->zio_offset = zio->io_offset; \
+ __entry->zio_timestamp = zio->io_timestamp; \
+ __entry->zio_delta = zio->io_delta; \
+ __entry->zio_delay = zio->io_delay; \
+ __entry->zio_flags = zio->io_flags; \
+ __entry->zio_stage = zio->io_stage; \
+ __entry->zio_pipeline = zio->io_pipeline; \
+ __entry->zio_orig_flags = zio->io_orig_flags; \
+ __entry->zio_orig_stage = zio->io_orig_stage; \
+ __entry->zio_orig_pipeline = zio->io_orig_pipeline; \
+ __entry->zio_reexecute = zio->io_reexecute; \
+ __entry->zio_txg = zio->io_txg; \
+ __entry->zio_error = zio->io_error; \
+ __entry->zio_ena = zio->io_ena; \
+ \
+ __entry->zp_checksum = zio->io_prop.zp_checksum; \
+ __entry->zp_compress = zio->io_prop.zp_compress; \
+ __entry->zp_type = zio->io_prop.zp_type; \
+ __entry->zp_level = zio->io_prop.zp_level; \
+ __entry->zp_copies = zio->io_prop.zp_copies; \
+ __entry->zp_dedup = zio->io_prop.zp_dedup; \
+ __entry->zp_nopwrite = zio->io_prop.zp_nopwrite; \
+ __entry->zp_dedup_verify = zio->io_prop.zp_dedup_verify;
+
+#define ZIO_TP_PRINTK_FMT \
+ "zio { type %u cmd %i prio %u size %llu orig_size %llu " \
+ "offset %llu timestamp %llu delta %llu delay %llu " \
+ "flags 0x%x stage 0x%x pipeline 0x%x orig_flags 0x%x " \
+ "orig_stage 0x%x orig_pipeline 0x%x reexecute %u " \
+ "txg %llu error %d ena %llu prop { checksum %u compress %u " \
+ "type %u level %u copies %u dedup %u dedup_verify %u nopwrite %u } }"
+
+#define ZIO_TP_PRINTK_ARGS \
+ __entry->zio_type, __entry->zio_cmd, __entry->zio_priority, \
+ __entry->zio_size, __entry->zio_orig_size, __entry->zio_offset, \
+ __entry->zio_timestamp, __entry->zio_delta, __entry->zio_delay, \
+ __entry->zio_flags, __entry->zio_stage, __entry->zio_pipeline, \
+ __entry->zio_orig_flags, __entry->zio_orig_stage, \
+ __entry->zio_orig_pipeline, __entry->zio_reexecute, \
+ __entry->zio_txg, __entry->zio_error, __entry->zio_ena, \
+ __entry->zp_checksum, __entry->zp_compress, __entry->zp_type, \
+ __entry->zp_level, __entry->zp_copies, __entry->zp_dedup, \
+ __entry->zp_dedup_verify, __entry->zp_nopwrite
+
+#endif /* _SYS_TRACE_COMMON_H */
diff --git a/zfs/include/sys/trace_dbgmsg.h b/zfs/include/sys/trace_dbgmsg.h
index e493a45802ed..a4aab1e63f75 100644
--- a/zfs/include/sys/trace_dbgmsg.h
+++ b/zfs/include/sys/trace_dbgmsg.h
@@ -37,75 +37,29 @@
*/
/*
- * Generic support for four argument tracepoints of the form:
+ * Generic support for one argument tracepoints of the form:
*
- * DTRACE_PROBE4(...,
- * const char *, ...,
- * const char *, ...,
- * int, ...,
+ * DTRACE_PROBE1(...,
* const char *, ...);
*/
-
+/* BEGIN CSTYLED */
DECLARE_EVENT_CLASS(zfs_dprintf_class,
- TP_PROTO(const char *file, const char *function, int line,
- const char *msg),
- TP_ARGS(file, function, line, msg),
+ TP_PROTO(const char *msg),
+ TP_ARGS(msg),
TP_STRUCT__entry(
- __field(const char *, file)
- __field(const char *, function)
- __field(int, line)
__string(msg, msg)
),
TP_fast_assign(
- __entry->file = file;
- __entry->function = function;
- __entry->line = line;
__assign_str(msg, msg);
),
- TP_printk("%s:%d:%s(): %s", __entry->file, __entry->line,
- __entry->function, __get_str(msg))
+ TP_printk("%s", __get_str(msg))
);
+/* END CSTYLED */
+/* BEGIN CSTYLED */
#define DEFINE_DPRINTF_EVENT(name) \
DEFINE_EVENT(zfs_dprintf_class, name, \
- TP_PROTO(const char *file, const char *function, int line, \
- const char *msg), \
- TP_ARGS(file, function, line, msg))
+ TP_PROTO(const char *msg), \
+ TP_ARGS(msg))
+/* END CSTYLED */
DEFINE_DPRINTF_EVENT(zfs_zfs__dprintf);
-
-/*
- * Generic support for four argument tracepoints of the form:
- *
- * DTRACE_PROBE4(...,
- * const char *, ...,
- * const char *, ...,
- * int, ...,
- * uintptr_t, ...);
- */
-
-DECLARE_EVENT_CLASS(zfs_set_error_class,
- TP_PROTO(const char *file, const char *function, int line,
- uintptr_t error),
- TP_ARGS(file, function, line, error),
- TP_STRUCT__entry(
- __field(const char *, file)
- __field(const char *, function)
- __field(int, line)
- __field(uintptr_t, error)
- ),
- TP_fast_assign(
- __entry->file = strchr(file, '/') ? strrchr(file, '/') + 1 : file;
- __entry->function = function;
- __entry->line = line;
- __entry->error = error;
- ),
- TP_printk("%s:%d:%s(): error 0x%lx", __entry->file, __entry->line,
- __entry->function, __entry->error)
-);
-
-#define DEFINE_SET_ERROR_EVENT(name) \
-DEFINE_EVENT(zfs_set_error_class, name, \
- TP_PROTO(const char *file, const char *function, int line, \
- uintptr_t error), \
- TP_ARGS(file, function, line, error))
-DEFINE_SET_ERROR_EVENT(zfs_set__error);
diff --git a/zfs/include/sys/trace_dbuf.h b/zfs/include/sys/trace_dbuf.h
index 49e35e3dcbfe..c3e70c371aa0 100644
--- a/zfs/include/sys/trace_dbuf.h
+++ b/zfs/include/sys/trace_dbuf.h
@@ -33,6 +33,10 @@
#include <linux/tracepoint.h>
#include <sys/types.h>
+#ifndef TRACE_DBUF_MSG_MAX
+#define TRACE_DBUF_MSG_MAX 512
+#endif
+
/*
* Generic support for two argument tracepoints of the form:
*
@@ -42,7 +46,7 @@
*/
#define DBUF_TP_STRUCT_ENTRY \
- __field(const char *, os_spa) \
+ __dynamic_array(char, os_spa, TRACE_DBUF_MSG_MAX) \
__field(uint64_t, ds_object) \
__field(uint64_t, db_object) \
__field(uint64_t, db_level) \
@@ -51,46 +55,85 @@
__field(uint64_t, db_size) \
__field(uint64_t, db_state) \
__field(int64_t, db_holds) \
+ __dynamic_array(char, msg, TRACE_DBUF_MSG_MAX)
-#define DBUF_TP_FAST_ASSIGN \
- __entry->os_spa = \
- spa_name(DB_DNODE(db)->dn_objset->os_spa); \
- \
- __entry->ds_object = db->db_objset->os_dsl_dataset ? \
- db->db_objset->os_dsl_dataset->ds_object : 0; \
- \
- __entry->db_object = db->db.db_object; \
- __entry->db_level = db->db_level; \
- __entry->db_blkid = db->db_blkid; \
- __entry->db_offset = db->db.db_offset; \
- __entry->db_size = db->db.db_size; \
- __entry->db_state = db->db_state; \
- __entry->db_holds = refcount_count(&db->db_holds);
+#define DBUF_TP_FAST_ASSIGN \
+ if (db != NULL) { \
+ __assign_str(os_spa, \
+ spa_name(DB_DNODE(db)->dn_objset->os_spa)); \
+ \
+ __entry->ds_object = db->db_objset->os_dsl_dataset ? \
+ db->db_objset->os_dsl_dataset->ds_object : 0; \
+ \
+ __entry->db_object = db->db.db_object; \
+ __entry->db_level = db->db_level; \
+ __entry->db_blkid = db->db_blkid; \
+ __entry->db_offset = db->db.db_offset; \
+ __entry->db_size = db->db.db_size; \
+ __entry->db_state = db->db_state; \
+ __entry->db_holds = refcount_count(&db->db_holds); \
+ snprintf(__get_str(msg), TRACE_DBUF_MSG_MAX, \
+ DBUF_TP_PRINTK_FMT, DBUF_TP_PRINTK_ARGS); \
+ } else { \
+ __assign_str(os_spa, "NULL") \
+ __entry->ds_object = 0; \
+ __entry->db_object = 0; \
+ __entry->db_level = 0; \
+ __entry->db_blkid = 0; \
+ __entry->db_offset = 0; \
+ __entry->db_size = 0; \
+ __entry->db_state = 0; \
+ __entry->db_holds = 0; \
+ snprintf(__get_str(msg), TRACE_DBUF_MSG_MAX, \
+ "dbuf { NULL }"); \
+ }
#define DBUF_TP_PRINTK_FMT \
"dbuf { spa \"%s\" objset %llu object %llu level %llu " \
"blkid %llu offset %llu size %llu state %llu holds %lld }"
#define DBUF_TP_PRINTK_ARGS \
- __entry->os_spa, __entry->ds_object, \
+ __get_str(os_spa), __entry->ds_object, \
__entry->db_object, __entry->db_level, \
__entry->db_blkid, __entry->db_offset, \
__entry->db_size, __entry->db_state, __entry->db_holds
+/* BEGIN CSTYLED */
DECLARE_EVENT_CLASS(zfs_dbuf_class,
TP_PROTO(dmu_buf_impl_t *db, zio_t *zio),
TP_ARGS(db, zio),
TP_STRUCT__entry(DBUF_TP_STRUCT_ENTRY),
TP_fast_assign(DBUF_TP_FAST_ASSIGN),
- TP_printk(DBUF_TP_PRINTK_FMT, DBUF_TP_PRINTK_ARGS)
+ TP_printk("%s", __get_str(msg))
);
+/* END CSTYLED */
+/* BEGIN CSTYLED */
#define DEFINE_DBUF_EVENT(name) \
DEFINE_EVENT(zfs_dbuf_class, name, \
TP_PROTO(dmu_buf_impl_t *db, zio_t *zio), \
TP_ARGS(db, zio))
+/* END CSTYLED */
DEFINE_DBUF_EVENT(zfs_blocked__read);
+/* BEGIN CSTYLED */
+DECLARE_EVENT_CLASS(zfs_dbuf_evict_one_class,
+ TP_PROTO(dmu_buf_impl_t *db, multilist_sublist_t *mls),
+ TP_ARGS(db, mls),
+ TP_STRUCT__entry(DBUF_TP_STRUCT_ENTRY),
+ TP_fast_assign(DBUF_TP_FAST_ASSIGN),
+ TP_printk("%s", __get_str(msg))
+);
+/* END CSTYLED */
+
+/* BEGIN CSTYLED */
+#define DEFINE_DBUF_EVICT_ONE_EVENT(name) \
+DEFINE_EVENT(zfs_dbuf_evict_one_class, name, \
+ TP_PROTO(dmu_buf_impl_t *db, multilist_sublist_t *mls), \
+ TP_ARGS(db, mls))
+/* END CSTYLED */
+DEFINE_DBUF_EVICT_ONE_EVENT(zfs_dbuf__evict__one);
+
#endif /* _TRACE_DBUF_H */
#undef TRACE_INCLUDE_PATH
diff --git a/zfs/include/sys/trace_dmu.h b/zfs/include/sys/trace_dmu.h
index e070997bca43..5ae59e563358 100644
--- a/zfs/include/sys/trace_dmu.h
+++ b/zfs/include/sys/trace_dmu.h
@@ -41,7 +41,7 @@
* uint64_t, ...,
* uint64_t, ...);
*/
-
+/* BEGIN CSTYLED */
DECLARE_EVENT_CLASS(zfs_delay_mintime_class,
TP_PROTO(dmu_tx_t *tx, uint64_t dirty, uint64_t min_tx_time),
TP_ARGS(tx, dirty, min_tx_time),
@@ -54,14 +54,6 @@ DECLARE_EVENT_CLASS(zfs_delay_mintime_class,
__field(hrtime_t, tx_start)
__field(boolean_t, tx_wait_dirty)
__field(int, tx_err)
-#ifdef DEBUG_DMU_TX
- __field(uint64_t, tx_space_towrite)
- __field(uint64_t, tx_space_tofree)
- __field(uint64_t, tx_space_tooverwrite)
- __field(uint64_t, tx_space_tounref)
- __field(int64_t, tx_space_written)
- __field(int64_t, tx_space_freed)
-#endif
__field(uint64_t, min_tx_time)
__field(uint64_t, dirty)
),
@@ -74,41 +66,57 @@ DECLARE_EVENT_CLASS(zfs_delay_mintime_class,
__entry->tx_start = tx->tx_start;
__entry->tx_wait_dirty = tx->tx_wait_dirty;
__entry->tx_err = tx->tx_err;
-#ifdef DEBUG_DMU_TX
- __entry->tx_space_towrite = tx->tx_space_towrite;
- __entry->tx_space_tofree = tx->tx_space_tofree;
- __entry->tx_space_tooverwrite = tx->tx_space_tooverwrite;
- __entry->tx_space_tounref = tx->tx_space_tounref;
- __entry->tx_space_written = tx->tx_space_written.rc_count;
- __entry->tx_space_freed = tx->tx_space_freed.rc_count;
-#endif
__entry->dirty = dirty;
__entry->min_tx_time = min_tx_time;
),
TP_printk("tx { txg %llu lastsnap_txg %llu tx_lasttried_txg %llu "
"anyobj %d waited %d start %llu wait_dirty %d err %i "
-#ifdef DEBUG_DMU_TX
- "space_towrite %llu space_tofree %llu space_tooverwrite %llu "
- "space_tounref %llu space_written %lli space_freed %lli "
-#endif
"} dirty %llu min_tx_time %llu",
__entry->tx_txg, __entry->tx_lastsnap_txg,
__entry->tx_lasttried_txg, __entry->tx_anyobj, __entry->tx_waited,
__entry->tx_start, __entry->tx_wait_dirty, __entry->tx_err,
-#ifdef DEBUG_DMU_TX
- __entry->tx_space_towrite, __entry->tx_space_tofree,
- __entry->tx_space_tooverwrite, __entry->tx_space_tounref,
- __entry->tx_space_written, __entry->tx_space_freed,
-#endif
__entry->dirty, __entry->min_tx_time)
);
+/* END CSTYLED */
+/* BEGIN CSTYLED */
#define DEFINE_DELAY_MINTIME_EVENT(name) \
DEFINE_EVENT(zfs_delay_mintime_class, name, \
TP_PROTO(dmu_tx_t *tx, uint64_t dirty, uint64_t min_tx_time), \
TP_ARGS(tx, dirty, min_tx_time))
+/* END CSTYLED */
DEFINE_DELAY_MINTIME_EVENT(zfs_delay__mintime);
+/* BEGIN CSTYLED */
+DECLARE_EVENT_CLASS(zfs_free_long_range_class,
+ TP_PROTO(uint64_t long_free_dirty_all_txgs, uint64_t chunk_len, \
+ uint64_t txg),
+ TP_ARGS(long_free_dirty_all_txgs, chunk_len, txg),
+ TP_STRUCT__entry(
+ __field(uint64_t, long_free_dirty_all_txgs)
+ __field(uint64_t, chunk_len)
+ __field(uint64_t, txg)
+ ),
+ TP_fast_assign(
+ __entry->long_free_dirty_all_txgs = long_free_dirty_all_txgs;
+ __entry->chunk_len = chunk_len;
+ __entry->txg = txg;
+ ),
+ TP_printk("long_free_dirty_all_txgs %llu chunk_len %llu txg %llu",
+ __entry->long_free_dirty_all_txgs,
+ __entry->chunk_len, __entry->txg)
+);
+/* END CSTYLED */
+
+/* BEGIN CSTYLED */
+#define DEFINE_FREE_LONG_RANGE_EVENT(name) \
+DEFINE_EVENT(zfs_free_long_range_class, name, \
+ TP_PROTO(uint64_t long_free_dirty_all_txgs, \
+ uint64_t chunk_len, uint64_t txg), \
+ TP_ARGS(long_free_dirty_all_txgs, chunk_len, txg))
+/* END CSTYLED */
+DEFINE_FREE_LONG_RANGE_EVENT(zfs_free__long__range);
+
#endif /* _TRACE_DMU_H */
#undef TRACE_INCLUDE_PATH
diff --git a/zfs/include/sys/trace_dnode.h b/zfs/include/sys/trace_dnode.h
index ee63c29cd119..7196a497d506 100644
--- a/zfs/include/sys/trace_dnode.h
+++ b/zfs/include/sys/trace_dnode.h
@@ -41,7 +41,7 @@
* int64_t, ...,
* uint32_t, ...);
*/
-
+/* BEGIN CSTYLED */
DECLARE_EVENT_CLASS(zfs_dnode_move_class,
TP_PROTO(dnode_t *dn, int64_t refcount, uint32_t dbufs),
TP_ARGS(dn, refcount, dbufs),
@@ -102,11 +102,14 @@ DECLARE_EVENT_CLASS(zfs_dnode_move_class,
__entry->dn_maxblkid, __entry->dn_tx_holds, __entry->dn_holds,
__entry->dn_have_spill, __entry->refcount, __entry->dbufs)
);
+/* END CSTYLED */
+/* BEGIN CSTYLED */
#define DEFINE_DNODE_MOVE_EVENT(name) \
DEFINE_EVENT(zfs_dnode_move_class, name, \
TP_PROTO(dnode_t *dn, int64_t refcount, uint32_t dbufs), \
TP_ARGS(dn, refcount, dbufs))
+/* END CSTYLED */
DEFINE_DNODE_MOVE_EVENT(zfs_dnode__move);
#endif /* _TRACE_DNODE_H */
diff --git a/zfs/include/sys/trace_multilist.h b/zfs/include/sys/trace_multilist.h
index 08e27a9119b7..ed0b38a3f38f 100644
--- a/zfs/include/sys/trace_multilist.h
+++ b/zfs/include/sys/trace_multilist.h
@@ -41,7 +41,7 @@
* unsigned int, ...,
* void *, ...);
*/
-
+/* BEGIN CSTYLED */
DECLARE_EVENT_CLASS(zfs_multilist_insert_remove_class,
TP_PROTO(multilist_t *ml, unsigned sublist_idx, void *obj),
TP_ARGS(ml, sublist_idx, obj),
@@ -60,11 +60,14 @@ DECLARE_EVENT_CLASS(zfs_multilist_insert_remove_class,
TP_printk("ml { offset %ld numsublists %llu sublistidx %u } ",
__entry->ml_offset, __entry->ml_num_sublists, __entry->sublist_idx)
);
+/* END CSTYLED */
+/* BEGIN CSTYLED */
#define DEFINE_MULTILIST_INSERT_REMOVE_EVENT(name) \
DEFINE_EVENT(zfs_multilist_insert_remove_class, name, \
TP_PROTO(multilist_t *ml, unsigned int sublist_idx, void *obj), \
TP_ARGS(ml, sublist_idx, obj))
+/* END CSTYLED */
DEFINE_MULTILIST_INSERT_REMOVE_EVENT(zfs_multilist__insert);
DEFINE_MULTILIST_INSERT_REMOVE_EVENT(zfs_multilist__remove);
diff --git a/zfs/include/sys/trace_txg.h b/zfs/include/sys/trace_txg.h
index 61d650984cd3..f85c3f9ef7e1 100644
--- a/zfs/include/sys/trace_txg.h
+++ b/zfs/include/sys/trace_txg.h
@@ -40,7 +40,7 @@
* dsl_pool_t *, ...,
* uint64_t, ...);
*/
-
+/* BEGIN CSTYLED */
DECLARE_EVENT_CLASS(zfs_txg_class,
TP_PROTO(dsl_pool_t *dp, uint64_t txg),
TP_ARGS(dp, txg),
@@ -52,11 +52,14 @@ DECLARE_EVENT_CLASS(zfs_txg_class,
),
TP_printk("txg %llu", __entry->txg)
);
+/* END CSTYLED */
+/* BEGIN CSTYLED */
#define DEFINE_TXG_EVENT(name) \
DEFINE_EVENT(zfs_txg_class, name, \
TP_PROTO(dsl_pool_t *dp, uint64_t txg), \
TP_ARGS(dp, txg))
+/* END CSTYLED */
DEFINE_TXG_EVENT(zfs_dsl_pool_sync__done);
DEFINE_TXG_EVENT(zfs_txg__quiescing);
DEFINE_TXG_EVENT(zfs_txg__opened);
diff --git a/zfs/include/sys/trace_zil.h b/zfs/include/sys/trace_zil.h
index e97466fde5e3..eaa391270d76 100644
--- a/zfs/include/sys/trace_zil.h
+++ b/zfs/include/sys/trace_zil.h
@@ -39,7 +39,7 @@
* DTRACE_PROBE1(...,
* zilog_t *, ...);
*/
-
+/* BEGIN CSTYLED */
DECLARE_EVENT_CLASS(zfs_zil_class,
TP_PROTO(zilog_t *zilog),
TP_ARGS(zilog),
@@ -63,7 +63,6 @@ DECLARE_EVENT_CLASS(zfs_zil_class,
__field(uint64_t, zl_parse_lr_count)
__field(uint64_t, zl_next_batch)
__field(uint64_t, zl_com_batch)
- __field(uint64_t, zl_itx_list_sz)
__field(uint64_t, zl_cur_used)
__field(clock_t, zl_replay_time)
__field(uint64_t, zl_replay_blks)
@@ -88,7 +87,6 @@ DECLARE_EVENT_CLASS(zfs_zil_class,
__entry->zl_parse_lr_count = zilog->zl_parse_lr_count;
__entry->zl_next_batch = zilog->zl_next_batch;
__entry->zl_com_batch = zilog->zl_com_batch;
- __entry->zl_itx_list_sz = zilog->zl_itx_list_sz;
__entry->zl_cur_used = zilog->zl_cur_used;
__entry->zl_replay_time = zilog->zl_replay_time;
__entry->zl_replay_blks = zilog->zl_replay_blks;
@@ -98,8 +96,7 @@ DECLARE_EVENT_CLASS(zfs_zil_class,
"replay %u stop_sync %u writer %u logbias %u sync %u "
"parse_error %u parse_blk_seq %llu parse_lr_seq %llu "
"parse_blk_count %llu parse_lr_count %llu next_batch %llu "
- "com_batch %llu itx_list_sz %llu cur_used %llu replay_time %lu "
- "replay_blks %llu }",
+ "com_batch %llu cur_used %llu replay_time %lu replay_blks %llu }",
__entry->zl_lr_seq, __entry->zl_commit_lr_seq,
__entry->zl_destroy_txg, __entry->zl_replaying_seq,
__entry->zl_suspend, __entry->zl_suspending, __entry->zl_keep_first,
@@ -107,17 +104,19 @@ DECLARE_EVENT_CLASS(zfs_zil_class,
__entry->zl_logbias, __entry->zl_sync, __entry->zl_parse_error,
__entry->zl_parse_blk_seq, __entry->zl_parse_lr_seq,
__entry->zl_parse_blk_count, __entry->zl_parse_lr_count,
- __entry->zl_next_batch, __entry->zl_com_batch,
- __entry->zl_itx_list_sz, __entry->zl_cur_used,
+ __entry->zl_next_batch, __entry->zl_com_batch, __entry->zl_cur_used,
__entry->zl_replay_time, __entry->zl_replay_blks)
);
+/* END CSTYLED */
+/* BEGIN CSTYLED */
#define DEFINE_ZIL_EVENT(name) \
DEFINE_EVENT(zfs_zil_class, name, \
TP_PROTO(zilog_t *zilog), \
TP_ARGS(zilog))
DEFINE_ZIL_EVENT(zfs_zil__cw1);
DEFINE_ZIL_EVENT(zfs_zil__cw2);
+/* END CSTYLED */
#endif /* _TRACE_ZIL_H */
diff --git a/zfs/include/sys/trace_zio.h b/zfs/include/sys/trace_zio.h
new file mode 100644
index 000000000000..af589b9dfa6d
--- /dev/null
+++ b/zfs/include/sys/trace_zio.h
@@ -0,0 +1,89 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#include <sys/list.h>
+
+#if defined(_KERNEL) && defined(HAVE_DECLARE_EVENT_CLASS)
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM zfs
+
+#undef TRACE_SYSTEM_VAR
+#define TRACE_SYSTEM_VAR zfs_zio
+
+#if !defined(_TRACE_ZIO_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_ZIO_H
+
+#include <linux/tracepoint.h>
+#include <sys/types.h>
+#include <sys/trace_common.h> /* For ZIO macros */
+
+/* BEGIN CSTYLED */
+TRACE_EVENT(zfs_zio__delay__miss,
+ TP_PROTO(zio_t *zio, hrtime_t now),
+ TP_ARGS(zio, now),
+ TP_STRUCT__entry(
+ ZIO_TP_STRUCT_ENTRY
+ __field(hrtime_t, now)
+ ),
+ TP_fast_assign(
+ ZIO_TP_FAST_ASSIGN
+ __entry->now = now;
+ ),
+ TP_printk("now %llu " ZIO_TP_PRINTK_FMT, __entry->now,
+ ZIO_TP_PRINTK_ARGS)
+);
+
+TRACE_EVENT(zfs_zio__delay__hit,
+ TP_PROTO(zio_t *zio, hrtime_t now, hrtime_t diff),
+ TP_ARGS(zio, now, diff),
+ TP_STRUCT__entry(
+ ZIO_TP_STRUCT_ENTRY
+ __field(hrtime_t, now)
+ __field(hrtime_t, diff)
+ ),
+ TP_fast_assign(
+ ZIO_TP_FAST_ASSIGN
+ __entry->now = now;
+ __entry->diff = diff;
+ ),
+ TP_printk("now %llu diff %llu " ZIO_TP_PRINTK_FMT, __entry->now,
+ __entry->diff, ZIO_TP_PRINTK_ARGS)
+);
+
+TRACE_EVENT(zfs_zio__delay__skip,
+ TP_PROTO(zio_t *zio),
+ TP_ARGS(zio),
+ TP_STRUCT__entry(ZIO_TP_STRUCT_ENTRY),
+ TP_fast_assign(ZIO_TP_FAST_ASSIGN),
+ TP_printk(ZIO_TP_PRINTK_FMT, ZIO_TP_PRINTK_ARGS)
+);
+/* END CSTYLED */
+
+#endif /* _TRACE_ZIO_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH sys
+#define TRACE_INCLUDE_FILE trace_zio
+#include <trace/define_trace.h>
+
+#endif /* _KERNEL && HAVE_DECLARE_EVENT_CLASS */
diff --git a/zfs/include/sys/trace_zrlock.h b/zfs/include/sys/trace_zrlock.h
index e1399c468a74..eacba759d32e 100644
--- a/zfs/include/sys/trace_zrlock.h
+++ b/zfs/include/sys/trace_zrlock.h
@@ -40,7 +40,7 @@
* zrlock_t *, ...,
* uint32_t, ...);
*/
-
+/* BEGIN CSTYLED */
DECLARE_EVENT_CLASS(zfs_zrlock_class,
TP_PROTO(zrlock_t *zrl, uint32_t n),
TP_ARGS(zrl, n),
@@ -48,7 +48,7 @@ DECLARE_EVENT_CLASS(zfs_zrlock_class,
__field(int32_t, refcount)
#ifdef ZFS_DEBUG
__field(pid_t, owner_pid)
- __field(const char *, caller)
+ __string(caller, zrl->zr_caller)
#endif
__field(uint32_t, n)
),
@@ -56,19 +56,20 @@ DECLARE_EVENT_CLASS(zfs_zrlock_class,
__entry->refcount = zrl->zr_refcount;
#ifdef ZFS_DEBUG
__entry->owner_pid = zrl->zr_owner ? zrl->zr_owner->pid : 0;
- __entry->caller = zrl->zr_caller;
+ __assign_str(caller, zrl->zr_caller);
#endif
__entry->n = n;
),
#ifdef ZFS_DEBUG
TP_printk("zrl { refcount %d owner_pid %d caller %s } n %u",
- __entry->refcount, __entry->owner_pid, __entry->caller,
+ __entry->refcount, __entry->owner_pid, __get_str(caller),
__entry->n)
#else
TP_printk("zrl { refcount %d } n %u",
__entry->refcount, __entry->n)
#endif
);
+/* END_CSTYLED */
#define DEFINE_ZRLOCK_EVENT(name) \
DEFINE_EVENT(zfs_zrlock_class, name, \
diff --git a/zfs/include/sys/txg.h b/zfs/include/sys/txg.h
index 44f81beca420..f52197781889 100644
--- a/zfs/include/sys/txg.h
+++ b/zfs/include/sys/txg.h
@@ -23,7 +23,7 @@
* Use is subject to license terms.
*/
/*
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
*/
#ifndef _SYS_TXG_H
@@ -60,6 +60,7 @@ typedef struct txg_node {
typedef struct txg_list {
kmutex_t tl_lock;
size_t tl_offset;
+ spa_t *tl_spa;
txg_node_t *tl_head[TXG_SIZE];
} txg_list_t;
@@ -103,6 +104,8 @@ extern boolean_t txg_stalled(struct dsl_pool *dp);
/* returns TRUE if someone is waiting for the next txg to sync */
extern boolean_t txg_sync_waiting(struct dsl_pool *dp);
+extern void txg_verify(spa_t *spa, uint64_t txg);
+
/*
* Wait for pending commit callbacks of already-synced transactions to finish
* processing.
@@ -115,7 +118,7 @@ extern void txg_wait_callbacks(struct dsl_pool *dp);
#define TXG_CLEAN(txg) ((txg) - 1)
-extern void txg_list_create(txg_list_t *tl, size_t offset);
+extern void txg_list_create(txg_list_t *tl, spa_t *spa, size_t offset);
extern void txg_list_destroy(txg_list_t *tl);
extern boolean_t txg_list_empty(txg_list_t *tl, uint64_t txg);
extern boolean_t txg_all_lists_empty(txg_list_t *tl);
diff --git a/zfs/include/sys/txg_impl.h b/zfs/include/sys/txg_impl.h
index e583d61eac2f..197cf8de7e17 100644
--- a/zfs/include/sys/txg_impl.h
+++ b/zfs/include/sys/txg_impl.h
@@ -65,7 +65,7 @@ extern "C" {
* grab all tc_open_locks, increment the tx_open_txg, and drop the locks.
* The tc_open_lock is held until the transaction is assigned into the
* transaction group. Typically, this is a short operation but if throttling
- * is occuring it may be held for longer periods of time.
+ * is occurring it may be held for longer periods of time.
*/
struct tx_cpu {
kmutex_t tc_open_lock; /* protects tx_open_txg */
diff --git a/zfs/include/sys/uberblock.h b/zfs/include/sys/uberblock.h
index 21e7ae0de7a7..044e438387c0 100644
--- a/zfs/include/sys/uberblock.h
+++ b/zfs/include/sys/uberblock.h
@@ -40,7 +40,8 @@ extern "C" {
typedef struct uberblock uberblock_t;
extern int uberblock_verify(uberblock_t *);
-extern boolean_t uberblock_update(uberblock_t *, vdev_t *, uint64_t);
+extern boolean_t uberblock_update(uberblock_t *ub, vdev_t *rvd, uint64_t txg,
+ uint64_t mmp_delay);
#ifdef __cplusplus
}
diff --git a/zfs/include/sys/uberblock_impl.h b/zfs/include/sys/uberblock_impl.h
index 6ab6aa3135a2..08eeabdda9d6 100644
--- a/zfs/include/sys/uberblock_impl.h
+++ b/zfs/include/sys/uberblock_impl.h
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017 by Delphix. All rights reserved.
*/
#ifndef _SYS_UBERBLOCK_IMPL_H
@@ -43,6 +44,7 @@ extern "C" {
*/
#define UBERBLOCK_MAGIC 0x00bab10c /* oo-ba-bloc! */
#define UBERBLOCK_SHIFT 10 /* up to 1K */
+#define MMP_MAGIC 0xa11cea11 /* all-see-all */
struct uberblock {
uint64_t ub_magic; /* UBERBLOCK_MAGIC */
@@ -54,6 +56,12 @@ struct uberblock {
/* highest SPA_VERSION supported by software that wrote this txg */
uint64_t ub_software_version;
+
+ /* Maybe missing in uberblocks we read, but always written */
+ uint64_t ub_mmp_magic; /* MMP_MAGIC */
+ uint64_t ub_mmp_delay; /* nanosec since last MMP write */
+ uint64_t ub_mmp_seq; /* reserved for sequence number */
+ uint64_t ub_checkpoint_txg;
};
#ifdef __cplusplus
diff --git a/zfs/include/sys/vdev.h b/zfs/include/sys/vdev.h
index 365789e524d6..7157ef43f64d 100644
--- a/zfs/include/sys/vdev.h
+++ b/zfs/include/sys/vdev.h
@@ -65,11 +65,16 @@ extern void vdev_dtl_dirty(vdev_t *vd, vdev_dtl_type_t d,
extern boolean_t vdev_dtl_contains(vdev_t *vd, vdev_dtl_type_t d,
uint64_t txg, uint64_t size);
extern boolean_t vdev_dtl_empty(vdev_t *vd, vdev_dtl_type_t d);
+extern boolean_t vdev_dtl_need_resilver(vdev_t *vd, uint64_t off, size_t size);
extern void vdev_dtl_reassess(vdev_t *vd, uint64_t txg, uint64_t scrub_txg,
int scrub_done);
extern boolean_t vdev_dtl_required(vdev_t *vd);
extern boolean_t vdev_resilver_needed(vdev_t *vd,
uint64_t *minp, uint64_t *maxp);
+extern void vdev_destroy_unlink_zap(vdev_t *vd, uint64_t zapobj,
+ dmu_tx_t *tx);
+extern uint64_t vdev_create_link_zap(vdev_t *vd, dmu_tx_t *tx);
+extern void vdev_construct_zaps(vdev_t *vd, dmu_tx_t *tx);
extern void vdev_hold(vdev_t *);
extern void vdev_rele(vdev_t *);
@@ -81,7 +86,7 @@ extern void vdev_expand(vdev_t *vd, uint64_t txg);
extern void vdev_split(vdev_t *vd);
extern void vdev_deadman(vdev_t *vd);
-
+extern void vdev_get_stats_ex(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx);
extern void vdev_get_stats(vdev_t *vd, vdev_stat_t *vs);
extern void vdev_clear_stats(vdev_t *vd);
extern void vdev_stat_update(zio_t *zio, uint64_t psize);
@@ -119,10 +124,13 @@ extern void vdev_queue_fini(vdev_t *vd);
extern zio_t *vdev_queue_io(zio_t *zio);
extern void vdev_queue_io_done(zio_t *zio);
+extern int vdev_queue_length(vdev_t *vd);
+extern uint64_t vdev_queue_lastoffset(vdev_t *vd);
+extern void vdev_queue_register_lastoffset(vdev_t *vd, zio_t *zio);
+
extern void vdev_config_dirty(vdev_t *vd);
extern void vdev_config_clean(vdev_t *vd);
-extern int vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg,
- boolean_t);
+extern int vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg);
extern void vdev_state_dirty(vdev_t *vd);
extern void vdev_state_clean(vdev_t *vd);
@@ -130,7 +138,8 @@ extern void vdev_state_clean(vdev_t *vd);
typedef enum vdev_config_flag {
VDEV_CONFIG_SPARE = 1 << 0,
VDEV_CONFIG_L2CACHE = 1 << 1,
- VDEV_CONFIG_REMOVING = 1 << 2
+ VDEV_CONFIG_REMOVING = 1 << 2,
+ VDEV_CONFIG_MOS = 1 << 3
} vdev_config_flag_t;
extern void vdev_top_config_generate(spa_t *spa, nvlist_t *config);
@@ -145,6 +154,9 @@ extern uint64_t vdev_label_offset(uint64_t psize, int l, uint64_t offset);
extern int vdev_label_number(uint64_t psise, uint64_t offset);
extern nvlist_t *vdev_label_read_config(vdev_t *vd, uint64_t txg);
extern void vdev_uberblock_load(vdev_t *, struct uberblock *, nvlist_t **);
+extern void vdev_config_generate_stats(vdev_t *vd, nvlist_t *nv);
+extern void vdev_label_write(zio_t *zio, vdev_t *vd, int l, abd_t *buf, uint64_t
+ offset, uint64_t size, zio_done_func_t *done, void *private, int flags);
typedef enum {
VDEV_LABEL_CREATE, /* create/add a new device */
diff --git a/zfs/include/sys/vdev_file.h b/zfs/include/sys/vdev_file.h
index aebcf55cfd9a..9a398c58391e 100644
--- a/zfs/include/sys/vdev_file.h
+++ b/zfs/include/sys/vdev_file.h
@@ -37,6 +37,9 @@ typedef struct vdev_file {
vnode_t *vf_vnode;
} vdev_file_t;
+extern void vdev_file_init(void);
+extern void vdev_file_fini(void);
+
#ifdef __cplusplus
}
#endif
diff --git a/zfs/include/sys/vdev_impl.h b/zfs/include/sys/vdev_impl.h
index 1371a3f0391f..7c5e54b08e19 100644
--- a/zfs/include/sys/vdev_impl.h
+++ b/zfs/include/sys/vdev_impl.h
@@ -34,6 +34,7 @@
#include <sys/vdev.h>
#include <sys/dkio.h>
#include <sys/uberblock_impl.h>
+#include <sys/zfs_ratelimit.h>
#ifdef __cplusplus
extern "C" {
@@ -52,6 +53,10 @@ extern "C" {
typedef struct vdev_queue vdev_queue_t;
typedef struct vdev_cache vdev_cache_t;
typedef struct vdev_cache_entry vdev_cache_entry_t;
+struct abd;
+
+extern int zfs_vdev_queue_depth_pct;
+extern uint32_t zfs_vdev_async_write_max_active;
/*
* Virtual device operations
@@ -63,6 +68,7 @@ typedef uint64_t vdev_asize_func_t(vdev_t *vd, uint64_t psize);
typedef void vdev_io_start_func_t(zio_t *zio);
typedef void vdev_io_done_func_t(zio_t *zio);
typedef void vdev_state_change_func_t(vdev_t *vd, int, int);
+typedef boolean_t vdev_need_resilver_func_t(vdev_t *vd, uint64_t, size_t);
typedef void vdev_hold_func_t(vdev_t *vd);
typedef void vdev_rele_func_t(vdev_t *vd);
@@ -73,6 +79,7 @@ typedef const struct vdev_ops {
vdev_io_start_func_t *vdev_op_io_start;
vdev_io_done_func_t *vdev_op_io_done;
vdev_state_change_func_t *vdev_op_state_change;
+ vdev_need_resilver_func_t *vdev_op_need_resilver;
vdev_hold_func_t *vdev_op_hold;
vdev_rele_func_t *vdev_op_rele;
char vdev_op_type[16];
@@ -83,7 +90,7 @@ typedef const struct vdev_ops {
* Virtual device properties
*/
struct vdev_cache_entry {
- char *ve_data;
+ struct abd *ve_abd;
uint64_t ve_offset;
clock_t ve_lastused;
avl_node_t ve_offset_node;
@@ -120,6 +127,7 @@ struct vdev_queue {
hrtime_t vq_io_delta_ts;
zio_t vq_io_search; /* used as local for stack reduction */
kmutex_t vq_lock;
+ uint64_t vq_lastoffset;
};
/*
@@ -149,6 +157,7 @@ struct vdev {
vdev_t **vdev_child; /* array of children */
uint64_t vdev_children; /* number of children */
vdev_stat_t vdev_stat; /* virtual device statistics */
+ vdev_stat_ex_t vdev_stat_ex; /* extended statistics */
boolean_t vdev_expanding; /* expand the vdev? */
boolean_t vdev_reopening; /* reopen in progress? */
boolean_t vdev_nonrot; /* true if solid state */
@@ -175,7 +184,19 @@ struct vdev {
uint64_t vdev_deflate_ratio; /* deflation ratio (x512) */
uint64_t vdev_islog; /* is an intent log device */
uint64_t vdev_removing; /* device is being removed? */
- boolean_t vdev_ishole; /* is a hole in the namespace */
+ boolean_t vdev_ishole; /* is a hole in the namespace */
+ kmutex_t vdev_queue_lock; /* protects vdev_queue_depth */
+ uint64_t vdev_top_zap;
+
+ /*
+ * The queue depth parameters determine how many async writes are
+ * still pending (i.e. allocated by net yet issued to disk) per
+ * top-level (vdev_async_write_queue_depth) and the maximum allowed
+ * (vdev_max_async_write_queue_depth). These values only apply to
+ * top-level vdevs.
+ */
+ uint64_t vdev_async_write_queue_depth;
+ uint64_t vdev_max_async_write_queue_depth;
/*
* Leaf vdev state.
@@ -195,6 +216,7 @@ struct vdev {
char *vdev_path; /* vdev path (if any) */
char *vdev_devid; /* vdev devid (if any) */
char *vdev_physpath; /* vdev device path (if any) */
+ char *vdev_enc_sysfs_path; /* enclosure sysfs path */
char *vdev_fru; /* physical FRU location */
uint64_t vdev_not_present; /* not present during import */
uint64_t vdev_unspare; /* unspare when resilvering done */
@@ -209,11 +231,14 @@ struct vdev {
boolean_t vdev_cant_write; /* vdev is failing all writes */
boolean_t vdev_isspare; /* was a hot spare */
boolean_t vdev_isl2cache; /* was a l2cache device */
+ boolean_t vdev_copy_uberblocks; /* post expand copy uberblocks */
vdev_queue_t vdev_queue; /* I/O deadline schedule queue */
vdev_cache_t vdev_cache; /* physical block cache */
spa_aux_vdev_t *vdev_aux; /* for l2cache and spares vdevs */
zio_t *vdev_probe_zio; /* root of current probe */
vdev_aux_t vdev_label_aux; /* on-disk aux state */
+ uint64_t vdev_leaf_zap;
+ hrtime_t vdev_mmp_pending; /* 0 if write finished */
/*
* For DTrace to work in userland (libzpool) context, these fields must
@@ -225,6 +250,15 @@ struct vdev {
kmutex_t vdev_dtl_lock; /* vdev_dtl_{map,resilver} */
kmutex_t vdev_stat_lock; /* vdev_stat */
kmutex_t vdev_probe_lock; /* protects vdev_probe_zio */
+
+ /*
+ * We rate limit ZIO delay and ZIO checksum events, since they
+ * can flood ZED with tons of events when a drive is acting up.
+ */
+#define DELAYS_PER_SECOND 5
+#define CHECKSUMS_PER_SECOND 5
+ zfs_ratelimit_t vdev_delay_rl;
+ zfs_ratelimit_t vdev_checksum_rl;
};
#define VDEV_RAIDZ_MAXPARITY 3
@@ -235,6 +269,12 @@ struct vdev {
#define VDEV_PHYS_SIZE (112 << 10)
#define VDEV_UBERBLOCK_RING (128 << 10)
+/*
+ * MMP blocks occupy the last MMP_BLOCKS_PER_LABEL slots in the uberblock
+ * ring when MMP is enabled.
+ */
+#define MMP_BLOCKS_PER_LABEL 1
+
/* The largest uberblock we support is 8k. */
#define MAX_UBERBLOCK_SHIFT (13)
#define VDEV_UBERBLOCK_SHIFT(vd) \
diff --git a/zfs/include/sys/vdev_raidz.h b/zfs/include/sys/vdev_raidz.h
new file mode 100644
index 000000000000..2ce32469d471
--- /dev/null
+++ b/zfs/include/sys/vdev_raidz.h
@@ -0,0 +1,64 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (C) 2016 Gvozden Neskovic <neskovic at compeng.uni-frankfurt.de>.
+ */
+
+#ifndef _SYS_VDEV_RAIDZ_H
+#define _SYS_VDEV_RAIDZ_H
+
+#include <sys/types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct zio;
+struct raidz_map;
+#if !defined(_KERNEL)
+struct kernel_param {};
+#endif
+
+/*
+ * vdev_raidz interface
+ */
+struct raidz_map *vdev_raidz_map_alloc(struct zio *, uint64_t, uint64_t,
+ uint64_t);
+void vdev_raidz_map_free(struct raidz_map *);
+void vdev_raidz_generate_parity(struct raidz_map *);
+int vdev_raidz_reconstruct(struct raidz_map *, const int *, int);
+
+/*
+ * vdev_raidz_math interface
+ */
+void vdev_raidz_math_init(void);
+void vdev_raidz_math_fini(void);
+struct raidz_impl_ops *vdev_raidz_math_get_ops(void);
+int vdev_raidz_math_generate(struct raidz_map *);
+int vdev_raidz_math_reconstruct(struct raidz_map *, const int *, const int *,
+ const int);
+int vdev_raidz_impl_set(const char *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_VDEV_RAIDZ_H */
diff --git a/zfs/include/sys/vdev_raidz_impl.h b/zfs/include/sys/vdev_raidz_impl.h
new file mode 100644
index 000000000000..4bd15e3d53c2
--- /dev/null
+++ b/zfs/include/sys/vdev_raidz_impl.h
@@ -0,0 +1,370 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
+ */
+
+#ifndef _VDEV_RAIDZ_H
+#define _VDEV_RAIDZ_H
+
+#include <sys/types.h>
+#include <sys/debug.h>
+#include <sys/kstat.h>
+#include <sys/abd.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define CODE_P (0U)
+#define CODE_Q (1U)
+#define CODE_R (2U)
+
+#define PARITY_P (1U)
+#define PARITY_PQ (2U)
+#define PARITY_PQR (3U)
+
+#define TARGET_X (0U)
+#define TARGET_Y (1U)
+#define TARGET_Z (2U)
+
+/*
+ * Parity generation methods indexes
+ */
+enum raidz_math_gen_op {
+ RAIDZ_GEN_P = 0,
+ RAIDZ_GEN_PQ,
+ RAIDZ_GEN_PQR,
+ RAIDZ_GEN_NUM = 3
+};
+/*
+ * Data reconstruction methods indexes
+ */
+enum raidz_rec_op {
+ RAIDZ_REC_P = 0,
+ RAIDZ_REC_Q,
+ RAIDZ_REC_R,
+ RAIDZ_REC_PQ,
+ RAIDZ_REC_PR,
+ RAIDZ_REC_QR,
+ RAIDZ_REC_PQR,
+ RAIDZ_REC_NUM = 7
+};
+
+extern const char *raidz_gen_name[RAIDZ_GEN_NUM];
+extern const char *raidz_rec_name[RAIDZ_REC_NUM];
+
+/*
+ * Methods used to define raidz implementation
+ *
+ * @raidz_gen_f Parity generation function
+ * @par1 pointer to raidz_map
+ * @raidz_rec_f Data reconstruction function
+ * @par1 pointer to raidz_map
+ * @par2 array of reconstruction targets
+ * @will_work_f Function returns TRUE if impl. is supported on the system
+ * @init_impl_f Function is called once on init
+ * @fini_impl_f Function is called once on fini
+ */
+typedef void (*raidz_gen_f)(void *);
+typedef int (*raidz_rec_f)(void *, const int *);
+typedef boolean_t (*will_work_f)(void);
+typedef void (*init_impl_f)(void);
+typedef void (*fini_impl_f)(void);
+
+#define RAIDZ_IMPL_NAME_MAX (16)
+
+typedef struct raidz_impl_ops {
+ init_impl_f init;
+ fini_impl_f fini;
+ raidz_gen_f gen[RAIDZ_GEN_NUM]; /* Parity generate functions */
+ raidz_rec_f rec[RAIDZ_REC_NUM]; /* Data reconstruction functions */
+ will_work_f is_supported; /* Support check function */
+ char name[RAIDZ_IMPL_NAME_MAX]; /* Name of the implementation */
+} raidz_impl_ops_t;
+
+typedef struct raidz_col {
+ size_t rc_devidx; /* child device index for I/O */
+ size_t rc_offset; /* device offset */
+ size_t rc_size; /* I/O size */
+ abd_t *rc_abd; /* I/O data */
+ void *rc_gdata; /* used to store the "good" version */
+ int rc_error; /* I/O error for this device */
+ unsigned int rc_tried; /* Did we attempt this I/O column? */
+ unsigned int rc_skipped; /* Did we skip this I/O column? */
+} raidz_col_t;
+
+typedef struct raidz_map {
+ size_t rm_cols; /* Regular column count */
+ size_t rm_scols; /* Count including skipped columns */
+ size_t rm_bigcols; /* Number of oversized columns */
+ size_t rm_asize; /* Actual total I/O size */
+ size_t rm_missingdata; /* Count of missing data devices */
+ size_t rm_missingparity; /* Count of missing parity devices */
+ size_t rm_firstdatacol; /* First data column/parity count */
+ size_t rm_nskip; /* Skipped sectors for padding */
+ size_t rm_skipstart; /* Column index of padding start */
+ abd_t *rm_abd_copy; /* rm_asize-buffer of copied data */
+ size_t rm_reports; /* # of referencing checksum reports */
+ unsigned int rm_freed; /* map no longer has referencing ZIO */
+ unsigned int rm_ecksuminjected; /* checksum error was injected */
+ raidz_impl_ops_t *rm_ops; /* RAIDZ math operations */
+ raidz_col_t rm_col[1]; /* Flexible array of I/O columns */
+} raidz_map_t;
+
+#define RAIDZ_ORIGINAL_IMPL (INT_MAX)
+
+extern const raidz_impl_ops_t vdev_raidz_scalar_impl;
+#if defined(__x86_64) && defined(HAVE_SSE2) /* only x86_64 for now */
+extern const raidz_impl_ops_t vdev_raidz_sse2_impl;
+#endif
+#if defined(__x86_64) && defined(HAVE_SSSE3) /* only x86_64 for now */
+extern const raidz_impl_ops_t vdev_raidz_ssse3_impl;
+#endif
+#if defined(__x86_64) && defined(HAVE_AVX2) /* only x86_64 for now */
+extern const raidz_impl_ops_t vdev_raidz_avx2_impl;
+#endif
+#if defined(__x86_64) && defined(HAVE_AVX512F) /* only x86_64 for now */
+extern const raidz_impl_ops_t vdev_raidz_avx512f_impl;
+#endif
+#if defined(__x86_64) && defined(HAVE_AVX512BW) /* only x86_64 for now */
+extern const raidz_impl_ops_t vdev_raidz_avx512bw_impl;
+#endif
+#if defined(__aarch64__)
+extern const raidz_impl_ops_t vdev_raidz_aarch64_neon_impl;
+extern const raidz_impl_ops_t vdev_raidz_aarch64_neonx2_impl;
+#endif
+
+/*
+ * Commonly used raidz_map helpers
+ *
+ * raidz_parity Returns parity of the RAIDZ block
+ * raidz_ncols Returns number of columns the block spans
+ * raidz_nbigcols Returns number of big columns columns
+ * raidz_col_p Returns pointer to a column
+ * raidz_col_size Returns size of a column
+ * raidz_big_size Returns size of big columns
+ * raidz_short_size Returns size of short columns
+ */
+#define raidz_parity(rm) ((rm)->rm_firstdatacol)
+#define raidz_ncols(rm) ((rm)->rm_cols)
+#define raidz_nbigcols(rm) ((rm)->rm_bigcols)
+#define raidz_col_p(rm, c) ((rm)->rm_col + (c))
+#define raidz_col_size(rm, c) ((rm)->rm_col[c].rc_size)
+#define raidz_big_size(rm) (raidz_col_size(rm, CODE_P))
+#define raidz_short_size(rm) (raidz_col_size(rm, raidz_ncols(rm)-1))
+
+/*
+ * Macro defines an RAIDZ parity generation method
+ *
+ * @code parity the function produce
+ * @impl name of the implementation
+ */
+#define _RAIDZ_GEN_WRAP(code, impl) \
+static void \
+impl ## _gen_ ## code(void *rmp) \
+{ \
+ raidz_map_t *rm = (raidz_map_t *)rmp; \
+ raidz_generate_## code ## _impl(rm); \
+}
+
+/*
+ * Macro defines an RAIDZ data reconstruction method
+ *
+ * @code parity the function produce
+ * @impl name of the implementation
+ */
+#define _RAIDZ_REC_WRAP(code, impl) \
+static int \
+impl ## _rec_ ## code(void *rmp, const int *tgtidx) \
+{ \
+ raidz_map_t *rm = (raidz_map_t *)rmp; \
+ return (raidz_reconstruct_## code ## _impl(rm, tgtidx)); \
+}
+
+/*
+ * Define all gen methods for an implementation
+ *
+ * @impl name of the implementation
+ */
+#define DEFINE_GEN_METHODS(impl) \
+ _RAIDZ_GEN_WRAP(p, impl); \
+ _RAIDZ_GEN_WRAP(pq, impl); \
+ _RAIDZ_GEN_WRAP(pqr, impl)
+
+/*
+ * Define all rec functions for an implementation
+ *
+ * @impl name of the implementation
+ */
+#define DEFINE_REC_METHODS(impl) \
+ _RAIDZ_REC_WRAP(p, impl); \
+ _RAIDZ_REC_WRAP(q, impl); \
+ _RAIDZ_REC_WRAP(r, impl); \
+ _RAIDZ_REC_WRAP(pq, impl); \
+ _RAIDZ_REC_WRAP(pr, impl); \
+ _RAIDZ_REC_WRAP(qr, impl); \
+ _RAIDZ_REC_WRAP(pqr, impl)
+
+#define RAIDZ_GEN_METHODS(impl) \
+{ \
+ [RAIDZ_GEN_P] = & impl ## _gen_p, \
+ [RAIDZ_GEN_PQ] = & impl ## _gen_pq, \
+ [RAIDZ_GEN_PQR] = & impl ## _gen_pqr \
+}
+
+#define RAIDZ_REC_METHODS(impl) \
+{ \
+ [RAIDZ_REC_P] = & impl ## _rec_p, \
+ [RAIDZ_REC_Q] = & impl ## _rec_q, \
+ [RAIDZ_REC_R] = & impl ## _rec_r, \
+ [RAIDZ_REC_PQ] = & impl ## _rec_pq, \
+ [RAIDZ_REC_PR] = & impl ## _rec_pr, \
+ [RAIDZ_REC_QR] = & impl ## _rec_qr, \
+ [RAIDZ_REC_PQR] = & impl ## _rec_pqr \
+}
+
+
+typedef struct raidz_impl_kstat {
+ uint64_t gen[RAIDZ_GEN_NUM]; /* gen method speed B/s */
+ uint64_t rec[RAIDZ_REC_NUM]; /* rec method speed B/s */
+} raidz_impl_kstat_t;
+
+/*
+ * Enumerate various multiplication constants
+ * used in reconstruction methods
+ */
+typedef enum raidz_mul_info {
+ /* Reconstruct Q */
+ MUL_Q_X = 0,
+ /* Reconstruct R */
+ MUL_R_X = 0,
+ /* Reconstruct PQ */
+ MUL_PQ_X = 0,
+ MUL_PQ_Y = 1,
+ /* Reconstruct PR */
+ MUL_PR_X = 0,
+ MUL_PR_Y = 1,
+ /* Reconstruct QR */
+ MUL_QR_XQ = 0,
+ MUL_QR_X = 1,
+ MUL_QR_YQ = 2,
+ MUL_QR_Y = 3,
+ /* Reconstruct PQR */
+ MUL_PQR_XP = 0,
+ MUL_PQR_XQ = 1,
+ MUL_PQR_XR = 2,
+ MUL_PQR_YU = 3,
+ MUL_PQR_YP = 4,
+ MUL_PQR_YQ = 5,
+
+ MUL_CNT = 6
+} raidz_mul_info_t;
+
+/*
+ * Powers of 2 in the Galois field.
+ */
+extern const uint8_t vdev_raidz_pow2[256] __attribute__((aligned(256)));
+/* Logs of 2 in the Galois field defined above. */
+extern const uint8_t vdev_raidz_log2[256] __attribute__((aligned(256)));
+
+/*
+ * Multiply a given number by 2 raised to the given power.
+ */
+static inline uint8_t
+vdev_raidz_exp2(const uint8_t a, const unsigned exp)
+{
+ if (a == 0)
+ return (0);
+
+ return (vdev_raidz_pow2[(exp + (unsigned)vdev_raidz_log2[a]) % 255]);
+}
+
+/*
+ * Galois Field operations.
+ *
+ * gf_exp2 - computes 2 raised to the given power
+ * gf_exp2 - computes 4 raised to the given power
+ * gf_mul - multiplication
+ * gf_div - division
+ * gf_inv - multiplicative inverse
+ */
+typedef unsigned gf_t;
+typedef unsigned gf_log_t;
+
+static inline gf_t
+gf_mul(const gf_t a, const gf_t b)
+{
+ gf_log_t logsum;
+
+ if (a == 0 || b == 0)
+ return (0);
+
+ logsum = (gf_log_t)vdev_raidz_log2[a] + (gf_log_t)vdev_raidz_log2[b];
+
+ return ((gf_t)vdev_raidz_pow2[logsum % 255]);
+}
+
+static inline gf_t
+gf_div(const gf_t a, const gf_t b)
+{
+ gf_log_t logsum;
+
+ ASSERT3U(b, >, 0);
+ if (a == 0)
+ return (0);
+
+ logsum = (gf_log_t)255 + (gf_log_t)vdev_raidz_log2[a] -
+ (gf_log_t)vdev_raidz_log2[b];
+
+ return ((gf_t)vdev_raidz_pow2[logsum % 255]);
+}
+
+static inline gf_t
+gf_inv(const gf_t a)
+{
+ gf_log_t logsum;
+
+ ASSERT3U(a, >, 0);
+
+ logsum = (gf_log_t)255 - (gf_log_t)vdev_raidz_log2[a];
+
+ return ((gf_t)vdev_raidz_pow2[logsum]);
+}
+
+static inline gf_t
+gf_exp2(gf_log_t exp)
+{
+ return (vdev_raidz_pow2[exp % 255]);
+}
+
+static inline gf_t
+gf_exp4(gf_log_t exp)
+{
+ ASSERT3U(exp, <=, 255);
+ return ((gf_t)vdev_raidz_pow2[(2 * exp) % 255]);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _VDEV_RAIDZ_H */
diff --git a/zfs/include/sys/xvattr.h b/zfs/include/sys/xvattr.h
index 53945d85b98b..4779b632163f 100644
--- a/zfs/include/sys/xvattr.h
+++ b/zfs/include/sys/xvattr.h
@@ -73,7 +73,7 @@ typedef struct xoptattr {
* - a 32 bit quantity (xva_mapsize) that specifies the size of the
* attribute bitmaps in 32 bit words.
* - A pointer to the returned attribute bitmap (needed because the
- * previous element, the requested attribute bitmap) is variable lenth.
+ * previous element, the requested attribute bitmap) is variable length.
* - The requested attribute bitmap, which is an array of 32 bit words.
* Callers use the XVA_SET_REQ() macro to set the bits corresponding to
* the attributes that are being requested.
@@ -97,7 +97,7 @@ typedef struct xoptattr {
* attributes to be requested/returned. File systems may or may not support
* optional attributes. They do so at their own discretion but if they do
* support optional attributes, they must register the VFSFT_XVATTR feature
- * so that the optional attributes can be set/retrived.
+ * so that the optional attributes can be set/retrieved.
*
* The fields of the xvattr structure are:
*
diff --git a/zfs/include/sys/zap.h b/zfs/include/sys/zap.h
index ed60b86dbd71..43b7fbd263c2 100644
--- a/zfs/include/sys/zap.h
+++ b/zfs/include/sys/zap.h
@@ -18,9 +18,11 @@
*
* CDDL HEADER END
*/
+
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc.
*/
#ifndef _SYS_ZAP_H
@@ -87,22 +89,15 @@ extern "C" {
/*
* Specifies matching criteria for ZAP lookups.
+ * MT_NORMALIZE Use ZAP normalization flags, which can include both
+ * unicode normalization and case-insensitivity.
+ * MT_MATCH_CASE Do case-sensitive lookups even if MT_NORMALIZE is
+ * specified and ZAP normalization flags include
+ * U8_TEXTPREP_TOUPPER.
*/
-typedef enum matchtype
-{
- /* Only find an exact match (non-normalized) */
- MT_EXACT,
- /*
- * If there is an exact match, find that, otherwise find the
- * first normalized match.
- */
- MT_BEST,
- /*
- * Find the "first" normalized (case and Unicode form) match;
- * the designated "first" match will not change as long as the
- * set of entries with this normalization doesn't change.
- */
- MT_FIRST
+typedef enum matchtype {
+ MT_NORMALIZE = 1 << 0,
+ MT_MATCH_CASE = 1 << 1,
} matchtype_t;
typedef enum zap_flags {
@@ -119,26 +114,27 @@ typedef enum zap_flags {
/*
* Create a new zapobj with no attributes and return its object number.
- * MT_EXACT will cause the zap object to only support MT_EXACT lookups,
- * otherwise any matchtype can be used for lookups.
- *
- * normflags specifies what normalization will be done. values are:
- * 0: no normalization (legacy on-disk format, supports MT_EXACT matching
- * only)
- * U8_TEXTPREP_TOLOWER: case normalization will be performed.
- * MT_FIRST/MT_BEST matching will find entries that match without
- * regard to case (eg. looking for "foo" can find an entry "Foo").
- * Eventually, other flags will permit unicode normalization as well.
*/
uint64_t zap_create(objset_t *ds, dmu_object_type_t ot,
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
+uint64_t zap_create_dnsize(objset_t *ds, dmu_object_type_t ot,
+ dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx);
uint64_t zap_create_norm(objset_t *ds, int normflags, dmu_object_type_t ot,
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
+uint64_t zap_create_norm_dnsize(objset_t *ds, int normflags,
+ dmu_object_type_t ot, dmu_object_type_t bonustype, int bonuslen,
+ int dnodesize, dmu_tx_t *tx);
uint64_t zap_create_flags(objset_t *os, int normflags, zap_flags_t flags,
dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift,
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
+uint64_t zap_create_flags_dnsize(objset_t *os, int normflags,
+ zap_flags_t flags, dmu_object_type_t ot, int leaf_blockshift,
+ int indirect_blockshift, dmu_object_type_t bonustype, int bonuslen,
+ int dnodesize, dmu_tx_t *tx);
uint64_t zap_create_link(objset_t *os, dmu_object_type_t ot,
uint64_t parent_obj, const char *name, dmu_tx_t *tx);
+uint64_t zap_create_link_dnsize(objset_t *os, dmu_object_type_t ot,
+ uint64_t parent_obj, const char *name, int dnodesize, dmu_tx_t *tx);
/*
* Initialize an already-allocated object.
@@ -152,9 +148,14 @@ void mzap_create_impl(objset_t *os, uint64_t obj, int normflags,
*/
int zap_create_claim(objset_t *ds, uint64_t obj, dmu_object_type_t ot,
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
+int zap_create_claim_dnsize(objset_t *ds, uint64_t obj, dmu_object_type_t ot,
+ dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx);
int zap_create_claim_norm(objset_t *ds, uint64_t obj,
int normflags, dmu_object_type_t ot,
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
+int zap_create_claim_norm_dnsize(objset_t *ds, uint64_t obj,
+ int normflags, dmu_object_type_t ot,
+ dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx);
/*
* The zapobj passed in must be a valid ZAP object for all of the
@@ -217,8 +218,15 @@ int zap_prefetch(objset_t *os, uint64_t zapobj, const char *name);
int zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
int key_numints);
-int zap_count_write(objset_t *os, uint64_t zapobj, const char *name,
- int add, uint64_t *towrite, uint64_t *tooverwrite);
+int zap_lookup_by_dnode(dnode_t *dn, const char *name,
+ uint64_t integer_size, uint64_t num_integers, void *buf);
+int zap_lookup_norm_by_dnode(dnode_t *dn, const char *name,
+ uint64_t integer_size, uint64_t num_integers, void *buf,
+ matchtype_t mt, char *realname, int rn_len,
+ boolean_t *ncp);
+
+int zap_count_write_by_dnode(dnode_t *dn, const char *name,
+ int add, refcount_t *towrite, refcount_t *tooverwrite);
/*
* Create an attribute with the given name and value.
@@ -229,6 +237,9 @@ int zap_count_write(objset_t *os, uint64_t zapobj, const char *name,
int zap_add(objset_t *ds, uint64_t zapobj, const char *key,
int integer_size, uint64_t num_integers,
const void *val, dmu_tx_t *tx);
+int zap_add_by_dnode(dnode_t *dn, const char *key,
+ int integer_size, uint64_t num_integers,
+ const void *val, dmu_tx_t *tx);
int zap_add_uint64(objset_t *ds, uint64_t zapobj, const uint64_t *key,
int key_numints, int integer_size, uint64_t num_integers,
const void *val, dmu_tx_t *tx);
@@ -268,6 +279,7 @@ int zap_length_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
int zap_remove(objset_t *ds, uint64_t zapobj, const char *name, dmu_tx_t *tx);
int zap_remove_norm(objset_t *ds, uint64_t zapobj, const char *name,
matchtype_t mt, dmu_tx_t *tx);
+int zap_remove_by_dnode(dnode_t *dn, const char *name, dmu_tx_t *tx);
int zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
int key_numints, dmu_tx_t *tx);
@@ -344,7 +356,7 @@ typedef struct {
boolean_t za_normalization_conflict;
uint64_t za_num_integers;
uint64_t za_first_integer; /* no sign extension for <8byte ints */
- char za_name[MAXNAMELEN];
+ char za_name[ZAP_MAXNAMELEN];
} zap_attribute_t;
/*
diff --git a/zfs/include/sys/zap_impl.h b/zfs/include/sys/zap_impl.h
index bfd43e31da80..250dde3ce235 100644
--- a/zfs/include/sys/zap_impl.h
+++ b/zfs/include/sys/zap_impl.h
@@ -18,9 +18,12 @@
*
* CDDL HEADER END
*/
+
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
+ * Copyright (c) 2013, 2016 by Delphix. All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc.
*/
#ifndef _SYS_ZAP_IMPL_H
@@ -187,6 +190,7 @@ typedef struct zap_name {
int zn_key_norm_numints;
uint64_t zn_hash;
matchtype_t zn_matchtype;
+ int zn_normflags;
char zn_normbuf[ZAP_MAXNAMELEN];
} zap_name_t;
@@ -195,9 +199,9 @@ typedef struct zap_name {
boolean_t zap_match(zap_name_t *zn, const char *matchname);
int zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
- krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp);
-void zap_unlockdir(zap_t *zap);
-void zap_evict(void *dbu);
+ krw_t lti, boolean_t fatreader, boolean_t adding, void *tag, zap_t **zapp);
+void zap_unlockdir(zap_t *zap, void *tag);
+void zap_evict_sync(void *dbu);
zap_name_t *zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt);
void zap_name_free(zap_name_t *zn);
int zap_hashbits(zap_t *zap);
@@ -212,12 +216,11 @@ int fzap_lookup(zap_name_t *zn,
uint64_t integer_size, uint64_t num_integers, void *buf,
char *realname, int rn_len, boolean_t *normalization_conflictp);
void fzap_prefetch(zap_name_t *zn);
-int fzap_count_write(zap_name_t *zn, int add, uint64_t *towrite,
- uint64_t *tooverwrite);
int fzap_add(zap_name_t *zn, uint64_t integer_size, uint64_t num_integers,
- const void *val, dmu_tx_t *tx);
+ const void *val, void *tag, dmu_tx_t *tx);
int fzap_update(zap_name_t *zn,
- int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx);
+ int integer_size, uint64_t num_integers, const void *val,
+ void *tag, dmu_tx_t *tx);
int fzap_length(zap_name_t *zn,
uint64_t *integer_size, uint64_t *num_integers);
int fzap_remove(zap_name_t *zn, dmu_tx_t *tx);
@@ -227,7 +230,7 @@ void zap_put_leaf(struct zap_leaf *l);
int fzap_add_cd(zap_name_t *zn,
uint64_t integer_size, uint64_t num_integers,
- const void *val, uint32_t cd, dmu_tx_t *tx);
+ const void *val, uint32_t cd, void *tag, dmu_tx_t *tx);
void fzap_upgrade(zap_t *zap, dmu_tx_t *tx, zap_flags_t flags);
#ifdef __cplusplus
diff --git a/zfs/include/sys/zfs_acl.h b/zfs/include/sys/zfs_acl.h
index 2c51f096e538..2572fee86306 100644
--- a/zfs/include/sys/zfs_acl.h
+++ b/zfs/include/sys/zfs_acl.h
@@ -202,13 +202,13 @@ typedef struct zfs_acl_ids {
#define ZFS_ACL_PASSTHROUGH_X 5
struct znode;
-struct zfs_sb;
+struct zfsvfs;
#ifdef _KERNEL
int zfs_acl_ids_create(struct znode *, int, vattr_t *,
cred_t *, vsecattr_t *, zfs_acl_ids_t *);
void zfs_acl_ids_free(zfs_acl_ids_t *);
-boolean_t zfs_acl_ids_overquota(struct zfs_sb *, zfs_acl_ids_t *);
+boolean_t zfs_acl_ids_overquota(struct zfsvfs *, zfs_acl_ids_t *);
int zfs_getacl(struct znode *, vsecattr_t *, boolean_t, cred_t *);
int zfs_setacl(struct znode *, vsecattr_t *, boolean_t, cred_t *);
void zfs_acl_rele(void *);
@@ -225,7 +225,7 @@ int zfs_zaccess_delete(struct znode *, struct znode *, cred_t *);
int zfs_zaccess_rename(struct znode *, struct znode *,
struct znode *, struct znode *, cred_t *cr);
void zfs_acl_free(zfs_acl_t *);
-int zfs_vsec_2_aclp(struct zfs_sb *, umode_t, vsecattr_t *, cred_t *,
+int zfs_vsec_2_aclp(struct zfsvfs *, umode_t, vsecattr_t *, cred_t *,
struct zfs_fuid_info **, zfs_acl_t **);
int zfs_aclset_common(struct znode *, zfs_acl_t *, cred_t *, dmu_tx_t *);
uint64_t zfs_external_acl(struct znode *);
diff --git a/zfs/include/sys/zfs_context.h b/zfs/include/sys/zfs_context.h
index 4f7e3287f3da..4fe35342dee7 100644
--- a/zfs/include/sys/zfs_context.h
+++ b/zfs/include/sys/zfs_context.h
@@ -19,13 +19,10 @@
* CDDL HEADER END
*/
/*
- * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-/*
- * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
*/
#ifndef _SYS_ZFS_CONTEXT_H
@@ -62,9 +59,11 @@
#include <vm/seg_kmem.h>
#include <sys/zone.h>
#include <sys/sdt.h>
+#include <sys/kstat.h>
#include <sys/zfs_debug.h>
+#include <sys/sysevent.h>
+#include <sys/sysevent/eventdefs.h>
#include <sys/zfs_delay.h>
-#include <sys/fm/fs/zfs.h>
#include <sys/sunddi.h>
#include <sys/ctype.h>
#include <sys/disp.h>
@@ -118,7 +117,8 @@
#include <sys/sdt.h>
#include <sys/kstat.h>
#include <sys/u8_textprep.h>
-#include <sys/fm/fs/zfs.h>
+#include <sys/sysevent.h>
+#include <sys/sysevent/eventdefs.h>
#include <sys/sunddi.h>
#include <sys/debug.h>
#include <sys/utsname.h>
@@ -128,6 +128,7 @@
*/
#define noinline __attribute__((noinline))
+#define likely(x) __builtin_expect((x), 1)
/*
* Debugging
@@ -160,8 +161,18 @@ extern int aok;
/*
* DTrace SDT probes have different signatures in userland than they do in
- * kernel. If they're being used in kernel code, re-define them out of
+ * the kernel. If they're being used in kernel code, re-define them out of
* existence for their counterparts in libzpool.
+ *
+ * Here's an example of how to use the set-error probes in userland:
+ * zfs$target:::set-error /arg0 == EBUSY/ {stack();}
+ *
+ * Here's an example of how to use DTRACE_PROBE probes in userland:
+ * If there is a probe declared as follows:
+ * DTRACE_PROBE2(zfs__probe_name, uint64_t, blkid, dnode_t *, dn);
+ * Then you can use it as follows:
+ * zfs$target:::probe2 /copyinstr(arg0) == "zfs__probe_name"/
+ * {printf("%u %p\n", arg1, arg2);}
*/
#ifdef DTRACE_PROBE
@@ -195,15 +206,6 @@ extern int aok;
ZFS_PROBE4(#a, (unsigned long)c, (unsigned long)e, (unsigned long)g, \
(unsigned long)i)
-/*
- * We use the comma operator so that this macro can be used without much
- * additional code. For example, "return (EINVAL);" becomes
- * "return (SET_ERROR(EINVAL));". Note that the argument will be evaluated
- * twice, so it should not have side effects (e.g. something like:
- * "return (SET_ERROR(log_error(EINVAL, info)));" would log the error twice).
- */
-#define SET_ERROR(err) (ZFS_SET_ERROR(err), err)
-
/*
* Threads. TS_STACK_MIN is dictated by the minimum allowed pthread stack
* size. While TS_STACK_MAX is somewhat arbitrary, it was selected to be
@@ -212,7 +214,7 @@ extern int aok;
*/
#define TS_MAGIC 0x72f158ab4261e538ull
#define TS_RUN 0x00000002
-#define TS_STACK_MIN PTHREAD_STACK_MIN
+#define TS_STACK_MIN MAX(PTHREAD_STACK_MIN, 32768)
#define TS_STACK_MAX (256 * 1024)
/* in libzpool, p0 exists only to have its address taken */
@@ -248,7 +250,7 @@ typedef struct kthread {
extern kthread_t *zk_thread_current(void);
extern void zk_thread_exit(void);
extern kthread_t *zk_thread_create(caddr_t stk, size_t stksize,
- thread_func_t func, void *arg, size_t len,
+ thread_func_t func, void *arg, uint64_t len,
proc_t *pp, int state, pri_t pri, int detachstate);
extern void zk_thread_join(kt_did_t tid);
@@ -274,6 +276,7 @@ typedef struct kmutex {
} kmutex_t;
#define MUTEX_DEFAULT 0
+#define MUTEX_NOLOCKDEP MUTEX_DEFAULT
#define MUTEX_HELD(m) ((m)->m_owner == curthread)
#define MUTEX_NOT_HELD(m) (!MUTEX_HELD(m))
@@ -305,6 +308,7 @@ typedef int krw_t;
#define RW_READER 0
#define RW_WRITER 1
#define RW_DEFAULT RW_READER
+#define RW_NOLOCKDEP RW_READER
#define RW_READ_HELD(x) ((x)->rw_readers > 0)
#define RW_WRITE_HELD(x) ((x)->rw_wr_owner == curthread)
@@ -341,6 +345,7 @@ typedef struct kcondvar {
} kcondvar_t;
#define CV_DEFAULT 0
+#define CALLOUT_FLAG_ABSOLUTE 0x2
extern void cv_init(kcondvar_t *cv, char *name, int type, void *arg);
extern void cv_destroy(kcondvar_t *cv);
@@ -353,6 +358,8 @@ extern void cv_broadcast(kcondvar_t *cv);
#define cv_timedwait_sig(cv, mp, at) cv_timedwait(cv, mp, at)
#define cv_wait_sig(cv, mp) cv_wait(cv, mp)
#define cv_wait_io(cv, mp) cv_wait(cv, mp)
+#define cv_timedwait_sig_hires(cv, mp, t, r, f) \
+ cv_timedwait_hires(cv, mp, t, r, f)
/*
* Thread-specific data
@@ -375,6 +382,7 @@ extern void cv_broadcast(kcondvar_t *cv);
*/
extern kstat_t *kstat_create(const char *, int,
const char *, const char *, uchar_t, ulong_t, uchar_t);
+extern void kstat_named_init(kstat_named_t *, const char *, uchar_t);
extern void kstat_install(kstat_t *);
extern void kstat_delete(kstat_t *);
extern void kstat_waitq_enter(kstat_io_t *);
@@ -430,7 +438,9 @@ typedef enum kmem_cbrc {
/*
* Task queues
*/
-typedef struct taskq taskq_t;
+
+#define TASKQ_NAMELEN 31
+
typedef uintptr_t taskqid_t;
typedef void (task_func_t)(void *);
@@ -442,6 +452,25 @@ typedef struct taskq_ent {
uintptr_t tqent_flags;
} taskq_ent_t;
+typedef struct taskq {
+ char tq_name[TASKQ_NAMELEN + 1];
+ kmutex_t tq_lock;
+ krwlock_t tq_threadlock;
+ kcondvar_t tq_dispatch_cv;
+ kcondvar_t tq_wait_cv;
+ kthread_t **tq_threadlist;
+ int tq_flags;
+ int tq_active;
+ int tq_nthreads;
+ int tq_nalloc;
+ int tq_minalloc;
+ int tq_maxalloc;
+ kcondvar_t tq_maxalloc_cv;
+ int tq_maxalloc_wait;
+ taskq_ent_t *tq_freelist;
+ taskq_ent_t tq_task;
+} taskq_t;
+
#define TQENT_FLAG_PREALLOC 0x1 /* taskq_dispatch_ent used */
#define TASKQ_PREPOPULATE 0x0001
@@ -455,7 +484,10 @@ typedef struct taskq_ent {
#define TQ_NOQUEUE 0x02 /* Do not enqueue if can't dispatch */
#define TQ_FRONT 0x08 /* Queue in front */
+#define TASKQID_INVALID ((taskqid_t)0)
+
extern taskq_t *system_taskq;
+extern taskq_t *system_delay_taskq;
extern taskq_t *taskq_create(const char *, int, pri_t, int, int, uint_t);
#define taskq_create_proc(a, b, c, d, e, p, f) \
@@ -488,8 +520,10 @@ typedef struct vnode {
uint64_t v_size;
int v_fd;
char *v_path;
+ int v_dump_fd;
} vnode_t;
+extern char *vn_dumpdir;
#define AV_SCANSTAMP_SZ 32 /* length of anti-virus scanstamp */
typedef struct xoptattr {
@@ -623,7 +657,7 @@ extern void delay(clock_t ticks);
#define maxclsyspri -20
#define defclsyspri 0
-#define CPU_SEQID (pthread_self() & (max_ncpus - 1))
+#define CPU_SEQID ((uintptr_t)pthread_self() & (max_ncpus - 1))
#define kcred NULL
#define CRED() NULL
@@ -633,15 +667,23 @@ extern void delay(clock_t ticks);
extern uint64_t physmem;
extern int highbit64(uint64_t i);
+extern int lowbit64(uint64_t i);
+extern int highbit(ulong_t i);
+extern int lowbit(ulong_t i);
extern int random_get_bytes(uint8_t *ptr, size_t len);
extern int random_get_pseudo_bytes(uint8_t *ptr, size_t len);
extern void kernel_init(int);
extern void kernel_fini(void);
+extern void thread_init(void);
+extern void thread_fini(void);
+extern void random_init(void);
+extern void random_fini(void);
struct spa;
extern void nicenum(uint64_t num, char *buf);
extern void show_pool_stats(struct spa *);
+extern int set_global_var(char *arg);
typedef struct callb_cpr {
kmutex_t *cc_lockp;
@@ -718,6 +760,7 @@ extern int zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr);
extern int zfs_secpolicy_rename_perms(const char *from, const char *to,
cred_t *cr);
extern int zfs_secpolicy_destroy_perms(const char *name, cred_t *cr);
+extern int secpolicy_zfs(const cred_t *cr);
extern zoneid_t getzoneid(void);
/* SID stuff */
@@ -747,7 +790,7 @@ typedef int fstrans_cookie_t;
extern fstrans_cookie_t spl_fstrans_mark(void);
extern void spl_fstrans_unmark(fstrans_cookie_t);
-extern int spl_fstrans_check(void);
+extern int __spl_pf_fstrans_check(void);
#endif /* _KERNEL */
#endif /* _SYS_ZFS_CONTEXT_H */
diff --git a/zfs/include/sys/zfs_ctldir.h b/zfs/include/sys/zfs_ctldir.h
index 65c44f3cca54..51933bc4fe47 100644
--- a/zfs/include/sys/zfs_ctldir.h
+++ b/zfs/include/sys/zfs_ctldir.h
@@ -50,9 +50,9 @@
extern int zfs_expire_snapshot;
/* zfsctl generic functions */
-extern int zfsctl_create(zfs_sb_t *zsb);
-extern void zfsctl_destroy(zfs_sb_t *zsb);
-extern struct inode *zfsctl_root(znode_t *zp);
+extern int zfsctl_create(zfsvfs_t *);
+extern void zfsctl_destroy(zfsvfs_t *);
+extern struct inode *zfsctl_root(znode_t *);
extern void zfsctl_init(void);
extern void zfsctl_fini(void);
extern boolean_t zfsctl_is_node(struct inode *ip);
@@ -74,13 +74,12 @@ extern int zfsctl_snapdir_remove(struct inode *dip, char *name, cred_t *cr,
int flags);
extern int zfsctl_snapdir_mkdir(struct inode *dip, char *dirname, vattr_t *vap,
struct inode **ipp, cred_t *cr, int flags);
-extern void zfsctl_snapdir_inactive(struct inode *ip);
extern int zfsctl_snapshot_mount(struct path *path, int flags);
extern int zfsctl_snapshot_unmount(char *snapname, int flags);
extern int zfsctl_snapshot_unmount_delay(spa_t *spa, uint64_t objsetid,
int delay);
-extern int zfsctl_lookup_objset(struct super_block *sb, uint64_t objsetid,
- zfs_sb_t **zsb);
+extern int zfsctl_snapdir_vget(struct super_block *sb, uint64_t objsetid,
+ int gen, struct inode **ipp);
/* zfsctl '.zfs/shares' functions */
extern int zfsctl_shares_lookup(struct inode *dip, char *name,
diff --git a/zfs/include/sys/zfs_debug.h b/zfs/include/sys/zfs_debug.h
index 2f0064ee045b..226aaa2b84ab 100644
--- a/zfs/include/sys/zfs_debug.h
+++ b/zfs/include/sys/zfs_debug.h
@@ -42,14 +42,16 @@ extern int zfs_flags;
extern int zfs_recover;
extern int zfs_free_leak_on_eio;
-#define ZFS_DEBUG_DPRINTF (1<<0)
-#define ZFS_DEBUG_DBUF_VERIFY (1<<1)
-#define ZFS_DEBUG_DNODE_VERIFY (1<<2)
-#define ZFS_DEBUG_SNAPNAMES (1<<3)
-#define ZFS_DEBUG_MODIFY (1<<4)
-#define ZFS_DEBUG_SPA (1<<5)
-#define ZFS_DEBUG_ZIO_FREE (1<<6)
-#define ZFS_DEBUG_HISTOGRAM_VERIFY (1<<7)
+#define ZFS_DEBUG_DPRINTF (1 << 0)
+#define ZFS_DEBUG_DBUF_VERIFY (1 << 1)
+#define ZFS_DEBUG_DNODE_VERIFY (1 << 2)
+#define ZFS_DEBUG_SNAPNAMES (1 << 3)
+#define ZFS_DEBUG_MODIFY (1 << 4)
+#define ZFS_DEBUG_SPA (1 << 5)
+#define ZFS_DEBUG_ZIO_FREE (1 << 6)
+#define ZFS_DEBUG_HISTOGRAM_VERIFY (1 << 7)
+#define ZFS_DEBUG_METASLAB_VERIFY (1 << 8)
+#define ZFS_DEBUG_SET_ERROR (1 << 9)
extern void __dprintf(const char *file, const char *func,
int line, const char *fmt, ...);
@@ -72,6 +74,7 @@ extern void zfs_dbgmsg_fini(void);
#ifndef _KERNEL
extern int dprintf_find_string(const char *string);
+extern void zfs_dbgmsg_print(const char *tag);
#endif
#ifdef __cplusplus
diff --git a/zfs/include/sys/zfs_dir.h b/zfs/include/sys/zfs_dir.h
index 8610fbe0861d..9ce3accfce70 100644
--- a/zfs/include/sys/zfs_dir.h
+++ b/zfs/include/sys/zfs_dir.h
@@ -47,6 +47,7 @@ extern "C" {
/* mknode flags */
#define IS_ROOT_NODE 0x01 /* create a root node */
#define IS_XATTR 0x02 /* create an extended attribute node */
+#define IS_TMPFILE 0x04 /* create a tmpfile */
extern int zfs_dirent_lock(zfs_dirlock_t **, znode_t *, char *, znode_t **,
int, int *, pathname_t *);
@@ -62,7 +63,7 @@ extern void zfs_rmnode(znode_t *);
extern void zfs_dl_name_switch(zfs_dirlock_t *dl, char *new, char **old);
extern boolean_t zfs_dirempty(znode_t *);
extern void zfs_unlinked_add(znode_t *, dmu_tx_t *);
-extern void zfs_unlinked_drain(zfs_sb_t *);
+extern void zfs_unlinked_drain(zfsvfs_t *zfsvfs);
extern int zfs_sticky_remove_access(znode_t *, znode_t *, cred_t *cr);
extern int zfs_get_xattrdir(znode_t *, struct inode **, cred_t *, int);
extern int zfs_make_xattrdir(znode_t *, vattr_t *, struct inode **, cred_t *);
diff --git a/zfs/include/sys/zfs_fuid.h b/zfs/include/sys/zfs_fuid.h
index deaebcc82bd0..0feb3ce4bb7c 100644
--- a/zfs/include/sys/zfs_fuid.h
+++ b/zfs/include/sys/zfs_fuid.h
@@ -33,7 +33,6 @@
#include <sys/zfs_vfsops.h>
#endif
#include <sys/avl.h>
-#include <sys/list.h>
#ifdef __cplusplus
extern "C" {
@@ -100,24 +99,24 @@ typedef struct zfs_fuid_info {
#ifdef _KERNEL
struct znode;
-extern uid_t zfs_fuid_map_id(zfs_sb_t *, uint64_t, cred_t *, zfs_fuid_type_t);
+extern uid_t zfs_fuid_map_id(zfsvfs_t *, uint64_t, cred_t *, zfs_fuid_type_t);
extern void zfs_fuid_node_add(zfs_fuid_info_t **, const char *, uint32_t,
uint64_t, uint64_t, zfs_fuid_type_t);
-extern void zfs_fuid_destroy(zfs_sb_t *);
-extern uint64_t zfs_fuid_create_cred(zfs_sb_t *, zfs_fuid_type_t,
+extern void zfs_fuid_destroy(zfsvfs_t *);
+extern uint64_t zfs_fuid_create_cred(zfsvfs_t *, zfs_fuid_type_t,
cred_t *, zfs_fuid_info_t **);
-extern uint64_t zfs_fuid_create(zfs_sb_t *, uint64_t, cred_t *, zfs_fuid_type_t,
+extern uint64_t zfs_fuid_create(zfsvfs_t *, uint64_t, cred_t *, zfs_fuid_type_t,
zfs_fuid_info_t **);
extern void zfs_fuid_map_ids(struct znode *zp, cred_t *cr,
uid_t *uid, uid_t *gid);
extern zfs_fuid_info_t *zfs_fuid_info_alloc(void);
extern void zfs_fuid_info_free(zfs_fuid_info_t *);
-extern boolean_t zfs_groupmember(zfs_sb_t *, uint64_t, cred_t *);
-void zfs_fuid_sync(zfs_sb_t *, dmu_tx_t *);
-extern int zfs_fuid_find_by_domain(zfs_sb_t *, const char *domain,
+extern boolean_t zfs_groupmember(zfsvfs_t *, uint64_t, cred_t *);
+void zfs_fuid_sync(zfsvfs_t *, dmu_tx_t *);
+extern int zfs_fuid_find_by_domain(zfsvfs_t *, const char *domain,
char **retdomain, boolean_t addok);
-extern const char *zfs_fuid_find_by_idx(zfs_sb_t *zsb, uint32_t idx);
-extern void zfs_fuid_txhold(zfs_sb_t *zsb, dmu_tx_t *tx);
+extern const char *zfs_fuid_find_by_idx(zfsvfs_t *zfsvfs, uint32_t idx);
+extern void zfs_fuid_txhold(zfsvfs_t *zfsvfs, dmu_tx_t *tx);
#endif
char *zfs_fuid_idx_domain(avl_tree_t *, uint32_t);
diff --git a/zfs/include/sys/zfs_ioctl.h b/zfs/include/sys/zfs_ioctl.h
index 09a96c043bf0..c68b8770b556 100644
--- a/zfs/include/sys/zfs_ioctl.h
+++ b/zfs/include/sys/zfs_ioctl.h
@@ -20,7 +20,9 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
+ * Copyright 2016 RackTop Systems.
+ * Copyright (c) 2017, Intel Corporation.
*/
#ifndef _SYS_ZFS_IOCTL_H
@@ -90,26 +92,35 @@ typedef enum drr_headertype {
* Feature flags for zfs send streams (flags in drr_versioninfo)
*/
-#define DMU_BACKUP_FEATURE_DEDUP (1<<0)
-#define DMU_BACKUP_FEATURE_DEDUPPROPS (1<<1)
-#define DMU_BACKUP_FEATURE_SA_SPILL (1<<2)
+#define DMU_BACKUP_FEATURE_DEDUP (1 << 0)
+#define DMU_BACKUP_FEATURE_DEDUPPROPS (1 << 1)
+#define DMU_BACKUP_FEATURE_SA_SPILL (1 << 2)
/* flags #3 - #15 are reserved for incompatible closed-source implementations */
-#define DMU_BACKUP_FEATURE_EMBED_DATA (1<<16)
-#define DMU_BACKUP_FEATURE_EMBED_DATA_LZ4 (1<<17)
+#define DMU_BACKUP_FEATURE_EMBED_DATA (1 << 16)
+#define DMU_BACKUP_FEATURE_LZ4 (1 << 17)
/* flag #18 is reserved for a Delphix feature */
-#define DMU_BACKUP_FEATURE_LARGE_BLOCKS (1<<19)
+#define DMU_BACKUP_FEATURE_LARGE_BLOCKS (1 << 19)
+#define DMU_BACKUP_FEATURE_RESUMING (1 << 20)
+/* flag #21 is reserved for a Delphix feature */
+#define DMU_BACKUP_FEATURE_COMPRESSED (1 << 22)
+#define DMU_BACKUP_FEATURE_LARGE_DNODE (1 << 23)
/*
* Mask of all supported backup features
*/
#define DMU_BACKUP_FEATURE_MASK (DMU_BACKUP_FEATURE_DEDUP | \
DMU_BACKUP_FEATURE_DEDUPPROPS | DMU_BACKUP_FEATURE_SA_SPILL | \
- DMU_BACKUP_FEATURE_EMBED_DATA | DMU_BACKUP_FEATURE_EMBED_DATA_LZ4 | \
- DMU_BACKUP_FEATURE_LARGE_BLOCKS)
+ DMU_BACKUP_FEATURE_EMBED_DATA | DMU_BACKUP_FEATURE_LZ4 | \
+ DMU_BACKUP_FEATURE_RESUMING | DMU_BACKUP_FEATURE_LARGE_BLOCKS | \
+ DMU_BACKUP_FEATURE_COMPRESSED | DMU_BACKUP_FEATURE_LARGE_DNODE)
/* Are all features in the given flag word currently supported? */
#define DMU_STREAM_SUPPORTED(x) (!((x) & ~DMU_BACKUP_FEATURE_MASK))
+typedef enum dmu_send_resume_token_version {
+ ZFS_SEND_RESUME_TOKEN_VERSION = 1
+} dmu_send_resume_token_version_t;
+
/*
* The drr_versioninfo field of the dmu_replay_record has the
* following layout:
@@ -129,8 +140,22 @@ typedef enum drr_headertype {
#define DMU_BACKUP_MAGIC 0x2F5bacbacULL
+/*
+ * Send stream flags. Bits 24-31 are reserved for vendor-specific
+ * implementations and should not be used.
+ */
#define DRR_FLAG_CLONE (1<<0)
#define DRR_FLAG_CI_DATA (1<<1)
+/*
+ * This send stream, if it is a full send, includes the FREE and FREEOBJECT
+ * records that are created by the sending process. This means that the send
+ * stream can be received as a clone, even though it is not an incremental.
+ * This is not implemented as a feature flag, because the receiving side does
+ * not need to have implemented it to receive this stream; it is fully backwards
+ * compatible. We need a flag, though, because full send streams without it
+ * cannot necessarily be received as a clone correctly.
+ */
+#define DRR_FLAG_FREERECORDS (1<<2)
/*
* flags in the drr_checksumflags field in the DRR_WRITE and
@@ -140,6 +165,12 @@ typedef enum drr_headertype {
#define DRR_IS_DEDUP_CAPABLE(flags) ((flags) & DRR_CHECKSUM_DEDUP)
+/* deal with compressed drr_write replay records */
+#define DRR_WRITE_COMPRESSED(drrw) ((drrw)->drr_compressiontype != 0)
+#define DRR_WRITE_PAYLOAD_SIZE(drrw) \
+ (DRR_WRITE_COMPRESSED(drrw) ? (drrw)->drr_compressed_size : \
+ (drrw)->drr_logical_size)
+
/*
* zfs ioctl command structure
*/
@@ -173,7 +204,8 @@ typedef struct dmu_replay_record {
uint32_t drr_bonuslen;
uint8_t drr_checksumtype;
uint8_t drr_compress;
- uint8_t drr_pad[6];
+ uint8_t drr_dn_slots;
+ uint8_t drr_pad[5];
uint64_t drr_toguid;
/* bonus content follows */
} drr_object;
@@ -187,12 +219,16 @@ typedef struct dmu_replay_record {
dmu_object_type_t drr_type;
uint32_t drr_pad;
uint64_t drr_offset;
- uint64_t drr_length;
+ uint64_t drr_logical_size;
uint64_t drr_toguid;
uint8_t drr_checksumtype;
uint8_t drr_checksumflags;
- uint8_t drr_pad2[6];
- ddt_key_t drr_key; /* deduplication key */
+ uint8_t drr_compressiontype;
+ uint8_t drr_pad2[5];
+ /* deduplication key */
+ ddt_key_t drr_key;
+ /* only nonzero if drr_compressiontype is not 0 */
+ uint64_t drr_compressed_size;
/* content follows */
} drr_write;
struct drr_free {
@@ -237,6 +273,22 @@ typedef struct dmu_replay_record {
uint32_t drr_psize; /* compr. (real) size of payload */
/* (possibly compressed) content follows */
} drr_write_embedded;
+
+ /*
+ * Nore: drr_checksum is overlaid with all record types
+ * except DRR_BEGIN. Therefore its (non-pad) members
+ * must not overlap with members from the other structs.
+ * We accomplish this by putting its members at the very
+ * end of the struct.
+ */
+ struct drr_checksum {
+ uint64_t drr_pad[34];
+ /*
+ * fletcher-4 checksum of everything preceding the
+ * checksum.
+ */
+ zio_cksum_t drr_checksum;
+ } drr_checksum;
} drr_u;
} dmu_replay_record_t;
@@ -271,6 +323,7 @@ typedef struct zinject_record {
uint32_t zi_iotype;
int32_t zi_duration;
uint64_t zi_timer;
+ uint64_t zi_nlanes;
uint32_t zi_cmd;
uint32_t zi_pad;
} zinject_record_t;
@@ -286,6 +339,10 @@ typedef struct zinject_record {
#define ZEVENT_SEEK_START 0
#define ZEVENT_SEEK_END UINT64_MAX
+/* scaled frequency ranges */
+#define ZI_PERCENTAGE_MIN 4294UL
+#define ZI_PERCENTAGE_MAX UINT32_MAX
+
typedef enum zinject_type {
ZINJECT_UNINITIALIZED,
ZINJECT_DATA_FAULT,
@@ -315,6 +372,12 @@ typedef enum zfs_case {
ZFS_CASE_MIXED
} zfs_case_t;
+/*
+ * Note: this struct must have the same layout in 32-bit and 64-bit, so
+ * that 32-bit processes (like /sbin/zfs) can pass it to the 64-bit
+ * kernel. Therefore, we add padding to it so that no "hidden" padding
+ * is automatically added on 64-bit (but not on 32-bit).
+ */
typedef struct zfs_cmd {
char zc_name[MAXPATHLEN]; /* name of pool or dataset */
uint64_t zc_nvlist_src; /* really (char *) */
diff --git a/zfs/include/sys/zfs_ratelimit.h b/zfs/include/sys/zfs_ratelimit.h
new file mode 100644
index 000000000000..f36e07841f92
--- /dev/null
+++ b/zfs/include/sys/zfs_ratelimit.h
@@ -0,0 +1,38 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2016, Lawrence Livermore National Security, LLC.
+ */
+
+#ifndef _SYS_ZFS_RATELIMIT_H
+#define _SYS_ZFS_RATELIMIT_H
+
+#include <sys/zfs_context.h>
+
+typedef struct {
+ hrtime_t start;
+ unsigned int count;
+ unsigned int burst; /* Number to allow per interval */
+ unsigned int interval; /* Interval length in seconds */
+ kmutex_t lock;
+} zfs_ratelimit_t;
+
+int zfs_ratelimit(zfs_ratelimit_t *rl);
+void zfs_ratelimit_init(zfs_ratelimit_t *rl, unsigned int burst,
+ unsigned int interval);
+void zfs_ratelimit_fini(zfs_ratelimit_t *rl);
+
+#endif /* _SYS_ZFS_RATELIMIT_H */
diff --git a/zfs/include/sys/zfs_rlock.h b/zfs/include/sys/zfs_rlock.h
index 5322f3bc73af..8483b4e8bf03 100644
--- a/zfs/include/sys/zfs_rlock.h
+++ b/zfs/include/sys/zfs_rlock.h
@@ -30,11 +30,14 @@
extern "C" {
#endif
-#ifdef _KERNEL
-
#include <sys/list.h>
#include <sys/avl.h>
+
+#ifdef _KERNEL
#include <sys/condvar.h>
+#else
+#include <sys/zfs_context.h>
+#endif
typedef enum {
RL_READER,
@@ -47,7 +50,7 @@ typedef struct zfs_rlock {
avl_tree_t zr_avl; /* avl tree of range locks */
uint64_t *zr_size; /* points to znode->z_size */
uint_t *zr_blksz; /* points to znode->z_blksz */
- uint64_t *zr_max_blksz; /* points to zsb->z_max_blksz */
+ uint64_t *zr_max_blksz; /* points to zfsvfs->z_max_blksz */
} zfs_rlock_t;
typedef struct rl {
@@ -106,7 +109,6 @@ zfs_rlock_destroy(zfs_rlock_t *zrl)
avl_destroy(&zrl->zr_avl);
mutex_destroy(&zrl->zr_mutex);
}
-#endif /* _KERNEL */
#ifdef __cplusplus
}
diff --git a/zfs/include/sys/zfs_vfsops.h b/zfs/include/sys/zfs_vfsops.h
index d971c082c44c..2326da422183 100644
--- a/zfs/include/sys/zfs_vfsops.h
+++ b/zfs/include/sys/zfs_vfsops.h
@@ -38,35 +38,45 @@
extern "C" {
#endif
-struct zfs_sb;
+typedef struct zfsvfs zfsvfs_t;
struct znode;
-typedef struct zfs_mntopts {
- char *z_osname; /* Objset name */
- char *z_mntpoint; /* Primary mount point */
- uint64_t z_xattr;
- boolean_t z_readonly;
- boolean_t z_do_readonly;
- boolean_t z_setuid;
- boolean_t z_do_setuid;
- boolean_t z_exec;
- boolean_t z_do_exec;
- boolean_t z_devices;
- boolean_t z_do_devices;
- boolean_t z_do_xattr;
- boolean_t z_atime;
- boolean_t z_do_atime;
- boolean_t z_relatime;
- boolean_t z_do_relatime;
- boolean_t z_nbmand;
- boolean_t z_do_nbmand;
-} zfs_mntopts_t;
-
-typedef struct zfs_sb {
+/*
+ * This structure emulates the vfs_t from other platforms. It's purpose
+ * is to faciliate the handling of mount options and minimize structural
+ * differences between the platforms.
+ */
+typedef struct vfs {
+ struct zfsvfs *vfs_data;
+ char *vfs_mntpoint; /* Primary mount point */
+ uint64_t vfs_xattr;
+ boolean_t vfs_readonly;
+ boolean_t vfs_do_readonly;
+ boolean_t vfs_setuid;
+ boolean_t vfs_do_setuid;
+ boolean_t vfs_exec;
+ boolean_t vfs_do_exec;
+ boolean_t vfs_devices;
+ boolean_t vfs_do_devices;
+ boolean_t vfs_do_xattr;
+ boolean_t vfs_atime;
+ boolean_t vfs_do_atime;
+ boolean_t vfs_relatime;
+ boolean_t vfs_do_relatime;
+ boolean_t vfs_nbmand;
+ boolean_t vfs_do_nbmand;
+} vfs_t;
+
+typedef struct zfs_mnt {
+ const char *mnt_osname; /* Objset name */
+ char *mnt_data; /* Raw mount options */
+} zfs_mnt_t;
+
+struct zfsvfs {
+ vfs_t *z_vfs; /* generic fs struct */
struct super_block *z_sb; /* generic super_block */
- struct zfs_sb *z_parent; /* parent fs */
+ struct zfsvfs *z_parent; /* parent fs */
objset_t *z_os; /* objset reference */
- zfs_mntopts_t *z_mntopts; /* passed mount options */
uint64_t z_flags; /* super_block flags */
uint64_t z_root; /* id of root znode */
uint64_t z_unlinkedobj; /* id of unlinked zapobj */
@@ -109,14 +119,14 @@ typedef struct zfs_sb {
kmutex_t z_lock;
uint64_t z_userquota_obj;
uint64_t z_groupquota_obj;
+ uint64_t z_userobjquota_obj;
+ uint64_t z_groupobjquota_obj;
uint64_t z_replay_eof; /* New end of file - replay only */
sa_attr_type_t *z_attr_table; /* SA attr mapping->id */
uint64_t z_hold_size; /* znode hold array size */
avl_tree_t *z_hold_trees; /* znode hold trees */
kmutex_t *z_hold_locks; /* znode hold locks */
-} zfs_sb_t;
-
-#define ZFS_SUPER_MAGIC 0x2fc12fc1
+};
#define ZSB_XATTR 0x0001 /* Enable user xattrs */
@@ -177,42 +187,34 @@ typedef struct zfid_long {
extern uint_t zfs_fsyncer_key;
-extern int zfs_suspend_fs(zfs_sb_t *zsb);
-extern int zfs_resume_fs(zfs_sb_t *zsb, const char *osname);
-extern int zfs_userspace_one(zfs_sb_t *zsb, zfs_userquota_prop_t type,
+extern int zfs_suspend_fs(zfsvfs_t *zfsvfs);
+extern int zfs_resume_fs(zfsvfs_t *zfsvfs, struct dsl_dataset *ds);
+extern int zfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
const char *domain, uint64_t rid, uint64_t *valuep);
-extern int zfs_userspace_many(zfs_sb_t *zsb, zfs_userquota_prop_t type,
+extern int zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
uint64_t *cookiep, void *vbuf, uint64_t *bufsizep);
-extern int zfs_set_userquota(zfs_sb_t *zsb, zfs_userquota_prop_t type,
+extern int zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
const char *domain, uint64_t rid, uint64_t quota);
-extern boolean_t zfs_owner_overquota(zfs_sb_t *zsb, struct znode *,
+extern boolean_t zfs_owner_overquota(zfsvfs_t *zfsvfs, struct znode *,
boolean_t isgroup);
-extern boolean_t zfs_fuid_overquota(zfs_sb_t *zsb, boolean_t isgroup,
+extern boolean_t zfs_fuid_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup,
uint64_t fuid);
-extern int zfs_set_version(zfs_sb_t *zsb, uint64_t newvers);
-extern int zfs_get_zplprop(objset_t *os, zfs_prop_t prop,
- uint64_t *value);
-extern zfs_mntopts_t *zfs_mntopts_alloc(void);
-extern void zfs_mntopts_free(zfs_mntopts_t *zmo);
-extern int zfs_sb_create(const char *name, zfs_mntopts_t *zmo,
- zfs_sb_t **zsbp);
-extern int zfs_sb_setup(zfs_sb_t *zsb, boolean_t mounting);
-extern void zfs_sb_free(zfs_sb_t *zsb);
-extern int zfs_sb_prune(struct super_block *sb, unsigned long nr_to_scan,
- int *objects);
-extern int zfs_sb_teardown(zfs_sb_t *zsb, boolean_t unmounting);
+extern boolean_t zfs_fuid_overobjquota(zfsvfs_t *zfsvfs, boolean_t isgroup,
+ uint64_t fuid);
+extern int zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers);
+extern int zfsvfs_create(const char *name, zfsvfs_t **zfvp);
+extern void zfsvfs_free(zfsvfs_t *zfsvfs);
extern int zfs_check_global_label(const char *dsname, const char *hexsl);
-extern boolean_t zfs_is_readonly(zfs_sb_t *zsb);
-extern int zfs_register_callbacks(zfs_sb_t *zsb);
-extern void zfs_unregister_callbacks(zfs_sb_t *zsb);
-extern int zfs_domount(struct super_block *sb, zfs_mntopts_t *zmo, int silent);
+extern boolean_t zfs_is_readonly(zfsvfs_t *zfsvfs);
+extern int zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent);
extern void zfs_preumount(struct super_block *sb);
extern int zfs_umount(struct super_block *sb);
-extern int zfs_remount(struct super_block *sb, int *flags, zfs_mntopts_t *zmo);
-extern int zfs_root(zfs_sb_t *zsb, struct inode **ipp);
+extern int zfs_remount(struct super_block *sb, int *flags, zfs_mnt_t *zm);
extern int zfs_statvfs(struct dentry *dentry, struct kstatfs *statp);
extern int zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp);
+extern int zfs_prune(struct super_block *sb, unsigned long nr_to_scan,
+ int *objects);
#ifdef __cplusplus
}
diff --git a/zfs/include/sys/zfs_vnops.h b/zfs/include/sys/zfs_vnops.h
index c331035c544a..f2f4d13f4ed2 100644
--- a/zfs/include/sys/zfs_vnops.h
+++ b/zfs/include/sys/zfs_vnops.h
@@ -47,7 +47,9 @@ extern int zfs_lookup(struct inode *dip, char *nm, struct inode **ipp,
int flags, cred_t *cr, int *direntflags, pathname_t *realpnp);
extern int zfs_create(struct inode *dip, char *name, vattr_t *vap, int excl,
int mode, struct inode **ipp, cred_t *cr, int flag, vsecattr_t *vsecp);
-extern int zfs_remove(struct inode *dip, char *name, cred_t *cr);
+extern int zfs_tmpfile(struct inode *dip, vattr_t *vap, int excl,
+ int mode, struct inode **ipp, cred_t *cr, int flag, vsecattr_t *vsecp);
+extern int zfs_remove(struct inode *dip, char *name, cred_t *cr, int flags);
extern int zfs_mkdir(struct inode *dip, char *dirname, vattr_t *vap,
struct inode **ipp, cred_t *cr, int flags, vsecattr_t *vsecp);
extern int zfs_rmdir(struct inode *dip, char *name, struct inode *cwd,
@@ -61,10 +63,9 @@ extern int zfs_rename(struct inode *sdip, char *snm, struct inode *tdip,
char *tnm, cred_t *cr, int flags);
extern int zfs_symlink(struct inode *dip, char *name, vattr_t *vap,
char *link, struct inode **ipp, cred_t *cr, int flags);
-extern int zfs_follow_link(struct dentry *dentry, struct nameidata *nd);
extern int zfs_readlink(struct inode *ip, uio_t *uio, cred_t *cr);
extern int zfs_link(struct inode *tdip, struct inode *sip,
- char *name, cred_t *cr);
+ char *name, cred_t *cr, int flags);
extern void zfs_inactive(struct inode *ip);
extern int zfs_space(struct inode *ip, int cmd, flock64_t *bfp, int flag,
offset_t offset, cred_t *cr);
diff --git a/zfs/include/sys/zfs_znode.h b/zfs/include/sys/zfs_znode.h
index 0b8bd24c541e..c292f03739e3 100644
--- a/zfs/include/sys/zfs_znode.h
+++ b/zfs/include/sys/zfs_znode.h
@@ -20,7 +20,8 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
+ * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
*/
#ifndef _SYS_FS_ZFS_ZNODE_H
@@ -138,17 +139,6 @@ extern "C" {
#define ZFS_SHARES_DIR "SHARES"
#define ZFS_SA_ATTRS "SA_ATTRS"
-/*
- * Path component length
- *
- * The generic fs code uses MAXNAMELEN to represent
- * what the largest component length is. Unfortunately,
- * this length includes the terminating NULL. ZFS needs
- * to tell the users via pathconf() and statvfs() what the
- * true maximum length of a component is, excluding the NULL.
- */
-#define ZFS_MAXNAMELEN (MAXNAMELEN - 1)
-
/*
* Convert mode bits (zp_mode) to BSD-style DT_* values for storing in
* the directory entries. On Linux systems this value is already
@@ -196,18 +186,16 @@ typedef struct znode {
uint_t z_blksz; /* block size in bytes */
uint_t z_seq; /* modification sequence number */
uint64_t z_mapcnt; /* number of pages mapped to file */
- uint64_t z_gen; /* generation (cached) */
+ uint64_t z_dnodesize; /* dnode size */
uint64_t z_size; /* file size (cached) */
- uint64_t z_links; /* file links (cached) */
uint64_t z_pflags; /* pflags (cached) */
- uint64_t z_uid; /* uid fuid (cached) */
- uint64_t z_gid; /* gid fuid (cached) */
uint32_t z_sync_cnt; /* synchronous open count */
mode_t z_mode; /* mode (cached) */
kmutex_t z_acl_lock; /* acl data lock */
zfs_acl_t *z_acl_cached; /* cached acl */
krwlock_t z_xattr_lock; /* xattr data lock */
nvlist_t *z_xattr_cached; /* cached xattrs */
+ uint64_t z_xattr_parent; /* parent obj for this xattr */
list_node_t z_link_node; /* all znodes in fs link */
sa_handle_t *z_sa_hdl; /* handle to sa data */
boolean_t z_is_sa; /* are we native sa? */
@@ -245,25 +233,25 @@ typedef struct znode_hold {
*/
#define ZTOI(znode) (&((znode)->z_inode))
#define ITOZ(inode) (container_of((inode), znode_t, z_inode))
-#define ZTOZSB(znode) ((zfs_sb_t *)(ZTOI(znode)->i_sb->s_fs_info))
-#define ITOZSB(inode) ((zfs_sb_t *)((inode)->i_sb->s_fs_info))
+#define ZTOZSB(znode) ((zfsvfs_t *)(ZTOI(znode)->i_sb->s_fs_info))
+#define ITOZSB(inode) ((zfsvfs_t *)((inode)->i_sb->s_fs_info))
#define S_ISDEV(mode) (S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode))
/* Called on entry to each ZFS vnode and vfs operation */
-#define ZFS_ENTER(zsb) \
+#define ZFS_ENTER(zfsvfs) \
{ \
- rrm_enter_read(&(zsb)->z_teardown_lock, FTAG); \
- if ((zsb)->z_unmounted) { \
- ZFS_EXIT(zsb); \
+ rrm_enter_read(&(zfsvfs)->z_teardown_lock, FTAG); \
+ if ((zfsvfs)->z_unmounted) { \
+ ZFS_EXIT(zfsvfs); \
return (EIO); \
} \
}
/* Must be called before exiting the vop */
-#define ZFS_EXIT(zsb) \
+#define ZFS_EXIT(zfsvfs) \
{ \
- rrm_exit(&(zsb)->z_teardown_lock, FTAG); \
+ rrm_exit(&(zfsvfs)->z_teardown_lock, FTAG); \
}
/* Verifies the znode is valid */
@@ -278,7 +266,7 @@ typedef struct znode_hold {
*/
#define ZFS_OBJ_MTX_SZ 64
#define ZFS_OBJ_MTX_MAX (1024 * 1024)
-#define ZFS_OBJ_HASH(zsb, obj) ((obj) & ((zsb->z_hold_size) - 1))
+#define ZFS_OBJ_HASH(zfsvfs, obj) ((obj) & ((zfsvfs->z_hold_size) - 1))
extern unsigned int zfs_object_mutex_size;
@@ -303,7 +291,7 @@ extern unsigned int zfs_object_mutex_size;
#define STATE_CHANGED (ATTR_CTIME)
#define CONTENT_MODIFIED (ATTR_MTIME | ATTR_CTIME)
-extern int zfs_init_fs(zfs_sb_t *, znode_t **);
+extern int zfs_init_fs(zfsvfs_t *, znode_t **);
extern void zfs_set_dataprop(objset_t *);
extern void zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *,
dmu_tx_t *tx);
@@ -314,7 +302,7 @@ extern int zfs_freesp(znode_t *, uint64_t, uint64_t, int, boolean_t);
extern void zfs_znode_init(void);
extern void zfs_znode_fini(void);
extern int zfs_znode_hold_compare(const void *, const void *);
-extern int zfs_zget(zfs_sb_t *, uint64_t, znode_t **);
+extern int zfs_zget(zfsvfs_t *, uint64_t, znode_t **);
extern int zfs_rezget(znode_t *);
extern void zfs_zinactive(znode_t *);
extern void zfs_znode_delete(znode_t *, dmu_tx_t *);
@@ -324,6 +312,7 @@ extern int zfs_sync(struct super_block *, int, cred_t *);
extern dev_t zfs_cmpldev(uint64_t);
extern int zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value);
extern int zfs_get_stats(objset_t *os, nvlist_t *nv);
+extern boolean_t zfs_get_vfs_flag_unmounted(objset_t *os);
extern void zfs_znode_dmu_fini(znode_t *);
extern int zfs_inode_alloc(struct super_block *, struct inode **ip);
extern void zfs_inode_destroy(struct inode *);
@@ -354,8 +343,8 @@ extern void zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype,
extern void zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp,
vsecattr_t *vsecp, zfs_fuid_info_t *fuidp);
extern void zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx);
-extern void zfs_upgrade(zfs_sb_t *zsb, dmu_tx_t *tx);
-extern int zfs_create_share_dir(zfs_sb_t *zsb, dmu_tx_t *tx);
+extern void zfs_upgrade(zfsvfs_t *zfsvfs, dmu_tx_t *tx);
+extern int zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx);
#if defined(HAVE_UIO_RW)
extern caddr_t zfs_map_page(page_t *, enum seg_rw);
diff --git a/zfs/include/sys/zil.h b/zfs/include/sys/zil.h
index 65b14f1cd6a2..95fd324b4abf 100644
--- a/zfs/include/sys/zil.h
+++ b/zfs/include/sys/zil.h
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
*/
/* Portions Copyright 2010 Robert Milkowski */
@@ -94,6 +94,15 @@ typedef struct zil_chain {
#define ZIL_MIN_BLKSZ 4096ULL
+/*
+ * ziltest is by and large an ugly hack, but very useful in
+ * checking replay without tedious work.
+ * When running ziltest we want to keep all itx's and so maintain
+ * a single list in the zl_itxg[] that uses a high txg: ZILTEST_TXG
+ * We subtract TXG_CONCURRENT_STATES to allow for common code.
+ */
+#define ZILTEST_TXG (UINT64_MAX - TXG_CONCURRENT_STATES)
+
/*
* The words of a log block checksum.
*/
@@ -172,6 +181,19 @@ typedef enum zil_create {
(txtype) == TX_ACL || \
(txtype) == TX_WRITE2)
+/*
+ * The number of dnode slots consumed by the object is stored in the 8
+ * unused upper bits of the object ID. We subtract 1 from the value
+ * stored on disk for compatibility with implementations that don't
+ * support large dnodes. The slot count for a single-slot dnode will
+ * contain 0 for those bits to preserve the log record format for
+ * "small" dnodes.
+ */
+#define LR_FOID_GET_SLOTS(oid) (BF64_GET((oid), 56, 8) + 1)
+#define LR_FOID_SET_SLOTS(oid, x) BF64_SET((oid), 56, 8, (x) - 1)
+#define LR_FOID_GET_OBJ(oid) BF64_GET((oid), 0, DN_MAX_OBJECT_SHIFT)
+#define LR_FOID_SET_OBJ(oid, x) BF64_SET((oid), 0, DN_MAX_OBJECT_SHIFT, (x))
+
/*
* Format of log records.
* The fields are carefully defined to allow them to be aligned
@@ -348,7 +370,7 @@ typedef struct {
* - the write occupies only one block
* WR_COPIED:
* If we know we'll immediately be committing the
- * transaction (FSYNC or FDSYNC), the we allocate a larger
+ * transaction (FSYNC or FDSYNC), then we allocate a larger
* log record here for the data and copy the data in.
* WR_NEED_COPY:
* Otherwise we don't allocate a buffer, and *if* we need to
@@ -372,7 +394,6 @@ typedef struct itx {
uint8_t itx_sync; /* synchronous transaction */
zil_callback_t itx_callback; /* Called when the itx is persistent */
void *itx_callback_data; /* User data for the callback */
- uint64_t itx_sod; /* record size on disk */
uint64_t itx_oid; /* object id */
lr_t itx_lr; /* common part of log record */
/* followed by type-specific part of lr_xx_t and its immediate data */
diff --git a/zfs/include/sys/zil_impl.h b/zfs/include/sys/zil_impl.h
index 0c426a15dd06..13ecca3c8b04 100644
--- a/zfs/include/sys/zil_impl.h
+++ b/zfs/include/sys/zil_impl.h
@@ -42,6 +42,7 @@ typedef struct lwb {
zilog_t *lwb_zilog; /* back pointer to log struct */
blkptr_t lwb_blk; /* on disk address of this log blk */
boolean_t lwb_fastwrite; /* is blk marked for fastwrite? */
+ boolean_t lwb_slog; /* lwb_blk is on SLOG device */
int lwb_nused; /* # used bytes in buffer */
int lwb_sz; /* size of block and buffer */
char *lwb_buf; /* log write buffer */
@@ -62,7 +63,6 @@ typedef struct itxs {
typedef struct itxg {
kmutex_t itxg_lock; /* lock for this structure */
uint64_t itxg_txg; /* txg for this chain */
- uint64_t itxg_sod; /* total size on disk for this txg */
itxs_t *itxg_itxs; /* sync and async itxs */
} itxg_t;
@@ -120,7 +120,6 @@ struct zilog {
kcondvar_t zl_cv_batch[2]; /* batch condition variables */
itxg_t zl_itxg[TXG_SIZE]; /* intent log txg chains */
list_t zl_itx_commit_list; /* itx list to be committed */
- uint64_t zl_itx_list_sz; /* total size of records on list */
uint64_t zl_cur_used; /* current commit log size used */
list_t zl_lwb_list; /* in-flight log write list */
kmutex_t zl_vdev_lock; /* protects zl_vdev_tree */
@@ -140,9 +139,26 @@ typedef struct zil_bp_node {
avl_node_t zn_node;
} zil_bp_node_t;
+/*
+ * Maximum amount of write data that can be put into single log block.
+ */
#define ZIL_MAX_LOG_DATA (SPA_OLD_MAXBLOCKSIZE - sizeof (zil_chain_t) - \
sizeof (lr_write_t))
+/*
+ * Maximum amount of log space we agree to waste to reduce number of
+ * WR_NEED_COPY chunks to reduce zl_get_data() overhead (~12%).
+ */
+#define ZIL_MAX_WASTE_SPACE (ZIL_MAX_LOG_DATA / 8)
+
+/*
+ * Maximum amount of write data for WR_COPIED. Fall back to WR_NEED_COPY
+ * as more space efficient if we can't fit at least two log records into
+ * maximum sized log block.
+ */
+#define ZIL_MAX_COPIED_DATA ((SPA_OLD_MAXBLOCKSIZE - \
+ sizeof (zil_chain_t)) / 2 - sizeof (lr_write_t))
+
#ifdef __cplusplus
}
#endif
diff --git a/zfs/include/sys/zio.h b/zfs/include/sys/zio.h
index 4916d87249ea..4eaabc38c86f 100644
--- a/zfs/include/sys/zio.h
+++ b/zfs/include/sys/zio.h
@@ -22,13 +22,16 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright 2016 Toomas Soome <tsoome at me.com>
*/
#ifndef _ZIO_H
#define _ZIO_H
+#include <sys/zio_priority.h>
#include <sys/zfs_context.h>
#include <sys/spa.h>
#include <sys/txg.h>
@@ -79,6 +82,10 @@ enum zio_checksum {
ZIO_CHECKSUM_FLETCHER_4,
ZIO_CHECKSUM_SHA256,
ZIO_CHECKSUM_ZILOG2,
+ ZIO_CHECKSUM_NOPARITY,
+ ZIO_CHECKSUM_SHA512,
+ ZIO_CHECKSUM_SKEIN,
+ ZIO_CHECKSUM_EDONR,
ZIO_CHECKSUM_FUNCTIONS
};
@@ -97,26 +104,6 @@ enum zio_checksum {
#define ZIO_DEDUPCHECKSUM ZIO_CHECKSUM_SHA256
#define ZIO_DEDUPDITTO_MIN 100
-enum zio_compress {
- ZIO_COMPRESS_INHERIT = 0,
- ZIO_COMPRESS_ON,
- ZIO_COMPRESS_OFF,
- ZIO_COMPRESS_LZJB,
- ZIO_COMPRESS_EMPTY,
- ZIO_COMPRESS_GZIP_1,
- ZIO_COMPRESS_GZIP_2,
- ZIO_COMPRESS_GZIP_3,
- ZIO_COMPRESS_GZIP_4,
- ZIO_COMPRESS_GZIP_5,
- ZIO_COMPRESS_GZIP_6,
- ZIO_COMPRESS_GZIP_7,
- ZIO_COMPRESS_GZIP_8,
- ZIO_COMPRESS_GZIP_9,
- ZIO_COMPRESS_ZLE,
- ZIO_COMPRESS_LZ4,
- ZIO_COMPRESS_FUNCTIONS
-};
-
/*
* The number of "legacy" compression functions which can be set on individual
* objects.
@@ -135,6 +122,16 @@ enum zio_compress {
#define BOOTFS_COMPRESS_VALID(compress) \
((compress) == ZIO_COMPRESS_LZJB || \
(compress) == ZIO_COMPRESS_LZ4 || \
+ (compress) == ZIO_COMPRESS_GZIP_1 || \
+ (compress) == ZIO_COMPRESS_GZIP_2 || \
+ (compress) == ZIO_COMPRESS_GZIP_3 || \
+ (compress) == ZIO_COMPRESS_GZIP_4 || \
+ (compress) == ZIO_COMPRESS_GZIP_5 || \
+ (compress) == ZIO_COMPRESS_GZIP_6 || \
+ (compress) == ZIO_COMPRESS_GZIP_7 || \
+ (compress) == ZIO_COMPRESS_GZIP_8 || \
+ (compress) == ZIO_COMPRESS_GZIP_9 || \
+ (compress) == ZIO_COMPRESS_ZLE || \
(compress) == ZIO_COMPRESS_ON || \
(compress) == ZIO_COMPRESS_OFF)
@@ -147,17 +144,6 @@ enum zio_compress {
#define ZIO_FAILURE_MODE_CONTINUE 1
#define ZIO_FAILURE_MODE_PANIC 2
-typedef enum zio_priority {
- ZIO_PRIORITY_SYNC_READ,
- ZIO_PRIORITY_SYNC_WRITE, /* ZIL */
- ZIO_PRIORITY_ASYNC_READ, /* prefetch */
- ZIO_PRIORITY_ASYNC_WRITE, /* spa_sync() */
- ZIO_PRIORITY_SCRUB, /* asynchronous scrub/resilver reads */
- ZIO_PRIORITY_NUM_QUEUEABLE,
-
- ZIO_PRIORITY_NOW /* non-queued i/os (e.g. free) */
-} zio_priority_t;
-
enum zio_flag {
/*
* Flags inherited by gang, ddt, and vdev children,
@@ -183,6 +169,7 @@ enum zio_flag {
ZIO_FLAG_DONT_CACHE = 1 << 11,
ZIO_FLAG_NODATA = 1 << 12,
ZIO_FLAG_INDUCE_DAMAGE = 1 << 13,
+ ZIO_FLAG_IO_ALLOCATING = 1 << 14,
#define ZIO_FLAG_DDT_INHERIT (ZIO_FLAG_IO_RETRY - 1)
#define ZIO_FLAG_GANG_INHERIT (ZIO_FLAG_IO_RETRY - 1)
@@ -190,28 +177,28 @@ enum zio_flag {
/*
* Flags inherited by vdev children.
*/
- ZIO_FLAG_IO_RETRY = 1 << 14, /* must be first for INHERIT */
- ZIO_FLAG_PROBE = 1 << 15,
- ZIO_FLAG_TRYHARD = 1 << 16,
- ZIO_FLAG_OPTIONAL = 1 << 17,
+ ZIO_FLAG_IO_RETRY = 1 << 15, /* must be first for INHERIT */
+ ZIO_FLAG_PROBE = 1 << 16,
+ ZIO_FLAG_TRYHARD = 1 << 17,
+ ZIO_FLAG_OPTIONAL = 1 << 18,
#define ZIO_FLAG_VDEV_INHERIT (ZIO_FLAG_DONT_QUEUE - 1)
/*
* Flags not inherited by any children.
*/
- ZIO_FLAG_DONT_QUEUE = 1 << 18, /* must be first for INHERIT */
- ZIO_FLAG_DONT_PROPAGATE = 1 << 19,
- ZIO_FLAG_IO_BYPASS = 1 << 20,
- ZIO_FLAG_IO_REWRITE = 1 << 21,
- ZIO_FLAG_RAW = 1 << 22,
- ZIO_FLAG_GANG_CHILD = 1 << 23,
- ZIO_FLAG_DDT_CHILD = 1 << 24,
- ZIO_FLAG_GODFATHER = 1 << 25,
- ZIO_FLAG_NOPWRITE = 1 << 26,
- ZIO_FLAG_REEXECUTED = 1 << 27,
- ZIO_FLAG_DELEGATED = 1 << 28,
- ZIO_FLAG_FASTWRITE = 1 << 29,
+ ZIO_FLAG_DONT_QUEUE = 1 << 19, /* must be first for INHERIT */
+ ZIO_FLAG_DONT_PROPAGATE = 1 << 20,
+ ZIO_FLAG_IO_BYPASS = 1 << 21,
+ ZIO_FLAG_IO_REWRITE = 1 << 22,
+ ZIO_FLAG_RAW = 1 << 23,
+ ZIO_FLAG_GANG_CHILD = 1 << 24,
+ ZIO_FLAG_DDT_CHILD = 1 << 25,
+ ZIO_FLAG_GODFATHER = 1 << 26,
+ ZIO_FLAG_NOPWRITE = 1 << 27,
+ ZIO_FLAG_REEXECUTED = 1 << 28,
+ ZIO_FLAG_DELEGATED = 1 << 29,
+ ZIO_FLAG_FASTWRITE = 1 << 30
};
#define ZIO_FLAG_MUSTSUCCEED 0
@@ -251,6 +238,7 @@ enum zio_wait_type {
typedef void zio_done_func_t(zio_t *zio);
+extern int zio_dva_throttle_enabled;
extern const char *zio_type_name[ZIO_TYPES];
/*
@@ -262,6 +250,7 @@ extern const char *zio_type_name[ZIO_TYPES];
* Root blocks (objset_phys_t) are object 0, level -1: <objset, 0, -1, 0>.
* ZIL blocks are bookmarked <objset, 0, -2, blkid == ZIL sequence number>.
* dmu_sync()ed ZIL data blocks are bookmarked <objset, object, -2, blkid>.
+ * dnode visit bookmarks are <objset, object id of dnode, -3, 0>.
*
* Note: this structure is called a bookmark because its original purpose
* was to remember where to resume a pool-wide traverse.
@@ -294,6 +283,9 @@ struct zbookmark_phys {
#define ZB_ZIL_OBJECT (0ULL)
#define ZB_ZIL_LEVEL (-2LL)
+#define ZB_DNODE_LEVEL (-3LL)
+#define ZB_DNODE_BLKID (0ULL)
+
#define ZB_IS_ZERO(zb) \
((zb)->zb_objset == 0 && (zb)->zb_object == 0 && \
(zb)->zb_level == 0 && (zb)->zb_blkid == 0)
@@ -316,11 +308,12 @@ typedef struct zio_prop {
typedef struct zio_cksum_report zio_cksum_report_t;
typedef void zio_cksum_finish_f(zio_cksum_report_t *rep,
- const void *good_data);
+ const abd_t *good_data);
typedef void zio_cksum_free_f(void *cbdata, size_t size);
struct zio_bad_cksum; /* defined in zio_checksum.h */
struct dnode_phys;
+struct abd;
struct zio_cksum_report {
struct zio_cksum_report *zcr_next;
@@ -353,12 +346,12 @@ typedef struct zio_gang_node {
} zio_gang_node_t;
typedef zio_t *zio_gang_issue_func_t(zio_t *zio, blkptr_t *bp,
- zio_gang_node_t *gn, void *data);
+ zio_gang_node_t *gn, struct abd *data, uint64_t offset);
-typedef void zio_transform_func_t(zio_t *zio, void *data, uint64_t size);
+typedef void zio_transform_func_t(zio_t *zio, struct abd *data, uint64_t size);
typedef struct zio_transform {
- void *zt_orig_data;
+ struct abd *zt_orig_abd;
uint64_t zt_orig_size;
uint64_t zt_bufsize;
zio_transform_func_t *zt_transform;
@@ -377,6 +370,11 @@ typedef int zio_pipe_stage_t(zio_t *zio);
#define ZIO_REEXECUTE_NOW 0x01
#define ZIO_REEXECUTE_SUSPEND 0x02
+typedef struct zio_alloc_list {
+ list_t zal_list;
+ uint64_t zal_size;
+} zio_alloc_list_t;
+
typedef struct zio_link {
zio_t *zl_parent;
zio_t *zl_child;
@@ -401,21 +399,23 @@ struct zio {
blkptr_t io_bp_copy;
list_t io_parent_list;
list_t io_child_list;
- zio_link_t *io_walk_link;
zio_t *io_logical;
zio_transform_t *io_transform_stack;
/* Callback info */
- zio_done_func_t *io_ready;
+ zio_done_func_t *io_ready;
+ zio_done_func_t *io_children_ready;
zio_done_func_t *io_physdone;
zio_done_func_t *io_done;
void *io_private;
int64_t io_prev_space_delta; /* DMU private */
blkptr_t io_bp_orig;
+ /* io_lsize != io_orig_size iff this is a raw write */
+ uint64_t io_lsize;
/* Data represented by this I/O */
- void *io_data;
- void *io_orig_data;
+ struct abd *io_abd;
+ struct abd *io_orig_abd;
uint64_t io_size;
uint64_t io_orig_size;
@@ -426,10 +426,15 @@ struct zio {
uint64_t io_offset;
hrtime_t io_timestamp; /* submitted at */
+ hrtime_t io_queued_timestamp;
+ hrtime_t io_target_timestamp;
hrtime_t io_delta; /* vdev queue service delta */
- uint64_t io_delay; /* vdev disk service delta (ticks) */
+ hrtime_t io_delay; /* Device access time (disk or */
+ /* file). */
avl_node_t io_queue_node;
avl_node_t io_offset_node;
+ avl_node_t io_alloc_node;
+ zio_alloc_list_t io_alloc_list;
/* Internal pipeline state */
enum zio_flag io_flags;
@@ -438,6 +443,7 @@ struct zio {
enum zio_flag io_orig_flags;
enum zio_stage io_orig_stage;
enum zio_stage io_orig_pipeline;
+ enum zio_stage io_pipeline_trace;
int io_error;
int io_child_error[ZIO_CHILD_TYPES];
uint64_t io_children[ZIO_CHILD_TYPES][ZIO_WAIT_TYPES];
@@ -460,24 +466,27 @@ struct zio {
taskq_ent_t io_tqent;
};
+extern int zio_bookmark_compare(const void *, const void *);
+
extern zio_t *zio_null(zio_t *pio, spa_t *spa, vdev_t *vd,
zio_done_func_t *done, void *private, enum zio_flag flags);
extern zio_t *zio_root(spa_t *spa,
zio_done_func_t *done, void *private, enum zio_flag flags);
-extern zio_t *zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, void *data,
- uint64_t size, zio_done_func_t *done, void *private,
+extern zio_t *zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
+ struct abd *data, uint64_t lsize, zio_done_func_t *done, void *private,
zio_priority_t priority, enum zio_flag flags, const zbookmark_phys_t *zb);
extern zio_t *zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
- void *data, uint64_t size, const zio_prop_t *zp,
- zio_done_func_t *ready, zio_done_func_t *physdone, zio_done_func_t *done,
- void *private,
- zio_priority_t priority, enum zio_flag flags, const zbookmark_phys_t *zb);
+ struct abd *data, uint64_t size, uint64_t psize, const zio_prop_t *zp,
+ zio_done_func_t *ready, zio_done_func_t *children_ready,
+ zio_done_func_t *physdone, zio_done_func_t *done,
+ void *private, zio_priority_t priority, enum zio_flag flags,
+ const zbookmark_phys_t *zb);
extern zio_t *zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
- void *data, uint64_t size, zio_done_func_t *done, void *private,
+ struct abd *data, uint64_t size, zio_done_func_t *done, void *private,
zio_priority_t priority, enum zio_flag flags, zbookmark_phys_t *zb);
extern void zio_write_override(zio_t *zio, blkptr_t *bp, int copies,
@@ -493,12 +502,12 @@ extern zio_t *zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd,
zio_done_func_t *done, void *private, enum zio_flag flags);
extern zio_t *zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
- uint64_t size, void *data, int checksum,
+ uint64_t size, struct abd *data, int checksum,
zio_done_func_t *done, void *private, zio_priority_t priority,
enum zio_flag flags, boolean_t labels);
extern zio_t *zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset,
- uint64_t size, void *data, int checksum,
+ uint64_t size, struct abd *data, int checksum,
zio_done_func_t *done, void *private, zio_priority_t priority,
enum zio_flag flags, boolean_t labels);
@@ -506,7 +515,7 @@ extern zio_t *zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg,
const blkptr_t *bp, enum zio_flag flags);
extern int zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp,
- uint64_t size, boolean_t use_slog);
+ uint64_t size, boolean_t *slog);
extern void zio_free_zil(spa_t *spa, uint64_t txg, blkptr_t *bp);
extern void zio_flush(zio_t *zio, vdev_t *vd);
extern void zio_shrink(zio_t *zio, uint64_t size);
@@ -515,9 +524,11 @@ extern int zio_wait(zio_t *zio);
extern void zio_nowait(zio_t *zio);
extern void zio_execute(zio_t *zio);
extern void zio_interrupt(zio_t *zio);
+extern void zio_delay_init(zio_t *zio);
+extern void zio_delay_interrupt(zio_t *zio);
-extern zio_t *zio_walk_parents(zio_t *cio);
-extern zio_t *zio_walk_children(zio_t *pio);
+extern zio_t *zio_walk_parents(zio_t *cio, zio_link_t **);
+extern zio_t *zio_walk_children(zio_t *pio, zio_link_t **);
extern zio_t *zio_unique_parent(zio_t *cio);
extern void zio_add_child(zio_t *pio, zio_t *cio);
@@ -525,17 +536,20 @@ extern void *zio_buf_alloc(size_t size);
extern void zio_buf_free(void *buf, size_t size);
extern void *zio_data_buf_alloc(size_t size);
extern void zio_data_buf_free(void *buf, size_t size);
-extern void *zio_buf_alloc_flags(size_t size, int flags);
+
+extern void zio_push_transform(zio_t *zio, struct abd *abd, uint64_t size,
+ uint64_t bufsize, zio_transform_func_t *transform);
+extern void zio_pop_transforms(zio_t *zio);
extern void zio_resubmit_stage_async(void *);
extern zio_t *zio_vdev_child_io(zio_t *zio, blkptr_t *bp, vdev_t *vd,
- uint64_t offset, void *data, uint64_t size, int type,
+ uint64_t offset, struct abd *data, uint64_t size, int type,
zio_priority_t priority, enum zio_flag flags,
zio_done_func_t *done, void *private);
extern zio_t *zio_vdev_delegated_io(vdev_t *vd, uint64_t offset,
- void *data, uint64_t size, int type, zio_priority_t priority,
+ struct abd *data, uint64_t size, int type, zio_priority_t priority,
enum zio_flag flags, zio_done_func_t *done, void *private);
extern void zio_vdev_io_bypass(zio_t *zio);
@@ -577,7 +591,7 @@ extern int zio_handle_fault_injection(zio_t *zio, int error);
extern int zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error);
extern int zio_handle_label_injection(zio_t *zio, int error);
extern void zio_handle_ignored_writes(zio_t *zio);
-extern uint64_t zio_handle_io_delay(zio_t *zio);
+extern hrtime_t zio_handle_io_delay(zio_t *zio);
/*
* Checksum ereport functions
@@ -585,22 +599,23 @@ extern uint64_t zio_handle_io_delay(zio_t *zio);
extern void zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, struct zio *zio,
uint64_t offset, uint64_t length, void *arg, struct zio_bad_cksum *info);
extern void zfs_ereport_finish_checksum(zio_cksum_report_t *report,
- const void *good_data, const void *bad_data, boolean_t drop_if_identical);
+ const abd_t *good_data, const abd_t *bad_data, boolean_t drop_if_identical);
-extern void zfs_ereport_send_interim_checksum(zio_cksum_report_t *report);
extern void zfs_ereport_free_checksum(zio_cksum_report_t *report);
/* If we have the good data in hand, this function can be used */
extern void zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd,
struct zio *zio, uint64_t offset, uint64_t length,
- const void *good_data, const void *bad_data, struct zio_bad_cksum *info);
+ const abd_t *good_data, const abd_t *bad_data, struct zio_bad_cksum *info);
/* Called from spa_sync(), but primarily an injection handler */
extern void spa_handle_ignored_writes(spa_t *spa);
/* zbookmark_phys functions */
-boolean_t zbookmark_is_before(const struct dnode_phys *dnp,
- const zbookmark_phys_t *zb1, const zbookmark_phys_t *zb2);
+boolean_t zbookmark_subtree_completed(const struct dnode_phys *dnp,
+ const zbookmark_phys_t *subtree_root, const zbookmark_phys_t *last_block);
+int zbookmark_compare(uint16_t dbss1, uint8_t ibs1, uint16_t dbss2,
+ uint8_t ibs2, const zbookmark_phys_t *zb1, const zbookmark_phys_t *zb2);
#ifdef __cplusplus
}
diff --git a/zfs/include/sys/zio_checksum.h b/zfs/include/sys/zio_checksum.h
index de89bc9a7967..45abd3bd313b 100644
--- a/zfs/include/sys/zio_checksum.h
+++ b/zfs/include/sys/zio_checksum.h
@@ -20,31 +20,76 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2016 by Delphix. All rights reserved.
+ * Copyright Saso Kiselkov 2013, All rights reserved.
*/
#ifndef _SYS_ZIO_CHECKSUM_H
#define _SYS_ZIO_CHECKSUM_H
#include <sys/zio.h>
+#include <zfeature_common.h>
+#include <zfs_fletcher.h>
#ifdef __cplusplus
extern "C" {
#endif
+struct abd;
+
/*
* Signature for checksum functions.
*/
-typedef void zio_checksum_t(const void *data, uint64_t size, zio_cksum_t *zcp);
+typedef void zio_checksum_t(struct abd *abd, uint64_t size,
+ const void *ctx_template, zio_cksum_t *zcp);
+typedef void *zio_checksum_tmpl_init_t(const zio_cksum_salt_t *salt);
+typedef void zio_checksum_tmpl_free_t(void *ctx_template);
+
+typedef enum zio_checksum_flags {
+ /* Strong enough for metadata? */
+ ZCHECKSUM_FLAG_METADATA = (1 << 1),
+ /* ZIO embedded checksum */
+ ZCHECKSUM_FLAG_EMBEDDED = (1 << 2),
+ /* Strong enough for dedup (without verification)? */
+ ZCHECKSUM_FLAG_DEDUP = (1 << 3),
+ /* Uses salt value */
+ ZCHECKSUM_FLAG_SALTED = (1 << 4),
+ /* Strong enough for nopwrite? */
+ ZCHECKSUM_FLAG_NOPWRITE = (1 << 5)
+} zio_checksum_flags_t;
+
+typedef enum {
+ ZIO_CHECKSUM_NATIVE,
+ ZIO_CHECKSUM_BYTESWAP
+} zio_byteorder_t;
+
+typedef struct zio_abd_checksum_data {
+ zio_byteorder_t acd_byteorder;
+ fletcher_4_ctx_t *acd_ctx;
+ zio_cksum_t *acd_zcp;
+ void *acd_private;
+} zio_abd_checksum_data_t;
+
+typedef void zio_abd_checksum_init_t(zio_abd_checksum_data_t *);
+typedef void zio_abd_checksum_fini_t(zio_abd_checksum_data_t *);
+typedef int zio_abd_checksum_iter_t(void *, size_t, void *);
+
+typedef const struct zio_abd_checksum_func {
+ zio_abd_checksum_init_t *acf_init;
+ zio_abd_checksum_fini_t *acf_fini;
+ zio_abd_checksum_iter_t *acf_iter;
+} zio_abd_checksum_func_t;
/*
* Information about each checksum function.
*/
typedef const struct zio_checksum_info {
- zio_checksum_t *ci_func[2]; /* checksum function for each byteorder */
- int ci_correctable; /* number of correctable bits */
- int ci_eck; /* uses zio embedded checksum? */
- int ci_dedup; /* strong enough for dedup? */
- char *ci_name; /* descriptive name */
+ /* checksum function for each byteorder */
+ zio_checksum_t *ci_func[2];
+ zio_checksum_tmpl_init_t *ci_tmpl_init;
+ zio_checksum_tmpl_free_t *ci_tmpl_free;
+ zio_checksum_flags_t ci_flags;
+ char *ci_name; /* descriptive name */
} zio_checksum_info_t;
typedef struct zio_bad_cksum {
@@ -61,12 +106,36 @@ extern zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS];
/*
* Checksum routines.
*/
-extern zio_checksum_t zio_checksum_SHA256;
+extern zio_checksum_t abd_checksum_SHA256;
+extern zio_checksum_t abd_checksum_SHA512_native;
+extern zio_checksum_t abd_checksum_SHA512_byteswap;
+
+/* Skein */
+extern zio_checksum_t abd_checksum_skein_native;
+extern zio_checksum_t abd_checksum_skein_byteswap;
+extern zio_checksum_tmpl_init_t abd_checksum_skein_tmpl_init;
+extern zio_checksum_tmpl_free_t abd_checksum_skein_tmpl_free;
+
+/* Edon-R */
+extern zio_checksum_t abd_checksum_edonr_native;
+extern zio_checksum_t abd_checksum_edonr_byteswap;
+extern zio_checksum_tmpl_init_t abd_checksum_edonr_tmpl_init;
+extern zio_checksum_tmpl_free_t abd_checksum_edonr_tmpl_free;
+
+extern zio_abd_checksum_func_t fletcher_4_abd_ops;
+extern zio_checksum_t abd_fletcher_4_native;
+extern zio_checksum_t abd_fletcher_4_byteswap;
-extern void zio_checksum_compute(zio_t *zio, enum zio_checksum checksum,
- void *data, uint64_t size);
+extern int zio_checksum_equal(spa_t *, blkptr_t *, enum zio_checksum,
+ void *, uint64_t, uint64_t, zio_bad_cksum_t *);
+extern void zio_checksum_compute(zio_t *, enum zio_checksum,
+ struct abd *, uint64_t);
+extern int zio_checksum_error_impl(spa_t *, const blkptr_t *, enum zio_checksum,
+ struct abd *, uint64_t, uint64_t, zio_bad_cksum_t *);
extern int zio_checksum_error(zio_t *zio, zio_bad_cksum_t *out);
extern enum zio_checksum spa_dedup_checksum(spa_t *spa);
+extern void zio_checksum_templates_free(spa_t *spa);
+extern spa_feature_t zio_checksum_to_feature(enum zio_checksum cksum);
#ifdef __cplusplus
}
diff --git a/zfs/include/sys/zio_compress.h b/zfs/include/sys/zio_compress.h
index 63863c713c18..1642823d3d42 100644
--- a/zfs/include/sys/zio_compress.h
+++ b/zfs/include/sys/zio_compress.h
@@ -22,17 +22,38 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright (c) 2015, 2016 by Delphix. All rights reserved.
*/
#ifndef _SYS_ZIO_COMPRESS_H
#define _SYS_ZIO_COMPRESS_H
-#include <sys/zio.h>
+#include <sys/abd.h>
#ifdef __cplusplus
extern "C" {
#endif
+enum zio_compress {
+ ZIO_COMPRESS_INHERIT = 0,
+ ZIO_COMPRESS_ON,
+ ZIO_COMPRESS_OFF,
+ ZIO_COMPRESS_LZJB,
+ ZIO_COMPRESS_EMPTY,
+ ZIO_COMPRESS_GZIP_1,
+ ZIO_COMPRESS_GZIP_2,
+ ZIO_COMPRESS_GZIP_3,
+ ZIO_COMPRESS_GZIP_4,
+ ZIO_COMPRESS_GZIP_5,
+ ZIO_COMPRESS_GZIP_6,
+ ZIO_COMPRESS_GZIP_7,
+ ZIO_COMPRESS_GZIP_8,
+ ZIO_COMPRESS_GZIP_9,
+ ZIO_COMPRESS_ZLE,
+ ZIO_COMPRESS_LZ4,
+ ZIO_COMPRESS_FUNCTIONS
+};
+
/* Common signature for all zio compress functions. */
typedef size_t zio_compress_func_t(void *src, void *dst,
size_t s_len, size_t d_len, int);
@@ -40,14 +61,21 @@ typedef size_t zio_compress_func_t(void *src, void *dst,
typedef int zio_decompress_func_t(void *src, void *dst,
size_t s_len, size_t d_len, int);
+/*
+ * Common signature for all zio decompress functions using an ABD as input.
+ * This is helpful if you have both compressed ARC and scatter ABDs enabled,
+ * but is not a requirement for all compression algorithms.
+ */
+typedef int zio_decompress_abd_func_t(abd_t *src, void *dst,
+ size_t s_len, size_t d_len, int);
/*
* Information about each compression function.
*/
typedef const struct zio_compress_info {
- zio_compress_func_t *ci_compress; /* compression function */
- zio_decompress_func_t *ci_decompress; /* decompression function */
- int ci_level; /* level parameter */
- char *ci_name; /* algorithm name */
+ char *ci_name;
+ int ci_level;
+ zio_compress_func_t *ci_compress;
+ zio_decompress_func_t *ci_decompress;
} zio_compress_info_t;
extern zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS];
@@ -77,13 +105,16 @@ extern size_t lz4_compress_zfs(void *src, void *dst, size_t s_len, size_t d_len,
int level);
extern int lz4_decompress_zfs(void *src, void *dst, size_t s_len, size_t d_len,
int level);
-
+extern int lz4_decompress_abd(abd_t *src, void *dst, size_t s_len, size_t d_len,
+ int level);
/*
* Compress and decompress data if necessary.
*/
-extern size_t zio_compress_data(enum zio_compress c, void *src, void *dst,
+extern size_t zio_compress_data(enum zio_compress c, abd_t *src, void *dst,
size_t s_len);
-extern int zio_decompress_data(enum zio_compress c, void *src, void *dst,
+extern int zio_decompress_data(enum zio_compress c, abd_t *src, void *dst,
+ size_t s_len, size_t d_len);
+extern int zio_decompress_data_buf(enum zio_compress c, void *src, void *dst,
size_t s_len, size_t d_len);
#ifdef __cplusplus
diff --git a/zfs/include/sys/zio_impl.h b/zfs/include/sys/zio_impl.h
index 08f820103e82..4d56e906666f 100644
--- a/zfs/include/sys/zio_impl.h
+++ b/zfs/include/sys/zio_impl.h
@@ -24,15 +24,12 @@
*/
/*
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
*/
#ifndef _ZIO_IMPL_H
#define _ZIO_IMPL_H
-#include <sys/zfs_context.h>
-#include <sys/zio.h>
-
#ifdef __cplusplus
extern "C" {
#endif
@@ -108,35 +105,37 @@ enum zio_stage {
ZIO_STAGE_OPEN = 1 << 0, /* RWFCI */
ZIO_STAGE_READ_BP_INIT = 1 << 1, /* R---- */
- ZIO_STAGE_FREE_BP_INIT = 1 << 2, /* --F-- */
- ZIO_STAGE_ISSUE_ASYNC = 1 << 3, /* RWF-- */
- ZIO_STAGE_WRITE_BP_INIT = 1 << 4, /* -W--- */
+ ZIO_STAGE_WRITE_BP_INIT = 1 << 2, /* -W--- */
+ ZIO_STAGE_FREE_BP_INIT = 1 << 3, /* --F-- */
+ ZIO_STAGE_ISSUE_ASYNC = 1 << 4, /* RWF-- */
+ ZIO_STAGE_WRITE_COMPRESS = 1 << 5, /* -W--- */
- ZIO_STAGE_CHECKSUM_GENERATE = 1 << 5, /* -W--- */
+ ZIO_STAGE_CHECKSUM_GENERATE = 1 << 6, /* -W--- */
- ZIO_STAGE_NOP_WRITE = 1 << 6, /* -W--- */
+ ZIO_STAGE_NOP_WRITE = 1 << 7, /* -W--- */
- ZIO_STAGE_DDT_READ_START = 1 << 7, /* R---- */
- ZIO_STAGE_DDT_READ_DONE = 1 << 8, /* R---- */
- ZIO_STAGE_DDT_WRITE = 1 << 9, /* -W--- */
- ZIO_STAGE_DDT_FREE = 1 << 10, /* --F-- */
+ ZIO_STAGE_DDT_READ_START = 1 << 8, /* R---- */
+ ZIO_STAGE_DDT_READ_DONE = 1 << 9, /* R---- */
+ ZIO_STAGE_DDT_WRITE = 1 << 10, /* -W--- */
+ ZIO_STAGE_DDT_FREE = 1 << 11, /* --F-- */
- ZIO_STAGE_GANG_ASSEMBLE = 1 << 11, /* RWFC- */
- ZIO_STAGE_GANG_ISSUE = 1 << 12, /* RWFC- */
+ ZIO_STAGE_GANG_ASSEMBLE = 1 << 12, /* RWFC- */
+ ZIO_STAGE_GANG_ISSUE = 1 << 13, /* RWFC- */
- ZIO_STAGE_DVA_ALLOCATE = 1 << 13, /* -W--- */
- ZIO_STAGE_DVA_FREE = 1 << 14, /* --F-- */
- ZIO_STAGE_DVA_CLAIM = 1 << 15, /* ---C- */
+ ZIO_STAGE_DVA_THROTTLE = 1 << 14, /* -W--- */
+ ZIO_STAGE_DVA_ALLOCATE = 1 << 15, /* -W--- */
+ ZIO_STAGE_DVA_FREE = 1 << 16, /* --F-- */
+ ZIO_STAGE_DVA_CLAIM = 1 << 17, /* ---C- */
- ZIO_STAGE_READY = 1 << 16, /* RWFCI */
+ ZIO_STAGE_READY = 1 << 18, /* RWFCI */
- ZIO_STAGE_VDEV_IO_START = 1 << 17, /* RW--I */
- ZIO_STAGE_VDEV_IO_DONE = 1 << 18, /* RW--I */
- ZIO_STAGE_VDEV_IO_ASSESS = 1 << 19, /* RW--I */
+ ZIO_STAGE_VDEV_IO_START = 1 << 19, /* RW--I */
+ ZIO_STAGE_VDEV_IO_DONE = 1 << 20, /* RW--I */
+ ZIO_STAGE_VDEV_IO_ASSESS = 1 << 21, /* RW--I */
- ZIO_STAGE_CHECKSUM_VERIFY = 1 << 20, /* R---- */
+ ZIO_STAGE_CHECKSUM_VERIFY = 1 << 22, /* R---- */
- ZIO_STAGE_DONE = 1 << 21 /* RWFCI */
+ ZIO_STAGE_DONE = 1 << 23 /* RWFCI */
};
#define ZIO_INTERLOCK_STAGES \
@@ -187,22 +186,27 @@ enum zio_stage {
#define ZIO_REWRITE_PIPELINE \
(ZIO_WRITE_COMMON_STAGES | \
+ ZIO_STAGE_WRITE_COMPRESS | \
ZIO_STAGE_WRITE_BP_INIT)
#define ZIO_WRITE_PIPELINE \
(ZIO_WRITE_COMMON_STAGES | \
ZIO_STAGE_WRITE_BP_INIT | \
+ ZIO_STAGE_WRITE_COMPRESS | \
+ ZIO_STAGE_DVA_THROTTLE | \
ZIO_STAGE_DVA_ALLOCATE)
#define ZIO_DDT_CHILD_WRITE_PIPELINE \
(ZIO_INTERLOCK_STAGES | \
ZIO_VDEV_IO_STAGES | \
+ ZIO_STAGE_DVA_THROTTLE | \
ZIO_STAGE_DVA_ALLOCATE)
#define ZIO_DDT_WRITE_PIPELINE \
(ZIO_INTERLOCK_STAGES | \
- ZIO_STAGE_ISSUE_ASYNC | \
ZIO_STAGE_WRITE_BP_INIT | \
+ ZIO_STAGE_ISSUE_ASYNC | \
+ ZIO_STAGE_WRITE_COMPRESS | \
ZIO_STAGE_CHECKSUM_GENERATE | \
ZIO_STAGE_DDT_WRITE)
diff --git a/zfs/include/sys/zio_priority.h b/zfs/include/sys/zio_priority.h
new file mode 100644
index 000000000000..3fc3589be0c1
--- /dev/null
+++ b/zfs/include/sys/zio_priority.h
@@ -0,0 +1,39 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2014 by Delphix. All rights reserved.
+ */
+#ifndef _ZIO_PRIORITY_H
+#define _ZIO_PRIORITY_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum zio_priority {
+ ZIO_PRIORITY_SYNC_READ,
+ ZIO_PRIORITY_SYNC_WRITE, /* ZIL */
+ ZIO_PRIORITY_ASYNC_READ, /* prefetch */
+ ZIO_PRIORITY_ASYNC_WRITE, /* spa_sync() */
+ ZIO_PRIORITY_SCRUB, /* asynchronous scrub/resilver reads */
+ ZIO_PRIORITY_NUM_QUEUEABLE,
+ ZIO_PRIORITY_NOW, /* non-queued i/os (e.g. free) */
+} zio_priority_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _ZIO_PRIORITY_H */
diff --git a/zfs/include/sys/zpl.h b/zfs/include/sys/zpl.h
index 9f0ac770df9c..1f97f2511a37 100644
--- a/zfs/include/sys/zpl.h
+++ b/zfs/include/sys/zpl.h
@@ -148,22 +148,21 @@ static inline bool
dir_emit(struct dir_context *ctx, const char *name, int namelen,
uint64_t ino, unsigned type)
{
- return (ctx->actor(ctx->dirent, name, namelen, ctx->pos, ino, type)
- == 0);
+ return (!ctx->actor(ctx->dirent, name, namelen, ctx->pos, ino, type));
}
static inline bool
dir_emit_dot(struct file *file, struct dir_context *ctx)
{
return (ctx->actor(ctx->dirent, ".", 1, ctx->pos,
- file->f_path.dentry->d_inode->i_ino, DT_DIR) == 0);
+ file_inode(file)->i_ino, DT_DIR) == 0);
}
static inline bool
dir_emit_dotdot(struct file *file, struct dir_context *ctx)
{
return (ctx->actor(ctx->dirent, "..", 2, ctx->pos,
- parent_ino(file->f_path.dentry), DT_DIR) == 0);
+ parent_ino(file_dentry(file)), DT_DIR) == 0);
}
static inline bool
diff --git a/zfs/include/sys/zrlock.h b/zfs/include/sys/zrlock.h
index dcd63f7b5b91..b6eba1a18ff4 100644
--- a/zfs/include/sys/zrlock.h
+++ b/zfs/include/sys/zrlock.h
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015 by Delphix. All rights reserved.
*/
#ifndef _SYS_ZRLOCK_H
@@ -44,12 +45,8 @@ typedef struct zrlock {
extern void zrl_init(zrlock_t *);
extern void zrl_destroy(zrlock_t *);
-#ifdef ZFS_DEBUG
-#define zrl_add(_z) zrl_add_debug((_z), __func__)
-extern void zrl_add_debug(zrlock_t *, const char *);
-#else
-extern void zrl_add(zrlock_t *);
-#endif
+#define zrl_add(_z) zrl_add_impl((_z), __func__)
+extern void zrl_add_impl(zrlock_t *, const char *);
extern void zrl_remove(zrlock_t *);
extern int zrl_tryenter(zrlock_t *);
extern void zrl_exit(zrlock_t *);
diff --git a/zfs/include/sys/zvol.h b/zfs/include/sys/zvol.h
index c3e386f0b79e..e8b084762a2d 100644
--- a/zfs/include/sys/zvol.h
+++ b/zfs/include/sys/zvol.h
@@ -32,12 +32,17 @@
#define ZVOL_OBJ 1ULL
#define ZVOL_ZAP_OBJ 2ULL
+#define SPEC_MAXOFFSET_T ((1LL << ((NBBY * sizeof (daddr32_t)) + \
+ DEV_BSHIFT - 1)) - 1)
+
extern void zvol_create_minors(spa_t *spa, const char *name, boolean_t async);
extern void zvol_remove_minors(spa_t *spa, const char *name, boolean_t async);
extern void zvol_rename_minors(spa_t *spa, const char *oldname,
const char *newname, boolean_t async);
#ifdef _KERNEL
+typedef struct zvol_state zvol_state_t;
+
extern int zvol_check_volsize(uint64_t volsize, uint64_t blocksize);
extern int zvol_check_volblocksize(const char *name, uint64_t volblocksize);
extern int zvol_get_stats(objset_t *os, nvlist_t *nv);
@@ -46,6 +51,10 @@ extern void zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
extern int zvol_set_volsize(const char *, uint64_t);
extern int zvol_set_volblocksize(const char *, uint64_t);
extern int zvol_set_snapdev(const char *, zprop_source_t, uint64_t);
+extern int zvol_set_volmode(const char *, zprop_source_t, uint64_t);
+extern zvol_state_t *zvol_suspend(const char *);
+extern int zvol_resume(zvol_state_t *);
+extern void *zvol_tag(zvol_state_t *);
extern int zvol_init(void);
extern void zvol_fini(void);
diff --git a/zfs/include/zfeature_common.h b/zfs/include/zfeature_common.h
index e383c4ff7887..25d680ffcebd 100644
--- a/zfs/include/zfeature_common.h
+++ b/zfs/include/zfeature_common.h
@@ -20,8 +20,9 @@
*/
/*
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
*/
#ifndef _ZFEATURE_COMMON_H
@@ -42,6 +43,7 @@ typedef enum spa_feature {
SPA_FEATURE_ASYNC_DESTROY,
SPA_FEATURE_EMPTY_BPOBJ,
SPA_FEATURE_LZ4_COMPRESS,
+ SPA_FEATURE_MULTI_VDEV_CRASH_DUMP,
SPA_FEATURE_SPACEMAP_HISTOGRAM,
SPA_FEATURE_ENABLED_TXG,
SPA_FEATURE_HOLE_BIRTH,
@@ -50,20 +52,33 @@ typedef enum spa_feature {
SPA_FEATURE_BOOKMARKS,
SPA_FEATURE_FS_SS_LIMIT,
SPA_FEATURE_LARGE_BLOCKS,
+ SPA_FEATURE_LARGE_DNODE,
+ SPA_FEATURE_SHA512,
+ SPA_FEATURE_SKEIN,
+ SPA_FEATURE_EDONR,
+ SPA_FEATURE_USEROBJ_ACCOUNTING,
SPA_FEATURES
} spa_feature_t;
#define SPA_FEATURE_DISABLED (-1ULL)
+typedef enum zfeature_flags {
+ /* Can open pool readonly even if this feature is not supported. */
+ ZFEATURE_FLAG_READONLY_COMPAT = (1 << 0),
+ /* Is this feature necessary to read the MOS? */
+ ZFEATURE_FLAG_MOS = (1 << 1),
+ /* Activate this feature at the same time it is enabled. */
+ ZFEATURE_FLAG_ACTIVATE_ON_ENABLE = (1 << 2),
+ /* Each dataset has a field set if it has ever used this feature. */
+ ZFEATURE_FLAG_PER_DATASET = (1 << 3)
+} zfeature_flags_t;
+
typedef struct zfeature_info {
spa_feature_t fi_feature;
const char *fi_uname; /* User-facing feature name */
const char *fi_guid; /* On-disk feature identifier */
const char *fi_desc; /* Feature description */
- boolean_t fi_can_readonly; /* Can open pool readonly w/o support? */
- boolean_t fi_mos; /* Is the feature necessary to read the MOS? */
- /* Activate this feature at the same time it is enabled */
- boolean_t fi_activate_on_enable;
+ zfeature_flags_t fi_flags;
/* array of dependencies, terminated by SPA_FEATURE_NONE */
const spa_feature_t *fi_depends;
} zfeature_info_t;
diff --git a/zfs/include/zfs_deleg.h b/zfs/include/zfs_deleg.h
index 16133c59f33f..95db9921f574 100644
--- a/zfs/include/zfs_deleg.h
+++ b/zfs/include/zfs_deleg.h
@@ -63,6 +63,10 @@ typedef enum {
ZFS_DELEG_NOTE_GROUPQUOTA,
ZFS_DELEG_NOTE_USERUSED,
ZFS_DELEG_NOTE_GROUPUSED,
+ ZFS_DELEG_NOTE_USEROBJQUOTA,
+ ZFS_DELEG_NOTE_GROUPOBJQUOTA,
+ ZFS_DELEG_NOTE_USEROBJUSED,
+ ZFS_DELEG_NOTE_GROUPOBJUSED,
ZFS_DELEG_NOTE_HOLD,
ZFS_DELEG_NOTE_RELEASE,
ZFS_DELEG_NOTE_DIFF,
diff --git a/zfs/include/zfs_fletcher.h b/zfs/include/zfs_fletcher.h
index b49df0cf4f0f..5c7a61c56259 100644
--- a/zfs/include/zfs_fletcher.h
+++ b/zfs/include/zfs_fletcher.h
@@ -22,12 +22,15 @@
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright 2013 Saso Kiselkov. All rights reserved.
+ */
#ifndef _ZFS_FLETCHER_H
#define _ZFS_FLETCHER_H
#include <sys/types.h>
-#include <sys/spa.h>
+#include <sys/spa_checksum.h>
#ifdef __cplusplus
extern "C" {
@@ -35,16 +38,114 @@ extern "C" {
/*
* fletcher checksum functions
+ *
+ * Note: Fletcher checksum methods expect buffer size to be 4B aligned. This
+ * limitation stems from the algorithm design. Performing incremental checksum
+ * without said alignment would yield different results. Therefore, the code
+ * includes assertions for the size alignment.
+ * For compatibility, it is required that some code paths calculate checksum of
+ * non-aligned buffer sizes. For this purpose, `fletcher_4_native_varsize()`
+ * checksum method is added. This method will ignore last (size % 4) bytes of
+ * the data buffer.
+ */
+void fletcher_init(zio_cksum_t *);
+void fletcher_2_native(const void *, uint64_t, const void *, zio_cksum_t *);
+void fletcher_2_byteswap(const void *, uint64_t, const void *, zio_cksum_t *);
+void fletcher_4_native(const void *, uint64_t, const void *, zio_cksum_t *);
+int fletcher_2_incremental_native(void *, size_t, void *);
+int fletcher_2_incremental_byteswap(void *, size_t, void *);
+void fletcher_4_native_varsize(const void *, uint64_t, zio_cksum_t *);
+void fletcher_4_byteswap(const void *, uint64_t, const void *, zio_cksum_t *);
+int fletcher_4_incremental_native(void *, size_t, void *);
+int fletcher_4_incremental_byteswap(void *, size_t, void *);
+int fletcher_4_impl_set(const char *selector);
+void fletcher_4_init(void);
+void fletcher_4_fini(void);
+
+
+
+/* Internal fletcher ctx */
+
+typedef struct zfs_fletcher_superscalar {
+ uint64_t v[4];
+} zfs_fletcher_superscalar_t;
+
+typedef struct zfs_fletcher_sse {
+ uint64_t v[2] __attribute__((aligned(16)));
+} zfs_fletcher_sse_t;
+
+typedef struct zfs_fletcher_avx {
+ uint64_t v[4] __attribute__((aligned(32)));
+} zfs_fletcher_avx_t;
+
+typedef struct zfs_fletcher_avx512 {
+ uint64_t v[8] __attribute__((aligned(64)));
+} zfs_fletcher_avx512_t;
+
+typedef struct zfs_fletcher_aarch64_neon {
+ uint64_t v[2] __attribute__((aligned(16)));
+} zfs_fletcher_aarch64_neon_t;
+
+
+typedef union fletcher_4_ctx {
+ zio_cksum_t scalar;
+ zfs_fletcher_superscalar_t superscalar[4];
+
+#if defined(HAVE_SSE2) || (defined(HAVE_SSE2) && defined(HAVE_SSSE3))
+ zfs_fletcher_sse_t sse[4];
+#endif
+#if defined(HAVE_AVX) && defined(HAVE_AVX2)
+ zfs_fletcher_avx_t avx[4];
+#endif
+#if defined(__x86_64) && defined(HAVE_AVX512F)
+ zfs_fletcher_avx512_t avx512[4];
+#endif
+#if defined(__aarch64__)
+ zfs_fletcher_aarch64_neon_t aarch64_neon[4];
+#endif
+} fletcher_4_ctx_t;
+
+/*
+ * fletcher checksum struct
*/
+typedef void (*fletcher_4_init_f)(fletcher_4_ctx_t *);
+typedef void (*fletcher_4_fini_f)(fletcher_4_ctx_t *, zio_cksum_t *);
+typedef void (*fletcher_4_compute_f)(fletcher_4_ctx_t *,
+ const void *, uint64_t);
+
+typedef struct fletcher_4_func {
+ fletcher_4_init_f init_native;
+ fletcher_4_fini_f fini_native;
+ fletcher_4_compute_f compute_native;
+ fletcher_4_init_f init_byteswap;
+ fletcher_4_fini_f fini_byteswap;
+ fletcher_4_compute_f compute_byteswap;
+ boolean_t (*valid)(void);
+ const char *name;
+} fletcher_4_ops_t;
+
+extern const fletcher_4_ops_t fletcher_4_superscalar_ops;
+extern const fletcher_4_ops_t fletcher_4_superscalar4_ops;
+
+#if defined(HAVE_SSE2)
+extern const fletcher_4_ops_t fletcher_4_sse2_ops;
+#endif
+
+#if defined(HAVE_SSE2) && defined(HAVE_SSSE3)
+extern const fletcher_4_ops_t fletcher_4_ssse3_ops;
+#endif
+
+#if defined(HAVE_AVX) && defined(HAVE_AVX2)
+extern const fletcher_4_ops_t fletcher_4_avx2_ops;
+#endif
+
+#if defined(__x86_64) && defined(HAVE_AVX512F)
+extern const fletcher_4_ops_t fletcher_4_avx512f_ops;
+#endif
-void fletcher_2_native(const void *, uint64_t, zio_cksum_t *);
-void fletcher_2_byteswap(const void *, uint64_t, zio_cksum_t *);
-void fletcher_4_native(const void *, uint64_t, zio_cksum_t *);
-void fletcher_4_byteswap(const void *, uint64_t, zio_cksum_t *);
-void fletcher_4_incremental_native(const void *, uint64_t,
- zio_cksum_t *);
-void fletcher_4_incremental_byteswap(const void *, uint64_t,
- zio_cksum_t *);
+#if defined(__aarch64__)
+extern const fletcher_4_ops_t fletcher_4_aarch64_neon_ops;
+#endif
#ifdef __cplusplus
}
diff --git a/zfs/include/zfs_namecheck.h b/zfs/include/zfs_namecheck.h
index cbefbaa0d5ab..db70641dbab2 100644
--- a/zfs/include/zfs_namecheck.h
+++ b/zfs/include/zfs_namecheck.h
@@ -38,7 +38,7 @@ typedef enum {
NAME_ERR_EMPTY_COMPONENT, /* name contains an empty component */
NAME_ERR_TRAILING_SLASH, /* name ends with a slash */
NAME_ERR_INVALCHAR, /* invalid character found */
- NAME_ERR_MULTIPLE_AT, /* multiple '@' characters found */
+ NAME_ERR_MULTIPLE_DELIMITERS, /* multiple '@'/'#' delimiters found */
NAME_ERR_NOLETTER, /* pool doesn't begin with a letter */
NAME_ERR_RESERVED, /* entire name is reserved */
NAME_ERR_DISKLIKE, /* reserved disk name (c[0-9].*) */
@@ -49,6 +49,7 @@ typedef enum {
#define ZFS_PERMSET_MAXLEN 64
int pool_namecheck(const char *, namecheck_err_t *, char *);
+int entity_namecheck(const char *, namecheck_err_t *, char *);
int dataset_namecheck(const char *, namecheck_err_t *, char *);
int mountpoint_namecheck(const char *, namecheck_err_t *);
int zfs_component_namecheck(const char *, namecheck_err_t *, char *);
diff --git a/zfs/include/zpios-ctl.h b/zfs/include/zpios-ctl.h
index 9a47ff91d55c..f17f11532608 100644
--- a/zfs/include/zpios-ctl.h
+++ b/zfs/include/zpios-ctl.h
@@ -1,7 +1,7 @@
/*
* ZPIOS is a heavily modified version of the original PIOS test code.
* It is designed to have the test code running in the Linux kernel
- * against ZFS while still being flexibly controled from user space.
+ * against ZFS while still being flexibly controlled from user space.
*
* Copyright (C) 2008-2010 Lawrence Livermore National Security, LLC.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -29,6 +29,8 @@
*
* You should have received a copy of the GNU General Public License along
* with ZPIOS. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Copyright (c) 2015, Intel Corporation.
*/
#ifndef _ZPIOS_CTL_H
@@ -116,6 +118,7 @@ typedef struct zpios_cmd {
uint32_t cmd_chunk_noise; /* Chunk noise */
uint32_t cmd_thread_delay; /* Thread delay */
uint32_t cmd_flags; /* Test flags */
+ uint32_t cmd_block_size; /* ZFS block size */
char cmd_pre[ZPIOS_PATH_SIZE]; /* Pre-exec hook */
char cmd_post[ZPIOS_PATH_SIZE]; /* Post-exec hook */
char cmd_log[ZPIOS_PATH_SIZE]; /* Requested log dir */
@@ -143,7 +146,7 @@ zpios_timespec_normalize(zpios_timespec_t *ts, uint32_t sec, uint32_t nsec)
nsec -= NSEC_PER_SEC;
sec++;
}
- while (nsec < 0) {
+ while (((int32_t)nsec) < 0) {
nsec += NSEC_PER_SEC;
sec--;
}
diff --git a/zfs/include/zpios-internal.h b/zfs/include/zpios-internal.h
index 4b99b4ce31f3..bb8cee0d7f40 100644
--- a/zfs/include/zpios-internal.h
+++ b/zfs/include/zpios-internal.h
@@ -1,7 +1,7 @@
/*
* ZPIOS is a heavily modified version of the original PIOS test code.
* It is designed to have the test code running in the Linux kernel
- * against ZFS while still being flexibly controled from user space.
+ * against ZFS while still being flexibly controlled from user space.
*
* Copyright (C) 2008-2010 Lawrence Livermore National Security, LLC.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -29,6 +29,8 @@
*
* You should have received a copy of the GNU General Public License along
* with ZPIOS. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Copyright (c) 2015, Intel Corporation.
*/
#ifndef _ZPIOS_INTERNAL_H
@@ -79,6 +81,7 @@ typedef struct run_args {
__u32 chunk_noise;
__u32 thread_delay;
__u32 flags;
+ __u32 block_size;
char pre[ZPIOS_PATH_SIZE];
char post[ZPIOS_PATH_SIZE];
char log[ZPIOS_PATH_SIZE];
diff --git a/zfs/module/Makefile.in b/zfs/module/Makefile.in
index d4ddee2f429f..093cf2286cef 100644
--- a/zfs/module/Makefile.in
+++ b/zfs/module/Makefile.in
@@ -4,23 +4,28 @@ subdir-m += unicode
subdir-m += zcommon
subdir-m += zfs
subdir-m += zpios
+subdir-m += icp
INSTALL_MOD_DIR ?= extra
ZFS_MODULE_CFLAGS += -include @SPL_OBJ@/spl_config.h
ZFS_MODULE_CFLAGS += -include @abs_top_builddir@/zfs_config.h
ZFS_MODULE_CFLAGS += -I at abs_top_srcdir@/include -I at SPL@/include -I at SPL@
+ at CONFIG_QAT_TRUE@ZFS_MODULE_CFLAGS += -I at QAT_SRC@/include
+ at CONFIG_QAT_TRUE@KBUILD_EXTRA_SYMBOLS += @QAT_SYMBOLS@
export ZFS_MODULE_CFLAGS
+SUBDIR_TARGETS = icp
+
modules:
@# Make the exported SPL symbols available to these modules.
@# They may be in the root of SPL_OBJ when building against
@# installed devel headers, or they may be in the module
@# subdirectory when building against the spl source tree.
@if [ -f @SPL_OBJ@/@SPL_SYMBOLS@ ]; then \
- /bin/cp @SPL_OBJ@/@SPL_SYMBOLS@ .; \
+ cp @SPL_OBJ@/@SPL_SYMBOLS@ .; \
elif [ -f @SPL_OBJ@/module/@SPL_SYMBOLS@ ]; then \
- /bin/cp @SPL_OBJ@/module/@SPL_SYMBOLS@ .; \
+ cp @SPL_OBJ@/module/@SPL_SYMBOLS@ .; \
else \
echo -e "\n" \
"*** Missing spl symbols ensure you have built the spl:\n" \
@@ -28,6 +33,9 @@ modules:
"*** - @SPL_OBJ@/module/@SPL_SYMBOLS@\n"; \
exit 1; \
fi
+ list='$(SUBDIR_TARGETS)'; for targetdir in $$list; do \
+ $(MAKE) -C $$targetdir; \
+ done
$(MAKE) -C @LINUX_OBJ@ SUBDIRS=`pwd` @KERNELMAKE_PARAMS@ CONFIG_ZFS=m $@
clean:
@@ -64,8 +72,8 @@ modules_uninstall:
distdir:
list='$(subdir-m)'; for subdir in $$list; do \
- (find @top_srcdir@/module/$$subdir -name '*.c' -o -name '*.h' |\
- xargs /bin/cp -t $$distdir/$$subdir); \
+ (cd @top_srcdir@/module && find $$subdir -name '*.c' -o -name '*.h' -o -name '*.S' |\
+ xargs cp --parents -t $$distdir); \
done
distclean maintainer-clean: clean
diff --git a/zfs/module/icp/Makefile.in b/zfs/module/icp/Makefile.in
new file mode 100644
index 000000000000..77b2ec1b546e
--- /dev/null
+++ b/zfs/module/icp/Makefile.in
@@ -0,0 +1,93 @@
+src = @abs_top_srcdir@/module/icp
+obj = @abs_builddir@
+
+MODULE := icp
+
+TARGET_ASM_DIR = @TARGET_ASM_DIR@
+
+ifeq ($(TARGET_ASM_DIR), asm-x86_64)
+ASM_SOURCES := asm-x86_64/aes/aeskey.o
+ASM_SOURCES += asm-x86_64/aes/aes_amd64.o
+ASM_SOURCES += asm-x86_64/aes/aes_intel.o
+ASM_SOURCES += asm-x86_64/modes/gcm_intel.o
+ASM_SOURCES += asm-x86_64/sha1/sha1-x86_64.o
+ASM_SOURCES += asm-x86_64/sha2/sha256_impl.o
+ASM_SOURCES += asm-x86_64/sha2/sha512_impl.o
+endif
+
+ifeq ($(TARGET_ASM_DIR), asm-i386)
+ASM_SOURCES :=
+endif
+
+ifeq ($(TARGET_ASM_DIR), asm-generic)
+ASM_SOURCES :=
+endif
+
+EXTRA_CFLAGS = $(ZFS_MODULE_CFLAGS) @KERNELCPPFLAGS@
+EXTRA_AFLAGS = $(ZFS_MODULE_CFLAGS) @KERNELCPPFLAGS@
+
+obj-$(CONFIG_ZFS) := $(MODULE).o
+
+ccflags-y += -I$(src)/include
+asflags-y += -I$(src)/include
+asflags-y += $(ZFS_MODULE_CFLAGS)
+
+$(MODULE)-objs += illumos-crypto.o
+$(MODULE)-objs += api/kcf_cipher.o
+$(MODULE)-objs += api/kcf_digest.o
+$(MODULE)-objs += api/kcf_mac.o
+$(MODULE)-objs += api/kcf_miscapi.o
+$(MODULE)-objs += api/kcf_ctxops.o
+$(MODULE)-objs += core/kcf_callprov.o
+$(MODULE)-objs += core/kcf_prov_tabs.o
+$(MODULE)-objs += core/kcf_sched.o
+$(MODULE)-objs += core/kcf_mech_tabs.o
+$(MODULE)-objs += core/kcf_prov_lib.o
+$(MODULE)-objs += spi/kcf_spi.o
+$(MODULE)-objs += io/aes.o
+$(MODULE)-objs += io/edonr_mod.o
+$(MODULE)-objs += io/sha1_mod.o
+$(MODULE)-objs += io/sha2_mod.o
+$(MODULE)-objs += io/skein_mod.o
+$(MODULE)-objs += os/modhash.o
+$(MODULE)-objs += os/modconf.o
+$(MODULE)-objs += algs/modes/cbc.o
+$(MODULE)-objs += algs/modes/ccm.o
+$(MODULE)-objs += algs/modes/ctr.o
+$(MODULE)-objs += algs/modes/ecb.o
+$(MODULE)-objs += algs/modes/gcm.o
+$(MODULE)-objs += algs/modes/modes.o
+$(MODULE)-objs += algs/aes/aes_impl.o
+$(MODULE)-objs += algs/aes/aes_modes.o
+$(MODULE)-objs += algs/edonr/edonr.o
+$(MODULE)-objs += algs/sha1/sha1.o
+$(MODULE)-objs += algs/sha2/sha2.o
+$(MODULE)-objs += algs/sha1/sha1.o
+$(MODULE)-objs += algs/skein/skein.o
+$(MODULE)-objs += algs/skein/skein_block.o
+$(MODULE)-objs += algs/skein/skein_iv.o
+$(MODULE)-objs += $(ASM_SOURCES)
+
+ICP_DIRS = \
+ api \
+ core \
+ spi \
+ io \
+ os \
+ algs \
+ algs/aes \
+ algs/edonr \
+ algs/modes \
+ algs/sha1 \
+ algs/sha2 \
+ algs/skein \
+ asm-x86_64 \
+ asm-x86_64/aes \
+ asm-x86_64/modes \
+ asm-x86_64/sha1 \
+ asm-x86_64/sha2 \
+ asm-i386 \
+ asm-generic
+
+all:
+ mkdir -p $(ICP_DIRS)
diff --git a/zfs/module/icp/algs/aes/aes_impl.c b/zfs/module/icp/algs/aes/aes_impl.c
new file mode 100644
index 000000000000..4c17e2a3001e
--- /dev/null
+++ b/zfs/module/icp/algs/aes/aes_impl.c
@@ -0,0 +1,1618 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#if defined(_KERNEL) && defined(__amd64)
+#include <linux/simd_x86.h>
+
+#define KPREEMPT_DISABLE kfpu_begin()
+#define KPREEMPT_ENABLE kfpu_end()
+
+#else
+#define KPREEMPT_DISABLE
+#define KPREEMPT_ENABLE
+#endif /* _KERNEL */
+
+#include <sys/zfs_context.h>
+#include <sys/crypto/spi.h>
+#include <modes/modes.h>
+#include <aes/aes_impl.h>
+
+
+
+/*
+ * This file is derived from the file rijndael-alg-fst.c taken from the
+ * "optimized C code v3.0" on the "rijndael home page"
+ * http://www.iaik.tu-graz.ac.at/research/krypto/AES/old/~rijmen/rijndael/
+ * pointed by the NIST web-site http://csrc.nist.gov/archive/aes/
+ *
+ * The following note is from the original file:
+ */
+
+/*
+ * rijndael-alg-fst.c
+ *
+ * @version 3.0 (December 2000)
+ *
+ * Optimised ANSI C code for the Rijndael cipher (now AES)
+ *
+ * @author Vincent Rijmen <vincent.rijmen at esat.kuleuven.ac.be>
+ * @author Antoon Bosselaers <antoon.bosselaers at esat.kuleuven.ac.be>
+ * @author Paulo Barreto <paulo.barreto at terra.com.br>
+ *
+ * This code is hereby placed in the public domain.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#if defined(__amd64)
+
+/* These functions are used to execute amd64 instructions for AMD or Intel: */
+extern int rijndael_key_setup_enc_amd64(uint32_t rk[],
+ const uint32_t cipherKey[], int keyBits);
+extern int rijndael_key_setup_dec_amd64(uint32_t rk[],
+ const uint32_t cipherKey[], int keyBits);
+extern void aes_encrypt_amd64(const uint32_t rk[], int Nr,
+ const uint32_t pt[4], uint32_t ct[4]);
+extern void aes_decrypt_amd64(const uint32_t rk[], int Nr,
+ const uint32_t ct[4], uint32_t pt[4]);
+
+/* These functions are used to execute Intel-specific AES-NI instructions: */
+extern int rijndael_key_setup_enc_intel(uint32_t rk[],
+ const uint32_t cipherKey[], uint64_t keyBits);
+extern int rijndael_key_setup_dec_intel(uint32_t rk[],
+ const uint32_t cipherKey[], uint64_t keyBits);
+extern void aes_encrypt_intel(const uint32_t rk[], int Nr,
+ const uint32_t pt[4], uint32_t ct[4]);
+extern void aes_decrypt_intel(const uint32_t rk[], int Nr,
+ const uint32_t ct[4], uint32_t pt[4]);
+
+static int intel_aes_instructions_present(void);
+
+#define AES_ENCRYPT_IMPL(a, b, c, d, e) rijndael_encrypt(a, b, c, d, e)
+#define AES_DECRYPT_IMPL(a, b, c, d, e) rijndael_decrypt(a, b, c, d, e)
+
+#else /* Generic C implementation */
+
+#define AES_ENCRYPT_IMPL(a, b, c, d, e) rijndael_encrypt(a, b, c, d)
+#define AES_DECRYPT_IMPL(a, b, c, d, e) rijndael_decrypt(a, b, c, d)
+#define rijndael_key_setup_enc_raw rijndael_key_setup_enc
+#endif /* __amd64 */
+
+#if defined(_LITTLE_ENDIAN) && !defined(__amd64)
+#define AES_BYTE_SWAP
+#endif
+
+
+#if !defined(__amd64)
+/*
+ * Constant tables
+ */
+
+/*
+ * Te0[x] = S [x].[02, 01, 01, 03];
+ * Te1[x] = S [x].[03, 02, 01, 01];
+ * Te2[x] = S [x].[01, 03, 02, 01];
+ * Te3[x] = S [x].[01, 01, 03, 02];
+ * Te4[x] = S [x].[01, 01, 01, 01];
+ *
+ * Td0[x] = Si[x].[0e, 09, 0d, 0b];
+ * Td1[x] = Si[x].[0b, 0e, 09, 0d];
+ * Td2[x] = Si[x].[0d, 0b, 0e, 09];
+ * Td3[x] = Si[x].[09, 0d, 0b, 0e];
+ * Td4[x] = Si[x].[01, 01, 01, 01];
+ */
+
+/* Encrypt Sbox constants (for the substitute bytes operation) */
+
+static const uint32_t Te0[256] =
+{
+ 0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
+ 0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
+ 0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
+ 0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU,
+ 0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U,
+ 0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU,
+ 0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU,
+ 0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU,
+ 0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU,
+ 0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU,
+ 0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U,
+ 0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU,
+ 0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU,
+ 0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U,
+ 0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU,
+ 0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU,
+ 0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU,
+ 0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU,
+ 0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU,
+ 0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U,
+ 0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU,
+ 0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU,
+ 0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU,
+ 0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU,
+ 0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U,
+ 0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U,
+ 0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U,
+ 0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U,
+ 0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU,
+ 0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U,
+ 0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U,
+ 0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU,
+ 0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU,
+ 0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U,
+ 0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U,
+ 0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U,
+ 0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU,
+ 0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U,
+ 0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU,
+ 0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U,
+ 0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU,
+ 0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U,
+ 0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U,
+ 0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU,
+ 0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U,
+ 0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U,
+ 0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U,
+ 0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U,
+ 0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U,
+ 0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U,
+ 0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U,
+ 0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U,
+ 0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU,
+ 0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U,
+ 0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U,
+ 0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U,
+ 0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U,
+ 0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U,
+ 0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U,
+ 0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU,
+ 0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U,
+ 0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U,
+ 0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
+ 0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU
+};
+
+
+static const uint32_t Te1[256] =
+{
+ 0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU,
+ 0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
+ 0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU,
+ 0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U,
+ 0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU,
+ 0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U,
+ 0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU,
+ 0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U,
+ 0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U,
+ 0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU,
+ 0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U,
+ 0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U,
+ 0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U,
+ 0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU,
+ 0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U,
+ 0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U,
+ 0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU,
+ 0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U,
+ 0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U,
+ 0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U,
+ 0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU,
+ 0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU,
+ 0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U,
+ 0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU,
+ 0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU,
+ 0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U,
+ 0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU,
+ 0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U,
+ 0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU,
+ 0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U,
+ 0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U,
+ 0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U,
+ 0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU,
+ 0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U,
+ 0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU,
+ 0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U,
+ 0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU,
+ 0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U,
+ 0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U,
+ 0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU,
+ 0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU,
+ 0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU,
+ 0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U,
+ 0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U,
+ 0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU,
+ 0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U,
+ 0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU,
+ 0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U,
+ 0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU,
+ 0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U,
+ 0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU,
+ 0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU,
+ 0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U,
+ 0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU,
+ 0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U,
+ 0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU,
+ 0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U,
+ 0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U,
+ 0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U,
+ 0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU,
+ 0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU,
+ 0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U,
+ 0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU,
+ 0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U
+};
+
+
+static const uint32_t Te2[256] =
+{
+ 0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU,
+ 0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U,
+ 0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU,
+ 0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U,
+ 0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU,
+ 0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U,
+ 0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU,
+ 0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U,
+ 0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U,
+ 0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU,
+ 0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U,
+ 0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U,
+ 0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U,
+ 0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU,
+ 0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U,
+ 0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U,
+ 0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU,
+ 0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U,
+ 0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U,
+ 0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U,
+ 0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU,
+ 0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU,
+ 0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U,
+ 0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU,
+ 0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU,
+ 0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U,
+ 0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU,
+ 0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U,
+ 0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU,
+ 0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U,
+ 0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U,
+ 0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U,
+ 0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU,
+ 0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U,
+ 0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU,
+ 0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U,
+ 0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU,
+ 0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U,
+ 0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U,
+ 0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU,
+ 0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU,
+ 0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU,
+ 0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U,
+ 0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U,
+ 0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU,
+ 0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U,
+ 0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU,
+ 0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U,
+ 0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU,
+ 0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U,
+ 0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU,
+ 0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU,
+ 0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U,
+ 0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU,
+ 0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U,
+ 0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU,
+ 0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U,
+ 0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U,
+ 0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U,
+ 0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU,
+ 0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU,
+ 0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U,
+ 0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU,
+ 0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U
+};
+
+
+static const uint32_t Te3[256] =
+{
+ 0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U,
+ 0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
+ 0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U,
+ 0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU,
+ 0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU,
+ 0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU,
+ 0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U,
+ 0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU,
+ 0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU,
+ 0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U,
+ 0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U,
+ 0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU,
+ 0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU,
+ 0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU,
+ 0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU,
+ 0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU,
+ 0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U,
+ 0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU,
+ 0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU,
+ 0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U,
+ 0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U,
+ 0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U,
+ 0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U,
+ 0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U,
+ 0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU,
+ 0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U,
+ 0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU,
+ 0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU,
+ 0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U,
+ 0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U,
+ 0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U,
+ 0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU,
+ 0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U,
+ 0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU,
+ 0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU,
+ 0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U,
+ 0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U,
+ 0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU,
+ 0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U,
+ 0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU,
+ 0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U,
+ 0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U,
+ 0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U,
+ 0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U,
+ 0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU,
+ 0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U,
+ 0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU,
+ 0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U,
+ 0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU,
+ 0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U,
+ 0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU,
+ 0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU,
+ 0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU,
+ 0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU,
+ 0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U,
+ 0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U,
+ 0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U,
+ 0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U,
+ 0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U,
+ 0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U,
+ 0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU,
+ 0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U,
+ 0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
+ 0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU
+};
+
+static const uint32_t Te4[256] =
+{
+ 0x63636363U, 0x7c7c7c7cU, 0x77777777U, 0x7b7b7b7bU,
+ 0xf2f2f2f2U, 0x6b6b6b6bU, 0x6f6f6f6fU, 0xc5c5c5c5U,
+ 0x30303030U, 0x01010101U, 0x67676767U, 0x2b2b2b2bU,
+ 0xfefefefeU, 0xd7d7d7d7U, 0xababababU, 0x76767676U,
+ 0xcacacacaU, 0x82828282U, 0xc9c9c9c9U, 0x7d7d7d7dU,
+ 0xfafafafaU, 0x59595959U, 0x47474747U, 0xf0f0f0f0U,
+ 0xadadadadU, 0xd4d4d4d4U, 0xa2a2a2a2U, 0xafafafafU,
+ 0x9c9c9c9cU, 0xa4a4a4a4U, 0x72727272U, 0xc0c0c0c0U,
+ 0xb7b7b7b7U, 0xfdfdfdfdU, 0x93939393U, 0x26262626U,
+ 0x36363636U, 0x3f3f3f3fU, 0xf7f7f7f7U, 0xccccccccU,
+ 0x34343434U, 0xa5a5a5a5U, 0xe5e5e5e5U, 0xf1f1f1f1U,
+ 0x71717171U, 0xd8d8d8d8U, 0x31313131U, 0x15151515U,
+ 0x04040404U, 0xc7c7c7c7U, 0x23232323U, 0xc3c3c3c3U,
+ 0x18181818U, 0x96969696U, 0x05050505U, 0x9a9a9a9aU,
+ 0x07070707U, 0x12121212U, 0x80808080U, 0xe2e2e2e2U,
+ 0xebebebebU, 0x27272727U, 0xb2b2b2b2U, 0x75757575U,
+ 0x09090909U, 0x83838383U, 0x2c2c2c2cU, 0x1a1a1a1aU,
+ 0x1b1b1b1bU, 0x6e6e6e6eU, 0x5a5a5a5aU, 0xa0a0a0a0U,
+ 0x52525252U, 0x3b3b3b3bU, 0xd6d6d6d6U, 0xb3b3b3b3U,
+ 0x29292929U, 0xe3e3e3e3U, 0x2f2f2f2fU, 0x84848484U,
+ 0x53535353U, 0xd1d1d1d1U, 0x00000000U, 0xededededU,
+ 0x20202020U, 0xfcfcfcfcU, 0xb1b1b1b1U, 0x5b5b5b5bU,
+ 0x6a6a6a6aU, 0xcbcbcbcbU, 0xbebebebeU, 0x39393939U,
+ 0x4a4a4a4aU, 0x4c4c4c4cU, 0x58585858U, 0xcfcfcfcfU,
+ 0xd0d0d0d0U, 0xefefefefU, 0xaaaaaaaaU, 0xfbfbfbfbU,
+ 0x43434343U, 0x4d4d4d4dU, 0x33333333U, 0x85858585U,
+ 0x45454545U, 0xf9f9f9f9U, 0x02020202U, 0x7f7f7f7fU,
+ 0x50505050U, 0x3c3c3c3cU, 0x9f9f9f9fU, 0xa8a8a8a8U,
+ 0x51515151U, 0xa3a3a3a3U, 0x40404040U, 0x8f8f8f8fU,
+ 0x92929292U, 0x9d9d9d9dU, 0x38383838U, 0xf5f5f5f5U,
+ 0xbcbcbcbcU, 0xb6b6b6b6U, 0xdadadadaU, 0x21212121U,
+ 0x10101010U, 0xffffffffU, 0xf3f3f3f3U, 0xd2d2d2d2U,
+ 0xcdcdcdcdU, 0x0c0c0c0cU, 0x13131313U, 0xececececU,
+ 0x5f5f5f5fU, 0x97979797U, 0x44444444U, 0x17171717U,
+ 0xc4c4c4c4U, 0xa7a7a7a7U, 0x7e7e7e7eU, 0x3d3d3d3dU,
+ 0x64646464U, 0x5d5d5d5dU, 0x19191919U, 0x73737373U,
+ 0x60606060U, 0x81818181U, 0x4f4f4f4fU, 0xdcdcdcdcU,
+ 0x22222222U, 0x2a2a2a2aU, 0x90909090U, 0x88888888U,
+ 0x46464646U, 0xeeeeeeeeU, 0xb8b8b8b8U, 0x14141414U,
+ 0xdedededeU, 0x5e5e5e5eU, 0x0b0b0b0bU, 0xdbdbdbdbU,
+ 0xe0e0e0e0U, 0x32323232U, 0x3a3a3a3aU, 0x0a0a0a0aU,
+ 0x49494949U, 0x06060606U, 0x24242424U, 0x5c5c5c5cU,
+ 0xc2c2c2c2U, 0xd3d3d3d3U, 0xacacacacU, 0x62626262U,
+ 0x91919191U, 0x95959595U, 0xe4e4e4e4U, 0x79797979U,
+ 0xe7e7e7e7U, 0xc8c8c8c8U, 0x37373737U, 0x6d6d6d6dU,
+ 0x8d8d8d8dU, 0xd5d5d5d5U, 0x4e4e4e4eU, 0xa9a9a9a9U,
+ 0x6c6c6c6cU, 0x56565656U, 0xf4f4f4f4U, 0xeaeaeaeaU,
+ 0x65656565U, 0x7a7a7a7aU, 0xaeaeaeaeU, 0x08080808U,
+ 0xbabababaU, 0x78787878U, 0x25252525U, 0x2e2e2e2eU,
+ 0x1c1c1c1cU, 0xa6a6a6a6U, 0xb4b4b4b4U, 0xc6c6c6c6U,
+ 0xe8e8e8e8U, 0xddddddddU, 0x74747474U, 0x1f1f1f1fU,
+ 0x4b4b4b4bU, 0xbdbdbdbdU, 0x8b8b8b8bU, 0x8a8a8a8aU,
+ 0x70707070U, 0x3e3e3e3eU, 0xb5b5b5b5U, 0x66666666U,
+ 0x48484848U, 0x03030303U, 0xf6f6f6f6U, 0x0e0e0e0eU,
+ 0x61616161U, 0x35353535U, 0x57575757U, 0xb9b9b9b9U,
+ 0x86868686U, 0xc1c1c1c1U, 0x1d1d1d1dU, 0x9e9e9e9eU,
+ 0xe1e1e1e1U, 0xf8f8f8f8U, 0x98989898U, 0x11111111U,
+ 0x69696969U, 0xd9d9d9d9U, 0x8e8e8e8eU, 0x94949494U,
+ 0x9b9b9b9bU, 0x1e1e1e1eU, 0x87878787U, 0xe9e9e9e9U,
+ 0xcecececeU, 0x55555555U, 0x28282828U, 0xdfdfdfdfU,
+ 0x8c8c8c8cU, 0xa1a1a1a1U, 0x89898989U, 0x0d0d0d0dU,
+ 0xbfbfbfbfU, 0xe6e6e6e6U, 0x42424242U, 0x68686868U,
+ 0x41414141U, 0x99999999U, 0x2d2d2d2dU, 0x0f0f0f0fU,
+ 0xb0b0b0b0U, 0x54545454U, 0xbbbbbbbbU, 0x16161616U
+};
+
+/* Decrypt Sbox constants (for the substitute bytes operation) */
+
+static const uint32_t Td0[256] =
+{
+ 0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
+ 0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
+ 0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
+ 0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU,
+ 0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U,
+ 0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U,
+ 0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU,
+ 0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U,
+ 0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU,
+ 0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U,
+ 0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U,
+ 0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U,
+ 0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U,
+ 0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU,
+ 0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U,
+ 0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU,
+ 0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U,
+ 0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU,
+ 0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U,
+ 0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U,
+ 0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U,
+ 0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU,
+ 0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U,
+ 0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU,
+ 0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U,
+ 0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU,
+ 0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U,
+ 0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU,
+ 0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU,
+ 0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U,
+ 0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU,
+ 0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U,
+ 0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU,
+ 0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U,
+ 0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U,
+ 0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U,
+ 0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU,
+ 0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U,
+ 0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U,
+ 0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU,
+ 0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U,
+ 0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U,
+ 0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U,
+ 0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U,
+ 0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U,
+ 0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU,
+ 0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U,
+ 0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U,
+ 0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U,
+ 0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U,
+ 0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U,
+ 0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU,
+ 0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU,
+ 0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU,
+ 0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU,
+ 0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U,
+ 0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U,
+ 0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU,
+ 0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU,
+ 0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U,
+ 0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU,
+ 0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U,
+ 0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
+ 0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U
+};
+
+static const uint32_t Td1[256] =
+{
+ 0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU,
+ 0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
+ 0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU,
+ 0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U,
+ 0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U,
+ 0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U,
+ 0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U,
+ 0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U,
+ 0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U,
+ 0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU,
+ 0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU,
+ 0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU,
+ 0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U,
+ 0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU,
+ 0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U,
+ 0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U,
+ 0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U,
+ 0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU,
+ 0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU,
+ 0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U,
+ 0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU,
+ 0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U,
+ 0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU,
+ 0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU,
+ 0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U,
+ 0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U,
+ 0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U,
+ 0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU,
+ 0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U,
+ 0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU,
+ 0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U,
+ 0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U,
+ 0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U,
+ 0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU,
+ 0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U,
+ 0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U,
+ 0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U,
+ 0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U,
+ 0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U,
+ 0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U,
+ 0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU,
+ 0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU,
+ 0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U,
+ 0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU,
+ 0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U,
+ 0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU,
+ 0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU,
+ 0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U,
+ 0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU,
+ 0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U,
+ 0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U,
+ 0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U,
+ 0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U,
+ 0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U,
+ 0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U,
+ 0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U,
+ 0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU,
+ 0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U,
+ 0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U,
+ 0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU,
+ 0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U,
+ 0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U,
+ 0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U,
+ 0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U
+};
+
+static const uint32_t Td2[256] =
+{
+ 0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U,
+ 0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U,
+ 0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U,
+ 0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U,
+ 0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU,
+ 0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U,
+ 0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U,
+ 0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U,
+ 0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U,
+ 0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU,
+ 0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U,
+ 0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U,
+ 0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU,
+ 0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U,
+ 0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U,
+ 0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U,
+ 0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U,
+ 0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U,
+ 0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U,
+ 0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU,
+ 0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U,
+ 0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U,
+ 0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U,
+ 0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U,
+ 0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U,
+ 0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU,
+ 0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU,
+ 0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U,
+ 0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU,
+ 0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U,
+ 0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU,
+ 0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU,
+ 0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU,
+ 0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU,
+ 0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U,
+ 0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U,
+ 0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U,
+ 0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U,
+ 0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U,
+ 0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U,
+ 0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U,
+ 0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU,
+ 0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU,
+ 0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U,
+ 0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U,
+ 0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU,
+ 0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU,
+ 0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U,
+ 0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U,
+ 0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U,
+ 0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U,
+ 0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U,
+ 0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U,
+ 0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U,
+ 0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU,
+ 0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U,
+ 0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U,
+ 0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U,
+ 0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U,
+ 0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U,
+ 0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U,
+ 0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU,
+ 0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U,
+ 0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U
+};
+
+static const uint32_t Td3[256] =
+{
+ 0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU,
+ 0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU,
+ 0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U,
+ 0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U,
+ 0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU,
+ 0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU,
+ 0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U,
+ 0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU,
+ 0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U,
+ 0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU,
+ 0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U,
+ 0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U,
+ 0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U,
+ 0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U,
+ 0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U,
+ 0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU,
+ 0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU,
+ 0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U,
+ 0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U,
+ 0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU,
+ 0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU,
+ 0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U,
+ 0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U,
+ 0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U,
+ 0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U,
+ 0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU,
+ 0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U,
+ 0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U,
+ 0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU,
+ 0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU,
+ 0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U,
+ 0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U,
+ 0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U,
+ 0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU,
+ 0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U,
+ 0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U,
+ 0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U,
+ 0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U,
+ 0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U,
+ 0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U,
+ 0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U,
+ 0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU,
+ 0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U,
+ 0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U,
+ 0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU,
+ 0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU,
+ 0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U,
+ 0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU,
+ 0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U,
+ 0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U,
+ 0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U,
+ 0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U,
+ 0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U,
+ 0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U,
+ 0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU,
+ 0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU,
+ 0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU,
+ 0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU,
+ 0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U,
+ 0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U,
+ 0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U,
+ 0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU,
+ 0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
+ 0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U
+};
+
+static const uint32_t Td4[256] =
+{
+ 0x52525252U, 0x09090909U, 0x6a6a6a6aU, 0xd5d5d5d5U,
+ 0x30303030U, 0x36363636U, 0xa5a5a5a5U, 0x38383838U,
+ 0xbfbfbfbfU, 0x40404040U, 0xa3a3a3a3U, 0x9e9e9e9eU,
+ 0x81818181U, 0xf3f3f3f3U, 0xd7d7d7d7U, 0xfbfbfbfbU,
+ 0x7c7c7c7cU, 0xe3e3e3e3U, 0x39393939U, 0x82828282U,
+ 0x9b9b9b9bU, 0x2f2f2f2fU, 0xffffffffU, 0x87878787U,
+ 0x34343434U, 0x8e8e8e8eU, 0x43434343U, 0x44444444U,
+ 0xc4c4c4c4U, 0xdedededeU, 0xe9e9e9e9U, 0xcbcbcbcbU,
+ 0x54545454U, 0x7b7b7b7bU, 0x94949494U, 0x32323232U,
+ 0xa6a6a6a6U, 0xc2c2c2c2U, 0x23232323U, 0x3d3d3d3dU,
+ 0xeeeeeeeeU, 0x4c4c4c4cU, 0x95959595U, 0x0b0b0b0bU,
+ 0x42424242U, 0xfafafafaU, 0xc3c3c3c3U, 0x4e4e4e4eU,
+ 0x08080808U, 0x2e2e2e2eU, 0xa1a1a1a1U, 0x66666666U,
+ 0x28282828U, 0xd9d9d9d9U, 0x24242424U, 0xb2b2b2b2U,
+ 0x76767676U, 0x5b5b5b5bU, 0xa2a2a2a2U, 0x49494949U,
+ 0x6d6d6d6dU, 0x8b8b8b8bU, 0xd1d1d1d1U, 0x25252525U,
+ 0x72727272U, 0xf8f8f8f8U, 0xf6f6f6f6U, 0x64646464U,
+ 0x86868686U, 0x68686868U, 0x98989898U, 0x16161616U,
+ 0xd4d4d4d4U, 0xa4a4a4a4U, 0x5c5c5c5cU, 0xccccccccU,
+ 0x5d5d5d5dU, 0x65656565U, 0xb6b6b6b6U, 0x92929292U,
+ 0x6c6c6c6cU, 0x70707070U, 0x48484848U, 0x50505050U,
+ 0xfdfdfdfdU, 0xededededU, 0xb9b9b9b9U, 0xdadadadaU,
+ 0x5e5e5e5eU, 0x15151515U, 0x46464646U, 0x57575757U,
+ 0xa7a7a7a7U, 0x8d8d8d8dU, 0x9d9d9d9dU, 0x84848484U,
+ 0x90909090U, 0xd8d8d8d8U, 0xababababU, 0x00000000U,
+ 0x8c8c8c8cU, 0xbcbcbcbcU, 0xd3d3d3d3U, 0x0a0a0a0aU,
+ 0xf7f7f7f7U, 0xe4e4e4e4U, 0x58585858U, 0x05050505U,
+ 0xb8b8b8b8U, 0xb3b3b3b3U, 0x45454545U, 0x06060606U,
+ 0xd0d0d0d0U, 0x2c2c2c2cU, 0x1e1e1e1eU, 0x8f8f8f8fU,
+ 0xcacacacaU, 0x3f3f3f3fU, 0x0f0f0f0fU, 0x02020202U,
+ 0xc1c1c1c1U, 0xafafafafU, 0xbdbdbdbdU, 0x03030303U,
+ 0x01010101U, 0x13131313U, 0x8a8a8a8aU, 0x6b6b6b6bU,
+ 0x3a3a3a3aU, 0x91919191U, 0x11111111U, 0x41414141U,
+ 0x4f4f4f4fU, 0x67676767U, 0xdcdcdcdcU, 0xeaeaeaeaU,
+ 0x97979797U, 0xf2f2f2f2U, 0xcfcfcfcfU, 0xcecececeU,
+ 0xf0f0f0f0U, 0xb4b4b4b4U, 0xe6e6e6e6U, 0x73737373U,
+ 0x96969696U, 0xacacacacU, 0x74747474U, 0x22222222U,
+ 0xe7e7e7e7U, 0xadadadadU, 0x35353535U, 0x85858585U,
+ 0xe2e2e2e2U, 0xf9f9f9f9U, 0x37373737U, 0xe8e8e8e8U,
+ 0x1c1c1c1cU, 0x75757575U, 0xdfdfdfdfU, 0x6e6e6e6eU,
+ 0x47474747U, 0xf1f1f1f1U, 0x1a1a1a1aU, 0x71717171U,
+ 0x1d1d1d1dU, 0x29292929U, 0xc5c5c5c5U, 0x89898989U,
+ 0x6f6f6f6fU, 0xb7b7b7b7U, 0x62626262U, 0x0e0e0e0eU,
+ 0xaaaaaaaaU, 0x18181818U, 0xbebebebeU, 0x1b1b1b1bU,
+ 0xfcfcfcfcU, 0x56565656U, 0x3e3e3e3eU, 0x4b4b4b4bU,
+ 0xc6c6c6c6U, 0xd2d2d2d2U, 0x79797979U, 0x20202020U,
+ 0x9a9a9a9aU, 0xdbdbdbdbU, 0xc0c0c0c0U, 0xfefefefeU,
+ 0x78787878U, 0xcdcdcdcdU, 0x5a5a5a5aU, 0xf4f4f4f4U,
+ 0x1f1f1f1fU, 0xddddddddU, 0xa8a8a8a8U, 0x33333333U,
+ 0x88888888U, 0x07070707U, 0xc7c7c7c7U, 0x31313131U,
+ 0xb1b1b1b1U, 0x12121212U, 0x10101010U, 0x59595959U,
+ 0x27272727U, 0x80808080U, 0xececececU, 0x5f5f5f5fU,
+ 0x60606060U, 0x51515151U, 0x7f7f7f7fU, 0xa9a9a9a9U,
+ 0x19191919U, 0xb5b5b5b5U, 0x4a4a4a4aU, 0x0d0d0d0dU,
+ 0x2d2d2d2dU, 0xe5e5e5e5U, 0x7a7a7a7aU, 0x9f9f9f9fU,
+ 0x93939393U, 0xc9c9c9c9U, 0x9c9c9c9cU, 0xefefefefU,
+ 0xa0a0a0a0U, 0xe0e0e0e0U, 0x3b3b3b3bU, 0x4d4d4d4dU,
+ 0xaeaeaeaeU, 0x2a2a2a2aU, 0xf5f5f5f5U, 0xb0b0b0b0U,
+ 0xc8c8c8c8U, 0xebebebebU, 0xbbbbbbbbU, 0x3c3c3c3cU,
+ 0x83838383U, 0x53535353U, 0x99999999U, 0x61616161U,
+ 0x17171717U, 0x2b2b2b2bU, 0x04040404U, 0x7e7e7e7eU,
+ 0xbabababaU, 0x77777777U, 0xd6d6d6d6U, 0x26262626U,
+ 0xe1e1e1e1U, 0x69696969U, 0x14141414U, 0x63636363U,
+ 0x55555555U, 0x21212121U, 0x0c0c0c0cU, 0x7d7d7d7dU
+};
+
+/* Rcon is Round Constant; used for encryption key expansion */
+static const uint32_t rcon[RC_LENGTH] =
+{
+ /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
+ 0x01000000, 0x02000000, 0x04000000, 0x08000000,
+ 0x10000000, 0x20000000, 0x40000000, 0x80000000,
+ 0x1B000000, 0x36000000
+};
+
+
+/*
+ * Expand the cipher key into the encryption key schedule.
+ *
+ * Return the number of rounds for the given cipher key size.
+ * The size of the key schedule depends on the number of rounds
+ * (which can be computed from the size of the key), i.e. 4*(Nr + 1).
+ *
+ * Parameters:
+ * rk AES key schedule 32-bit array to be initialized
+ * cipherKey User key
+ * keyBits AES key size (128, 192, or 256 bits)
+ */
+static int
+rijndael_key_setup_enc_raw(uint32_t rk[], const uint32_t cipherKey[],
+ int keyBits)
+{
+ int i = 0;
+ uint32_t temp;
+
+ rk[0] = cipherKey[0];
+ rk[1] = cipherKey[1];
+ rk[2] = cipherKey[2];
+ rk[3] = cipherKey[3];
+
+ if (keyBits == 128) {
+ for (;;) {
+ temp = rk[3];
+ rk[4] = rk[0] ^
+ (Te4[(temp >> 16) & 0xff] & 0xff000000) ^
+ (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
+ (Te4[temp & 0xff] & 0x0000ff00) ^
+ (Te4[temp >> 24] & 0x000000ff) ^
+ rcon[i];
+ rk[5] = rk[1] ^ rk[4];
+ rk[6] = rk[2] ^ rk[5];
+ rk[7] = rk[3] ^ rk[6];
+
+ if (++i == 10) {
+ return (10);
+ }
+ rk += 4;
+ }
+ }
+
+ rk[4] = cipherKey[4];
+ rk[5] = cipherKey[5];
+
+ if (keyBits == 192) {
+ for (;;) {
+ temp = rk[5];
+ rk[6] = rk[0] ^
+ (Te4[(temp >> 16) & 0xff] & 0xff000000) ^
+ (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
+ (Te4[temp & 0xff] & 0x0000ff00) ^
+ (Te4[temp >> 24] & 0x000000ff) ^
+ rcon[i];
+ rk[7] = rk[1] ^ rk[6];
+ rk[8] = rk[2] ^ rk[7];
+ rk[9] = rk[3] ^ rk[8];
+
+ if (++i == 8) {
+ return (12);
+ }
+
+ rk[10] = rk[4] ^ rk[9];
+ rk[11] = rk[5] ^ rk[10];
+ rk += 6;
+ }
+ }
+
+ rk[6] = cipherKey[6];
+ rk[7] = cipherKey[7];
+
+ if (keyBits == 256) {
+ for (;;) {
+ temp = rk[7];
+ rk[8] = rk[0] ^
+ (Te4[(temp >> 16) & 0xff] & 0xff000000) ^
+ (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
+ (Te4[temp & 0xff] & 0x0000ff00) ^
+ (Te4[temp >> 24] & 0x000000ff) ^
+ rcon[i];
+ rk[9] = rk[1] ^ rk[8];
+ rk[10] = rk[2] ^ rk[9];
+ rk[11] = rk[3] ^ rk[10];
+
+ if (++i == 7) {
+ return (14);
+ }
+ temp = rk[11];
+ rk[12] = rk[4] ^
+ (Te4[temp >> 24] & 0xff000000) ^
+ (Te4[(temp >> 16) & 0xff] & 0x00ff0000) ^
+ (Te4[(temp >> 8) & 0xff] & 0x0000ff00) ^
+ (Te4[temp & 0xff] & 0x000000ff);
+ rk[13] = rk[5] ^ rk[12];
+ rk[14] = rk[6] ^ rk[13];
+ rk[15] = rk[7] ^ rk[14];
+
+ rk += 8;
+ }
+ }
+
+ return (0);
+}
+#endif /* !__amd64 */
+
+#if defined(__amd64)
+
+/*
+ * Expand the 32-bit AES cipher key array into the encryption and decryption
+ * key schedules.
+ *
+ * Parameters:
+ * key AES key schedule to be initialized
+ * keyarr32 User key
+ * keyBits AES key size (128, 192, or 256 bits)
+ */
+static void
+aes_setupkeys(aes_key_t *key, const uint32_t *keyarr32, int keybits)
+{
+ if (intel_aes_instructions_present()) {
+ key->flags = INTEL_AES_NI_CAPABLE;
+ KPREEMPT_DISABLE;
+ key->nr = rijndael_key_setup_enc_intel(&(key->encr_ks.ks32[0]),
+ keyarr32, keybits);
+ key->nr = rijndael_key_setup_dec_intel(&(key->decr_ks.ks32[0]),
+ keyarr32, keybits);
+ KPREEMPT_ENABLE;
+ } else {
+ key->flags = 0;
+ key->nr = rijndael_key_setup_enc_amd64(&(key->encr_ks.ks32[0]),
+ keyarr32, keybits);
+ key->nr = rijndael_key_setup_dec_amd64(&(key->decr_ks.ks32[0]),
+ keyarr32, keybits);
+ }
+
+ key->type = AES_32BIT_KS;
+}
+
+/*
+ * Encrypt one block of data. The block is assumed to be an array
+ * of four uint32_t values, so copy for alignment (and byte-order
+ * reversal for little endian systems might be necessary on the
+ * input and output byte streams.
+ * The size of the key schedule depends on the number of rounds
+ * (which can be computed from the size of the key), i.e. 4*(Nr + 1).
+ *
+ * Parameters:
+ * rk Key schedule, of aes_ks_t (60 32-bit integers)
+ * Nr Number of rounds
+ * pt Input block (plain text)
+ * ct Output block (crypto text). Can overlap with pt
+ * flags Indicates whether we're on Intel AES-NI-capable hardware
+ */
+static void
+rijndael_encrypt(const uint32_t rk[], int Nr, const uint32_t pt[4],
+ uint32_t ct[4], int flags)
+{
+ if (flags & INTEL_AES_NI_CAPABLE) {
+ KPREEMPT_DISABLE;
+ aes_encrypt_intel(rk, Nr, pt, ct);
+ KPREEMPT_ENABLE;
+ } else {
+ aes_encrypt_amd64(rk, Nr, pt, ct);
+ }
+}
+
+/*
+ * Decrypt one block of data. The block is assumed to be an array
+ * of four uint32_t values, so copy for alignment (and byte-order
+ * reversal for little endian systems might be necessary on the
+ * input and output byte streams.
+ * The size of the key schedule depends on the number of rounds
+ * (which can be computed from the size of the key), i.e. 4*(Nr + 1).
+ *
+ * Parameters:
+ * rk Key schedule, of aes_ks_t (60 32-bit integers)
+ * Nr Number of rounds
+ * ct Input block (crypto text)
+ * pt Output block (plain text). Can overlap with pt
+ * flags Indicates whether we're on Intel AES-NI-capable hardware
+ */
+static void
+rijndael_decrypt(const uint32_t rk[], int Nr, const uint32_t ct[4],
+ uint32_t pt[4], int flags)
+{
+ if (flags & INTEL_AES_NI_CAPABLE) {
+ KPREEMPT_DISABLE;
+ aes_decrypt_intel(rk, Nr, ct, pt);
+ KPREEMPT_ENABLE;
+ } else {
+ aes_decrypt_amd64(rk, Nr, ct, pt);
+ }
+}
+
+
+#else /* generic C implementation */
+
+/*
+ * Expand the cipher key into the decryption key schedule.
+ * Return the number of rounds for the given cipher key size.
+ * The size of the key schedule depends on the number of rounds
+ * (which can be computed from the size of the key), i.e. 4*(Nr + 1).
+ *
+ * Parameters:
+ * rk AES key schedule 32-bit array to be initialized
+ * cipherKey User key
+ * keyBits AES key size (128, 192, or 256 bits)
+ */
+static int
+rijndael_key_setup_dec(uint32_t rk[], const uint32_t cipherKey[], int keyBits)
+{
+ int Nr, i, j;
+ uint32_t temp;
+
+ /* expand the cipher key: */
+ Nr = rijndael_key_setup_enc_raw(rk, cipherKey, keyBits);
+
+ /* invert the order of the round keys: */
+ for (i = 0, j = 4 * Nr; i < j; i += 4, j -= 4) {
+ temp = rk[i];
+ rk[i] = rk[j];
+ rk[j] = temp;
+ temp = rk[i + 1];
+ rk[i + 1] = rk[j + 1];
+ rk[j + 1] = temp;
+ temp = rk[i + 2];
+ rk[i + 2] = rk[j + 2];
+ rk[j + 2] = temp;
+ temp = rk[i + 3];
+ rk[i + 3] = rk[j + 3];
+ rk[j + 3] = temp;
+ }
+
+ /*
+ * apply the inverse MixColumn transform to all
+ * round keys but the first and the last:
+ */
+ for (i = 1; i < Nr; i++) {
+ rk += 4;
+ rk[0] = Td0[Te4[rk[0] >> 24] & 0xff] ^
+ Td1[Te4[(rk[0] >> 16) & 0xff] & 0xff] ^
+ Td2[Te4[(rk[0] >> 8) & 0xff] & 0xff] ^
+ Td3[Te4[rk[0] & 0xff] & 0xff];
+ rk[1] = Td0[Te4[rk[1] >> 24] & 0xff] ^
+ Td1[Te4[(rk[1] >> 16) & 0xff] & 0xff] ^
+ Td2[Te4[(rk[1] >> 8) & 0xff] & 0xff] ^
+ Td3[Te4[rk[1] & 0xff] & 0xff];
+ rk[2] = Td0[Te4[rk[2] >> 24] & 0xff] ^
+ Td1[Te4[(rk[2] >> 16) & 0xff] & 0xff] ^
+ Td2[Te4[(rk[2] >> 8) & 0xff] & 0xff] ^
+ Td3[Te4[rk[2] & 0xff] & 0xff];
+ rk[3] = Td0[Te4[rk[3] >> 24] & 0xff] ^
+ Td1[Te4[(rk[3] >> 16) & 0xff] & 0xff] ^
+ Td2[Te4[(rk[3] >> 8) & 0xff] & 0xff] ^
+ Td3[Te4[rk[3] & 0xff] & 0xff];
+ }
+
+ return (Nr);
+}
+
+
+/*
+ * Expand the 32-bit AES cipher key array into the encryption and decryption
+ * key schedules.
+ *
+ * Parameters:
+ * key AES key schedule to be initialized
+ * keyarr32 User key
+ * keyBits AES key size (128, 192, or 256 bits)
+ */
+static void
+aes_setupkeys(aes_key_t *key, const uint32_t *keyarr32, int keybits)
+{
+ key->nr = rijndael_key_setup_enc(&(key->encr_ks.ks32[0]), keyarr32,
+ keybits);
+ key->nr = rijndael_key_setup_dec(&(key->decr_ks.ks32[0]), keyarr32,
+ keybits);
+ key->type = AES_32BIT_KS;
+}
+
+
+/*
+ * Encrypt one block of data. The block is assumed to be an array
+ * of four uint32_t values, so copy for alignment (and byte-order
+ * reversal for little endian systems might be necessary on the
+ * input and output byte streams.
+ * The size of the key schedule depends on the number of rounds
+ * (which can be computed from the size of the key), i.e. 4*(Nr + 1).
+ *
+ * Parameters:
+ * rk Key schedule, of aes_ks_t (60 32-bit integers)
+ * Nr Number of rounds
+ * pt Input block (plain text)
+ * ct Output block (crypto text). Can overlap with pt
+ */
+static void
+rijndael_encrypt(const uint32_t rk[], int Nr, const uint32_t pt[4],
+ uint32_t ct[4])
+{
+ uint32_t s0, s1, s2, s3, t0, t1, t2, t3;
+ int r;
+
+ /*
+ * map byte array block to cipher state
+ * and add initial round key:
+ */
+
+ s0 = pt[0] ^ rk[0];
+ s1 = pt[1] ^ rk[1];
+ s2 = pt[2] ^ rk[2];
+ s3 = pt[3] ^ rk[3];
+
+ /*
+ * Nr - 1 full rounds:
+ */
+
+ r = Nr >> 1;
+
+ for (;;) {
+ t0 = Te0[s0 >> 24] ^
+ Te1[(s1 >> 16) & 0xff] ^
+ Te2[(s2 >> 8) & 0xff] ^
+ Te3[s3 & 0xff] ^
+ rk[4];
+
+ t1 = Te0[s1 >> 24] ^
+ Te1[(s2 >> 16) & 0xff] ^
+ Te2[(s3 >> 8) & 0xff] ^
+ Te3[s0 & 0xff] ^
+ rk[5];
+
+ t2 = Te0[s2 >> 24] ^
+ Te1[(s3 >> 16) & 0xff] ^
+ Te2[(s0 >> 8) & 0xff] ^
+ Te3[s1 & 0xff] ^
+ rk[6];
+
+ t3 = Te0[s3 >> 24] ^
+ Te1[(s0 >> 16) & 0xff] ^
+ Te2[(s1 >> 8) & 0xff] ^
+ Te3[s2 & 0xff] ^
+ rk[7];
+
+ rk += 8;
+
+ if (--r == 0) {
+ break;
+ }
+
+ s0 = Te0[t0 >> 24] ^
+ Te1[(t1 >> 16) & 0xff] ^
+ Te2[(t2 >> 8) & 0xff] ^
+ Te3[t3 & 0xff] ^
+ rk[0];
+
+ s1 = Te0[t1 >> 24] ^
+ Te1[(t2 >> 16) & 0xff] ^
+ Te2[(t3 >> 8) & 0xff] ^
+ Te3[t0 & 0xff] ^
+ rk[1];
+
+ s2 = Te0[t2 >> 24] ^
+ Te1[(t3 >> 16) & 0xff] ^
+ Te2[(t0 >> 8) & 0xff] ^
+ Te3[t1 & 0xff] ^
+ rk[2];
+
+ s3 = Te0[t3 >> 24] ^
+ Te1[(t0 >> 16) & 0xff] ^
+ Te2[(t1 >> 8) & 0xff] ^
+ Te3[t2 & 0xff] ^
+ rk[3];
+ }
+
+ /*
+ * apply last round and
+ * map cipher state to byte array block:
+ */
+
+ s0 = (Te4[(t0 >> 24)] & 0xff000000) ^
+ (Te4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
+ (Te4[(t2 >> 8) & 0xff] & 0x0000ff00) ^
+ (Te4[t3 & 0xff] & 0x000000ff) ^
+ rk[0];
+ ct[0] = s0;
+
+ s1 = (Te4[(t1 >> 24)] & 0xff000000) ^
+ (Te4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
+ (Te4[(t3 >> 8) & 0xff] & 0x0000ff00) ^
+ (Te4[t0 & 0xff] & 0x000000ff) ^
+ rk[1];
+ ct[1] = s1;
+
+ s2 = (Te4[(t2 >> 24)] & 0xff000000) ^
+ (Te4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
+ (Te4[(t0 >> 8) & 0xff] & 0x0000ff00) ^
+ (Te4[t1 & 0xff] & 0x000000ff) ^
+ rk[2];
+ ct[2] = s2;
+
+ s3 = (Te4[(t3 >> 24)] & 0xff000000) ^
+ (Te4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
+ (Te4[(t1 >> 8) & 0xff] & 0x0000ff00) ^
+ (Te4[t2 & 0xff] & 0x000000ff) ^
+ rk[3];
+ ct[3] = s3;
+}
+
+
+/*
+ * Decrypt one block of data. The block is assumed to be an array
+ * of four uint32_t values, so copy for alignment (and byte-order
+ * reversal for little endian systems might be necessary on the
+ * input and output byte streams.
+ * The size of the key schedule depends on the number of rounds
+ * (which can be computed from the size of the key), i.e. 4*(Nr + 1).
+ *
+ * Parameters:
+ * rk Key schedule, of aes_ks_t (60 32-bit integers)
+ * Nr Number of rounds
+ * ct Input block (crypto text)
+ * pt Output block (plain text). Can overlap with pt
+ */
+static void
+rijndael_decrypt(const uint32_t rk[], int Nr, const uint32_t ct[4],
+ uint32_t pt[4])
+{
+ uint32_t s0, s1, s2, s3, t0, t1, t2, t3;
+ int r;
+
+ /*
+ * map byte array block to cipher state
+ * and add initial round key:
+ */
+ s0 = ct[0] ^ rk[0];
+ s1 = ct[1] ^ rk[1];
+ s2 = ct[2] ^ rk[2];
+ s3 = ct[3] ^ rk[3];
+
+ /*
+ * Nr - 1 full rounds:
+ */
+
+ r = Nr >> 1;
+
+ for (;;) {
+ t0 = Td0[s0 >> 24] ^
+ Td1[(s3 >> 16) & 0xff] ^
+ Td2[(s2 >> 8) & 0xff] ^
+ Td3[s1 & 0xff] ^
+ rk[4];
+
+ t1 = Td0[s1 >> 24] ^
+ Td1[(s0 >> 16) & 0xff] ^
+ Td2[(s3 >> 8) & 0xff] ^
+ Td3[s2 & 0xff] ^
+ rk[5];
+
+ t2 = Td0[s2 >> 24] ^
+ Td1[(s1 >> 16) & 0xff] ^
+ Td2[(s0 >> 8) & 0xff] ^
+ Td3[s3 & 0xff] ^
+ rk[6];
+
+ t3 = Td0[s3 >> 24] ^
+ Td1[(s2 >> 16) & 0xff] ^
+ Td2[(s1 >> 8) & 0xff] ^
+ Td3[s0 & 0xff] ^
+ rk[7];
+
+ rk += 8;
+
+ if (--r == 0) {
+ break;
+ }
+
+ s0 = Td0[t0 >> 24] ^
+ Td1[(t3 >> 16) & 0xff] ^
+ Td2[(t2 >> 8) & 0xff] ^
+ Td3[t1 & 0xff] ^
+ rk[0];
+
+ s1 = Td0[t1 >> 24] ^
+ Td1[(t0 >> 16) & 0xff] ^
+ Td2[(t3 >> 8) & 0xff] ^
+ Td3[t2 & 0xff] ^
+ rk[1];
+
+ s2 = Td0[t2 >> 24] ^
+ Td1[(t1 >> 16) & 0xff] ^
+ Td2[(t0 >> 8) & 0xff] ^
+ Td3[t3 & 0xff] ^
+ rk[2];
+
+ s3 = Td0[t3 >> 24] ^
+ Td1[(t2 >> 16) & 0xff] ^
+ Td2[(t1 >> 8) & 0xff] ^
+ Td3[t0 & 0xff] ^
+ rk[3];
+ }
+
+ /*
+ * apply last round and
+ * map cipher state to byte array block:
+ */
+
+ s0 = (Td4[t0 >> 24] & 0xff000000) ^
+ (Td4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
+ (Td4[(t2 >> 8) & 0xff] & 0x0000ff00) ^
+ (Td4[t1 & 0xff] & 0x000000ff) ^
+ rk[0];
+ pt[0] = s0;
+
+ s1 = (Td4[t1 >> 24] & 0xff000000) ^
+ (Td4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
+ (Td4[(t3 >> 8) & 0xff] & 0x0000ff00) ^
+ (Td4[t2 & 0xff] & 0x000000ff) ^
+ rk[1];
+ pt[1] = s1;
+
+ s2 = (Td4[t2 >> 24] & 0xff000000) ^
+ (Td4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
+ (Td4[(t0 >> 8) & 0xff] & 0x0000ff00) ^
+ (Td4[t3 & 0xff] & 0x000000ff) ^
+ rk[2];
+ pt[2] = s2;
+
+ s3 = (Td4[t3 >> 24] & 0xff000000) ^
+ (Td4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
+ (Td4[(t1 >> 8) & 0xff] & 0x0000ff00) ^
+ (Td4[t0 & 0xff] & 0x000000ff) ^
+ rk[3];
+ pt[3] = s3;
+}
+#endif /* __amd64 */
+
+
+/*
+ * Initialize AES encryption and decryption key schedules.
+ *
+ * Parameters:
+ * cipherKey User key
+ * keyBits AES key size (128, 192, or 256 bits)
+ * keysched AES key schedule to be initialized, of type aes_key_t.
+ * Allocated by aes_alloc_keysched().
+ */
+void
+aes_init_keysched(const uint8_t *cipherKey, uint_t keyBits, void *keysched)
+{
+ aes_key_t *newbie = keysched;
+ uint_t keysize, i, j;
+ union {
+ uint64_t ka64[4];
+ uint32_t ka32[8];
+ } keyarr;
+
+ switch (keyBits) {
+ case 128:
+ newbie->nr = 10;
+ break;
+
+ case 192:
+ newbie->nr = 12;
+ break;
+
+ case 256:
+ newbie->nr = 14;
+ break;
+
+ default:
+ /* should never get here */
+ return;
+ }
+ keysize = CRYPTO_BITS2BYTES(keyBits);
+
+ /*
+ * For _LITTLE_ENDIAN machines (except AMD64), reverse every
+ * 4 bytes in the key. On _BIG_ENDIAN and AMD64, copy the key
+ * without reversing bytes.
+ * For AMD64, do not byte swap for aes_setupkeys().
+ *
+ * SPARCv8/v9 uses a key schedule array with 64-bit elements.
+ * X86/AMD64 uses a key schedule array with 32-bit elements.
+ */
+#ifndef AES_BYTE_SWAP
+ if (IS_P2ALIGNED(cipherKey, sizeof (uint64_t))) {
+ for (i = 0, j = 0; j < keysize; i++, j += 8) {
+ /* LINTED: pointer alignment */
+ keyarr.ka64[i] = *((uint64_t *)&cipherKey[j]);
+ }
+ } else {
+ bcopy(cipherKey, keyarr.ka32, keysize);
+ }
+
+#else /* byte swap */
+ for (i = 0, j = 0; j < keysize; i++, j += 4) {
+ keyarr.ka32[i] = htonl(*(uint32_t *)(void *)&cipherKey[j]);
+ }
+#endif
+
+ aes_setupkeys(newbie, keyarr.ka32, keyBits);
+}
+
+
+/*
+ * Encrypt one block using AES.
+ * Align if needed and (for x86 32-bit only) byte-swap.
+ *
+ * Parameters:
+ * ks Key schedule, of type aes_key_t
+ * pt Input block (plain text)
+ * ct Output block (crypto text). Can overlap with pt
+ */
+int
+aes_encrypt_block(const void *ks, const uint8_t *pt, uint8_t *ct)
+{
+ aes_key_t *ksch = (aes_key_t *)ks;
+
+#ifndef AES_BYTE_SWAP
+ if (IS_P2ALIGNED2(pt, ct, sizeof (uint32_t))) {
+ /* LINTED: pointer alignment */
+ AES_ENCRYPT_IMPL(&ksch->encr_ks.ks32[0], ksch->nr,
+ /* LINTED: pointer alignment */
+ (uint32_t *)pt, (uint32_t *)ct, ksch->flags);
+ } else {
+#endif
+ uint32_t buffer[AES_BLOCK_LEN / sizeof (uint32_t)];
+
+ /* Copy input block into buffer */
+#ifndef AES_BYTE_SWAP
+ bcopy(pt, &buffer, AES_BLOCK_LEN);
+
+#else /* byte swap */
+ buffer[0] = htonl(*(uint32_t *)(void *)&pt[0]);
+ buffer[1] = htonl(*(uint32_t *)(void *)&pt[4]);
+ buffer[2] = htonl(*(uint32_t *)(void *)&pt[8]);
+ buffer[3] = htonl(*(uint32_t *)(void *)&pt[12]);
+#endif
+
+ AES_ENCRYPT_IMPL(&ksch->encr_ks.ks32[0], ksch->nr,
+ buffer, buffer, ksch->flags);
+
+ /* Copy result from buffer to output block */
+#ifndef AES_BYTE_SWAP
+ bcopy(&buffer, ct, AES_BLOCK_LEN);
+ }
+
+#else /* byte swap */
+ *(uint32_t *)(void *)&ct[0] = htonl(buffer[0]);
+ *(uint32_t *)(void *)&ct[4] = htonl(buffer[1]);
+ *(uint32_t *)(void *)&ct[8] = htonl(buffer[2]);
+ *(uint32_t *)(void *)&ct[12] = htonl(buffer[3]);
+#endif
+ return (CRYPTO_SUCCESS);
+}
+
+
+/*
+ * Decrypt one block using AES.
+ * Align and byte-swap if needed.
+ *
+ * Parameters:
+ * ks Key schedule, of type aes_key_t
+ * ct Input block (crypto text)
+ * pt Output block (plain text). Can overlap with pt
+ */
+int
+aes_decrypt_block(const void *ks, const uint8_t *ct, uint8_t *pt)
+{
+ aes_key_t *ksch = (aes_key_t *)ks;
+
+#ifndef AES_BYTE_SWAP
+ if (IS_P2ALIGNED2(ct, pt, sizeof (uint32_t))) {
+ /* LINTED: pointer alignment */
+ AES_DECRYPT_IMPL(&ksch->decr_ks.ks32[0], ksch->nr,
+ /* LINTED: pointer alignment */
+ (uint32_t *)ct, (uint32_t *)pt, ksch->flags);
+ } else {
+#endif
+ uint32_t buffer[AES_BLOCK_LEN / sizeof (uint32_t)];
+
+ /* Copy input block into buffer */
+#ifndef AES_BYTE_SWAP
+ bcopy(ct, &buffer, AES_BLOCK_LEN);
+
+#else /* byte swap */
+ buffer[0] = htonl(*(uint32_t *)(void *)&ct[0]);
+ buffer[1] = htonl(*(uint32_t *)(void *)&ct[4]);
+ buffer[2] = htonl(*(uint32_t *)(void *)&ct[8]);
+ buffer[3] = htonl(*(uint32_t *)(void *)&ct[12]);
+#endif
+
+ AES_DECRYPT_IMPL(&ksch->decr_ks.ks32[0], ksch->nr,
+ buffer, buffer, ksch->flags);
+
+ /* Copy result from buffer to output block */
+#ifndef AES_BYTE_SWAP
+ bcopy(&buffer, pt, AES_BLOCK_LEN);
+ }
+
+#else /* byte swap */
+ *(uint32_t *)(void *)&pt[0] = htonl(buffer[0]);
+ *(uint32_t *)(void *)&pt[4] = htonl(buffer[1]);
+ *(uint32_t *)(void *)&pt[8] = htonl(buffer[2]);
+ *(uint32_t *)(void *)&pt[12] = htonl(buffer[3]);
+#endif
+
+ return (CRYPTO_SUCCESS);
+}
+
+
+/*
+ * Allocate key schedule for AES.
+ *
+ * Return the pointer and set size to the number of bytes allocated.
+ * Memory allocated must be freed by the caller when done.
+ *
+ * Parameters:
+ * size Size of key schedule allocated, in bytes
+ * kmflag Flag passed to kmem_alloc(9F); ignored in userland.
+ */
+/* ARGSUSED */
+void *
+aes_alloc_keysched(size_t *size, int kmflag)
+{
+ aes_key_t *keysched;
+
+ keysched = (aes_key_t *)kmem_alloc(sizeof (aes_key_t), kmflag);
+ if (keysched != NULL) {
+ *size = sizeof (aes_key_t);
+ return (keysched);
+ }
+ return (NULL);
+}
+
+
+#ifdef __amd64
+
+#define INTEL_AESNI_FLAG (1 << 25)
+
+/*
+ * Return 1 if executing on Intel with AES-NI instructions,
+ * otherwise 0 (i.e., Intel without AES-NI or AMD64).
+ * Cache the result, as the CPU can't change.
+ */
+static int
+intel_aes_instructions_present(void)
+{
+ static int cached_result = -1;
+ unsigned eax, ebx, ecx, edx;
+ unsigned func, subfunc;
+
+ if (cached_result == -1) { /* first time */
+ /* check for an intel cpu */
+ func = 0;
+ subfunc = 0;
+
+ __asm__ __volatile__(
+ "cpuid"
+ : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
+ : "a"(func), "c"(subfunc));
+
+ if (memcmp((char *)(&ebx), "Genu", 4) == 0 &&
+ memcmp((char *)(&edx), "ineI", 4) == 0 &&
+ memcmp((char *)(&ecx), "ntel", 4) == 0) {
+ func = 1;
+ subfunc = 0;
+
+ /* check for aes-ni instruction set */
+ __asm__ __volatile__(
+ "cpuid"
+ : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
+ : "a"(func), "c"(subfunc));
+
+ cached_result = !!(ecx & INTEL_AESNI_FLAG);
+ } else {
+ cached_result = 0;
+ }
+ }
+
+ return (cached_result);
+}
+
+#endif /* __amd64 */
diff --git a/zfs/module/icp/algs/aes/aes_modes.c b/zfs/module/icp/algs/aes/aes_modes.c
new file mode 100644
index 000000000000..9e4b498fffcb
--- /dev/null
+++ b/zfs/module/icp/algs/aes/aes_modes.c
@@ -0,0 +1,135 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/zfs_context.h>
+#include <modes/modes.h>
+#include <aes/aes_impl.h>
+
+/* Copy a 16-byte AES block from "in" to "out" */
+void
+aes_copy_block(uint8_t *in, uint8_t *out)
+{
+ if (IS_P2ALIGNED2(in, out, sizeof (uint32_t))) {
+ /* LINTED: pointer alignment */
+ *(uint32_t *)&out[0] = *(uint32_t *)&in[0];
+ /* LINTED: pointer alignment */
+ *(uint32_t *)&out[4] = *(uint32_t *)&in[4];
+ /* LINTED: pointer alignment */
+ *(uint32_t *)&out[8] = *(uint32_t *)&in[8];
+ /* LINTED: pointer alignment */
+ *(uint32_t *)&out[12] = *(uint32_t *)&in[12];
+ } else {
+ AES_COPY_BLOCK(in, out);
+ }
+}
+
+
+/* XOR a 16-byte AES block of data into dst */
+void
+aes_xor_block(uint8_t *data, uint8_t *dst)
+{
+ if (IS_P2ALIGNED2(dst, data, sizeof (uint32_t))) {
+ /* LINTED: pointer alignment */
+ *(uint32_t *)&dst[0] ^= *(uint32_t *)&data[0];
+ /* LINTED: pointer alignment */
+ *(uint32_t *)&dst[4] ^= *(uint32_t *)&data[4];
+ /* LINTED: pointer alignment */
+ *(uint32_t *)&dst[8] ^= *(uint32_t *)&data[8];
+ /* LINTED: pointer alignment */
+ *(uint32_t *)&dst[12] ^= *(uint32_t *)&data[12];
+ } else {
+ AES_XOR_BLOCK(data, dst);
+ }
+}
+
+
+/*
+ * Encrypt multiple blocks of data according to mode.
+ */
+int
+aes_encrypt_contiguous_blocks(void *ctx, char *data, size_t length,
+ crypto_data_t *out)
+{
+ aes_ctx_t *aes_ctx = ctx;
+ int rv;
+
+ if (aes_ctx->ac_flags & CTR_MODE) {
+ rv = ctr_mode_contiguous_blocks(ctx, data, length, out,
+ AES_BLOCK_LEN, aes_encrypt_block, aes_xor_block);
+ } else if (aes_ctx->ac_flags & CCM_MODE) {
+ rv = ccm_mode_encrypt_contiguous_blocks(ctx, data, length,
+ out, AES_BLOCK_LEN, aes_encrypt_block, aes_copy_block,
+ aes_xor_block);
+ } else if (aes_ctx->ac_flags & (GCM_MODE|GMAC_MODE)) {
+ rv = gcm_mode_encrypt_contiguous_blocks(ctx, data, length,
+ out, AES_BLOCK_LEN, aes_encrypt_block, aes_copy_block,
+ aes_xor_block);
+ } else if (aes_ctx->ac_flags & CBC_MODE) {
+ rv = cbc_encrypt_contiguous_blocks(ctx,
+ data, length, out, AES_BLOCK_LEN, aes_encrypt_block,
+ aes_copy_block, aes_xor_block);
+ } else {
+ rv = ecb_cipher_contiguous_blocks(ctx, data, length, out,
+ AES_BLOCK_LEN, aes_encrypt_block);
+ }
+ return (rv);
+}
+
+
+/*
+ * Decrypt multiple blocks of data according to mode.
+ */
+int
+aes_decrypt_contiguous_blocks(void *ctx, char *data, size_t length,
+ crypto_data_t *out)
+{
+ aes_ctx_t *aes_ctx = ctx;
+ int rv;
+
+ if (aes_ctx->ac_flags & CTR_MODE) {
+ rv = ctr_mode_contiguous_blocks(ctx, data, length, out,
+ AES_BLOCK_LEN, aes_encrypt_block, aes_xor_block);
+ if (rv == CRYPTO_DATA_LEN_RANGE)
+ rv = CRYPTO_ENCRYPTED_DATA_LEN_RANGE;
+ } else if (aes_ctx->ac_flags & CCM_MODE) {
+ rv = ccm_mode_decrypt_contiguous_blocks(ctx, data, length,
+ out, AES_BLOCK_LEN, aes_encrypt_block, aes_copy_block,
+ aes_xor_block);
+ } else if (aes_ctx->ac_flags & (GCM_MODE|GMAC_MODE)) {
+ rv = gcm_mode_decrypt_contiguous_blocks(ctx, data, length,
+ out, AES_BLOCK_LEN, aes_encrypt_block, aes_copy_block,
+ aes_xor_block);
+ } else if (aes_ctx->ac_flags & CBC_MODE) {
+ rv = cbc_decrypt_contiguous_blocks(ctx, data, length, out,
+ AES_BLOCK_LEN, aes_decrypt_block, aes_copy_block,
+ aes_xor_block);
+ } else {
+ rv = ecb_cipher_contiguous_blocks(ctx, data, length, out,
+ AES_BLOCK_LEN, aes_decrypt_block);
+ if (rv == CRYPTO_DATA_LEN_RANGE)
+ rv = CRYPTO_ENCRYPTED_DATA_LEN_RANGE;
+ }
+ return (rv);
+}
diff --git a/zfs/module/icp/algs/edonr/edonr.c b/zfs/module/icp/algs/edonr/edonr.c
new file mode 100644
index 000000000000..8ae989890867
--- /dev/null
+++ b/zfs/module/icp/algs/edonr/edonr.c
@@ -0,0 +1,751 @@
+/*
+ * IDI,NTNU
+ *
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ *
+ * Copyright (C) 2009, 2010, Jorn Amundsen <jorn.amundsen at ntnu.no>
+ * Tweaked Edon-R implementation for SUPERCOP, based on NIST API.
+ *
+ * $Id: edonr.c 517 2013-02-17 20:34:39Z joern $
+ */
+/*
+ * Portions copyright (c) 2013, Saso Kiselkov, All rights reserved
+ */
+
+/* determine where we can get bcopy/bzero declarations */
+#ifdef _KERNEL
+#include <sys/systm.h>
+#else
+#include <strings.h>
+#endif
+#include <sys/edonr.h>
+#include <sys/debug.h>
+
+/* big endian support, provides no-op's if run on little endian hosts */
+#include "edonr_byteorder.h"
+
+#define hashState224(x) ((x)->pipe->p256)
+#define hashState256(x) ((x)->pipe->p256)
+#define hashState384(x) ((x)->pipe->p512)
+#define hashState512(x) ((x)->pipe->p512)
+
+/* shift and rotate shortcuts */
+#define shl(x, n) ((x) << n)
+#define shr(x, n) ((x) >> n)
+
+#define rotl32(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
+#define rotr32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
+
+#define rotl64(x, n) (((x) << (n)) | ((x) >> (64 - (n))))
+#define rotr64(x, n) (((x) >> (n)) | ((x) << (64 - (n))))
+
+#if !defined(__C99_RESTRICT)
+#define restrict /* restrict */
+#endif
+
+#define EDONR_VALID_HASHBITLEN(x) \
+ ((x) == 512 || (x) == 384 || (x) == 256 || (x) == 224)
+
+/* EdonR224 initial double chaining pipe */
+static const uint32_t i224p2[16] = {
+ 0x00010203ul, 0x04050607ul, 0x08090a0bul, 0x0c0d0e0ful,
+ 0x10111213ul, 0x14151617ul, 0x18191a1bul, 0x1c1d1e1ful,
+ 0x20212223ul, 0x24252627ul, 0x28292a2bul, 0x2c2d2e2ful,
+ 0x30313233ul, 0x34353637ul, 0x38393a3bul, 0x3c3d3e3ful,
+};
+
+/* EdonR256 initial double chaining pipe */
+static const uint32_t i256p2[16] = {
+ 0x40414243ul, 0x44454647ul, 0x48494a4bul, 0x4c4d4e4ful,
+ 0x50515253ul, 0x54555657ul, 0x58595a5bul, 0x5c5d5e5ful,
+ 0x60616263ul, 0x64656667ul, 0x68696a6bul, 0x6c6d6e6ful,
+ 0x70717273ul, 0x74757677ul, 0x78797a7bul, 0x7c7d7e7ful,
+};
+
+/* EdonR384 initial double chaining pipe */
+static const uint64_t i384p2[16] = {
+ 0x0001020304050607ull, 0x08090a0b0c0d0e0full,
+ 0x1011121314151617ull, 0x18191a1b1c1d1e1full,
+ 0x2021222324252627ull, 0x28292a2b2c2d2e2full,
+ 0x3031323334353637ull, 0x38393a3b3c3d3e3full,
+ 0x4041424344454647ull, 0x48494a4b4c4d4e4full,
+ 0x5051525354555657ull, 0x58595a5b5c5d5e5full,
+ 0x6061626364656667ull, 0x68696a6b6c6d6e6full,
+ 0x7071727374757677ull, 0x78797a7b7c7d7e7full
+};
+
+/* EdonR512 initial double chaining pipe */
+static const uint64_t i512p2[16] = {
+ 0x8081828384858687ull, 0x88898a8b8c8d8e8full,
+ 0x9091929394959697ull, 0x98999a9b9c9d9e9full,
+ 0xa0a1a2a3a4a5a6a7ull, 0xa8a9aaabacadaeafull,
+ 0xb0b1b2b3b4b5b6b7ull, 0xb8b9babbbcbdbebfull,
+ 0xc0c1c2c3c4c5c6c7ull, 0xc8c9cacbcccdcecfull,
+ 0xd0d1d2d3d4d5d6d7ull, 0xd8d9dadbdcdddedfull,
+ 0xe0e1e2e3e4e5e6e7ull, 0xe8e9eaebecedeeefull,
+ 0xf0f1f2f3f4f5f6f7ull, 0xf8f9fafbfcfdfeffull
+};
+
+/*
+ * First Latin Square
+ * 0 7 1 3 2 4 6 5
+ * 4 1 7 6 3 0 5 2
+ * 7 0 4 2 5 3 1 6
+ * 1 4 0 5 6 2 7 3
+ * 2 3 6 7 1 5 0 4
+ * 5 2 3 1 7 6 4 0
+ * 3 6 5 0 4 7 2 1
+ * 6 5 2 4 0 1 3 7
+ */
+#define LS1_256(c, x0, x1, x2, x3, x4, x5, x6, x7) \
+{ \
+ uint32_t x04, x17, x23, x56, x07, x26; \
+ x04 = x0+x4, x17 = x1+x7, x07 = x04+x17; \
+ s0 = c + x07 + x2; \
+ s1 = rotl32(x07 + x3, 4); \
+ s2 = rotl32(x07 + x6, 8); \
+ x23 = x2 + x3; \
+ s5 = rotl32(x04 + x23 + x5, 22); \
+ x56 = x5 + x6; \
+ s6 = rotl32(x17 + x56 + x0, 24); \
+ x26 = x23+x56; \
+ s3 = rotl32(x26 + x7, 13); \
+ s4 = rotl32(x26 + x1, 17); \
+ s7 = rotl32(x26 + x4, 29); \
+}
+
+#define LS1_512(c, x0, x1, x2, x3, x4, x5, x6, x7) \
+{ \
+ uint64_t x04, x17, x23, x56, x07, x26; \
+ x04 = x0+x4, x17 = x1+x7, x07 = x04+x17; \
+ s0 = c + x07 + x2; \
+ s1 = rotl64(x07 + x3, 5); \
+ s2 = rotl64(x07 + x6, 15); \
+ x23 = x2 + x3; \
+ s5 = rotl64(x04 + x23 + x5, 40); \
+ x56 = x5 + x6; \
+ s6 = rotl64(x17 + x56 + x0, 50); \
+ x26 = x23+x56; \
+ s3 = rotl64(x26 + x7, 22); \
+ s4 = rotl64(x26 + x1, 31); \
+ s7 = rotl64(x26 + x4, 59); \
+}
+
+/*
+ * Second Orthogonal Latin Square
+ * 0 4 2 3 1 6 5 7
+ * 7 6 3 2 5 4 1 0
+ * 5 3 1 6 0 2 7 4
+ * 1 0 5 4 3 7 2 6
+ * 2 1 0 7 4 5 6 3
+ * 3 5 7 0 6 1 4 2
+ * 4 7 6 1 2 0 3 5
+ * 6 2 4 5 7 3 0 1
+ */
+#define LS2_256(c, y0, y1, y2, y3, y4, y5, y6, y7) \
+{ \
+ uint32_t y01, y25, y34, y67, y04, y05, y27, y37; \
+ y01 = y0+y1, y25 = y2+y5, y05 = y01+y25; \
+ t0 = ~c + y05 + y7; \
+ t2 = rotl32(y05 + y3, 9); \
+ y34 = y3+y4, y04 = y01+y34; \
+ t1 = rotl32(y04 + y6, 5); \
+ t4 = rotl32(y04 + y5, 15); \
+ y67 = y6+y7, y37 = y34+y67; \
+ t3 = rotl32(y37 + y2, 11); \
+ t7 = rotl32(y37 + y0, 27); \
+ y27 = y25+y67; \
+ t5 = rotl32(y27 + y4, 20); \
+ t6 = rotl32(y27 + y1, 25); \
+}
+
+#define LS2_512(c, y0, y1, y2, y3, y4, y5, y6, y7) \
+{ \
+ uint64_t y01, y25, y34, y67, y04, y05, y27, y37; \
+ y01 = y0+y1, y25 = y2+y5, y05 = y01+y25; \
+ t0 = ~c + y05 + y7; \
+ t2 = rotl64(y05 + y3, 19); \
+ y34 = y3+y4, y04 = y01+y34; \
+ t1 = rotl64(y04 + y6, 10); \
+ t4 = rotl64(y04 + y5, 36); \
+ y67 = y6+y7, y37 = y34+y67; \
+ t3 = rotl64(y37 + y2, 29); \
+ t7 = rotl64(y37 + y0, 55); \
+ y27 = y25+y67; \
+ t5 = rotl64(y27 + y4, 44); \
+ t6 = rotl64(y27 + y1, 48); \
+}
+
+#define quasi_exform256(r0, r1, r2, r3, r4, r5, r6, r7) \
+{ \
+ uint32_t s04, s17, s23, s56, t01, t25, t34, t67; \
+ s04 = s0 ^ s4, t01 = t0 ^ t1; \
+ r0 = (s04 ^ s1) + (t01 ^ t5); \
+ t67 = t6 ^ t7; \
+ r1 = (s04 ^ s7) + (t2 ^ t67); \
+ s23 = s2 ^ s3; \
+ r7 = (s23 ^ s5) + (t4 ^ t67); \
+ t34 = t3 ^ t4; \
+ r3 = (s23 ^ s4) + (t0 ^ t34); \
+ s56 = s5 ^ s6; \
+ r5 = (s3 ^ s56) + (t34 ^ t6); \
+ t25 = t2 ^ t5; \
+ r6 = (s2 ^ s56) + (t25 ^ t7); \
+ s17 = s1 ^ s7; \
+ r4 = (s0 ^ s17) + (t1 ^ t25); \
+ r2 = (s17 ^ s6) + (t01 ^ t3); \
+}
+
+#define quasi_exform512(r0, r1, r2, r3, r4, r5, r6, r7) \
+{ \
+ uint64_t s04, s17, s23, s56, t01, t25, t34, t67; \
+ s04 = s0 ^ s4, t01 = t0 ^ t1; \
+ r0 = (s04 ^ s1) + (t01 ^ t5); \
+ t67 = t6 ^ t7; \
+ r1 = (s04 ^ s7) + (t2 ^ t67); \
+ s23 = s2 ^ s3; \
+ r7 = (s23 ^ s5) + (t4 ^ t67); \
+ t34 = t3 ^ t4; \
+ r3 = (s23 ^ s4) + (t0 ^ t34); \
+ s56 = s5 ^ s6; \
+ r5 = (s3 ^ s56) + (t34 ^ t6); \
+ t25 = t2 ^ t5; \
+ r6 = (s2 ^ s56) + (t25 ^ t7); \
+ s17 = s1 ^ s7; \
+ r4 = (s0 ^ s17) + (t1 ^ t25); \
+ r2 = (s17 ^ s6) + (t01 ^ t3); \
+}
+
+static size_t
+Q256(size_t bitlen, const uint32_t *data, uint32_t *restrict p)
+{
+ size_t bl;
+
+ for (bl = bitlen; bl >= EdonR256_BLOCK_BITSIZE;
+ bl -= EdonR256_BLOCK_BITSIZE, data += 16) {
+ uint32_t s0, s1, s2, s3, s4, s5, s6, s7, t0, t1, t2, t3, t4,
+ t5, t6, t7;
+ uint32_t p0, p1, p2, p3, p4, p5, p6, p7, q0, q1, q2, q3, q4,
+ q5, q6, q7;
+ const uint32_t defix = 0xaaaaaaaa;
+#if defined(MACHINE_IS_BIG_ENDIAN)
+ uint32_t swp0, swp1, swp2, swp3, swp4, swp5, swp6, swp7, swp8,
+ swp9, swp10, swp11, swp12, swp13, swp14, swp15;
+#define d(j) swp ## j
+#define s32(j) ld_swap32((uint32_t *)data + j, swp ## j)
+#else
+#define d(j) data[j]
+#endif
+
+ /* First row of quasigroup e-transformations */
+#if defined(MACHINE_IS_BIG_ENDIAN)
+ s32(8);
+ s32(9);
+ s32(10);
+ s32(11);
+ s32(12);
+ s32(13);
+ s32(14);
+ s32(15);
+#endif
+ LS1_256(defix, d(15), d(14), d(13), d(12), d(11), d(10), d(9),
+ d(8));
+#if defined(MACHINE_IS_BIG_ENDIAN)
+ s32(0);
+ s32(1);
+ s32(2);
+ s32(3);
+ s32(4);
+ s32(5);
+ s32(6);
+ s32(7);
+#undef s32
+#endif
+ LS2_256(defix, d(0), d(1), d(2), d(3), d(4), d(5), d(6), d(7));
+ quasi_exform256(p0, p1, p2, p3, p4, p5, p6, p7);
+
+ LS1_256(defix, p0, p1, p2, p3, p4, p5, p6, p7);
+ LS2_256(defix, d(8), d(9), d(10), d(11), d(12), d(13), d(14),
+ d(15));
+ quasi_exform256(q0, q1, q2, q3, q4, q5, q6, q7);
+
+ /* Second row of quasigroup e-transformations */
+ LS1_256(defix, p[8], p[9], p[10], p[11], p[12], p[13], p[14],
+ p[15]);
+ LS2_256(defix, p0, p1, p2, p3, p4, p5, p6, p7);
+ quasi_exform256(p0, p1, p2, p3, p4, p5, p6, p7);
+
+ LS1_256(defix, p0, p1, p2, p3, p4, p5, p6, p7);
+ LS2_256(defix, q0, q1, q2, q3, q4, q5, q6, q7);
+ quasi_exform256(q0, q1, q2, q3, q4, q5, q6, q7);
+
+ /* Third row of quasigroup e-transformations */
+ LS1_256(defix, p0, p1, p2, p3, p4, p5, p6, p7);
+ LS2_256(defix, p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7]);
+ quasi_exform256(p0, p1, p2, p3, p4, p5, p6, p7);
+
+ LS1_256(defix, q0, q1, q2, q3, q4, q5, q6, q7);
+ LS2_256(defix, p0, p1, p2, p3, p4, p5, p6, p7);
+ quasi_exform256(q0, q1, q2, q3, q4, q5, q6, q7);
+
+ /* Fourth row of quasigroup e-transformations */
+ LS1_256(defix, d(7), d(6), d(5), d(4), d(3), d(2), d(1), d(0));
+ LS2_256(defix, p0, p1, p2, p3, p4, p5, p6, p7);
+ quasi_exform256(p0, p1, p2, p3, p4, p5, p6, p7);
+
+ LS1_256(defix, p0, p1, p2, p3, p4, p5, p6, p7);
+ LS2_256(defix, q0, q1, q2, q3, q4, q5, q6, q7);
+ quasi_exform256(q0, q1, q2, q3, q4, q5, q6, q7);
+
+ /* Edon-R tweak on the original SHA-3 Edon-R submission. */
+ p[0] ^= d(8) ^ p0;
+ p[1] ^= d(9) ^ p1;
+ p[2] ^= d(10) ^ p2;
+ p[3] ^= d(11) ^ p3;
+ p[4] ^= d(12) ^ p4;
+ p[5] ^= d(13) ^ p5;
+ p[6] ^= d(14) ^ p6;
+ p[7] ^= d(15) ^ p7;
+ p[8] ^= d(0) ^ q0;
+ p[9] ^= d(1) ^ q1;
+ p[10] ^= d(2) ^ q2;
+ p[11] ^= d(3) ^ q3;
+ p[12] ^= d(4) ^ q4;
+ p[13] ^= d(5) ^ q5;
+ p[14] ^= d(6) ^ q6;
+ p[15] ^= d(7) ^ q7;
+ }
+
+#undef d
+ return (bitlen - bl);
+}
+
+/*
+ * Why is this #pragma here?
+ *
+ * Checksum functions like this one can go over the stack frame size check
+ * Linux imposes on 32-bit platforms (-Wframe-larger-than=1024). We can
+ * safely ignore the compiler error since we know that in ZoL, that
+ * the function will be called from a worker thread that won't be using
+ * much stack. The only function that goes over the 1k limit is Q512(),
+ * which only goes over it by a hair (1248 bytes on ARM32).
+ */
+#include <sys/isa_defs.h> /* for _ILP32 */
+#ifdef _ILP32 /* We're 32-bit, assume small stack frames */
+#pragma GCC diagnostic ignored "-Wframe-larger-than="
+#endif
+
+#if defined(__IBMC__) && defined(_AIX) && defined(__64BIT__)
+static inline size_t
+#else
+static size_t
+#endif
+Q512(size_t bitlen, const uint64_t *data, uint64_t *restrict p)
+{
+ size_t bl;
+
+ for (bl = bitlen; bl >= EdonR512_BLOCK_BITSIZE;
+ bl -= EdonR512_BLOCK_BITSIZE, data += 16) {
+ uint64_t s0, s1, s2, s3, s4, s5, s6, s7, t0, t1, t2, t3, t4,
+ t5, t6, t7;
+ uint64_t p0, p1, p2, p3, p4, p5, p6, p7, q0, q1, q2, q3, q4,
+ q5, q6, q7;
+ const uint64_t defix = 0xaaaaaaaaaaaaaaaaull;
+#if defined(MACHINE_IS_BIG_ENDIAN)
+ uint64_t swp0, swp1, swp2, swp3, swp4, swp5, swp6, swp7, swp8,
+ swp9, swp10, swp11, swp12, swp13, swp14, swp15;
+#define d(j) swp##j
+#define s64(j) ld_swap64((uint64_t *)data+j, swp##j)
+#else
+#define d(j) data[j]
+#endif
+
+ /* First row of quasigroup e-transformations */
+#if defined(MACHINE_IS_BIG_ENDIAN)
+ s64(8);
+ s64(9);
+ s64(10);
+ s64(11);
+ s64(12);
+ s64(13);
+ s64(14);
+ s64(15);
+#endif
+ LS1_512(defix, d(15), d(14), d(13), d(12), d(11), d(10), d(9),
+ d(8));
+#if defined(MACHINE_IS_BIG_ENDIAN)
+ s64(0);
+ s64(1);
+ s64(2);
+ s64(3);
+ s64(4);
+ s64(5);
+ s64(6);
+ s64(7);
+#undef s64
+#endif
+ LS2_512(defix, d(0), d(1), d(2), d(3), d(4), d(5), d(6), d(7));
+ quasi_exform512(p0, p1, p2, p3, p4, p5, p6, p7);
+
+ LS1_512(defix, p0, p1, p2, p3, p4, p5, p6, p7);
+ LS2_512(defix, d(8), d(9), d(10), d(11), d(12), d(13), d(14),
+ d(15));
+ quasi_exform512(q0, q1, q2, q3, q4, q5, q6, q7);
+
+ /* Second row of quasigroup e-transformations */
+ LS1_512(defix, p[8], p[9], p[10], p[11], p[12], p[13], p[14],
+ p[15]);
+ LS2_512(defix, p0, p1, p2, p3, p4, p5, p6, p7);
+ quasi_exform512(p0, p1, p2, p3, p4, p5, p6, p7);
+
+ LS1_512(defix, p0, p1, p2, p3, p4, p5, p6, p7);
+ LS2_512(defix, q0, q1, q2, q3, q4, q5, q6, q7);
+ quasi_exform512(q0, q1, q2, q3, q4, q5, q6, q7);
+
+ /* Third row of quasigroup e-transformations */
+ LS1_512(defix, p0, p1, p2, p3, p4, p5, p6, p7);
+ LS2_512(defix, p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7]);
+ quasi_exform512(p0, p1, p2, p3, p4, p5, p6, p7);
+
+ LS1_512(defix, q0, q1, q2, q3, q4, q5, q6, q7);
+ LS2_512(defix, p0, p1, p2, p3, p4, p5, p6, p7);
+ quasi_exform512(q0, q1, q2, q3, q4, q5, q6, q7);
+
+ /* Fourth row of quasigroup e-transformations */
+ LS1_512(defix, d(7), d(6), d(5), d(4), d(3), d(2), d(1), d(0));
+ LS2_512(defix, p0, p1, p2, p3, p4, p5, p6, p7);
+ quasi_exform512(p0, p1, p2, p3, p4, p5, p6, p7);
+
+ LS1_512(defix, p0, p1, p2, p3, p4, p5, p6, p7);
+ LS2_512(defix, q0, q1, q2, q3, q4, q5, q6, q7);
+ quasi_exform512(q0, q1, q2, q3, q4, q5, q6, q7);
+
+ /* Edon-R tweak on the original SHA-3 Edon-R submission. */
+ p[0] ^= d(8) ^ p0;
+ p[1] ^= d(9) ^ p1;
+ p[2] ^= d(10) ^ p2;
+ p[3] ^= d(11) ^ p3;
+ p[4] ^= d(12) ^ p4;
+ p[5] ^= d(13) ^ p5;
+ p[6] ^= d(14) ^ p6;
+ p[7] ^= d(15) ^ p7;
+ p[8] ^= d(0) ^ q0;
+ p[9] ^= d(1) ^ q1;
+ p[10] ^= d(2) ^ q2;
+ p[11] ^= d(3) ^ q3;
+ p[12] ^= d(4) ^ q4;
+ p[13] ^= d(5) ^ q5;
+ p[14] ^= d(6) ^ q6;
+ p[15] ^= d(7) ^ q7;
+ }
+
+#undef d
+ return (bitlen - bl);
+}
+
+void
+EdonRInit(EdonRState *state, size_t hashbitlen)
+{
+ ASSERT(EDONR_VALID_HASHBITLEN(hashbitlen));
+ switch (hashbitlen) {
+ case 224:
+ state->hashbitlen = 224;
+ state->bits_processed = 0;
+ state->unprocessed_bits = 0;
+ bcopy(i224p2, hashState224(state)->DoublePipe,
+ 16 * sizeof (uint32_t));
+ break;
+
+ case 256:
+ state->hashbitlen = 256;
+ state->bits_processed = 0;
+ state->unprocessed_bits = 0;
+ bcopy(i256p2, hashState256(state)->DoublePipe,
+ 16 * sizeof (uint32_t));
+ break;
+
+ case 384:
+ state->hashbitlen = 384;
+ state->bits_processed = 0;
+ state->unprocessed_bits = 0;
+ bcopy(i384p2, hashState384(state)->DoublePipe,
+ 16 * sizeof (uint64_t));
+ break;
+
+ case 512:
+ state->hashbitlen = 512;
+ state->bits_processed = 0;
+ state->unprocessed_bits = 0;
+ bcopy(i512p2, hashState224(state)->DoublePipe,
+ 16 * sizeof (uint64_t));
+ break;
+ }
+}
+
+
+void
+EdonRUpdate(EdonRState *state, const uint8_t *data, size_t databitlen)
+{
+ uint32_t *data32;
+ uint64_t *data64;
+
+ size_t bits_processed;
+
+ ASSERT(EDONR_VALID_HASHBITLEN(state->hashbitlen));
+ switch (state->hashbitlen) {
+ case 224:
+ case 256:
+ if (state->unprocessed_bits > 0) {
+ /* LastBytes = databitlen / 8 */
+ int LastBytes = (int)databitlen >> 3;
+
+ ASSERT(state->unprocessed_bits + databitlen <=
+ EdonR256_BLOCK_SIZE * 8);
+
+ bcopy(data, hashState256(state)->LastPart
+ + (state->unprocessed_bits >> 3), LastBytes);
+ state->unprocessed_bits += (int)databitlen;
+ databitlen = state->unprocessed_bits;
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ data32 = (uint32_t *)hashState256(state)->LastPart;
+ } else
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ data32 = (uint32_t *)data;
+
+ bits_processed = Q256(databitlen, data32,
+ hashState256(state)->DoublePipe);
+ state->bits_processed += bits_processed;
+ databitlen -= bits_processed;
+ state->unprocessed_bits = (int)databitlen;
+ if (databitlen > 0) {
+ /* LastBytes = Ceil(databitlen / 8) */
+ int LastBytes =
+ ((~(((-(int)databitlen) >> 3) & 0x01ff)) +
+ 1) & 0x01ff;
+
+ data32 += bits_processed >> 5; /* byte size update */
+ bcopy(data32, hashState256(state)->LastPart, LastBytes);
+ }
+ break;
+
+ case 384:
+ case 512:
+ if (state->unprocessed_bits > 0) {
+ /* LastBytes = databitlen / 8 */
+ int LastBytes = (int)databitlen >> 3;
+
+ ASSERT(state->unprocessed_bits + databitlen <=
+ EdonR512_BLOCK_SIZE * 8);
+
+ bcopy(data, hashState512(state)->LastPart
+ + (state->unprocessed_bits >> 3), LastBytes);
+ state->unprocessed_bits += (int)databitlen;
+ databitlen = state->unprocessed_bits;
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ data64 = (uint64_t *)hashState512(state)->LastPart;
+ } else
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ data64 = (uint64_t *)data;
+
+ bits_processed = Q512(databitlen, data64,
+ hashState512(state)->DoublePipe);
+ state->bits_processed += bits_processed;
+ databitlen -= bits_processed;
+ state->unprocessed_bits = (int)databitlen;
+ if (databitlen > 0) {
+ /* LastBytes = Ceil(databitlen / 8) */
+ int LastBytes =
+ ((~(((-(int)databitlen) >> 3) & 0x03ff)) +
+ 1) & 0x03ff;
+
+ data64 += bits_processed >> 6; /* byte size update */
+ bcopy(data64, hashState512(state)->LastPart, LastBytes);
+ }
+ break;
+ }
+}
+
+void
+EdonRFinal(EdonRState *state, uint8_t *hashval)
+{
+ uint32_t *data32;
+ uint64_t *data64, num_bits;
+
+ size_t databitlen;
+ int LastByte, PadOnePosition;
+
+ num_bits = state->bits_processed + state->unprocessed_bits;
+ ASSERT(EDONR_VALID_HASHBITLEN(state->hashbitlen));
+ switch (state->hashbitlen) {
+ case 224:
+ case 256:
+ LastByte = (int)state->unprocessed_bits >> 3;
+ PadOnePosition = 7 - (state->unprocessed_bits & 0x07);
+ hashState256(state)->LastPart[LastByte] =
+ (hashState256(state)->LastPart[LastByte]
+ & (0xff << (PadOnePosition + 1))) ^
+ (0x01 << PadOnePosition);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ data64 = (uint64_t *)hashState256(state)->LastPart;
+
+ if (state->unprocessed_bits < 448) {
+ (void) memset((hashState256(state)->LastPart) +
+ LastByte + 1, 0x00,
+ EdonR256_BLOCK_SIZE - LastByte - 9);
+ databitlen = EdonR256_BLOCK_SIZE * 8;
+#if defined(MACHINE_IS_BIG_ENDIAN)
+ st_swap64(num_bits, data64 + 7);
+#else
+ data64[7] = num_bits;
+#endif
+ } else {
+ (void) memset((hashState256(state)->LastPart) +
+ LastByte + 1, 0x00,
+ EdonR256_BLOCK_SIZE * 2 - LastByte - 9);
+ databitlen = EdonR256_BLOCK_SIZE * 16;
+#if defined(MACHINE_IS_BIG_ENDIAN)
+ st_swap64(num_bits, data64 + 15);
+#else
+ data64[15] = num_bits;
+#endif
+ }
+
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ data32 = (uint32_t *)hashState256(state)->LastPart;
+ state->bits_processed += Q256(databitlen, data32,
+ hashState256(state)->DoublePipe);
+ break;
+
+ case 384:
+ case 512:
+ LastByte = (int)state->unprocessed_bits >> 3;
+ PadOnePosition = 7 - (state->unprocessed_bits & 0x07);
+ hashState512(state)->LastPart[LastByte] =
+ (hashState512(state)->LastPart[LastByte]
+ & (0xff << (PadOnePosition + 1))) ^
+ (0x01 << PadOnePosition);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ data64 = (uint64_t *)hashState512(state)->LastPart;
+
+ if (state->unprocessed_bits < 960) {
+ (void) memset((hashState512(state)->LastPart) +
+ LastByte + 1, 0x00,
+ EdonR512_BLOCK_SIZE - LastByte - 9);
+ databitlen = EdonR512_BLOCK_SIZE * 8;
+#if defined(MACHINE_IS_BIG_ENDIAN)
+ st_swap64(num_bits, data64 + 15);
+#else
+ data64[15] = num_bits;
+#endif
+ } else {
+ (void) memset((hashState512(state)->LastPart) +
+ LastByte + 1, 0x00,
+ EdonR512_BLOCK_SIZE * 2 - LastByte - 9);
+ databitlen = EdonR512_BLOCK_SIZE * 16;
+#if defined(MACHINE_IS_BIG_ENDIAN)
+ st_swap64(num_bits, data64 + 31);
+#else
+ data64[31] = num_bits;
+#endif
+ }
+
+ state->bits_processed += Q512(databitlen, data64,
+ hashState512(state)->DoublePipe);
+ break;
+ }
+
+ switch (state->hashbitlen) {
+ case 224: {
+#if defined(MACHINE_IS_BIG_ENDIAN)
+ uint32_t *d32 = (uint32_t *)hashval;
+ uint32_t *s32 = hashState224(state)->DoublePipe + 9;
+ int j;
+
+ for (j = 0; j < EdonR224_DIGEST_SIZE >> 2; j++)
+ st_swap32(s32[j], d32 + j);
+#else
+ bcopy(hashState256(state)->DoublePipe + 9, hashval,
+ EdonR224_DIGEST_SIZE);
+#endif
+ break;
+ }
+ case 256: {
+#if defined(MACHINE_IS_BIG_ENDIAN)
+ uint32_t *d32 = (uint32_t *)hashval;
+ uint32_t *s32 = hashState224(state)->DoublePipe + 8;
+ int j;
+
+ for (j = 0; j < EdonR256_DIGEST_SIZE >> 2; j++)
+ st_swap32(s32[j], d32 + j);
+#else
+ bcopy(hashState256(state)->DoublePipe + 8, hashval,
+ EdonR256_DIGEST_SIZE);
+#endif
+ break;
+ }
+ case 384: {
+#if defined(MACHINE_IS_BIG_ENDIAN)
+ uint64_t *d64 = (uint64_t *)hashval;
+ uint64_t *s64 = hashState384(state)->DoublePipe + 10;
+ int j;
+
+ for (j = 0; j < EdonR384_DIGEST_SIZE >> 3; j++)
+ st_swap64(s64[j], d64 + j);
+#else
+ bcopy(hashState384(state)->DoublePipe + 10, hashval,
+ EdonR384_DIGEST_SIZE);
+#endif
+ break;
+ }
+ case 512: {
+#if defined(MACHINE_IS_BIG_ENDIAN)
+ uint64_t *d64 = (uint64_t *)hashval;
+ uint64_t *s64 = hashState512(state)->DoublePipe + 8;
+ int j;
+
+ for (j = 0; j < EdonR512_DIGEST_SIZE >> 3; j++)
+ st_swap64(s64[j], d64 + j);
+#else
+ bcopy(hashState512(state)->DoublePipe + 8, hashval,
+ EdonR512_DIGEST_SIZE);
+#endif
+ break;
+ }
+ }
+}
+
+
+void
+EdonRHash(size_t hashbitlen, const uint8_t *data, size_t databitlen,
+ uint8_t *hashval)
+{
+ EdonRState state;
+
+ EdonRInit(&state, hashbitlen);
+ EdonRUpdate(&state, data, databitlen);
+ EdonRFinal(&state, hashval);
+}
+
+#ifdef _KERNEL
+EXPORT_SYMBOL(EdonRInit);
+EXPORT_SYMBOL(EdonRUpdate);
+EXPORT_SYMBOL(EdonRHash);
+EXPORT_SYMBOL(EdonRFinal);
+#endif
diff --git a/zfs/module/icp/algs/edonr/edonr_byteorder.h b/zfs/module/icp/algs/edonr/edonr_byteorder.h
new file mode 100644
index 000000000000..d17e8f1fd4be
--- /dev/null
+++ b/zfs/module/icp/algs/edonr/edonr_byteorder.h
@@ -0,0 +1,216 @@
+/*
+ * IDI,NTNU
+ *
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ *
+ * Copyright (C) 2009, 2010, Jorn Amundsen <jorn.amundsen at ntnu.no>
+ *
+ * C header file to determine compile machine byte order. Take care when cross
+ * compiling.
+ *
+ * $Id: byteorder.h 517 2013-02-17 20:34:39Z joern $
+ */
+/*
+ * Portions copyright (c) 2013, Saso Kiselkov, All rights reserved
+ */
+
+#ifndef _CRYPTO_EDONR_BYTEORDER_H
+#define _CRYPTO_EDONR_BYTEORDER_H
+
+
+#include <sys/param.h>
+
+#if defined(__BYTE_ORDER)
+#if (__BYTE_ORDER == __BIG_ENDIAN)
+#define MACHINE_IS_BIG_ENDIAN
+#elif (__BYTE_ORDER == __LITTLE_ENDIAN)
+#define MACHINE_IS_LITTLE_ENDIAN
+#endif
+#elif defined(BYTE_ORDER)
+#if (BYTE_ORDER == BIG_ENDIAN)
+#define MACHINE_IS_BIG_ENDIAN
+#elif (BYTE_ORDER == LITTLE_ENDIAN)
+#define MACHINE_IS_LITTLE_ENDIAN
+#endif
+#endif /* __BYTE_ORDER || BYTE_ORDER */
+
+#if !defined(MACHINE_IS_BIG_ENDIAN) && !defined(MACHINE_IS_LITTLE_ENDIAN)
+#if defined(_BIG_ENDIAN) || defined(_MIPSEB)
+#define MACHINE_IS_BIG_ENDIAN
+#endif
+#if defined(_LITTLE_ENDIAN) || defined(_MIPSEL)
+#define MACHINE_IS_LITTLE_ENDIAN
+#endif
+#endif /* !MACHINE_IS_BIG_ENDIAN && !MACHINE_IS_LITTLE_ENDIAN */
+
+#if !defined(MACHINE_IS_BIG_ENDIAN) && !defined(MACHINE_IS_LITTLE_ENDIAN)
+#error unknown machine byte sex
+#endif
+
+#define BYTEORDER_INCLUDED
+
+#if defined(MACHINE_IS_BIG_ENDIAN)
+/*
+ * Byte swapping macros for big endian architectures and compilers,
+ * add as appropriate for other architectures and/or compilers.
+ *
+ * ld_swap64(src,dst) : uint64_t dst = *(src)
+ * st_swap64(src,dst) : *(dst) = uint64_t src
+ */
+
+#if defined(__PPC__) || defined(_ARCH_PPC)
+
+#if defined(__64BIT__)
+#if defined(_ARCH_PWR7)
+#define aix_ld_swap64(s64, d64)\
+ __asm__("ldbrx %0,0,%1" : "=r"(d64) : "r"(s64))
+#define aix_st_swap64(s64, d64)\
+ __asm__ volatile("stdbrx %1,0,%0" : : "r"(d64), "r"(s64))
+#else
+#define aix_ld_swap64(s64, d64) \
+{ \
+ uint64_t *s4 = 0, h; /* initialize to zero for gcc warning */ \
+ \
+ __asm__("addi %0,%3,4;lwbrx %1,0,%3;lwbrx %2,0,%0;rldimi %1,%2,32,0"\
+ : "+r"(s4), "=r"(d64), "=r"(h) : "b"(s64)); \
+}
+
+#define aix_st_swap64(s64, d64) \
+{ \
+ uint64_t *s4 = 0, h; /* initialize to zero for gcc warning */ \
+ h = (s64) >> 32; \
+ __asm__ volatile("addi %0,%3,4;stwbrx %1,0,%3;stwbrx %2,0,%0" \
+ : "+r"(s4) : "r"(s64), "r"(h), "b"(d64)); \
+}
+#endif /* 64BIT && PWR7 */
+#else
+#define aix_ld_swap64(s64, d64) \
+{ \
+ uint32_t *s4 = 0, h, l; /* initialize to zero for gcc warning */\
+ __asm__("addi %0,%3,4;lwbrx %1,0,%3;lwbrx %2,0,%0" \
+ : "+r"(s4), "=r"(l), "=r"(h) : "b"(s64)); \
+ d64 = ((uint64_t)h<<32) | l; \
+}
+
+#define aix_st_swap64(s64, d64) \
+{ \
+ uint32_t *s4 = 0, h, l; /* initialize to zero for gcc warning */\
+ l = (s64) & 0xfffffffful, h = (s64) >> 32; \
+ __asm__ volatile("addi %0,%3,4;stwbrx %1,0,%3;stwbrx %2,0,%0" \
+ : "+r"(s4) : "r"(l), "r"(h), "b"(d64)); \
+}
+#endif /* __64BIT__ */
+#define aix_ld_swap32(s32, d32)\
+ __asm__("lwbrx %0,0,%1" : "=r"(d32) : "r"(s32))
+#define aix_st_swap32(s32, d32)\
+ __asm__ volatile("stwbrx %1,0,%0" : : "r"(d32), "r"(s32))
+#define ld_swap32(s, d) aix_ld_swap32(s, d)
+#define st_swap32(s, d) aix_st_swap32(s, d)
+#define ld_swap64(s, d) aix_ld_swap64(s, d)
+#define st_swap64(s, d) aix_st_swap64(s, d)
+#endif /* __PPC__ || _ARCH_PPC */
+
+#if defined(__sparc)
+#if !defined(__arch64__) && !defined(__sparcv8) && defined(__sparcv9)
+#define __arch64__
+#endif
+#if defined(__GNUC__) || (defined(__SUNPRO_C) && __SUNPRO_C > 0x590)
+/* need Sun Studio C 5.10 and above for GNU inline assembly */
+#if defined(__arch64__)
+#define sparc_ld_swap64(s64, d64) \
+ __asm__("ldxa [%1]0x88,%0" : "=r"(d64) : "r"(s64))
+#define sparc_st_swap64(s64, d64) \
+ __asm__ volatile("stxa %0,[%1]0x88" : : "r"(s64), "r"(d64))
+#define st_swap64(s, d) sparc_st_swap64(s, d)
+#else
+#define sparc_ld_swap64(s64, d64) \
+{ \
+ uint32_t *s4, h, l; \
+ __asm__("add %3,4,%0\n\tlda [%3]0x88,%1\n\tlda [%0]0x88,%2" \
+ : "+r"(s4), "=r"(l), "=r"(h) : "r"(s64)); \
+ d64 = ((uint64_t)h<<32) | l; \
+}
+#define sparc_st_swap64(s64, d64) \
+{ \
+ uint32_t *s4, h, l; \
+ l = (s64) & 0xfffffffful, h = (s64) >> 32; \
+ __asm__ volatile("add %3,4,%0\n\tsta %1,[%3]0x88\n\tsta %2,[%0]0x88"\
+ : "+r"(s4) : "r"(l), "r"(h), "r"(d64)); \
+}
+#endif /* sparc64 */
+#define sparc_ld_swap32(s32, d32)\
+ __asm__("lda [%1]0x88,%0" : "=r"(d32) : "r"(s32))
+#define sparc_st_swap32(s32, d32)\
+ __asm__ volatile("sta %0,[%1]0x88" : : "r"(s32), "r"(d32))
+#define ld_swap32(s, d) sparc_ld_swap32(s, d)
+#define st_swap32(s, d) sparc_st_swap32(s, d)
+#define ld_swap64(s, d) sparc_ld_swap64(s, d)
+#define st_swap64(s, d) sparc_st_swap64(s, d)
+#endif /* GCC || Sun Studio C > 5.9 */
+#endif /* sparc */
+
+/* GCC fallback */
+#if ((__GNUC__ >= 4) || defined(__PGIC__)) && !defined(ld_swap32)
+#define ld_swap32(s, d) (d = __builtin_bswap32(*(s)))
+#define st_swap32(s, d) (*(d) = __builtin_bswap32(s))
+#endif /* GCC4/PGIC && !swap32 */
+#if ((__GNUC__ >= 4) || defined(__PGIC__)) && !defined(ld_swap64)
+#define ld_swap64(s, d) (d = __builtin_bswap64(*(s)))
+#define st_swap64(s, d) (*(d) = __builtin_bswap64(s))
+#endif /* GCC4/PGIC && !swap64 */
+
+/* generic fallback */
+#if !defined(ld_swap32)
+#define ld_swap32(s, d) \
+ (d = (*(s) >> 24) | (*(s) >> 8 & 0xff00) | \
+ (*(s) << 8 & 0xff0000) | (*(s) << 24))
+#define st_swap32(s, d) \
+ (*(d) = ((s) >> 24) | ((s) >> 8 & 0xff00) | \
+ ((s) << 8 & 0xff0000) | ((s) << 24))
+#endif
+#if !defined(ld_swap64)
+#define ld_swap64(s, d) \
+ (d = (*(s) >> 56) | (*(s) >> 40 & 0xff00) | \
+ (*(s) >> 24 & 0xff0000) | (*(s) >> 8 & 0xff000000) | \
+ (*(s) & 0xff000000) << 8 | (*(s) & 0xff0000) << 24 | \
+ (*(s) & 0xff00) << 40 | *(s) << 56)
+#define st_swap64(s, d) \
+ (*(d) = ((s) >> 56) | ((s) >> 40 & 0xff00) | \
+ ((s) >> 24 & 0xff0000) | ((s) >> 8 & 0xff000000) | \
+ ((s) & 0xff000000) << 8 | ((s) & 0xff0000) << 24 | \
+ ((s) & 0xff00) << 40 | (s) << 56)
+#endif
+
+#endif /* MACHINE_IS_BIG_ENDIAN */
+
+
+#if defined(MACHINE_IS_LITTLE_ENDIAN)
+/* replace swaps with simple assignments on little endian systems */
+#undef ld_swap32
+#undef st_swap32
+#define ld_swap32(s, d) (d = *(s))
+#define st_swap32(s, d) (*(d) = s)
+#undef ld_swap64
+#undef st_swap64
+#define ld_swap64(s, d) (d = *(s))
+#define st_swap64(s, d) (*(d) = s)
+#endif /* MACHINE_IS_LITTLE_ENDIAN */
+
+#endif /* _CRYPTO_EDONR_BYTEORDER_H */
diff --git a/zfs/module/icp/algs/modes/cbc.c b/zfs/module/icp/algs/modes/cbc.c
new file mode 100644
index 000000000000..2cc94ec72656
--- /dev/null
+++ b/zfs/module/icp/algs/modes/cbc.c
@@ -0,0 +1,305 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/zfs_context.h>
+#include <modes/modes.h>
+#include <sys/crypto/common.h>
+#include <sys/crypto/impl.h>
+
+/*
+ * Algorithm independent CBC functions.
+ */
+int
+cbc_encrypt_contiguous_blocks(cbc_ctx_t *ctx, char *data, size_t length,
+ crypto_data_t *out, size_t block_size,
+ int (*encrypt)(const void *, const uint8_t *, uint8_t *),
+ void (*copy_block)(uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+ size_t remainder = length;
+ size_t need = 0;
+ uint8_t *datap = (uint8_t *)data;
+ uint8_t *blockp;
+ uint8_t *lastp;
+ void *iov_or_mp;
+ offset_t offset;
+ uint8_t *out_data_1;
+ uint8_t *out_data_2;
+ size_t out_data_1_len;
+
+ if (length + ctx->cbc_remainder_len < block_size) {
+ /* accumulate bytes here and return */
+ bcopy(datap,
+ (uint8_t *)ctx->cbc_remainder + ctx->cbc_remainder_len,
+ length);
+ ctx->cbc_remainder_len += length;
+ ctx->cbc_copy_to = datap;
+ return (CRYPTO_SUCCESS);
+ }
+
+ lastp = (uint8_t *)ctx->cbc_iv;
+ if (out != NULL)
+ crypto_init_ptrs(out, &iov_or_mp, &offset);
+
+ do {
+ /* Unprocessed data from last call. */
+ if (ctx->cbc_remainder_len > 0) {
+ need = block_size - ctx->cbc_remainder_len;
+
+ if (need > remainder)
+ return (CRYPTO_DATA_LEN_RANGE);
+
+ bcopy(datap, &((uint8_t *)ctx->cbc_remainder)
+ [ctx->cbc_remainder_len], need);
+
+ blockp = (uint8_t *)ctx->cbc_remainder;
+ } else {
+ blockp = datap;
+ }
+
+ if (out == NULL) {
+ /*
+ * XOR the previous cipher block or IV with the
+ * current clear block.
+ */
+ xor_block(lastp, blockp);
+ encrypt(ctx->cbc_keysched, blockp, blockp);
+
+ ctx->cbc_lastp = blockp;
+ lastp = blockp;
+
+ if (ctx->cbc_remainder_len > 0) {
+ bcopy(blockp, ctx->cbc_copy_to,
+ ctx->cbc_remainder_len);
+ bcopy(blockp + ctx->cbc_remainder_len, datap,
+ need);
+ }
+ } else {
+ /*
+ * XOR the previous cipher block or IV with the
+ * current clear block.
+ */
+ xor_block(blockp, lastp);
+ encrypt(ctx->cbc_keysched, lastp, lastp);
+ crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
+ &out_data_1_len, &out_data_2, block_size);
+
+ /* copy block to where it belongs */
+ if (out_data_1_len == block_size) {
+ copy_block(lastp, out_data_1);
+ } else {
+ bcopy(lastp, out_data_1, out_data_1_len);
+ if (out_data_2 != NULL) {
+ bcopy(lastp + out_data_1_len,
+ out_data_2,
+ block_size - out_data_1_len);
+ }
+ }
+ /* update offset */
+ out->cd_offset += block_size;
+ }
+
+ /* Update pointer to next block of data to be processed. */
+ if (ctx->cbc_remainder_len != 0) {
+ datap += need;
+ ctx->cbc_remainder_len = 0;
+ } else {
+ datap += block_size;
+ }
+
+ remainder = (size_t)&data[length] - (size_t)datap;
+
+ /* Incomplete last block. */
+ if (remainder > 0 && remainder < block_size) {
+ bcopy(datap, ctx->cbc_remainder, remainder);
+ ctx->cbc_remainder_len = remainder;
+ ctx->cbc_copy_to = datap;
+ goto out;
+ }
+ ctx->cbc_copy_to = NULL;
+
+ } while (remainder > 0);
+
+out:
+ /*
+ * Save the last encrypted block in the context.
+ */
+ if (ctx->cbc_lastp != NULL) {
+ copy_block((uint8_t *)ctx->cbc_lastp, (uint8_t *)ctx->cbc_iv);
+ ctx->cbc_lastp = (uint8_t *)ctx->cbc_iv;
+ }
+
+ return (CRYPTO_SUCCESS);
+}
+
+#define OTHER(a, ctx) \
+ (((a) == (ctx)->cbc_lastblock) ? (ctx)->cbc_iv : (ctx)->cbc_lastblock)
+
+/* ARGSUSED */
+int
+cbc_decrypt_contiguous_blocks(cbc_ctx_t *ctx, char *data, size_t length,
+ crypto_data_t *out, size_t block_size,
+ int (*decrypt)(const void *, const uint8_t *, uint8_t *),
+ void (*copy_block)(uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+ size_t remainder = length;
+ size_t need = 0;
+ uint8_t *datap = (uint8_t *)data;
+ uint8_t *blockp;
+ uint8_t *lastp;
+ void *iov_or_mp;
+ offset_t offset;
+ uint8_t *out_data_1;
+ uint8_t *out_data_2;
+ size_t out_data_1_len;
+
+ if (length + ctx->cbc_remainder_len < block_size) {
+ /* accumulate bytes here and return */
+ bcopy(datap,
+ (uint8_t *)ctx->cbc_remainder + ctx->cbc_remainder_len,
+ length);
+ ctx->cbc_remainder_len += length;
+ ctx->cbc_copy_to = datap;
+ return (CRYPTO_SUCCESS);
+ }
+
+ lastp = ctx->cbc_lastp;
+ if (out != NULL)
+ crypto_init_ptrs(out, &iov_or_mp, &offset);
+
+ do {
+ /* Unprocessed data from last call. */
+ if (ctx->cbc_remainder_len > 0) {
+ need = block_size - ctx->cbc_remainder_len;
+
+ if (need > remainder)
+ return (CRYPTO_ENCRYPTED_DATA_LEN_RANGE);
+
+ bcopy(datap, &((uint8_t *)ctx->cbc_remainder)
+ [ctx->cbc_remainder_len], need);
+
+ blockp = (uint8_t *)ctx->cbc_remainder;
+ } else {
+ blockp = datap;
+ }
+
+ /* LINTED: pointer alignment */
+ copy_block(blockp, (uint8_t *)OTHER((uint64_t *)lastp, ctx));
+
+ if (out != NULL) {
+ decrypt(ctx->cbc_keysched, blockp,
+ (uint8_t *)ctx->cbc_remainder);
+ blockp = (uint8_t *)ctx->cbc_remainder;
+ } else {
+ decrypt(ctx->cbc_keysched, blockp, blockp);
+ }
+
+ /*
+ * XOR the previous cipher block or IV with the
+ * currently decrypted block.
+ */
+ xor_block(lastp, blockp);
+
+ /* LINTED: pointer alignment */
+ lastp = (uint8_t *)OTHER((uint64_t *)lastp, ctx);
+
+ if (out != NULL) {
+ crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
+ &out_data_1_len, &out_data_2, block_size);
+
+ bcopy(blockp, out_data_1, out_data_1_len);
+ if (out_data_2 != NULL) {
+ bcopy(blockp + out_data_1_len, out_data_2,
+ block_size - out_data_1_len);
+ }
+
+ /* update offset */
+ out->cd_offset += block_size;
+
+ } else if (ctx->cbc_remainder_len > 0) {
+ /* copy temporary block to where it belongs */
+ bcopy(blockp, ctx->cbc_copy_to, ctx->cbc_remainder_len);
+ bcopy(blockp + ctx->cbc_remainder_len, datap, need);
+ }
+
+ /* Update pointer to next block of data to be processed. */
+ if (ctx->cbc_remainder_len != 0) {
+ datap += need;
+ ctx->cbc_remainder_len = 0;
+ } else {
+ datap += block_size;
+ }
+
+ remainder = (size_t)&data[length] - (size_t)datap;
+
+ /* Incomplete last block. */
+ if (remainder > 0 && remainder < block_size) {
+ bcopy(datap, ctx->cbc_remainder, remainder);
+ ctx->cbc_remainder_len = remainder;
+ ctx->cbc_lastp = lastp;
+ ctx->cbc_copy_to = datap;
+ return (CRYPTO_SUCCESS);
+ }
+ ctx->cbc_copy_to = NULL;
+
+ } while (remainder > 0);
+
+ ctx->cbc_lastp = lastp;
+ return (CRYPTO_SUCCESS);
+}
+
+int
+cbc_init_ctx(cbc_ctx_t *cbc_ctx, char *param, size_t param_len,
+ size_t block_size, void (*copy_block)(uint8_t *, uint64_t *))
+{
+ /*
+ * Copy IV into context.
+ *
+ * If cm_param == NULL then the IV comes from the
+ * cd_miscdata field in the crypto_data structure.
+ */
+ if (param != NULL) {
+ ASSERT(param_len == block_size);
+ copy_block((uchar_t *)param, cbc_ctx->cbc_iv);
+ }
+
+ cbc_ctx->cbc_lastp = (uint8_t *)&cbc_ctx->cbc_iv[0];
+ cbc_ctx->cbc_flags |= CBC_MODE;
+ return (CRYPTO_SUCCESS);
+}
+
+/* ARGSUSED */
+void *
+cbc_alloc_ctx(int kmflag)
+{
+ cbc_ctx_t *cbc_ctx;
+
+ if ((cbc_ctx = kmem_zalloc(sizeof (cbc_ctx_t), kmflag)) == NULL)
+ return (NULL);
+
+ cbc_ctx->cbc_flags = CBC_MODE;
+ return (cbc_ctx);
+}
diff --git a/zfs/module/icp/algs/modes/ccm.c b/zfs/module/icp/algs/modes/ccm.c
new file mode 100644
index 000000000000..22aeb0a6aa47
--- /dev/null
+++ b/zfs/module/icp/algs/modes/ccm.c
@@ -0,0 +1,920 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/zfs_context.h>
+#include <modes/modes.h>
+#include <sys/crypto/common.h>
+#include <sys/crypto/impl.h>
+
+#if defined(__i386) || defined(__amd64)
+#include <sys/byteorder.h>
+#define UNALIGNED_POINTERS_PERMITTED
+#endif
+
+/*
+ * Encrypt multiple blocks of data in CCM mode. Decrypt for CCM mode
+ * is done in another function.
+ */
+int
+ccm_mode_encrypt_contiguous_blocks(ccm_ctx_t *ctx, char *data, size_t length,
+ crypto_data_t *out, size_t block_size,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*copy_block)(uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+ size_t remainder = length;
+ size_t need = 0;
+ uint8_t *datap = (uint8_t *)data;
+ uint8_t *blockp;
+ uint8_t *lastp;
+ void *iov_or_mp;
+ offset_t offset;
+ uint8_t *out_data_1;
+ uint8_t *out_data_2;
+ size_t out_data_1_len;
+ uint64_t counter;
+ uint8_t *mac_buf;
+
+ if (length + ctx->ccm_remainder_len < block_size) {
+ /* accumulate bytes here and return */
+ bcopy(datap,
+ (uint8_t *)ctx->ccm_remainder + ctx->ccm_remainder_len,
+ length);
+ ctx->ccm_remainder_len += length;
+ ctx->ccm_copy_to = datap;
+ return (CRYPTO_SUCCESS);
+ }
+
+ lastp = (uint8_t *)ctx->ccm_cb;
+ if (out != NULL)
+ crypto_init_ptrs(out, &iov_or_mp, &offset);
+
+ mac_buf = (uint8_t *)ctx->ccm_mac_buf;
+
+ do {
+ /* Unprocessed data from last call. */
+ if (ctx->ccm_remainder_len > 0) {
+ need = block_size - ctx->ccm_remainder_len;
+
+ if (need > remainder)
+ return (CRYPTO_DATA_LEN_RANGE);
+
+ bcopy(datap, &((uint8_t *)ctx->ccm_remainder)
+ [ctx->ccm_remainder_len], need);
+
+ blockp = (uint8_t *)ctx->ccm_remainder;
+ } else {
+ blockp = datap;
+ }
+
+ /*
+ * do CBC MAC
+ *
+ * XOR the previous cipher block current clear block.
+ * mac_buf always contain previous cipher block.
+ */
+ xor_block(blockp, mac_buf);
+ encrypt_block(ctx->ccm_keysched, mac_buf, mac_buf);
+
+ /* ccm_cb is the counter block */
+ encrypt_block(ctx->ccm_keysched, (uint8_t *)ctx->ccm_cb,
+ (uint8_t *)ctx->ccm_tmp);
+
+ lastp = (uint8_t *)ctx->ccm_tmp;
+
+ /*
+ * Increment counter. Counter bits are confined
+ * to the bottom 64 bits of the counter block.
+ */
+#ifdef _LITTLE_ENDIAN
+ counter = ntohll(ctx->ccm_cb[1] & ctx->ccm_counter_mask);
+ counter = htonll(counter + 1);
+#else
+ counter = ctx->ccm_cb[1] & ctx->ccm_counter_mask;
+ counter++;
+#endif /* _LITTLE_ENDIAN */
+ counter &= ctx->ccm_counter_mask;
+ ctx->ccm_cb[1] =
+ (ctx->ccm_cb[1] & ~(ctx->ccm_counter_mask)) | counter;
+
+ /*
+ * XOR encrypted counter block with the current clear block.
+ */
+ xor_block(blockp, lastp);
+
+ ctx->ccm_processed_data_len += block_size;
+
+ if (out == NULL) {
+ if (ctx->ccm_remainder_len > 0) {
+ bcopy(blockp, ctx->ccm_copy_to,
+ ctx->ccm_remainder_len);
+ bcopy(blockp + ctx->ccm_remainder_len, datap,
+ need);
+ }
+ } else {
+ crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
+ &out_data_1_len, &out_data_2, block_size);
+
+ /* copy block to where it belongs */
+ if (out_data_1_len == block_size) {
+ copy_block(lastp, out_data_1);
+ } else {
+ bcopy(lastp, out_data_1, out_data_1_len);
+ if (out_data_2 != NULL) {
+ bcopy(lastp + out_data_1_len,
+ out_data_2,
+ block_size - out_data_1_len);
+ }
+ }
+ /* update offset */
+ out->cd_offset += block_size;
+ }
+
+ /* Update pointer to next block of data to be processed. */
+ if (ctx->ccm_remainder_len != 0) {
+ datap += need;
+ ctx->ccm_remainder_len = 0;
+ } else {
+ datap += block_size;
+ }
+
+ remainder = (size_t)&data[length] - (size_t)datap;
+
+ /* Incomplete last block. */
+ if (remainder > 0 && remainder < block_size) {
+ bcopy(datap, ctx->ccm_remainder, remainder);
+ ctx->ccm_remainder_len = remainder;
+ ctx->ccm_copy_to = datap;
+ goto out;
+ }
+ ctx->ccm_copy_to = NULL;
+
+ } while (remainder > 0);
+
+out:
+ return (CRYPTO_SUCCESS);
+}
+
+void
+calculate_ccm_mac(ccm_ctx_t *ctx, uint8_t *ccm_mac,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *))
+{
+ uint64_t counter;
+ uint8_t *counterp, *mac_buf;
+ int i;
+
+ mac_buf = (uint8_t *)ctx->ccm_mac_buf;
+
+ /* first counter block start with index 0 */
+ counter = 0;
+ ctx->ccm_cb[1] = (ctx->ccm_cb[1] & ~(ctx->ccm_counter_mask)) | counter;
+
+ counterp = (uint8_t *)ctx->ccm_tmp;
+ encrypt_block(ctx->ccm_keysched, (uint8_t *)ctx->ccm_cb, counterp);
+
+ /* calculate XOR of MAC with first counter block */
+ for (i = 0; i < ctx->ccm_mac_len; i++) {
+ ccm_mac[i] = mac_buf[i] ^ counterp[i];
+ }
+}
+
+/* ARGSUSED */
+int
+ccm_encrypt_final(ccm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+ uint8_t *lastp, *mac_buf, *ccm_mac_p, *macp = NULL;
+ void *iov_or_mp;
+ offset_t offset;
+ uint8_t *out_data_1;
+ uint8_t *out_data_2;
+ size_t out_data_1_len;
+ int i;
+
+ if (out->cd_length < (ctx->ccm_remainder_len + ctx->ccm_mac_len)) {
+ return (CRYPTO_DATA_LEN_RANGE);
+ }
+
+ /*
+ * When we get here, the number of bytes of payload processed
+ * plus whatever data remains, if any,
+ * should be the same as the number of bytes that's being
+ * passed in the argument during init time.
+ */
+ if ((ctx->ccm_processed_data_len + ctx->ccm_remainder_len)
+ != (ctx->ccm_data_len)) {
+ return (CRYPTO_DATA_LEN_RANGE);
+ }
+
+ mac_buf = (uint8_t *)ctx->ccm_mac_buf;
+
+ if (ctx->ccm_remainder_len > 0) {
+
+ /* ccm_mac_input_buf is not used for encryption */
+ macp = (uint8_t *)ctx->ccm_mac_input_buf;
+ bzero(macp, block_size);
+
+ /* copy remainder to temporary buffer */
+ bcopy(ctx->ccm_remainder, macp, ctx->ccm_remainder_len);
+
+ /* calculate the CBC MAC */
+ xor_block(macp, mac_buf);
+ encrypt_block(ctx->ccm_keysched, mac_buf, mac_buf);
+
+ /* calculate the counter mode */
+ lastp = (uint8_t *)ctx->ccm_tmp;
+ encrypt_block(ctx->ccm_keysched, (uint8_t *)ctx->ccm_cb, lastp);
+
+ /* XOR with counter block */
+ for (i = 0; i < ctx->ccm_remainder_len; i++) {
+ macp[i] ^= lastp[i];
+ }
+ ctx->ccm_processed_data_len += ctx->ccm_remainder_len;
+ }
+
+ /* Calculate the CCM MAC */
+ ccm_mac_p = (uint8_t *)ctx->ccm_tmp;
+ calculate_ccm_mac(ctx, ccm_mac_p, encrypt_block);
+
+ crypto_init_ptrs(out, &iov_or_mp, &offset);
+ crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
+ &out_data_1_len, &out_data_2,
+ ctx->ccm_remainder_len + ctx->ccm_mac_len);
+
+ if (ctx->ccm_remainder_len > 0) {
+
+ /* copy temporary block to where it belongs */
+ if (out_data_2 == NULL) {
+ /* everything will fit in out_data_1 */
+ bcopy(macp, out_data_1, ctx->ccm_remainder_len);
+ bcopy(ccm_mac_p, out_data_1 + ctx->ccm_remainder_len,
+ ctx->ccm_mac_len);
+ } else {
+
+ if (out_data_1_len < ctx->ccm_remainder_len) {
+
+ size_t data_2_len_used;
+
+ bcopy(macp, out_data_1, out_data_1_len);
+
+ data_2_len_used = ctx->ccm_remainder_len
+ - out_data_1_len;
+
+ bcopy((uint8_t *)macp + out_data_1_len,
+ out_data_2, data_2_len_used);
+ bcopy(ccm_mac_p, out_data_2 + data_2_len_used,
+ ctx->ccm_mac_len);
+ } else {
+ bcopy(macp, out_data_1, out_data_1_len);
+ if (out_data_1_len == ctx->ccm_remainder_len) {
+ /* mac will be in out_data_2 */
+ bcopy(ccm_mac_p, out_data_2,
+ ctx->ccm_mac_len);
+ } else {
+ size_t len_not_used = out_data_1_len -
+ ctx->ccm_remainder_len;
+ /*
+ * part of mac in will be in
+ * out_data_1, part of the mac will be
+ * in out_data_2
+ */
+ bcopy(ccm_mac_p,
+ out_data_1 + ctx->ccm_remainder_len,
+ len_not_used);
+ bcopy(ccm_mac_p + len_not_used,
+ out_data_2,
+ ctx->ccm_mac_len - len_not_used);
+
+ }
+ }
+ }
+ } else {
+ /* copy block to where it belongs */
+ bcopy(ccm_mac_p, out_data_1, out_data_1_len);
+ if (out_data_2 != NULL) {
+ bcopy(ccm_mac_p + out_data_1_len, out_data_2,
+ block_size - out_data_1_len);
+ }
+ }
+ out->cd_offset += ctx->ccm_remainder_len + ctx->ccm_mac_len;
+ ctx->ccm_remainder_len = 0;
+ return (CRYPTO_SUCCESS);
+}
+
+/*
+ * This will only deal with decrypting the last block of the input that
+ * might not be a multiple of block length.
+ */
+void
+ccm_decrypt_incomplete_block(ccm_ctx_t *ctx,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *))
+{
+ uint8_t *datap, *outp, *counterp;
+ int i;
+
+ datap = (uint8_t *)ctx->ccm_remainder;
+ outp = &((ctx->ccm_pt_buf)[ctx->ccm_processed_data_len]);
+
+ counterp = (uint8_t *)ctx->ccm_tmp;
+ encrypt_block(ctx->ccm_keysched, (uint8_t *)ctx->ccm_cb, counterp);
+
+ /* XOR with counter block */
+ for (i = 0; i < ctx->ccm_remainder_len; i++) {
+ outp[i] = datap[i] ^ counterp[i];
+ }
+}
+
+/*
+ * This will decrypt the cipher text. However, the plaintext won't be
+ * returned to the caller. It will be returned when decrypt_final() is
+ * called if the MAC matches
+ */
+/* ARGSUSED */
+int
+ccm_mode_decrypt_contiguous_blocks(ccm_ctx_t *ctx, char *data, size_t length,
+ crypto_data_t *out, size_t block_size,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*copy_block)(uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+ size_t remainder = length;
+ size_t need = 0;
+ uint8_t *datap = (uint8_t *)data;
+ uint8_t *blockp;
+ uint8_t *cbp;
+ uint64_t counter;
+ size_t pt_len, total_decrypted_len, mac_len, pm_len, pd_len;
+ uint8_t *resultp;
+
+
+ pm_len = ctx->ccm_processed_mac_len;
+
+ if (pm_len > 0) {
+ uint8_t *tmp;
+ /*
+ * all ciphertext has been processed, just waiting for
+ * part of the value of the mac
+ */
+ if ((pm_len + length) > ctx->ccm_mac_len) {
+ return (CRYPTO_ENCRYPTED_DATA_LEN_RANGE);
+ }
+ tmp = (uint8_t *)ctx->ccm_mac_input_buf;
+
+ bcopy(datap, tmp + pm_len, length);
+
+ ctx->ccm_processed_mac_len += length;
+ return (CRYPTO_SUCCESS);
+ }
+
+ /*
+ * If we decrypt the given data, what total amount of data would
+ * have been decrypted?
+ */
+ pd_len = ctx->ccm_processed_data_len;
+ total_decrypted_len = pd_len + length + ctx->ccm_remainder_len;
+
+ if (total_decrypted_len >
+ (ctx->ccm_data_len + ctx->ccm_mac_len)) {
+ return (CRYPTO_ENCRYPTED_DATA_LEN_RANGE);
+ }
+
+ pt_len = ctx->ccm_data_len;
+
+ if (total_decrypted_len > pt_len) {
+ /*
+ * part of the input will be the MAC, need to isolate that
+ * to be dealt with later. The left-over data in
+ * ccm_remainder_len from last time will not be part of the
+ * MAC. Otherwise, it would have already been taken out
+ * when this call is made last time.
+ */
+ size_t pt_part = pt_len - pd_len - ctx->ccm_remainder_len;
+
+ mac_len = length - pt_part;
+
+ ctx->ccm_processed_mac_len = mac_len;
+ bcopy(data + pt_part, ctx->ccm_mac_input_buf, mac_len);
+
+ if (pt_part + ctx->ccm_remainder_len < block_size) {
+ /*
+ * since this is last of the ciphertext, will
+ * just decrypt with it here
+ */
+ bcopy(datap, &((uint8_t *)ctx->ccm_remainder)
+ [ctx->ccm_remainder_len], pt_part);
+ ctx->ccm_remainder_len += pt_part;
+ ccm_decrypt_incomplete_block(ctx, encrypt_block);
+ ctx->ccm_processed_data_len += ctx->ccm_remainder_len;
+ ctx->ccm_remainder_len = 0;
+ return (CRYPTO_SUCCESS);
+ } else {
+ /* let rest of the code handle this */
+ length = pt_part;
+ }
+ } else if (length + ctx->ccm_remainder_len < block_size) {
+ /* accumulate bytes here and return */
+ bcopy(datap,
+ (uint8_t *)ctx->ccm_remainder + ctx->ccm_remainder_len,
+ length);
+ ctx->ccm_remainder_len += length;
+ ctx->ccm_copy_to = datap;
+ return (CRYPTO_SUCCESS);
+ }
+
+ do {
+ /* Unprocessed data from last call. */
+ if (ctx->ccm_remainder_len > 0) {
+ need = block_size - ctx->ccm_remainder_len;
+
+ if (need > remainder)
+ return (CRYPTO_ENCRYPTED_DATA_LEN_RANGE);
+
+ bcopy(datap, &((uint8_t *)ctx->ccm_remainder)
+ [ctx->ccm_remainder_len], need);
+
+ blockp = (uint8_t *)ctx->ccm_remainder;
+ } else {
+ blockp = datap;
+ }
+
+ /* Calculate the counter mode, ccm_cb is the counter block */
+ cbp = (uint8_t *)ctx->ccm_tmp;
+ encrypt_block(ctx->ccm_keysched, (uint8_t *)ctx->ccm_cb, cbp);
+
+ /*
+ * Increment counter.
+ * Counter bits are confined to the bottom 64 bits
+ */
+#ifdef _LITTLE_ENDIAN
+ counter = ntohll(ctx->ccm_cb[1] & ctx->ccm_counter_mask);
+ counter = htonll(counter + 1);
+#else
+ counter = ctx->ccm_cb[1] & ctx->ccm_counter_mask;
+ counter++;
+#endif /* _LITTLE_ENDIAN */
+ counter &= ctx->ccm_counter_mask;
+ ctx->ccm_cb[1] =
+ (ctx->ccm_cb[1] & ~(ctx->ccm_counter_mask)) | counter;
+
+ /* XOR with the ciphertext */
+ xor_block(blockp, cbp);
+
+ /* Copy the plaintext to the "holding buffer" */
+ resultp = (uint8_t *)ctx->ccm_pt_buf +
+ ctx->ccm_processed_data_len;
+ copy_block(cbp, resultp);
+
+ ctx->ccm_processed_data_len += block_size;
+
+ ctx->ccm_lastp = blockp;
+
+ /* Update pointer to next block of data to be processed. */
+ if (ctx->ccm_remainder_len != 0) {
+ datap += need;
+ ctx->ccm_remainder_len = 0;
+ } else {
+ datap += block_size;
+ }
+
+ remainder = (size_t)&data[length] - (size_t)datap;
+
+ /* Incomplete last block */
+ if (remainder > 0 && remainder < block_size) {
+ bcopy(datap, ctx->ccm_remainder, remainder);
+ ctx->ccm_remainder_len = remainder;
+ ctx->ccm_copy_to = datap;
+ if (ctx->ccm_processed_mac_len > 0) {
+ /*
+ * not expecting anymore ciphertext, just
+ * compute plaintext for the remaining input
+ */
+ ccm_decrypt_incomplete_block(ctx,
+ encrypt_block);
+ ctx->ccm_processed_data_len += remainder;
+ ctx->ccm_remainder_len = 0;
+ }
+ goto out;
+ }
+ ctx->ccm_copy_to = NULL;
+
+ } while (remainder > 0);
+
+out:
+ return (CRYPTO_SUCCESS);
+}
+
+int
+ccm_decrypt_final(ccm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*copy_block)(uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+ size_t mac_remain, pt_len;
+ uint8_t *pt, *mac_buf, *macp, *ccm_mac_p;
+ int rv;
+
+ pt_len = ctx->ccm_data_len;
+
+ /* Make sure output buffer can fit all of the plaintext */
+ if (out->cd_length < pt_len) {
+ return (CRYPTO_DATA_LEN_RANGE);
+ }
+
+ pt = ctx->ccm_pt_buf;
+ mac_remain = ctx->ccm_processed_data_len;
+ mac_buf = (uint8_t *)ctx->ccm_mac_buf;
+
+ macp = (uint8_t *)ctx->ccm_tmp;
+
+ while (mac_remain > 0) {
+
+ if (mac_remain < block_size) {
+ bzero(macp, block_size);
+ bcopy(pt, macp, mac_remain);
+ mac_remain = 0;
+ } else {
+ copy_block(pt, macp);
+ mac_remain -= block_size;
+ pt += block_size;
+ }
+
+ /* calculate the CBC MAC */
+ xor_block(macp, mac_buf);
+ encrypt_block(ctx->ccm_keysched, mac_buf, mac_buf);
+ }
+
+ /* Calculate the CCM MAC */
+ ccm_mac_p = (uint8_t *)ctx->ccm_tmp;
+ calculate_ccm_mac((ccm_ctx_t *)ctx, ccm_mac_p, encrypt_block);
+
+ /* compare the input CCM MAC value with what we calculated */
+ if (bcmp(ctx->ccm_mac_input_buf, ccm_mac_p, ctx->ccm_mac_len)) {
+ /* They don't match */
+ return (CRYPTO_INVALID_MAC);
+ } else {
+ rv = crypto_put_output_data(ctx->ccm_pt_buf, out, pt_len);
+ if (rv != CRYPTO_SUCCESS)
+ return (rv);
+ out->cd_offset += pt_len;
+ }
+ return (CRYPTO_SUCCESS);
+}
+
+int
+ccm_validate_args(CK_AES_CCM_PARAMS *ccm_param, boolean_t is_encrypt_init)
+{
+ size_t macSize, nonceSize;
+ uint8_t q;
+ uint64_t maxValue;
+
+ /*
+ * Check the length of the MAC. The only valid
+ * lengths for the MAC are: 4, 6, 8, 10, 12, 14, 16
+ */
+ macSize = ccm_param->ulMACSize;
+ if ((macSize < 4) || (macSize > 16) || ((macSize % 2) != 0)) {
+ return (CRYPTO_MECHANISM_PARAM_INVALID);
+ }
+
+ /* Check the nonce length. Valid values are 7, 8, 9, 10, 11, 12, 13 */
+ nonceSize = ccm_param->ulNonceSize;
+ if ((nonceSize < 7) || (nonceSize > 13)) {
+ return (CRYPTO_MECHANISM_PARAM_INVALID);
+ }
+
+ /* q is the length of the field storing the length, in bytes */
+ q = (uint8_t)((15 - nonceSize) & 0xFF);
+
+
+ /*
+ * If it is decrypt, need to make sure size of ciphertext is at least
+ * bigger than MAC len
+ */
+ if ((!is_encrypt_init) && (ccm_param->ulDataSize < macSize)) {
+ return (CRYPTO_MECHANISM_PARAM_INVALID);
+ }
+
+ /*
+ * Check to make sure the length of the payload is within the
+ * range of values allowed by q
+ */
+ if (q < 8) {
+ maxValue = (1ULL << (q * 8)) - 1;
+ } else {
+ maxValue = ULONG_MAX;
+ }
+
+ if (ccm_param->ulDataSize > maxValue) {
+ return (CRYPTO_MECHANISM_PARAM_INVALID);
+ }
+ return (CRYPTO_SUCCESS);
+}
+
+/*
+ * Format the first block used in CBC-MAC (B0) and the initial counter
+ * block based on formatting functions and counter generation functions
+ * specified in RFC 3610 and NIST publication 800-38C, appendix A
+ *
+ * b0 is the first block used in CBC-MAC
+ * cb0 is the first counter block
+ *
+ * It's assumed that the arguments b0 and cb0 are preallocated AES blocks
+ *
+ */
+static void
+ccm_format_initial_blocks(uchar_t *nonce, ulong_t nonceSize,
+ ulong_t authDataSize, uint8_t *b0, ccm_ctx_t *aes_ctx)
+{
+ uint64_t payloadSize;
+ uint8_t t, q, have_adata = 0;
+ size_t limit;
+ int i, j, k;
+ uint64_t mask = 0;
+ uint8_t *cb;
+
+ q = (uint8_t)((15 - nonceSize) & 0xFF);
+ t = (uint8_t)((aes_ctx->ccm_mac_len) & 0xFF);
+
+ /* Construct the first octet of b0 */
+ if (authDataSize > 0) {
+ have_adata = 1;
+ }
+ b0[0] = (have_adata << 6) | (((t - 2) / 2) << 3) | (q - 1);
+
+ /* copy the nonce value into b0 */
+ bcopy(nonce, &(b0[1]), nonceSize);
+
+ /* store the length of the payload into b0 */
+ bzero(&(b0[1+nonceSize]), q);
+
+ payloadSize = aes_ctx->ccm_data_len;
+ limit = 8 < q ? 8 : q;
+
+ for (i = 0, j = 0, k = 15; i < limit; i++, j += 8, k--) {
+ b0[k] = (uint8_t)((payloadSize >> j) & 0xFF);
+ }
+
+ /* format the counter block */
+
+ cb = (uint8_t *)aes_ctx->ccm_cb;
+
+ cb[0] = 0x07 & (q-1); /* first byte */
+
+ /* copy the nonce value into the counter block */
+ bcopy(nonce, &(cb[1]), nonceSize);
+
+ bzero(&(cb[1+nonceSize]), q);
+
+ /* Create the mask for the counter field based on the size of nonce */
+ q <<= 3;
+ while (q-- > 0) {
+ mask |= (1ULL << q);
+ }
+
+#ifdef _LITTLE_ENDIAN
+ mask = htonll(mask);
+#endif
+ aes_ctx->ccm_counter_mask = mask;
+
+ /*
+ * During calculation, we start using counter block 1, we will
+ * set it up right here.
+ * We can just set the last byte to have the value 1, because
+ * even with the biggest nonce of 13, the last byte of the
+ * counter block will be used for the counter value.
+ */
+ cb[15] = 0x01;
+}
+
+/*
+ * Encode the length of the associated data as
+ * specified in RFC 3610 and NIST publication 800-38C, appendix A
+ */
+static void
+encode_adata_len(ulong_t auth_data_len, uint8_t *encoded, size_t *encoded_len)
+{
+#ifdef UNALIGNED_POINTERS_PERMITTED
+ uint32_t *lencoded_ptr;
+#ifdef _LP64
+ uint64_t *llencoded_ptr;
+#endif
+#endif /* UNALIGNED_POINTERS_PERMITTED */
+
+ if (auth_data_len < ((1ULL<<16) - (1ULL<<8))) {
+ /* 0 < a < (2^16-2^8) */
+ *encoded_len = 2;
+ encoded[0] = (auth_data_len & 0xff00) >> 8;
+ encoded[1] = auth_data_len & 0xff;
+
+ } else if ((auth_data_len >= ((1ULL<<16) - (1ULL<<8))) &&
+ (auth_data_len < (1ULL << 31))) {
+ /* (2^16-2^8) <= a < 2^32 */
+ *encoded_len = 6;
+ encoded[0] = 0xff;
+ encoded[1] = 0xfe;
+#ifdef UNALIGNED_POINTERS_PERMITTED
+ lencoded_ptr = (uint32_t *)&encoded[2];
+ *lencoded_ptr = htonl(auth_data_len);
+#else
+ encoded[2] = (auth_data_len & 0xff000000) >> 24;
+ encoded[3] = (auth_data_len & 0xff0000) >> 16;
+ encoded[4] = (auth_data_len & 0xff00) >> 8;
+ encoded[5] = auth_data_len & 0xff;
+#endif /* UNALIGNED_POINTERS_PERMITTED */
+
+#ifdef _LP64
+ } else {
+ /* 2^32 <= a < 2^64 */
+ *encoded_len = 10;
+ encoded[0] = 0xff;
+ encoded[1] = 0xff;
+#ifdef UNALIGNED_POINTERS_PERMITTED
+ llencoded_ptr = (uint64_t *)&encoded[2];
+ *llencoded_ptr = htonl(auth_data_len);
+#else
+ encoded[2] = (auth_data_len & 0xff00000000000000) >> 56;
+ encoded[3] = (auth_data_len & 0xff000000000000) >> 48;
+ encoded[4] = (auth_data_len & 0xff0000000000) >> 40;
+ encoded[5] = (auth_data_len & 0xff00000000) >> 32;
+ encoded[6] = (auth_data_len & 0xff000000) >> 24;
+ encoded[7] = (auth_data_len & 0xff0000) >> 16;
+ encoded[8] = (auth_data_len & 0xff00) >> 8;
+ encoded[9] = auth_data_len & 0xff;
+#endif /* UNALIGNED_POINTERS_PERMITTED */
+#endif /* _LP64 */
+ }
+}
+
+/*
+ * The following function should be call at encrypt or decrypt init time
+ * for AES CCM mode.
+ */
+int
+ccm_init(ccm_ctx_t *ctx, unsigned char *nonce, size_t nonce_len,
+ unsigned char *auth_data, size_t auth_data_len, size_t block_size,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+ uint8_t *mac_buf, *datap, *ivp, *authp;
+ size_t remainder, processed;
+ uint8_t encoded_a[10]; /* max encoded auth data length is 10 octets */
+ size_t encoded_a_len = 0;
+
+ mac_buf = (uint8_t *)&(ctx->ccm_mac_buf);
+
+ /*
+ * Format the 1st block for CBC-MAC and construct the
+ * 1st counter block.
+ *
+ * aes_ctx->ccm_iv is used for storing the counter block
+ * mac_buf will store b0 at this time.
+ */
+ ccm_format_initial_blocks(nonce, nonce_len,
+ auth_data_len, mac_buf, ctx);
+
+ /* The IV for CBC MAC for AES CCM mode is always zero */
+ ivp = (uint8_t *)ctx->ccm_tmp;
+ bzero(ivp, block_size);
+
+ xor_block(ivp, mac_buf);
+
+ /* encrypt the nonce */
+ encrypt_block(ctx->ccm_keysched, mac_buf, mac_buf);
+
+ /* take care of the associated data, if any */
+ if (auth_data_len == 0) {
+ return (CRYPTO_SUCCESS);
+ }
+
+ encode_adata_len(auth_data_len, encoded_a, &encoded_a_len);
+
+ remainder = auth_data_len;
+
+ /* 1st block: it contains encoded associated data, and some data */
+ authp = (uint8_t *)ctx->ccm_tmp;
+ bzero(authp, block_size);
+ bcopy(encoded_a, authp, encoded_a_len);
+ processed = block_size - encoded_a_len;
+ if (processed > auth_data_len) {
+ /* in case auth_data is very small */
+ processed = auth_data_len;
+ }
+ bcopy(auth_data, authp+encoded_a_len, processed);
+ /* xor with previous buffer */
+ xor_block(authp, mac_buf);
+ encrypt_block(ctx->ccm_keysched, mac_buf, mac_buf);
+ remainder -= processed;
+ if (remainder == 0) {
+ /* a small amount of associated data, it's all done now */
+ return (CRYPTO_SUCCESS);
+ }
+
+ do {
+ if (remainder < block_size) {
+ /*
+ * There's not a block full of data, pad rest of
+ * buffer with zero
+ */
+ bzero(authp, block_size);
+ bcopy(&(auth_data[processed]), authp, remainder);
+ datap = (uint8_t *)authp;
+ remainder = 0;
+ } else {
+ datap = (uint8_t *)(&(auth_data[processed]));
+ processed += block_size;
+ remainder -= block_size;
+ }
+
+ xor_block(datap, mac_buf);
+ encrypt_block(ctx->ccm_keysched, mac_buf, mac_buf);
+
+ } while (remainder > 0);
+
+ return (CRYPTO_SUCCESS);
+}
+
+int
+ccm_init_ctx(ccm_ctx_t *ccm_ctx, char *param, int kmflag,
+ boolean_t is_encrypt_init, size_t block_size,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+ int rv;
+ CK_AES_CCM_PARAMS *ccm_param;
+
+ if (param != NULL) {
+ ccm_param = (CK_AES_CCM_PARAMS *)param;
+
+ if ((rv = ccm_validate_args(ccm_param,
+ is_encrypt_init)) != 0) {
+ return (rv);
+ }
+
+ ccm_ctx->ccm_mac_len = ccm_param->ulMACSize;
+ if (is_encrypt_init) {
+ ccm_ctx->ccm_data_len = ccm_param->ulDataSize;
+ } else {
+ ccm_ctx->ccm_data_len =
+ ccm_param->ulDataSize - ccm_ctx->ccm_mac_len;
+ ccm_ctx->ccm_processed_mac_len = 0;
+ }
+ ccm_ctx->ccm_processed_data_len = 0;
+
+ ccm_ctx->ccm_flags |= CCM_MODE;
+ } else {
+ rv = CRYPTO_MECHANISM_PARAM_INVALID;
+ goto out;
+ }
+
+ if (ccm_init(ccm_ctx, ccm_param->nonce, ccm_param->ulNonceSize,
+ ccm_param->authData, ccm_param->ulAuthDataSize, block_size,
+ encrypt_block, xor_block) != 0) {
+ rv = CRYPTO_MECHANISM_PARAM_INVALID;
+ goto out;
+ }
+ if (!is_encrypt_init) {
+ /* allocate buffer for storing decrypted plaintext */
+ ccm_ctx->ccm_pt_buf = vmem_alloc(ccm_ctx->ccm_data_len,
+ kmflag);
+ if (ccm_ctx->ccm_pt_buf == NULL) {
+ rv = CRYPTO_HOST_MEMORY;
+ }
+ }
+out:
+ return (rv);
+}
+
+void *
+ccm_alloc_ctx(int kmflag)
+{
+ ccm_ctx_t *ccm_ctx;
+
+ if ((ccm_ctx = kmem_zalloc(sizeof (ccm_ctx_t), kmflag)) == NULL)
+ return (NULL);
+
+ ccm_ctx->ccm_flags = CCM_MODE;
+ return (ccm_ctx);
+}
diff --git a/zfs/module/icp/algs/modes/ctr.c b/zfs/module/icp/algs/modes/ctr.c
new file mode 100644
index 000000000000..e3b0e1238232
--- /dev/null
+++ b/zfs/module/icp/algs/modes/ctr.c
@@ -0,0 +1,238 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/zfs_context.h>
+#include <modes/modes.h>
+#include <sys/crypto/common.h>
+#include <sys/crypto/impl.h>
+#include <sys/byteorder.h>
+
+/*
+ * Encrypt and decrypt multiple blocks of data in counter mode.
+ */
+int
+ctr_mode_contiguous_blocks(ctr_ctx_t *ctx, char *data, size_t length,
+ crypto_data_t *out, size_t block_size,
+ int (*cipher)(const void *ks, const uint8_t *pt, uint8_t *ct),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+ size_t remainder = length;
+ size_t need = 0;
+ uint8_t *datap = (uint8_t *)data;
+ uint8_t *blockp;
+ uint8_t *lastp;
+ void *iov_or_mp;
+ offset_t offset;
+ uint8_t *out_data_1;
+ uint8_t *out_data_2;
+ size_t out_data_1_len;
+ uint64_t lower_counter, upper_counter;
+
+ if (length + ctx->ctr_remainder_len < block_size) {
+ /* accumulate bytes here and return */
+ bcopy(datap,
+ (uint8_t *)ctx->ctr_remainder + ctx->ctr_remainder_len,
+ length);
+ ctx->ctr_remainder_len += length;
+ ctx->ctr_copy_to = datap;
+ return (CRYPTO_SUCCESS);
+ }
+
+ lastp = (uint8_t *)ctx->ctr_cb;
+ if (out != NULL)
+ crypto_init_ptrs(out, &iov_or_mp, &offset);
+
+ do {
+ /* Unprocessed data from last call. */
+ if (ctx->ctr_remainder_len > 0) {
+ need = block_size - ctx->ctr_remainder_len;
+
+ if (need > remainder)
+ return (CRYPTO_DATA_LEN_RANGE);
+
+ bcopy(datap, &((uint8_t *)ctx->ctr_remainder)
+ [ctx->ctr_remainder_len], need);
+
+ blockp = (uint8_t *)ctx->ctr_remainder;
+ } else {
+ blockp = datap;
+ }
+
+ /* ctr_cb is the counter block */
+ cipher(ctx->ctr_keysched, (uint8_t *)ctx->ctr_cb,
+ (uint8_t *)ctx->ctr_tmp);
+
+ lastp = (uint8_t *)ctx->ctr_tmp;
+
+ /*
+ * Increment Counter.
+ */
+ lower_counter = ntohll(ctx->ctr_cb[1] & ctx->ctr_lower_mask);
+ lower_counter = htonll(lower_counter + 1);
+ lower_counter &= ctx->ctr_lower_mask;
+ ctx->ctr_cb[1] = (ctx->ctr_cb[1] & ~(ctx->ctr_lower_mask)) |
+ lower_counter;
+
+ /* wrap around */
+ if (lower_counter == 0) {
+ upper_counter =
+ ntohll(ctx->ctr_cb[0] & ctx->ctr_upper_mask);
+ upper_counter = htonll(upper_counter + 1);
+ upper_counter &= ctx->ctr_upper_mask;
+ ctx->ctr_cb[0] =
+ (ctx->ctr_cb[0] & ~(ctx->ctr_upper_mask)) |
+ upper_counter;
+ }
+
+ /*
+ * XOR encrypted counter block with the current clear block.
+ */
+ xor_block(blockp, lastp);
+
+ if (out == NULL) {
+ if (ctx->ctr_remainder_len > 0) {
+ bcopy(lastp, ctx->ctr_copy_to,
+ ctx->ctr_remainder_len);
+ bcopy(lastp + ctx->ctr_remainder_len, datap,
+ need);
+ }
+ } else {
+ crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
+ &out_data_1_len, &out_data_2, block_size);
+
+ /* copy block to where it belongs */
+ bcopy(lastp, out_data_1, out_data_1_len);
+ if (out_data_2 != NULL) {
+ bcopy(lastp + out_data_1_len, out_data_2,
+ block_size - out_data_1_len);
+ }
+ /* update offset */
+ out->cd_offset += block_size;
+ }
+
+ /* Update pointer to next block of data to be processed. */
+ if (ctx->ctr_remainder_len != 0) {
+ datap += need;
+ ctx->ctr_remainder_len = 0;
+ } else {
+ datap += block_size;
+ }
+
+ remainder = (size_t)&data[length] - (size_t)datap;
+
+ /* Incomplete last block. */
+ if (remainder > 0 && remainder < block_size) {
+ bcopy(datap, ctx->ctr_remainder, remainder);
+ ctx->ctr_remainder_len = remainder;
+ ctx->ctr_copy_to = datap;
+ goto out;
+ }
+ ctx->ctr_copy_to = NULL;
+
+ } while (remainder > 0);
+
+out:
+ return (CRYPTO_SUCCESS);
+}
+
+int
+ctr_mode_final(ctr_ctx_t *ctx, crypto_data_t *out,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *))
+{
+ uint8_t *lastp;
+ void *iov_or_mp;
+ offset_t offset;
+ uint8_t *out_data_1;
+ uint8_t *out_data_2;
+ size_t out_data_1_len;
+ uint8_t *p;
+ int i;
+
+ if (out->cd_length < ctx->ctr_remainder_len)
+ return (CRYPTO_DATA_LEN_RANGE);
+
+ encrypt_block(ctx->ctr_keysched, (uint8_t *)ctx->ctr_cb,
+ (uint8_t *)ctx->ctr_tmp);
+
+ lastp = (uint8_t *)ctx->ctr_tmp;
+ p = (uint8_t *)ctx->ctr_remainder;
+ for (i = 0; i < ctx->ctr_remainder_len; i++) {
+ p[i] ^= lastp[i];
+ }
+
+ crypto_init_ptrs(out, &iov_or_mp, &offset);
+ crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
+ &out_data_1_len, &out_data_2, ctx->ctr_remainder_len);
+
+ bcopy(p, out_data_1, out_data_1_len);
+ if (out_data_2 != NULL) {
+ bcopy((uint8_t *)p + out_data_1_len,
+ out_data_2, ctx->ctr_remainder_len - out_data_1_len);
+ }
+ out->cd_offset += ctx->ctr_remainder_len;
+ ctx->ctr_remainder_len = 0;
+ return (CRYPTO_SUCCESS);
+}
+
+int
+ctr_init_ctx(ctr_ctx_t *ctr_ctx, ulong_t count, uint8_t *cb,
+ void (*copy_block)(uint8_t *, uint8_t *))
+{
+ uint64_t upper_mask = 0;
+ uint64_t lower_mask = 0;
+
+ if (count == 0 || count > 128) {
+ return (CRYPTO_MECHANISM_PARAM_INVALID);
+ }
+ /* upper 64 bits of the mask */
+ if (count >= 64) {
+ count -= 64;
+ upper_mask = (count == 64) ? UINT64_MAX : (1ULL << count) - 1;
+ lower_mask = UINT64_MAX;
+ } else {
+ /* now the lower 63 bits */
+ lower_mask = (1ULL << count) - 1;
+ }
+ ctr_ctx->ctr_lower_mask = htonll(lower_mask);
+ ctr_ctx->ctr_upper_mask = htonll(upper_mask);
+
+ copy_block(cb, (uchar_t *)ctr_ctx->ctr_cb);
+ ctr_ctx->ctr_lastp = (uint8_t *)&ctr_ctx->ctr_cb[0];
+ ctr_ctx->ctr_flags |= CTR_MODE;
+ return (CRYPTO_SUCCESS);
+}
+
+/* ARGSUSED */
+void *
+ctr_alloc_ctx(int kmflag)
+{
+ ctr_ctx_t *ctr_ctx;
+
+ if ((ctr_ctx = kmem_zalloc(sizeof (ctr_ctx_t), kmflag)) == NULL)
+ return (NULL);
+
+ ctr_ctx->ctr_flags = CTR_MODE;
+ return (ctr_ctx);
+}
diff --git a/zfs/module/icp/algs/modes/ecb.c b/zfs/module/icp/algs/modes/ecb.c
new file mode 100644
index 000000000000..04e6c5eaa650
--- /dev/null
+++ b/zfs/module/icp/algs/modes/ecb.c
@@ -0,0 +1,143 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/zfs_context.h>
+#include <modes/modes.h>
+#include <sys/crypto/common.h>
+#include <sys/crypto/impl.h>
+
+/*
+ * Algorithm independent ECB functions.
+ */
+int
+ecb_cipher_contiguous_blocks(ecb_ctx_t *ctx, char *data, size_t length,
+ crypto_data_t *out, size_t block_size,
+ int (*cipher)(const void *ks, const uint8_t *pt, uint8_t *ct))
+{
+ size_t remainder = length;
+ size_t need = 0;
+ uint8_t *datap = (uint8_t *)data;
+ uint8_t *blockp;
+ uint8_t *lastp;
+ void *iov_or_mp;
+ offset_t offset;
+ uint8_t *out_data_1;
+ uint8_t *out_data_2;
+ size_t out_data_1_len;
+
+ if (length + ctx->ecb_remainder_len < block_size) {
+ /* accumulate bytes here and return */
+ bcopy(datap,
+ (uint8_t *)ctx->ecb_remainder + ctx->ecb_remainder_len,
+ length);
+ ctx->ecb_remainder_len += length;
+ ctx->ecb_copy_to = datap;
+ return (CRYPTO_SUCCESS);
+ }
+
+ lastp = (uint8_t *)ctx->ecb_iv;
+ if (out != NULL)
+ crypto_init_ptrs(out, &iov_or_mp, &offset);
+
+ do {
+ /* Unprocessed data from last call. */
+ if (ctx->ecb_remainder_len > 0) {
+ need = block_size - ctx->ecb_remainder_len;
+
+ if (need > remainder)
+ return (CRYPTO_DATA_LEN_RANGE);
+
+ bcopy(datap, &((uint8_t *)ctx->ecb_remainder)
+ [ctx->ecb_remainder_len], need);
+
+ blockp = (uint8_t *)ctx->ecb_remainder;
+ } else {
+ blockp = datap;
+ }
+
+ if (out == NULL) {
+ cipher(ctx->ecb_keysched, blockp, blockp);
+
+ ctx->ecb_lastp = blockp;
+ lastp = blockp;
+
+ if (ctx->ecb_remainder_len > 0) {
+ bcopy(blockp, ctx->ecb_copy_to,
+ ctx->ecb_remainder_len);
+ bcopy(blockp + ctx->ecb_remainder_len, datap,
+ need);
+ }
+ } else {
+ cipher(ctx->ecb_keysched, blockp, lastp);
+ crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
+ &out_data_1_len, &out_data_2, block_size);
+
+ /* copy block to where it belongs */
+ bcopy(lastp, out_data_1, out_data_1_len);
+ if (out_data_2 != NULL) {
+ bcopy(lastp + out_data_1_len, out_data_2,
+ block_size - out_data_1_len);
+ }
+ /* update offset */
+ out->cd_offset += block_size;
+ }
+
+ /* Update pointer to next block of data to be processed. */
+ if (ctx->ecb_remainder_len != 0) {
+ datap += need;
+ ctx->ecb_remainder_len = 0;
+ } else {
+ datap += block_size;
+ }
+
+ remainder = (size_t)&data[length] - (size_t)datap;
+
+ /* Incomplete last block. */
+ if (remainder > 0 && remainder < block_size) {
+ bcopy(datap, ctx->ecb_remainder, remainder);
+ ctx->ecb_remainder_len = remainder;
+ ctx->ecb_copy_to = datap;
+ goto out;
+ }
+ ctx->ecb_copy_to = NULL;
+
+ } while (remainder > 0);
+
+out:
+ return (CRYPTO_SUCCESS);
+}
+
+/* ARGSUSED */
+void *
+ecb_alloc_ctx(int kmflag)
+{
+ ecb_ctx_t *ecb_ctx;
+
+ if ((ecb_ctx = kmem_zalloc(sizeof (ecb_ctx_t), kmflag)) == NULL)
+ return (NULL);
+
+ ecb_ctx->ecb_flags = ECB_MODE;
+ return (ecb_ctx);
+}
diff --git a/zfs/module/icp/algs/modes/gcm.c b/zfs/module/icp/algs/modes/gcm.c
new file mode 100644
index 000000000000..c0a26f52454b
--- /dev/null
+++ b/zfs/module/icp/algs/modes/gcm.c
@@ -0,0 +1,748 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#if defined(_KERNEL) && defined(__amd64)
+#include <linux/simd_x86.h>
+
+#define KPREEMPT_DISABLE kfpu_begin()
+#define KPREEMPT_ENABLE kfpu_end()
+
+#else
+#define KPREEMPT_DISABLE
+#define KPREEMPT_ENABLE
+#endif /* _KERNEL */
+
+#include <sys/zfs_context.h>
+#include <modes/modes.h>
+#include <sys/crypto/common.h>
+#include <sys/crypto/impl.h>
+#include <sys/byteorder.h>
+
+#ifdef __amd64
+
+extern void gcm_mul_pclmulqdq(uint64_t *x_in, uint64_t *y, uint64_t *res);
+static int intel_pclmulqdq_instruction_present(void);
+#endif /* __amd64 */
+
+struct aes_block {
+ uint64_t a;
+ uint64_t b;
+};
+
+
+/*
+ * gcm_mul()
+ * Perform a carry-less multiplication (that is, use XOR instead of the
+ * multiply operator) on *x_in and *y and place the result in *res.
+ *
+ * Byte swap the input (*x_in and *y) and the output (*res).
+ *
+ * Note: x_in, y, and res all point to 16-byte numbers (an array of two
+ * 64-bit integers).
+ */
+void
+gcm_mul(uint64_t *x_in, uint64_t *y, uint64_t *res)
+{
+#ifdef __amd64
+ if (intel_pclmulqdq_instruction_present()) {
+ KPREEMPT_DISABLE;
+ gcm_mul_pclmulqdq(x_in, y, res);
+ KPREEMPT_ENABLE;
+ } else
+#endif /* __amd64 */
+ {
+ static const uint64_t R = 0xe100000000000000ULL;
+ struct aes_block z = {0, 0};
+ struct aes_block v;
+ uint64_t x;
+ int i, j;
+
+ v.a = ntohll(y[0]);
+ v.b = ntohll(y[1]);
+
+ for (j = 0; j < 2; j++) {
+ x = ntohll(x_in[j]);
+ for (i = 0; i < 64; i++, x <<= 1) {
+ if (x & 0x8000000000000000ULL) {
+ z.a ^= v.a;
+ z.b ^= v.b;
+ }
+ if (v.b & 1ULL) {
+ v.b = (v.a << 63)|(v.b >> 1);
+ v.a = (v.a >> 1) ^ R;
+ } else {
+ v.b = (v.a << 63)|(v.b >> 1);
+ v.a = v.a >> 1;
+ }
+ }
+ }
+ res[0] = htonll(z.a);
+ res[1] = htonll(z.b);
+ }
+}
+
+
+#define GHASH(c, d, t) \
+ xor_block((uint8_t *)(d), (uint8_t *)(c)->gcm_ghash); \
+ gcm_mul((uint64_t *)(void *)(c)->gcm_ghash, (c)->gcm_H, \
+ (uint64_t *)(void *)(t));
+
+
+/*
+ * Encrypt multiple blocks of data in GCM mode. Decrypt for GCM mode
+ * is done in another function.
+ */
+int
+gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
+ crypto_data_t *out, size_t block_size,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*copy_block)(uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+ size_t remainder = length;
+ size_t need = 0;
+ uint8_t *datap = (uint8_t *)data;
+ uint8_t *blockp;
+ uint8_t *lastp;
+ void *iov_or_mp;
+ offset_t offset;
+ uint8_t *out_data_1;
+ uint8_t *out_data_2;
+ size_t out_data_1_len;
+ uint64_t counter;
+ uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
+
+ if (length + ctx->gcm_remainder_len < block_size) {
+ /* accumulate bytes here and return */
+ bcopy(datap,
+ (uint8_t *)ctx->gcm_remainder + ctx->gcm_remainder_len,
+ length);
+ ctx->gcm_remainder_len += length;
+ ctx->gcm_copy_to = datap;
+ return (CRYPTO_SUCCESS);
+ }
+
+ lastp = (uint8_t *)ctx->gcm_cb;
+ if (out != NULL)
+ crypto_init_ptrs(out, &iov_or_mp, &offset);
+
+ do {
+ /* Unprocessed data from last call. */
+ if (ctx->gcm_remainder_len > 0) {
+ need = block_size - ctx->gcm_remainder_len;
+
+ if (need > remainder)
+ return (CRYPTO_DATA_LEN_RANGE);
+
+ bcopy(datap, &((uint8_t *)ctx->gcm_remainder)
+ [ctx->gcm_remainder_len], need);
+
+ blockp = (uint8_t *)ctx->gcm_remainder;
+ } else {
+ blockp = datap;
+ }
+
+ /*
+ * Increment counter. Counter bits are confined
+ * to the bottom 32 bits of the counter block.
+ */
+ counter = ntohll(ctx->gcm_cb[1] & counter_mask);
+ counter = htonll(counter + 1);
+ counter &= counter_mask;
+ ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
+
+ encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb,
+ (uint8_t *)ctx->gcm_tmp);
+ xor_block(blockp, (uint8_t *)ctx->gcm_tmp);
+
+ lastp = (uint8_t *)ctx->gcm_tmp;
+
+ ctx->gcm_processed_data_len += block_size;
+
+ if (out == NULL) {
+ if (ctx->gcm_remainder_len > 0) {
+ bcopy(blockp, ctx->gcm_copy_to,
+ ctx->gcm_remainder_len);
+ bcopy(blockp + ctx->gcm_remainder_len, datap,
+ need);
+ }
+ } else {
+ crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
+ &out_data_1_len, &out_data_2, block_size);
+
+ /* copy block to where it belongs */
+ if (out_data_1_len == block_size) {
+ copy_block(lastp, out_data_1);
+ } else {
+ bcopy(lastp, out_data_1, out_data_1_len);
+ if (out_data_2 != NULL) {
+ bcopy(lastp + out_data_1_len,
+ out_data_2,
+ block_size - out_data_1_len);
+ }
+ }
+ /* update offset */
+ out->cd_offset += block_size;
+ }
+
+ /* add ciphertext to the hash */
+ GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash);
+
+ /* Update pointer to next block of data to be processed. */
+ if (ctx->gcm_remainder_len != 0) {
+ datap += need;
+ ctx->gcm_remainder_len = 0;
+ } else {
+ datap += block_size;
+ }
+
+ remainder = (size_t)&data[length] - (size_t)datap;
+
+ /* Incomplete last block. */
+ if (remainder > 0 && remainder < block_size) {
+ bcopy(datap, ctx->gcm_remainder, remainder);
+ ctx->gcm_remainder_len = remainder;
+ ctx->gcm_copy_to = datap;
+ goto out;
+ }
+ ctx->gcm_copy_to = NULL;
+
+ } while (remainder > 0);
+out:
+ return (CRYPTO_SUCCESS);
+}
+
+/* ARGSUSED */
+int
+gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*copy_block)(uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+ uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
+ uint8_t *ghash, *macp = NULL;
+ int i, rv;
+
+ if (out->cd_length <
+ (ctx->gcm_remainder_len + ctx->gcm_tag_len)) {
+ return (CRYPTO_DATA_LEN_RANGE);
+ }
+
+ ghash = (uint8_t *)ctx->gcm_ghash;
+
+ if (ctx->gcm_remainder_len > 0) {
+ uint64_t counter;
+ uint8_t *tmpp = (uint8_t *)ctx->gcm_tmp;
+
+ /*
+ * Here is where we deal with data that is not a
+ * multiple of the block size.
+ */
+
+ /*
+ * Increment counter.
+ */
+ counter = ntohll(ctx->gcm_cb[1] & counter_mask);
+ counter = htonll(counter + 1);
+ counter &= counter_mask;
+ ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
+
+ encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb,
+ (uint8_t *)ctx->gcm_tmp);
+
+ macp = (uint8_t *)ctx->gcm_remainder;
+ bzero(macp + ctx->gcm_remainder_len,
+ block_size - ctx->gcm_remainder_len);
+
+ /* XOR with counter block */
+ for (i = 0; i < ctx->gcm_remainder_len; i++) {
+ macp[i] ^= tmpp[i];
+ }
+
+ /* add ciphertext to the hash */
+ GHASH(ctx, macp, ghash);
+
+ ctx->gcm_processed_data_len += ctx->gcm_remainder_len;
+ }
+
+ ctx->gcm_len_a_len_c[1] =
+ htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len));
+ GHASH(ctx, ctx->gcm_len_a_len_c, ghash);
+ encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0,
+ (uint8_t *)ctx->gcm_J0);
+ xor_block((uint8_t *)ctx->gcm_J0, ghash);
+
+ if (ctx->gcm_remainder_len > 0) {
+ rv = crypto_put_output_data(macp, out, ctx->gcm_remainder_len);
+ if (rv != CRYPTO_SUCCESS)
+ return (rv);
+ }
+ out->cd_offset += ctx->gcm_remainder_len;
+ ctx->gcm_remainder_len = 0;
+ rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len);
+ if (rv != CRYPTO_SUCCESS)
+ return (rv);
+ out->cd_offset += ctx->gcm_tag_len;
+
+ return (CRYPTO_SUCCESS);
+}
+
+/*
+ * This will only deal with decrypting the last block of the input that
+ * might not be a multiple of block length.
+ */
+static void
+gcm_decrypt_incomplete_block(gcm_ctx_t *ctx, size_t block_size, size_t index,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+ uint8_t *datap, *outp, *counterp;
+ uint64_t counter;
+ uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
+ int i;
+
+ /*
+ * Increment counter.
+ * Counter bits are confined to the bottom 32 bits
+ */
+ counter = ntohll(ctx->gcm_cb[1] & counter_mask);
+ counter = htonll(counter + 1);
+ counter &= counter_mask;
+ ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
+
+ datap = (uint8_t *)ctx->gcm_remainder;
+ outp = &((ctx->gcm_pt_buf)[index]);
+ counterp = (uint8_t *)ctx->gcm_tmp;
+
+ /* authentication tag */
+ bzero((uint8_t *)ctx->gcm_tmp, block_size);
+ bcopy(datap, (uint8_t *)ctx->gcm_tmp, ctx->gcm_remainder_len);
+
+ /* add ciphertext to the hash */
+ GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash);
+
+ /* decrypt remaining ciphertext */
+ encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, counterp);
+
+ /* XOR with counter block */
+ for (i = 0; i < ctx->gcm_remainder_len; i++) {
+ outp[i] = datap[i] ^ counterp[i];
+ }
+}
+
+/* ARGSUSED */
+int
+gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
+ crypto_data_t *out, size_t block_size,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*copy_block)(uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+ size_t new_len;
+ uint8_t *new;
+
+ /*
+ * Copy contiguous ciphertext input blocks to plaintext buffer.
+ * Ciphertext will be decrypted in the final.
+ */
+ if (length > 0) {
+ new_len = ctx->gcm_pt_buf_len + length;
+ new = vmem_alloc(new_len, ctx->gcm_kmflag);
+ bcopy(ctx->gcm_pt_buf, new, ctx->gcm_pt_buf_len);
+ vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len);
+ if (new == NULL)
+ return (CRYPTO_HOST_MEMORY);
+
+ ctx->gcm_pt_buf = new;
+ ctx->gcm_pt_buf_len = new_len;
+ bcopy(data, &ctx->gcm_pt_buf[ctx->gcm_processed_data_len],
+ length);
+ ctx->gcm_processed_data_len += length;
+ }
+
+ ctx->gcm_remainder_len = 0;
+ return (CRYPTO_SUCCESS);
+}
+
+int
+gcm_decrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+ size_t pt_len;
+ size_t remainder;
+ uint8_t *ghash;
+ uint8_t *blockp;
+ uint8_t *cbp;
+ uint64_t counter;
+ uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
+ int processed = 0, rv;
+
+ ASSERT(ctx->gcm_processed_data_len == ctx->gcm_pt_buf_len);
+
+ pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len;
+ ghash = (uint8_t *)ctx->gcm_ghash;
+ blockp = ctx->gcm_pt_buf;
+ remainder = pt_len;
+ while (remainder > 0) {
+ /* Incomplete last block */
+ if (remainder < block_size) {
+ bcopy(blockp, ctx->gcm_remainder, remainder);
+ ctx->gcm_remainder_len = remainder;
+ /*
+ * not expecting anymore ciphertext, just
+ * compute plaintext for the remaining input
+ */
+ gcm_decrypt_incomplete_block(ctx, block_size,
+ processed, encrypt_block, xor_block);
+ ctx->gcm_remainder_len = 0;
+ goto out;
+ }
+ /* add ciphertext to the hash */
+ GHASH(ctx, blockp, ghash);
+
+ /*
+ * Increment counter.
+ * Counter bits are confined to the bottom 32 bits
+ */
+ counter = ntohll(ctx->gcm_cb[1] & counter_mask);
+ counter = htonll(counter + 1);
+ counter &= counter_mask;
+ ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
+
+ cbp = (uint8_t *)ctx->gcm_tmp;
+ encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, cbp);
+
+ /* XOR with ciphertext */
+ xor_block(cbp, blockp);
+
+ processed += block_size;
+ blockp += block_size;
+ remainder -= block_size;
+ }
+out:
+ ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len));
+ GHASH(ctx, ctx->gcm_len_a_len_c, ghash);
+ encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0,
+ (uint8_t *)ctx->gcm_J0);
+ xor_block((uint8_t *)ctx->gcm_J0, ghash);
+
+ /* compare the input authentication tag with what we calculated */
+ if (bcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) {
+ /* They don't match */
+ return (CRYPTO_INVALID_MAC);
+ } else {
+ rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len);
+ if (rv != CRYPTO_SUCCESS)
+ return (rv);
+ out->cd_offset += pt_len;
+ }
+ return (CRYPTO_SUCCESS);
+}
+
+static int
+gcm_validate_args(CK_AES_GCM_PARAMS *gcm_param)
+{
+ size_t tag_len;
+
+ /*
+ * Check the length of the authentication tag (in bits).
+ */
+ tag_len = gcm_param->ulTagBits;
+ switch (tag_len) {
+ case 32:
+ case 64:
+ case 96:
+ case 104:
+ case 112:
+ case 120:
+ case 128:
+ break;
+ default:
+ return (CRYPTO_MECHANISM_PARAM_INVALID);
+ }
+
+ if (gcm_param->ulIvLen == 0)
+ return (CRYPTO_MECHANISM_PARAM_INVALID);
+
+ return (CRYPTO_SUCCESS);
+}
+
+static void
+gcm_format_initial_blocks(uchar_t *iv, ulong_t iv_len,
+ gcm_ctx_t *ctx, size_t block_size,
+ void (*copy_block)(uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+ uint8_t *cb;
+ ulong_t remainder = iv_len;
+ ulong_t processed = 0;
+ uint8_t *datap, *ghash;
+ uint64_t len_a_len_c[2];
+
+ ghash = (uint8_t *)ctx->gcm_ghash;
+ cb = (uint8_t *)ctx->gcm_cb;
+ if (iv_len == 12) {
+ bcopy(iv, cb, 12);
+ cb[12] = 0;
+ cb[13] = 0;
+ cb[14] = 0;
+ cb[15] = 1;
+ /* J0 will be used again in the final */
+ copy_block(cb, (uint8_t *)ctx->gcm_J0);
+ } else {
+ /* GHASH the IV */
+ do {
+ if (remainder < block_size) {
+ bzero(cb, block_size);
+ bcopy(&(iv[processed]), cb, remainder);
+ datap = (uint8_t *)cb;
+ remainder = 0;
+ } else {
+ datap = (uint8_t *)(&(iv[processed]));
+ processed += block_size;
+ remainder -= block_size;
+ }
+ GHASH(ctx, datap, ghash);
+ } while (remainder > 0);
+
+ len_a_len_c[0] = 0;
+ len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(iv_len));
+ GHASH(ctx, len_a_len_c, ctx->gcm_J0);
+
+ /* J0 will be used again in the final */
+ copy_block((uint8_t *)ctx->gcm_J0, (uint8_t *)cb);
+ }
+}
+
+/*
+ * The following function is called at encrypt or decrypt init time
+ * for AES GCM mode.
+ */
+int
+gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len,
+ unsigned char *auth_data, size_t auth_data_len, size_t block_size,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*copy_block)(uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+ uint8_t *ghash, *datap, *authp;
+ size_t remainder, processed;
+
+ /* encrypt zero block to get subkey H */
+ bzero(ctx->gcm_H, sizeof (ctx->gcm_H));
+ encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_H,
+ (uint8_t *)ctx->gcm_H);
+
+ gcm_format_initial_blocks(iv, iv_len, ctx, block_size,
+ copy_block, xor_block);
+
+ authp = (uint8_t *)ctx->gcm_tmp;
+ ghash = (uint8_t *)ctx->gcm_ghash;
+ bzero(authp, block_size);
+ bzero(ghash, block_size);
+
+ processed = 0;
+ remainder = auth_data_len;
+ do {
+ if (remainder < block_size) {
+ /*
+ * There's not a block full of data, pad rest of
+ * buffer with zero
+ */
+ bzero(authp, block_size);
+ bcopy(&(auth_data[processed]), authp, remainder);
+ datap = (uint8_t *)authp;
+ remainder = 0;
+ } else {
+ datap = (uint8_t *)(&(auth_data[processed]));
+ processed += block_size;
+ remainder -= block_size;
+ }
+
+ /* add auth data to the hash */
+ GHASH(ctx, datap, ghash);
+
+ } while (remainder > 0);
+
+ return (CRYPTO_SUCCESS);
+}
+
+int
+gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*copy_block)(uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+ int rv;
+ CK_AES_GCM_PARAMS *gcm_param;
+
+ if (param != NULL) {
+ gcm_param = (CK_AES_GCM_PARAMS *)(void *)param;
+
+ if ((rv = gcm_validate_args(gcm_param)) != 0) {
+ return (rv);
+ }
+
+ gcm_ctx->gcm_tag_len = gcm_param->ulTagBits;
+ gcm_ctx->gcm_tag_len >>= 3;
+ gcm_ctx->gcm_processed_data_len = 0;
+
+ /* these values are in bits */
+ gcm_ctx->gcm_len_a_len_c[0]
+ = htonll(CRYPTO_BYTES2BITS(gcm_param->ulAADLen));
+
+ rv = CRYPTO_SUCCESS;
+ gcm_ctx->gcm_flags |= GCM_MODE;
+ } else {
+ rv = CRYPTO_MECHANISM_PARAM_INVALID;
+ goto out;
+ }
+
+ if (gcm_init(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen,
+ gcm_param->pAAD, gcm_param->ulAADLen, block_size,
+ encrypt_block, copy_block, xor_block) != 0) {
+ rv = CRYPTO_MECHANISM_PARAM_INVALID;
+ }
+out:
+ return (rv);
+}
+
+int
+gmac_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*copy_block)(uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+ int rv;
+ CK_AES_GMAC_PARAMS *gmac_param;
+
+ if (param != NULL) {
+ gmac_param = (CK_AES_GMAC_PARAMS *)(void *)param;
+
+ gcm_ctx->gcm_tag_len = CRYPTO_BITS2BYTES(AES_GMAC_TAG_BITS);
+ gcm_ctx->gcm_processed_data_len = 0;
+
+ /* these values are in bits */
+ gcm_ctx->gcm_len_a_len_c[0]
+ = htonll(CRYPTO_BYTES2BITS(gmac_param->ulAADLen));
+
+ rv = CRYPTO_SUCCESS;
+ gcm_ctx->gcm_flags |= GMAC_MODE;
+ } else {
+ rv = CRYPTO_MECHANISM_PARAM_INVALID;
+ goto out;
+ }
+
+ if (gcm_init(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN,
+ gmac_param->pAAD, gmac_param->ulAADLen, block_size,
+ encrypt_block, copy_block, xor_block) != 0) {
+ rv = CRYPTO_MECHANISM_PARAM_INVALID;
+ }
+out:
+ return (rv);
+}
+
+void *
+gcm_alloc_ctx(int kmflag)
+{
+ gcm_ctx_t *gcm_ctx;
+
+ if ((gcm_ctx = kmem_zalloc(sizeof (gcm_ctx_t), kmflag)) == NULL)
+ return (NULL);
+
+ gcm_ctx->gcm_flags = GCM_MODE;
+ return (gcm_ctx);
+}
+
+void *
+gmac_alloc_ctx(int kmflag)
+{
+ gcm_ctx_t *gcm_ctx;
+
+ if ((gcm_ctx = kmem_zalloc(sizeof (gcm_ctx_t), kmflag)) == NULL)
+ return (NULL);
+
+ gcm_ctx->gcm_flags = GMAC_MODE;
+ return (gcm_ctx);
+}
+
+void
+gcm_set_kmflag(gcm_ctx_t *ctx, int kmflag)
+{
+ ctx->gcm_kmflag = kmflag;
+}
+
+
+#ifdef __amd64
+
+#define INTEL_PCLMULQDQ_FLAG (1 << 1)
+
+/*
+ * Return 1 if executing on Intel with PCLMULQDQ instructions,
+ * otherwise 0 (i.e., Intel without PCLMULQDQ or AMD64).
+ * Cache the result, as the CPU can't change.
+ *
+ * Note: the userland version uses getisax(). The kernel version uses
+ * is_x86_featureset().
+ */
+static int
+intel_pclmulqdq_instruction_present(void)
+{
+ static int cached_result = -1;
+ unsigned eax, ebx, ecx, edx;
+ unsigned func, subfunc;
+
+ if (cached_result == -1) { /* first time */
+ /* check for an intel cpu */
+ func = 0;
+ subfunc = 0;
+
+ __asm__ __volatile__(
+ "cpuid"
+ : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
+ : "a"(func), "c"(subfunc));
+
+ if (memcmp((char *)(&ebx), "Genu", 4) == 0 &&
+ memcmp((char *)(&edx), "ineI", 4) == 0 &&
+ memcmp((char *)(&ecx), "ntel", 4) == 0) {
+ func = 1;
+ subfunc = 0;
+
+ /* check for aes-ni instruction set */
+ __asm__ __volatile__(
+ "cpuid"
+ : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
+ : "a"(func), "c"(subfunc));
+
+ cached_result = !!(ecx & INTEL_PCLMULQDQ_FLAG);
+ } else {
+ cached_result = 0;
+ }
+ }
+
+ return (cached_result);
+}
+
+#endif /* __amd64 */
diff --git a/zfs/module/icp/algs/modes/modes.c b/zfs/module/icp/algs/modes/modes.c
new file mode 100644
index 000000000000..1d33c4268816
--- /dev/null
+++ b/zfs/module/icp/algs/modes/modes.c
@@ -0,0 +1,159 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/zfs_context.h>
+#include <modes/modes.h>
+#include <sys/crypto/common.h>
+#include <sys/crypto/impl.h>
+
+/*
+ * Initialize by setting iov_or_mp to point to the current iovec or mp,
+ * and by setting current_offset to an offset within the current iovec or mp.
+ */
+void
+crypto_init_ptrs(crypto_data_t *out, void **iov_or_mp, offset_t *current_offset)
+{
+ offset_t offset;
+
+ switch (out->cd_format) {
+ case CRYPTO_DATA_RAW:
+ *current_offset = out->cd_offset;
+ break;
+
+ case CRYPTO_DATA_UIO: {
+ uio_t *uiop = out->cd_uio;
+ uintptr_t vec_idx;
+
+ offset = out->cd_offset;
+ for (vec_idx = 0; vec_idx < uiop->uio_iovcnt &&
+ offset >= uiop->uio_iov[vec_idx].iov_len;
+ offset -= uiop->uio_iov[vec_idx++].iov_len)
+ ;
+
+ *current_offset = offset;
+ *iov_or_mp = (void *)vec_idx;
+ break;
+ }
+ } /* end switch */
+}
+
+/*
+ * Get pointers for where in the output to copy a block of encrypted or
+ * decrypted data. The iov_or_mp argument stores a pointer to the current
+ * iovec or mp, and offset stores an offset into the current iovec or mp.
+ */
+void
+crypto_get_ptrs(crypto_data_t *out, void **iov_or_mp, offset_t *current_offset,
+ uint8_t **out_data_1, size_t *out_data_1_len, uint8_t **out_data_2,
+ size_t amt)
+{
+ offset_t offset;
+
+ switch (out->cd_format) {
+ case CRYPTO_DATA_RAW: {
+ iovec_t *iov;
+
+ offset = *current_offset;
+ iov = &out->cd_raw;
+ if ((offset + amt) <= iov->iov_len) {
+ /* one block fits */
+ *out_data_1 = (uint8_t *)iov->iov_base + offset;
+ *out_data_1_len = amt;
+ *out_data_2 = NULL;
+ *current_offset = offset + amt;
+ }
+ break;
+ }
+
+ case CRYPTO_DATA_UIO: {
+ uio_t *uio = out->cd_uio;
+ iovec_t *iov;
+ offset_t offset;
+ uintptr_t vec_idx;
+ uint8_t *p;
+
+ offset = *current_offset;
+ vec_idx = (uintptr_t)(*iov_or_mp);
+ iov = (iovec_t *)&uio->uio_iov[vec_idx];
+ p = (uint8_t *)iov->iov_base + offset;
+ *out_data_1 = p;
+
+ if (offset + amt <= iov->iov_len) {
+ /* can fit one block into this iov */
+ *out_data_1_len = amt;
+ *out_data_2 = NULL;
+ *current_offset = offset + amt;
+ } else {
+ /* one block spans two iovecs */
+ *out_data_1_len = iov->iov_len - offset;
+ if (vec_idx == uio->uio_iovcnt)
+ return;
+ vec_idx++;
+ iov = (iovec_t *)&uio->uio_iov[vec_idx];
+ *out_data_2 = (uint8_t *)iov->iov_base;
+ *current_offset = amt - *out_data_1_len;
+ }
+ *iov_or_mp = (void *)vec_idx;
+ break;
+ }
+ } /* end switch */
+}
+
+void
+crypto_free_mode_ctx(void *ctx)
+{
+ common_ctx_t *common_ctx = (common_ctx_t *)ctx;
+
+ switch (common_ctx->cc_flags &
+ (ECB_MODE|CBC_MODE|CTR_MODE|CCM_MODE|GCM_MODE|GMAC_MODE)) {
+ case ECB_MODE:
+ kmem_free(common_ctx, sizeof (ecb_ctx_t));
+ break;
+
+ case CBC_MODE:
+ kmem_free(common_ctx, sizeof (cbc_ctx_t));
+ break;
+
+ case CTR_MODE:
+ kmem_free(common_ctx, sizeof (ctr_ctx_t));
+ break;
+
+ case CCM_MODE:
+ if (((ccm_ctx_t *)ctx)->ccm_pt_buf != NULL)
+ vmem_free(((ccm_ctx_t *)ctx)->ccm_pt_buf,
+ ((ccm_ctx_t *)ctx)->ccm_data_len);
+
+ kmem_free(ctx, sizeof (ccm_ctx_t));
+ break;
+
+ case GCM_MODE:
+ case GMAC_MODE:
+ if (((gcm_ctx_t *)ctx)->gcm_pt_buf != NULL)
+ vmem_free(((gcm_ctx_t *)ctx)->gcm_pt_buf,
+ ((gcm_ctx_t *)ctx)->gcm_pt_buf_len);
+
+ kmem_free(ctx, sizeof (gcm_ctx_t));
+ }
+}
diff --git a/zfs/module/icp/algs/sha1/sha1.c b/zfs/module/icp/algs/sha1/sha1.c
new file mode 100644
index 000000000000..7f28b3796b5d
--- /dev/null
+++ b/zfs/module/icp/algs/sha1/sha1.c
@@ -0,0 +1,838 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * The basic framework for this code came from the reference
+ * implementation for MD5. That implementation is Copyright (C)
+ * 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved.
+ *
+ * License to copy and use this software is granted provided that it
+ * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
+ * Algorithm" in all material mentioning or referencing this software
+ * or this function.
+ *
+ * License is also granted to make and use derivative works provided
+ * that such works are identified as "derived from the RSA Data
+ * Security, Inc. MD5 Message-Digest Algorithm" in all material
+ * mentioning or referencing the derived work.
+ *
+ * RSA Data Security, Inc. makes no representations concerning either
+ * the merchantability of this software or the suitability of this
+ * software for any particular purpose. It is provided "as is"
+ * without express or implied warranty of any kind.
+ *
+ * These notices must be retained in any copies of any part of this
+ * documentation and/or software.
+ *
+ * NOTE: Cleaned-up and optimized, version of SHA1, based on the FIPS 180-1
+ * standard, available at http://www.itl.nist.gov/fipspubs/fip180-1.htm
+ * Not as fast as one would like -- further optimizations are encouraged
+ * and appreciated.
+ */
+
+#include <sys/zfs_context.h>
+#include <sha1/sha1.h>
+#include <sha1/sha1_consts.h>
+
+#ifdef _LITTLE_ENDIAN
+#include <sys/byteorder.h>
+#define HAVE_HTONL
+#endif
+
+#define _RESTRICT_KYWD
+
+static void Encode(uint8_t *, const uint32_t *, size_t);
+
+#if defined(__sparc)
+
+#define SHA1_TRANSFORM(ctx, in) \
+ SHA1Transform((ctx)->state[0], (ctx)->state[1], (ctx)->state[2], \
+ (ctx)->state[3], (ctx)->state[4], (ctx), (in))
+
+static void SHA1Transform(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t,
+ SHA1_CTX *, const uint8_t *);
+
+#elif defined(__amd64)
+
+#define SHA1_TRANSFORM(ctx, in) sha1_block_data_order((ctx), (in), 1)
+#define SHA1_TRANSFORM_BLOCKS(ctx, in, num) sha1_block_data_order((ctx), \
+ (in), (num))
+
+void sha1_block_data_order(SHA1_CTX *ctx, const void *inpp, size_t num_blocks);
+
+#else
+
+#define SHA1_TRANSFORM(ctx, in) SHA1Transform((ctx), (in))
+
+static void SHA1Transform(SHA1_CTX *, const uint8_t *);
+
+#endif
+
+
+static uint8_t PADDING[64] = { 0x80, /* all zeros */ };
+
+/*
+ * F, G, and H are the basic SHA1 functions.
+ */
+#define F(b, c, d) (((b) & (c)) | ((~b) & (d)))
+#define G(b, c, d) ((b) ^ (c) ^ (d))
+#define H(b, c, d) (((b) & (c)) | (((b)|(c)) & (d)))
+
+/*
+ * ROTATE_LEFT rotates x left n bits.
+ */
+
+#if defined(__GNUC__) && defined(_LP64)
+static __inline__ uint64_t
+ROTATE_LEFT(uint64_t value, uint32_t n)
+{
+ uint32_t t32;
+
+ t32 = (uint32_t)value;
+ return ((t32 << n) | (t32 >> (32 - n)));
+}
+
+#else
+
+#define ROTATE_LEFT(x, n) \
+ (((x) << (n)) | ((x) >> ((sizeof (x) * NBBY)-(n))))
+
+#endif
+
+
+/*
+ * SHA1Init()
+ *
+ * purpose: initializes the sha1 context and begins and sha1 digest operation
+ * input: SHA1_CTX * : the context to initializes.
+ * output: void
+ */
+
+void
+SHA1Init(SHA1_CTX *ctx)
+{
+ ctx->count[0] = ctx->count[1] = 0;
+
+ /*
+ * load magic initialization constants. Tell lint
+ * that these constants are unsigned by using U.
+ */
+
+ ctx->state[0] = 0x67452301U;
+ ctx->state[1] = 0xefcdab89U;
+ ctx->state[2] = 0x98badcfeU;
+ ctx->state[3] = 0x10325476U;
+ ctx->state[4] = 0xc3d2e1f0U;
+}
+
+void
+SHA1Update(SHA1_CTX *ctx, const void *inptr, size_t input_len)
+{
+ uint32_t i, buf_index, buf_len;
+ const uint8_t *input = inptr;
+#if defined(__amd64)
+ uint32_t block_count;
+#endif /* __amd64 */
+
+ /* check for noop */
+ if (input_len == 0)
+ return;
+
+ /* compute number of bytes mod 64 */
+ buf_index = (ctx->count[1] >> 3) & 0x3F;
+
+ /* update number of bits */
+ if ((ctx->count[1] += (input_len << 3)) < (input_len << 3))
+ ctx->count[0]++;
+
+ ctx->count[0] += (input_len >> 29);
+
+ buf_len = 64 - buf_index;
+
+ /* transform as many times as possible */
+ i = 0;
+ if (input_len >= buf_len) {
+
+ /*
+ * general optimization:
+ *
+ * only do initial bcopy() and SHA1Transform() if
+ * buf_index != 0. if buf_index == 0, we're just
+ * wasting our time doing the bcopy() since there
+ * wasn't any data left over from a previous call to
+ * SHA1Update().
+ */
+
+ if (buf_index) {
+ bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len);
+ SHA1_TRANSFORM(ctx, ctx->buf_un.buf8);
+ i = buf_len;
+ }
+
+#if !defined(__amd64)
+ for (; i + 63 < input_len; i += 64)
+ SHA1_TRANSFORM(ctx, &input[i]);
+#else
+ block_count = (input_len - i) >> 6;
+ if (block_count > 0) {
+ SHA1_TRANSFORM_BLOCKS(ctx, &input[i], block_count);
+ i += block_count << 6;
+ }
+#endif /* !__amd64 */
+
+ /*
+ * general optimization:
+ *
+ * if i and input_len are the same, return now instead
+ * of calling bcopy(), since the bcopy() in this case
+ * will be an expensive nop.
+ */
+
+ if (input_len == i)
+ return;
+
+ buf_index = 0;
+ }
+
+ /* buffer remaining input */
+ bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i);
+}
+
+/*
+ * SHA1Final()
+ *
+ * purpose: ends an sha1 digest operation, finalizing the message digest and
+ * zeroing the context.
+ * input: uchar_t * : A buffer to store the digest.
+ * : The function actually uses void* because many
+ * : callers pass things other than uchar_t here.
+ * SHA1_CTX * : the context to finalize, save, and zero
+ * output: void
+ */
+
+void
+SHA1Final(void *digest, SHA1_CTX *ctx)
+{
+ uint8_t bitcount_be[sizeof (ctx->count)];
+ uint32_t index = (ctx->count[1] >> 3) & 0x3f;
+
+ /* store bit count, big endian */
+ Encode(bitcount_be, ctx->count, sizeof (bitcount_be));
+
+ /* pad out to 56 mod 64 */
+ SHA1Update(ctx, PADDING, ((index < 56) ? 56 : 120) - index);
+
+ /* append length (before padding) */
+ SHA1Update(ctx, bitcount_be, sizeof (bitcount_be));
+
+ /* store state in digest */
+ Encode(digest, ctx->state, sizeof (ctx->state));
+
+ /* zeroize sensitive information */
+ bzero(ctx, sizeof (*ctx));
+}
+
+
+#if !defined(__amd64)
+
+typedef uint32_t sha1word;
+
+/*
+ * sparc optimization:
+ *
+ * on the sparc, we can load big endian 32-bit data easily. note that
+ * special care must be taken to ensure the address is 32-bit aligned.
+ * in the interest of speed, we don't check to make sure, since
+ * careful programming can guarantee this for us.
+ */
+
+#if defined(_BIG_ENDIAN)
+#define LOAD_BIG_32(addr) (*(uint32_t *)(addr))
+
+#elif defined(HAVE_HTONL)
+#define LOAD_BIG_32(addr) htonl(*((uint32_t *)(addr)))
+
+#else
+/* little endian -- will work on big endian, but slowly */
+#define LOAD_BIG_32(addr) \
+ (((addr)[0] << 24) | ((addr)[1] << 16) | ((addr)[2] << 8) | (addr)[3])
+#endif /* _BIG_ENDIAN */
+
+/*
+ * SHA1Transform()
+ */
+#if defined(W_ARRAY)
+#define W(n) w[n]
+#else /* !defined(W_ARRAY) */
+#define W(n) w_ ## n
+#endif /* !defined(W_ARRAY) */
+
+#if defined(__sparc)
+
+
+/*
+ * sparc register window optimization:
+ *
+ * `a', `b', `c', `d', and `e' are passed into SHA1Transform
+ * explicitly since it increases the number of registers available to
+ * the compiler. under this scheme, these variables can be held in
+ * %i0 - %i4, which leaves more local and out registers available.
+ *
+ * purpose: sha1 transformation -- updates the digest based on `block'
+ * input: uint32_t : bytes 1 - 4 of the digest
+ * uint32_t : bytes 5 - 8 of the digest
+ * uint32_t : bytes 9 - 12 of the digest
+ * uint32_t : bytes 12 - 16 of the digest
+ * uint32_t : bytes 16 - 20 of the digest
+ * SHA1_CTX * : the context to update
+ * uint8_t [64]: the block to use to update the digest
+ * output: void
+ */
+
+
+void
+SHA1Transform(uint32_t a, uint32_t b, uint32_t c, uint32_t d, uint32_t e,
+ SHA1_CTX *ctx, const uint8_t blk[64])
+{
+ /*
+ * sparc optimization:
+ *
+ * while it is somewhat counter-intuitive, on sparc, it is
+ * more efficient to place all the constants used in this
+ * function in an array and load the values out of the array
+ * than to manually load the constants. this is because
+ * setting a register to a 32-bit value takes two ops in most
+ * cases: a `sethi' and an `or', but loading a 32-bit value
+ * from memory only takes one `ld' (or `lduw' on v9). while
+ * this increases memory usage, the compiler can find enough
+ * other things to do while waiting to keep the pipeline does
+ * not stall. additionally, it is likely that many of these
+ * constants are cached so that later accesses do not even go
+ * out to the bus.
+ *
+ * this array is declared `static' to keep the compiler from
+ * having to bcopy() this array onto the stack frame of
+ * SHA1Transform() each time it is called -- which is
+ * unacceptably expensive.
+ *
+ * the `const' is to ensure that callers are good citizens and
+ * do not try to munge the array. since these routines are
+ * going to be called from inside multithreaded kernelland,
+ * this is a good safety check. -- `sha1_consts' will end up in
+ * .rodata.
+ *
+ * unfortunately, loading from an array in this manner hurts
+ * performance under Intel. So, there is a macro,
+ * SHA1_CONST(), used in SHA1Transform(), that either expands to
+ * a reference to this array, or to the actual constant,
+ * depending on what platform this code is compiled for.
+ */
+
+
+ static const uint32_t sha1_consts[] = {
+ SHA1_CONST_0, SHA1_CONST_1, SHA1_CONST_2, SHA1_CONST_3
+ };
+
+
+ /*
+ * general optimization:
+ *
+ * use individual integers instead of using an array. this is a
+ * win, although the amount it wins by seems to vary quite a bit.
+ */
+
+
+ uint32_t w_0, w_1, w_2, w_3, w_4, w_5, w_6, w_7;
+ uint32_t w_8, w_9, w_10, w_11, w_12, w_13, w_14, w_15;
+
+
+ /*
+ * sparc optimization:
+ *
+ * if `block' is already aligned on a 4-byte boundary, use
+ * LOAD_BIG_32() directly. otherwise, bcopy() into a
+ * buffer that *is* aligned on a 4-byte boundary and then do
+ * the LOAD_BIG_32() on that buffer. benchmarks have shown
+ * that using the bcopy() is better than loading the bytes
+ * individually and doing the endian-swap by hand.
+ *
+ * even though it's quite tempting to assign to do:
+ *
+ * blk = bcopy(ctx->buf_un.buf32, blk, sizeof (ctx->buf_un.buf32));
+ *
+ * and only have one set of LOAD_BIG_32()'s, the compiler
+ * *does not* like that, so please resist the urge.
+ */
+
+
+ if ((uintptr_t)blk & 0x3) { /* not 4-byte aligned? */
+ bcopy(blk, ctx->buf_un.buf32, sizeof (ctx->buf_un.buf32));
+ w_15 = LOAD_BIG_32(ctx->buf_un.buf32 + 15);
+ w_14 = LOAD_BIG_32(ctx->buf_un.buf32 + 14);
+ w_13 = LOAD_BIG_32(ctx->buf_un.buf32 + 13);
+ w_12 = LOAD_BIG_32(ctx->buf_un.buf32 + 12);
+ w_11 = LOAD_BIG_32(ctx->buf_un.buf32 + 11);
+ w_10 = LOAD_BIG_32(ctx->buf_un.buf32 + 10);
+ w_9 = LOAD_BIG_32(ctx->buf_un.buf32 + 9);
+ w_8 = LOAD_BIG_32(ctx->buf_un.buf32 + 8);
+ w_7 = LOAD_BIG_32(ctx->buf_un.buf32 + 7);
+ w_6 = LOAD_BIG_32(ctx->buf_un.buf32 + 6);
+ w_5 = LOAD_BIG_32(ctx->buf_un.buf32 + 5);
+ w_4 = LOAD_BIG_32(ctx->buf_un.buf32 + 4);
+ w_3 = LOAD_BIG_32(ctx->buf_un.buf32 + 3);
+ w_2 = LOAD_BIG_32(ctx->buf_un.buf32 + 2);
+ w_1 = LOAD_BIG_32(ctx->buf_un.buf32 + 1);
+ w_0 = LOAD_BIG_32(ctx->buf_un.buf32 + 0);
+ } else {
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w_15 = LOAD_BIG_32(blk + 60);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w_14 = LOAD_BIG_32(blk + 56);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w_13 = LOAD_BIG_32(blk + 52);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w_12 = LOAD_BIG_32(blk + 48);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w_11 = LOAD_BIG_32(blk + 44);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w_10 = LOAD_BIG_32(blk + 40);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w_9 = LOAD_BIG_32(blk + 36);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w_8 = LOAD_BIG_32(blk + 32);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w_7 = LOAD_BIG_32(blk + 28);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w_6 = LOAD_BIG_32(blk + 24);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w_5 = LOAD_BIG_32(blk + 20);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w_4 = LOAD_BIG_32(blk + 16);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w_3 = LOAD_BIG_32(blk + 12);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w_2 = LOAD_BIG_32(blk + 8);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w_1 = LOAD_BIG_32(blk + 4);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w_0 = LOAD_BIG_32(blk + 0);
+ }
+#else /* !defined(__sparc) */
+
+void /* CSTYLED */
+SHA1Transform(SHA1_CTX *ctx, const uint8_t blk[64])
+{
+ /* CSTYLED */
+ sha1word a = ctx->state[0];
+ sha1word b = ctx->state[1];
+ sha1word c = ctx->state[2];
+ sha1word d = ctx->state[3];
+ sha1word e = ctx->state[4];
+
+#if defined(W_ARRAY)
+ sha1word w[16];
+#else /* !defined(W_ARRAY) */
+ sha1word w_0, w_1, w_2, w_3, w_4, w_5, w_6, w_7;
+ sha1word w_8, w_9, w_10, w_11, w_12, w_13, w_14, w_15;
+#endif /* !defined(W_ARRAY) */
+
+ W(0) = LOAD_BIG_32((void *)(blk + 0));
+ W(1) = LOAD_BIG_32((void *)(blk + 4));
+ W(2) = LOAD_BIG_32((void *)(blk + 8));
+ W(3) = LOAD_BIG_32((void *)(blk + 12));
+ W(4) = LOAD_BIG_32((void *)(blk + 16));
+ W(5) = LOAD_BIG_32((void *)(blk + 20));
+ W(6) = LOAD_BIG_32((void *)(blk + 24));
+ W(7) = LOAD_BIG_32((void *)(blk + 28));
+ W(8) = LOAD_BIG_32((void *)(blk + 32));
+ W(9) = LOAD_BIG_32((void *)(blk + 36));
+ W(10) = LOAD_BIG_32((void *)(blk + 40));
+ W(11) = LOAD_BIG_32((void *)(blk + 44));
+ W(12) = LOAD_BIG_32((void *)(blk + 48));
+ W(13) = LOAD_BIG_32((void *)(blk + 52));
+ W(14) = LOAD_BIG_32((void *)(blk + 56));
+ W(15) = LOAD_BIG_32((void *)(blk + 60));
+
+#endif /* !defined(__sparc) */
+
+ /*
+ * general optimization:
+ *
+ * even though this approach is described in the standard as
+ * being slower algorithmically, it is 30-40% faster than the
+ * "faster" version under SPARC, because this version has more
+ * of the constraints specified at compile-time and uses fewer
+ * variables (and therefore has better register utilization)
+ * than its "speedier" brother. (i've tried both, trust me)
+ *
+ * for either method given in the spec, there is an "assignment"
+ * phase where the following takes place:
+ *
+ * tmp = (main_computation);
+ * e = d; d = c; c = rotate_left(b, 30); b = a; a = tmp;
+ *
+ * we can make the algorithm go faster by not doing this work,
+ * but just pretending that `d' is now `e', etc. this works
+ * really well and obviates the need for a temporary variable.
+ * however, we still explicitly perform the rotate action,
+ * since it is cheaper on SPARC to do it once than to have to
+ * do it over and over again.
+ */
+
+ /* round 1 */
+ e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(0) + SHA1_CONST(0); /* 0 */
+ b = ROTATE_LEFT(b, 30);
+
+ d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(1) + SHA1_CONST(0); /* 1 */
+ a = ROTATE_LEFT(a, 30);
+
+ c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(2) + SHA1_CONST(0); /* 2 */
+ e = ROTATE_LEFT(e, 30);
+
+ b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(3) + SHA1_CONST(0); /* 3 */
+ d = ROTATE_LEFT(d, 30);
+
+ a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(4) + SHA1_CONST(0); /* 4 */
+ c = ROTATE_LEFT(c, 30);
+
+ e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(5) + SHA1_CONST(0); /* 5 */
+ b = ROTATE_LEFT(b, 30);
+
+ d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(6) + SHA1_CONST(0); /* 6 */
+ a = ROTATE_LEFT(a, 30);
+
+ c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(7) + SHA1_CONST(0); /* 7 */
+ e = ROTATE_LEFT(e, 30);
+
+ b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(8) + SHA1_CONST(0); /* 8 */
+ d = ROTATE_LEFT(d, 30);
+
+ a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(9) + SHA1_CONST(0); /* 9 */
+ c = ROTATE_LEFT(c, 30);
+
+ e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(10) + SHA1_CONST(0); /* 10 */
+ b = ROTATE_LEFT(b, 30);
+
+ d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(11) + SHA1_CONST(0); /* 11 */
+ a = ROTATE_LEFT(a, 30);
+
+ c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(12) + SHA1_CONST(0); /* 12 */
+ e = ROTATE_LEFT(e, 30);
+
+ b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(13) + SHA1_CONST(0); /* 13 */
+ d = ROTATE_LEFT(d, 30);
+
+ a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(14) + SHA1_CONST(0); /* 14 */
+ c = ROTATE_LEFT(c, 30);
+
+ e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(15) + SHA1_CONST(0); /* 15 */
+ b = ROTATE_LEFT(b, 30);
+
+ W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1); /* 16 */
+ d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(0) + SHA1_CONST(0);
+ a = ROTATE_LEFT(a, 30);
+
+ W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1); /* 17 */
+ c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(1) + SHA1_CONST(0);
+ e = ROTATE_LEFT(e, 30);
+
+ W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1); /* 18 */
+ b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(2) + SHA1_CONST(0);
+ d = ROTATE_LEFT(d, 30);
+
+ W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1); /* 19 */
+ a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(3) + SHA1_CONST(0);
+ c = ROTATE_LEFT(c, 30);
+
+ /* round 2 */
+ W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1); /* 20 */
+ e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(4) + SHA1_CONST(1);
+ b = ROTATE_LEFT(b, 30);
+
+ W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1); /* 21 */
+ d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(5) + SHA1_CONST(1);
+ a = ROTATE_LEFT(a, 30);
+
+ W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1); /* 22 */
+ c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(6) + SHA1_CONST(1);
+ e = ROTATE_LEFT(e, 30);
+
+ W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1); /* 23 */
+ b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(7) + SHA1_CONST(1);
+ d = ROTATE_LEFT(d, 30);
+
+ W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1); /* 24 */
+ a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(8) + SHA1_CONST(1);
+ c = ROTATE_LEFT(c, 30);
+
+ W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1); /* 25 */
+ e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(9) + SHA1_CONST(1);
+ b = ROTATE_LEFT(b, 30);
+
+ W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1); /* 26 */
+ d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(10) + SHA1_CONST(1);
+ a = ROTATE_LEFT(a, 30);
+
+ W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1); /* 27 */
+ c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(11) + SHA1_CONST(1);
+ e = ROTATE_LEFT(e, 30);
+
+ W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1); /* 28 */
+ b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(12) + SHA1_CONST(1);
+ d = ROTATE_LEFT(d, 30);
+
+ W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 29 */
+ a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(13) + SHA1_CONST(1);
+ c = ROTATE_LEFT(c, 30);
+
+ W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1); /* 30 */
+ e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(14) + SHA1_CONST(1);
+ b = ROTATE_LEFT(b, 30);
+
+ W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1); /* 31 */
+ d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(15) + SHA1_CONST(1);
+ a = ROTATE_LEFT(a, 30);
+
+ W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1); /* 32 */
+ c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(0) + SHA1_CONST(1);
+ e = ROTATE_LEFT(e, 30);
+
+ W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1); /* 33 */
+ b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(1) + SHA1_CONST(1);
+ d = ROTATE_LEFT(d, 30);
+
+ W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1); /* 34 */
+ a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(2) + SHA1_CONST(1);
+ c = ROTATE_LEFT(c, 30);
+
+ W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1); /* 35 */
+ e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(3) + SHA1_CONST(1);
+ b = ROTATE_LEFT(b, 30);
+
+ W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1); /* 36 */
+ d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(4) + SHA1_CONST(1);
+ a = ROTATE_LEFT(a, 30);
+
+ W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1); /* 37 */
+ c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(5) + SHA1_CONST(1);
+ e = ROTATE_LEFT(e, 30);
+
+ W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1); /* 38 */
+ b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(6) + SHA1_CONST(1);
+ d = ROTATE_LEFT(d, 30);
+
+ W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1); /* 39 */
+ a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(7) + SHA1_CONST(1);
+ c = ROTATE_LEFT(c, 30);
+
+ /* round 3 */
+ W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1); /* 40 */
+ e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(8) + SHA1_CONST(2);
+ b = ROTATE_LEFT(b, 30);
+
+ W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1); /* 41 */
+ d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(9) + SHA1_CONST(2);
+ a = ROTATE_LEFT(a, 30);
+
+ W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1); /* 42 */
+ c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(10) + SHA1_CONST(2);
+ e = ROTATE_LEFT(e, 30);
+
+ W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1); /* 43 */
+ b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(11) + SHA1_CONST(2);
+ d = ROTATE_LEFT(d, 30);
+
+ W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1); /* 44 */
+ a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(12) + SHA1_CONST(2);
+ c = ROTATE_LEFT(c, 30);
+
+ W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 45 */
+ e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(13) + SHA1_CONST(2);
+ b = ROTATE_LEFT(b, 30);
+
+ W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1); /* 46 */
+ d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(14) + SHA1_CONST(2);
+ a = ROTATE_LEFT(a, 30);
+
+ W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1); /* 47 */
+ c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(15) + SHA1_CONST(2);
+ e = ROTATE_LEFT(e, 30);
+
+ W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1); /* 48 */
+ b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(0) + SHA1_CONST(2);
+ d = ROTATE_LEFT(d, 30);
+
+ W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1); /* 49 */
+ a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(1) + SHA1_CONST(2);
+ c = ROTATE_LEFT(c, 30);
+
+ W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1); /* 50 */
+ e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(2) + SHA1_CONST(2);
+ b = ROTATE_LEFT(b, 30);
+
+ W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1); /* 51 */
+ d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(3) + SHA1_CONST(2);
+ a = ROTATE_LEFT(a, 30);
+
+ W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1); /* 52 */
+ c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(4) + SHA1_CONST(2);
+ e = ROTATE_LEFT(e, 30);
+
+ W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1); /* 53 */
+ b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(5) + SHA1_CONST(2);
+ d = ROTATE_LEFT(d, 30);
+
+ W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1); /* 54 */
+ a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(6) + SHA1_CONST(2);
+ c = ROTATE_LEFT(c, 30);
+
+ W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1); /* 55 */
+ e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(7) + SHA1_CONST(2);
+ b = ROTATE_LEFT(b, 30);
+
+ W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1); /* 56 */
+ d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(8) + SHA1_CONST(2);
+ a = ROTATE_LEFT(a, 30);
+
+ W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1); /* 57 */
+ c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(9) + SHA1_CONST(2);
+ e = ROTATE_LEFT(e, 30);
+
+ W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1); /* 58 */
+ b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(10) + SHA1_CONST(2);
+ d = ROTATE_LEFT(d, 30);
+
+ W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1); /* 59 */
+ a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(11) + SHA1_CONST(2);
+ c = ROTATE_LEFT(c, 30);
+
+ /* round 4 */
+ W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1); /* 60 */
+ e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(12) + SHA1_CONST(3);
+ b = ROTATE_LEFT(b, 30);
+
+ W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 61 */
+ d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(13) + SHA1_CONST(3);
+ a = ROTATE_LEFT(a, 30);
+
+ W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1); /* 62 */
+ c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(14) + SHA1_CONST(3);
+ e = ROTATE_LEFT(e, 30);
+
+ W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1); /* 63 */
+ b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(15) + SHA1_CONST(3);
+ d = ROTATE_LEFT(d, 30);
+
+ W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1); /* 64 */
+ a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(0) + SHA1_CONST(3);
+ c = ROTATE_LEFT(c, 30);
+
+ W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1); /* 65 */
+ e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(1) + SHA1_CONST(3);
+ b = ROTATE_LEFT(b, 30);
+
+ W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1); /* 66 */
+ d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(2) + SHA1_CONST(3);
+ a = ROTATE_LEFT(a, 30);
+
+ W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1); /* 67 */
+ c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(3) + SHA1_CONST(3);
+ e = ROTATE_LEFT(e, 30);
+
+ W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1); /* 68 */
+ b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(4) + SHA1_CONST(3);
+ d = ROTATE_LEFT(d, 30);
+
+ W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1); /* 69 */
+ a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(5) + SHA1_CONST(3);
+ c = ROTATE_LEFT(c, 30);
+
+ W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1); /* 70 */
+ e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(6) + SHA1_CONST(3);
+ b = ROTATE_LEFT(b, 30);
+
+ W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1); /* 71 */
+ d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(7) + SHA1_CONST(3);
+ a = ROTATE_LEFT(a, 30);
+
+ W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1); /* 72 */
+ c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(8) + SHA1_CONST(3);
+ e = ROTATE_LEFT(e, 30);
+
+ W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1); /* 73 */
+ b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(9) + SHA1_CONST(3);
+ d = ROTATE_LEFT(d, 30);
+
+ W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1); /* 74 */
+ a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(10) + SHA1_CONST(3);
+ c = ROTATE_LEFT(c, 30);
+
+ W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1); /* 75 */
+ e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(11) + SHA1_CONST(3);
+ b = ROTATE_LEFT(b, 30);
+
+ W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1); /* 76 */
+ d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(12) + SHA1_CONST(3);
+ a = ROTATE_LEFT(a, 30);
+
+ W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 77 */
+ c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(13) + SHA1_CONST(3);
+ e = ROTATE_LEFT(e, 30);
+
+ W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1); /* 78 */
+ b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(14) + SHA1_CONST(3);
+ d = ROTATE_LEFT(d, 30);
+
+ W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1); /* 79 */
+
+ ctx->state[0] += ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(15) +
+ SHA1_CONST(3);
+ ctx->state[1] += b;
+ ctx->state[2] += ROTATE_LEFT(c, 30);
+ ctx->state[3] += d;
+ ctx->state[4] += e;
+
+ /* zeroize sensitive information */
+ W(0) = W(1) = W(2) = W(3) = W(4) = W(5) = W(6) = W(7) = W(8) = 0;
+ W(9) = W(10) = W(11) = W(12) = W(13) = W(14) = W(15) = 0;
+}
+#endif /* !__amd64 */
+
+
+/*
+ * Encode()
+ *
+ * purpose: to convert a list of numbers from little endian to big endian
+ * input: uint8_t * : place to store the converted big endian numbers
+ * uint32_t * : place to get numbers to convert from
+ * size_t : the length of the input in bytes
+ * output: void
+ */
+
+static void
+Encode(uint8_t *_RESTRICT_KYWD output, const uint32_t *_RESTRICT_KYWD input,
+ size_t len)
+{
+ size_t i, j;
+
+#if defined(__sparc)
+ if (IS_P2ALIGNED(output, sizeof (uint32_t))) {
+ for (i = 0, j = 0; j < len; i++, j += 4) {
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ *((uint32_t *)(output + j)) = input[i];
+ }
+ } else {
+#endif /* little endian -- will work on big endian, but slowly */
+
+ for (i = 0, j = 0; j < len; i++, j += 4) {
+ output[j] = (input[i] >> 24) & 0xff;
+ output[j + 1] = (input[i] >> 16) & 0xff;
+ output[j + 2] = (input[i] >> 8) & 0xff;
+ output[j + 3] = input[i] & 0xff;
+ }
+#if defined(__sparc)
+ }
+#endif
+}
diff --git a/zfs/module/icp/algs/sha2/sha2.c b/zfs/module/icp/algs/sha2/sha2.c
new file mode 100644
index 000000000000..dbe008190688
--- /dev/null
+++ b/zfs/module/icp/algs/sha2/sha2.c
@@ -0,0 +1,960 @@
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright 2013 Saso Kiselkov. All rights reserved.
+ */
+
+/*
+ * The basic framework for this code came from the reference
+ * implementation for MD5. That implementation is Copyright (C)
+ * 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved.
+ *
+ * License to copy and use this software is granted provided that it
+ * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
+ * Algorithm" in all material mentioning or referencing this software
+ * or this function.
+ *
+ * License is also granted to make and use derivative works provided
+ * that such works are identified as "derived from the RSA Data
+ * Security, Inc. MD5 Message-Digest Algorithm" in all material
+ * mentioning or referencing the derived work.
+ *
+ * RSA Data Security, Inc. makes no representations concerning either
+ * the merchantability of this software or the suitability of this
+ * software for any particular purpose. It is provided "as is"
+ * without express or implied warranty of any kind.
+ *
+ * These notices must be retained in any copies of any part of this
+ * documentation and/or software.
+ *
+ * NOTE: Cleaned-up and optimized, version of SHA2, based on the FIPS 180-2
+ * standard, available at
+ * http://csrc.nist.gov/publications/fips/fips180-2/fips180-2.pdf
+ * Not as fast as one would like -- further optimizations are encouraged
+ * and appreciated.
+ */
+
+#include <sys/zfs_context.h>
+#define _SHA2_IMPL
+#include <sys/sha2.h>
+#include <sha2/sha2_consts.h>
+
+#define _RESTRICT_KYWD
+
+#ifdef _LITTLE_ENDIAN
+#include <sys/byteorder.h>
+#define HAVE_HTONL
+#endif
+#include <sys/isa_defs.h> /* for _ILP32 */
+
+static void Encode(uint8_t *, uint32_t *, size_t);
+static void Encode64(uint8_t *, uint64_t *, size_t);
+
+#if defined(__amd64)
+#define SHA512Transform(ctx, in) SHA512TransformBlocks((ctx), (in), 1)
+#define SHA256Transform(ctx, in) SHA256TransformBlocks((ctx), (in), 1)
+
+void SHA512TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num);
+void SHA256TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num);
+
+#else
+static void SHA256Transform(SHA2_CTX *, const uint8_t *);
+static void SHA512Transform(SHA2_CTX *, const uint8_t *);
+#endif /* __amd64 */
+
+static uint8_t PADDING[128] = { 0x80, /* all zeros */ };
+
+/*
+ * The low-level checksum routines use a lot of stack space. On systems where
+ * small stacks are enforced (like 32-bit kernel builds), insert compiler memory
+ * barriers to reduce stack frame size. This can reduce the SHA512Transform()
+ * stack frame usage from 3k to <1k on ARM32, for example.
+ */
+#if defined(_ILP32) || defined(__powerpc) /* small stack */
+#define SMALL_STACK_MEMORY_BARRIER asm volatile("": : :"memory");
+#else
+#define SMALL_STACK_MEMORY_BARRIER
+#endif
+
+/* Ch and Maj are the basic SHA2 functions. */
+#define Ch(b, c, d) (((b) & (c)) ^ ((~b) & (d)))
+#define Maj(b, c, d) (((b) & (c)) ^ ((b) & (d)) ^ ((c) & (d)))
+
+/* Rotates x right n bits. */
+#define ROTR(x, n) \
+ (((x) >> (n)) | ((x) << ((sizeof (x) * NBBY)-(n))))
+
+/* Shift x right n bits */
+#define SHR(x, n) ((x) >> (n))
+
+/* SHA256 Functions */
+#define BIGSIGMA0_256(x) (ROTR((x), 2) ^ ROTR((x), 13) ^ ROTR((x), 22))
+#define BIGSIGMA1_256(x) (ROTR((x), 6) ^ ROTR((x), 11) ^ ROTR((x), 25))
+#define SIGMA0_256(x) (ROTR((x), 7) ^ ROTR((x), 18) ^ SHR((x), 3))
+#define SIGMA1_256(x) (ROTR((x), 17) ^ ROTR((x), 19) ^ SHR((x), 10))
+
+#define SHA256ROUND(a, b, c, d, e, f, g, h, i, w) \
+ T1 = h + BIGSIGMA1_256(e) + Ch(e, f, g) + SHA256_CONST(i) + w; \
+ d += T1; \
+ T2 = BIGSIGMA0_256(a) + Maj(a, b, c); \
+ h = T1 + T2
+
+/* SHA384/512 Functions */
+#define BIGSIGMA0(x) (ROTR((x), 28) ^ ROTR((x), 34) ^ ROTR((x), 39))
+#define BIGSIGMA1(x) (ROTR((x), 14) ^ ROTR((x), 18) ^ ROTR((x), 41))
+#define SIGMA0(x) (ROTR((x), 1) ^ ROTR((x), 8) ^ SHR((x), 7))
+#define SIGMA1(x) (ROTR((x), 19) ^ ROTR((x), 61) ^ SHR((x), 6))
+#define SHA512ROUND(a, b, c, d, e, f, g, h, i, w) \
+ T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + SHA512_CONST(i) + w; \
+ d += T1; \
+ T2 = BIGSIGMA0(a) + Maj(a, b, c); \
+ h = T1 + T2; \
+ SMALL_STACK_MEMORY_BARRIER;
+
+/*
+ * sparc optimization:
+ *
+ * on the sparc, we can load big endian 32-bit data easily. note that
+ * special care must be taken to ensure the address is 32-bit aligned.
+ * in the interest of speed, we don't check to make sure, since
+ * careful programming can guarantee this for us.
+ */
+
+#if defined(_BIG_ENDIAN)
+#define LOAD_BIG_32(addr) (*(uint32_t *)(addr))
+#define LOAD_BIG_64(addr) (*(uint64_t *)(addr))
+
+#elif defined(HAVE_HTONL)
+#define LOAD_BIG_32(addr) htonl(*((uint32_t *)(addr)))
+#define LOAD_BIG_64(addr) htonll(*((uint64_t *)(addr)))
+
+#else
+/* little endian -- will work on big endian, but slowly */
+#define LOAD_BIG_32(addr) \
+ (((addr)[0] << 24) | ((addr)[1] << 16) | ((addr)[2] << 8) | (addr)[3])
+#define LOAD_BIG_64(addr) \
+ (((uint64_t)(addr)[0] << 56) | ((uint64_t)(addr)[1] << 48) | \
+ ((uint64_t)(addr)[2] << 40) | ((uint64_t)(addr)[3] << 32) | \
+ ((uint64_t)(addr)[4] << 24) | ((uint64_t)(addr)[5] << 16) | \
+ ((uint64_t)(addr)[6] << 8) | (uint64_t)(addr)[7])
+#endif /* _BIG_ENDIAN */
+
+
+#if !defined(__amd64)
+/* SHA256 Transform */
+
+static void
+SHA256Transform(SHA2_CTX *ctx, const uint8_t *blk)
+{
+ uint32_t a = ctx->state.s32[0];
+ uint32_t b = ctx->state.s32[1];
+ uint32_t c = ctx->state.s32[2];
+ uint32_t d = ctx->state.s32[3];
+ uint32_t e = ctx->state.s32[4];
+ uint32_t f = ctx->state.s32[5];
+ uint32_t g = ctx->state.s32[6];
+ uint32_t h = ctx->state.s32[7];
+
+ uint32_t w0, w1, w2, w3, w4, w5, w6, w7;
+ uint32_t w8, w9, w10, w11, w12, w13, w14, w15;
+ uint32_t T1, T2;
+
+#if defined(__sparc)
+ static const uint32_t sha256_consts[] = {
+ SHA256_CONST_0, SHA256_CONST_1, SHA256_CONST_2,
+ SHA256_CONST_3, SHA256_CONST_4, SHA256_CONST_5,
+ SHA256_CONST_6, SHA256_CONST_7, SHA256_CONST_8,
+ SHA256_CONST_9, SHA256_CONST_10, SHA256_CONST_11,
+ SHA256_CONST_12, SHA256_CONST_13, SHA256_CONST_14,
+ SHA256_CONST_15, SHA256_CONST_16, SHA256_CONST_17,
+ SHA256_CONST_18, SHA256_CONST_19, SHA256_CONST_20,
+ SHA256_CONST_21, SHA256_CONST_22, SHA256_CONST_23,
+ SHA256_CONST_24, SHA256_CONST_25, SHA256_CONST_26,
+ SHA256_CONST_27, SHA256_CONST_28, SHA256_CONST_29,
+ SHA256_CONST_30, SHA256_CONST_31, SHA256_CONST_32,
+ SHA256_CONST_33, SHA256_CONST_34, SHA256_CONST_35,
+ SHA256_CONST_36, SHA256_CONST_37, SHA256_CONST_38,
+ SHA256_CONST_39, SHA256_CONST_40, SHA256_CONST_41,
+ SHA256_CONST_42, SHA256_CONST_43, SHA256_CONST_44,
+ SHA256_CONST_45, SHA256_CONST_46, SHA256_CONST_47,
+ SHA256_CONST_48, SHA256_CONST_49, SHA256_CONST_50,
+ SHA256_CONST_51, SHA256_CONST_52, SHA256_CONST_53,
+ SHA256_CONST_54, SHA256_CONST_55, SHA256_CONST_56,
+ SHA256_CONST_57, SHA256_CONST_58, SHA256_CONST_59,
+ SHA256_CONST_60, SHA256_CONST_61, SHA256_CONST_62,
+ SHA256_CONST_63
+ };
+#endif /* __sparc */
+
+ if ((uintptr_t)blk & 0x3) { /* not 4-byte aligned? */
+ bcopy(blk, ctx->buf_un.buf32, sizeof (ctx->buf_un.buf32));
+ blk = (uint8_t *)ctx->buf_un.buf32;
+ }
+
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w0 = LOAD_BIG_32(blk + 4 * 0);
+ SHA256ROUND(a, b, c, d, e, f, g, h, 0, w0);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w1 = LOAD_BIG_32(blk + 4 * 1);
+ SHA256ROUND(h, a, b, c, d, e, f, g, 1, w1);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w2 = LOAD_BIG_32(blk + 4 * 2);
+ SHA256ROUND(g, h, a, b, c, d, e, f, 2, w2);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w3 = LOAD_BIG_32(blk + 4 * 3);
+ SHA256ROUND(f, g, h, a, b, c, d, e, 3, w3);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w4 = LOAD_BIG_32(blk + 4 * 4);
+ SHA256ROUND(e, f, g, h, a, b, c, d, 4, w4);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w5 = LOAD_BIG_32(blk + 4 * 5);
+ SHA256ROUND(d, e, f, g, h, a, b, c, 5, w5);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w6 = LOAD_BIG_32(blk + 4 * 6);
+ SHA256ROUND(c, d, e, f, g, h, a, b, 6, w6);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w7 = LOAD_BIG_32(blk + 4 * 7);
+ SHA256ROUND(b, c, d, e, f, g, h, a, 7, w7);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w8 = LOAD_BIG_32(blk + 4 * 8);
+ SHA256ROUND(a, b, c, d, e, f, g, h, 8, w8);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w9 = LOAD_BIG_32(blk + 4 * 9);
+ SHA256ROUND(h, a, b, c, d, e, f, g, 9, w9);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w10 = LOAD_BIG_32(blk + 4 * 10);
+ SHA256ROUND(g, h, a, b, c, d, e, f, 10, w10);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w11 = LOAD_BIG_32(blk + 4 * 11);
+ SHA256ROUND(f, g, h, a, b, c, d, e, 11, w11);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w12 = LOAD_BIG_32(blk + 4 * 12);
+ SHA256ROUND(e, f, g, h, a, b, c, d, 12, w12);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w13 = LOAD_BIG_32(blk + 4 * 13);
+ SHA256ROUND(d, e, f, g, h, a, b, c, 13, w13);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w14 = LOAD_BIG_32(blk + 4 * 14);
+ SHA256ROUND(c, d, e, f, g, h, a, b, 14, w14);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w15 = LOAD_BIG_32(blk + 4 * 15);
+ SHA256ROUND(b, c, d, e, f, g, h, a, 15, w15);
+
+ w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
+ SHA256ROUND(a, b, c, d, e, f, g, h, 16, w0);
+ w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
+ SHA256ROUND(h, a, b, c, d, e, f, g, 17, w1);
+ w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
+ SHA256ROUND(g, h, a, b, c, d, e, f, 18, w2);
+ w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
+ SHA256ROUND(f, g, h, a, b, c, d, e, 19, w3);
+ w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
+ SHA256ROUND(e, f, g, h, a, b, c, d, 20, w4);
+ w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
+ SHA256ROUND(d, e, f, g, h, a, b, c, 21, w5);
+ w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
+ SHA256ROUND(c, d, e, f, g, h, a, b, 22, w6);
+ w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
+ SHA256ROUND(b, c, d, e, f, g, h, a, 23, w7);
+ w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
+ SHA256ROUND(a, b, c, d, e, f, g, h, 24, w8);
+ w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
+ SHA256ROUND(h, a, b, c, d, e, f, g, 25, w9);
+ w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
+ SHA256ROUND(g, h, a, b, c, d, e, f, 26, w10);
+ w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
+ SHA256ROUND(f, g, h, a, b, c, d, e, 27, w11);
+ w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
+ SHA256ROUND(e, f, g, h, a, b, c, d, 28, w12);
+ w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
+ SHA256ROUND(d, e, f, g, h, a, b, c, 29, w13);
+ w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
+ SHA256ROUND(c, d, e, f, g, h, a, b, 30, w14);
+ w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
+ SHA256ROUND(b, c, d, e, f, g, h, a, 31, w15);
+
+ w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
+ SHA256ROUND(a, b, c, d, e, f, g, h, 32, w0);
+ w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
+ SHA256ROUND(h, a, b, c, d, e, f, g, 33, w1);
+ w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
+ SHA256ROUND(g, h, a, b, c, d, e, f, 34, w2);
+ w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
+ SHA256ROUND(f, g, h, a, b, c, d, e, 35, w3);
+ w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
+ SHA256ROUND(e, f, g, h, a, b, c, d, 36, w4);
+ w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
+ SHA256ROUND(d, e, f, g, h, a, b, c, 37, w5);
+ w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
+ SHA256ROUND(c, d, e, f, g, h, a, b, 38, w6);
+ w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
+ SHA256ROUND(b, c, d, e, f, g, h, a, 39, w7);
+ w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
+ SHA256ROUND(a, b, c, d, e, f, g, h, 40, w8);
+ w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
+ SHA256ROUND(h, a, b, c, d, e, f, g, 41, w9);
+ w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
+ SHA256ROUND(g, h, a, b, c, d, e, f, 42, w10);
+ w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
+ SHA256ROUND(f, g, h, a, b, c, d, e, 43, w11);
+ w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
+ SHA256ROUND(e, f, g, h, a, b, c, d, 44, w12);
+ w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
+ SHA256ROUND(d, e, f, g, h, a, b, c, 45, w13);
+ w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
+ SHA256ROUND(c, d, e, f, g, h, a, b, 46, w14);
+ w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
+ SHA256ROUND(b, c, d, e, f, g, h, a, 47, w15);
+
+ w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0;
+ SHA256ROUND(a, b, c, d, e, f, g, h, 48, w0);
+ w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1;
+ SHA256ROUND(h, a, b, c, d, e, f, g, 49, w1);
+ w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2;
+ SHA256ROUND(g, h, a, b, c, d, e, f, 50, w2);
+ w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3;
+ SHA256ROUND(f, g, h, a, b, c, d, e, 51, w3);
+ w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4;
+ SHA256ROUND(e, f, g, h, a, b, c, d, 52, w4);
+ w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5;
+ SHA256ROUND(d, e, f, g, h, a, b, c, 53, w5);
+ w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6;
+ SHA256ROUND(c, d, e, f, g, h, a, b, 54, w6);
+ w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7;
+ SHA256ROUND(b, c, d, e, f, g, h, a, 55, w7);
+ w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8;
+ SHA256ROUND(a, b, c, d, e, f, g, h, 56, w8);
+ w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9;
+ SHA256ROUND(h, a, b, c, d, e, f, g, 57, w9);
+ w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10;
+ SHA256ROUND(g, h, a, b, c, d, e, f, 58, w10);
+ w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11;
+ SHA256ROUND(f, g, h, a, b, c, d, e, 59, w11);
+ w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12;
+ SHA256ROUND(e, f, g, h, a, b, c, d, 60, w12);
+ w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13;
+ SHA256ROUND(d, e, f, g, h, a, b, c, 61, w13);
+ w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14;
+ SHA256ROUND(c, d, e, f, g, h, a, b, 62, w14);
+ w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15;
+ SHA256ROUND(b, c, d, e, f, g, h, a, 63, w15);
+
+ ctx->state.s32[0] += a;
+ ctx->state.s32[1] += b;
+ ctx->state.s32[2] += c;
+ ctx->state.s32[3] += d;
+ ctx->state.s32[4] += e;
+ ctx->state.s32[5] += f;
+ ctx->state.s32[6] += g;
+ ctx->state.s32[7] += h;
+}
+
+
+/* SHA384 and SHA512 Transform */
+
+static void
+SHA512Transform(SHA2_CTX *ctx, const uint8_t *blk)
+{
+
+ uint64_t a = ctx->state.s64[0];
+ uint64_t b = ctx->state.s64[1];
+ uint64_t c = ctx->state.s64[2];
+ uint64_t d = ctx->state.s64[3];
+ uint64_t e = ctx->state.s64[4];
+ uint64_t f = ctx->state.s64[5];
+ uint64_t g = ctx->state.s64[6];
+ uint64_t h = ctx->state.s64[7];
+
+ uint64_t w0, w1, w2, w3, w4, w5, w6, w7;
+ uint64_t w8, w9, w10, w11, w12, w13, w14, w15;
+ uint64_t T1, T2;
+
+#if defined(__sparc)
+ static const uint64_t sha512_consts[] = {
+ SHA512_CONST_0, SHA512_CONST_1, SHA512_CONST_2,
+ SHA512_CONST_3, SHA512_CONST_4, SHA512_CONST_5,
+ SHA512_CONST_6, SHA512_CONST_7, SHA512_CONST_8,
+ SHA512_CONST_9, SHA512_CONST_10, SHA512_CONST_11,
+ SHA512_CONST_12, SHA512_CONST_13, SHA512_CONST_14,
+ SHA512_CONST_15, SHA512_CONST_16, SHA512_CONST_17,
+ SHA512_CONST_18, SHA512_CONST_19, SHA512_CONST_20,
+ SHA512_CONST_21, SHA512_CONST_22, SHA512_CONST_23,
+ SHA512_CONST_24, SHA512_CONST_25, SHA512_CONST_26,
+ SHA512_CONST_27, SHA512_CONST_28, SHA512_CONST_29,
+ SHA512_CONST_30, SHA512_CONST_31, SHA512_CONST_32,
+ SHA512_CONST_33, SHA512_CONST_34, SHA512_CONST_35,
+ SHA512_CONST_36, SHA512_CONST_37, SHA512_CONST_38,
+ SHA512_CONST_39, SHA512_CONST_40, SHA512_CONST_41,
+ SHA512_CONST_42, SHA512_CONST_43, SHA512_CONST_44,
+ SHA512_CONST_45, SHA512_CONST_46, SHA512_CONST_47,
+ SHA512_CONST_48, SHA512_CONST_49, SHA512_CONST_50,
+ SHA512_CONST_51, SHA512_CONST_52, SHA512_CONST_53,
+ SHA512_CONST_54, SHA512_CONST_55, SHA512_CONST_56,
+ SHA512_CONST_57, SHA512_CONST_58, SHA512_CONST_59,
+ SHA512_CONST_60, SHA512_CONST_61, SHA512_CONST_62,
+ SHA512_CONST_63, SHA512_CONST_64, SHA512_CONST_65,
+ SHA512_CONST_66, SHA512_CONST_67, SHA512_CONST_68,
+ SHA512_CONST_69, SHA512_CONST_70, SHA512_CONST_71,
+ SHA512_CONST_72, SHA512_CONST_73, SHA512_CONST_74,
+ SHA512_CONST_75, SHA512_CONST_76, SHA512_CONST_77,
+ SHA512_CONST_78, SHA512_CONST_79
+ };
+#endif /* __sparc */
+
+
+ if ((uintptr_t)blk & 0x7) { /* not 8-byte aligned? */
+ bcopy(blk, ctx->buf_un.buf64, sizeof (ctx->buf_un.buf64));
+ blk = (uint8_t *)ctx->buf_un.buf64;
+ }
+
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w0 = LOAD_BIG_64(blk + 8 * 0);
+ SHA512ROUND(a, b, c, d, e, f, g, h, 0, w0);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w1 = LOAD_BIG_64(blk + 8 * 1);
+ SHA512ROUND(h, a, b, c, d, e, f, g, 1, w1);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w2 = LOAD_BIG_64(blk + 8 * 2);
+ SHA512ROUND(g, h, a, b, c, d, e, f, 2, w2);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w3 = LOAD_BIG_64(blk + 8 * 3);
+ SHA512ROUND(f, g, h, a, b, c, d, e, 3, w3);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w4 = LOAD_BIG_64(blk + 8 * 4);
+ SHA512ROUND(e, f, g, h, a, b, c, d, 4, w4);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w5 = LOAD_BIG_64(blk + 8 * 5);
+ SHA512ROUND(d, e, f, g, h, a, b, c, 5, w5);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w6 = LOAD_BIG_64(blk + 8 * 6);
+ SHA512ROUND(c, d, e, f, g, h, a, b, 6, w6);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w7 = LOAD_BIG_64(blk + 8 * 7);
+ SHA512ROUND(b, c, d, e, f, g, h, a, 7, w7);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w8 = LOAD_BIG_64(blk + 8 * 8);
+ SHA512ROUND(a, b, c, d, e, f, g, h, 8, w8);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w9 = LOAD_BIG_64(blk + 8 * 9);
+ SHA512ROUND(h, a, b, c, d, e, f, g, 9, w9);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w10 = LOAD_BIG_64(blk + 8 * 10);
+ SHA512ROUND(g, h, a, b, c, d, e, f, 10, w10);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w11 = LOAD_BIG_64(blk + 8 * 11);
+ SHA512ROUND(f, g, h, a, b, c, d, e, 11, w11);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w12 = LOAD_BIG_64(blk + 8 * 12);
+ SHA512ROUND(e, f, g, h, a, b, c, d, 12, w12);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w13 = LOAD_BIG_64(blk + 8 * 13);
+ SHA512ROUND(d, e, f, g, h, a, b, c, 13, w13);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w14 = LOAD_BIG_64(blk + 8 * 14);
+ SHA512ROUND(c, d, e, f, g, h, a, b, 14, w14);
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ w15 = LOAD_BIG_64(blk + 8 * 15);
+ SHA512ROUND(b, c, d, e, f, g, h, a, 15, w15);
+
+ w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
+ SHA512ROUND(a, b, c, d, e, f, g, h, 16, w0);
+ w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
+ SHA512ROUND(h, a, b, c, d, e, f, g, 17, w1);
+ w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
+ SHA512ROUND(g, h, a, b, c, d, e, f, 18, w2);
+ w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
+ SHA512ROUND(f, g, h, a, b, c, d, e, 19, w3);
+ w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
+ SHA512ROUND(e, f, g, h, a, b, c, d, 20, w4);
+ w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
+ SHA512ROUND(d, e, f, g, h, a, b, c, 21, w5);
+ w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
+ SHA512ROUND(c, d, e, f, g, h, a, b, 22, w6);
+ w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
+ SHA512ROUND(b, c, d, e, f, g, h, a, 23, w7);
+ w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
+ SHA512ROUND(a, b, c, d, e, f, g, h, 24, w8);
+ w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
+ SHA512ROUND(h, a, b, c, d, e, f, g, 25, w9);
+ w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
+ SHA512ROUND(g, h, a, b, c, d, e, f, 26, w10);
+ w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
+ SHA512ROUND(f, g, h, a, b, c, d, e, 27, w11);
+ w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
+ SHA512ROUND(e, f, g, h, a, b, c, d, 28, w12);
+ w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
+ SHA512ROUND(d, e, f, g, h, a, b, c, 29, w13);
+ w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
+ SHA512ROUND(c, d, e, f, g, h, a, b, 30, w14);
+ w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
+ SHA512ROUND(b, c, d, e, f, g, h, a, 31, w15);
+
+ w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
+ SHA512ROUND(a, b, c, d, e, f, g, h, 32, w0);
+ w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
+ SHA512ROUND(h, a, b, c, d, e, f, g, 33, w1);
+ w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
+ SHA512ROUND(g, h, a, b, c, d, e, f, 34, w2);
+ w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
+ SHA512ROUND(f, g, h, a, b, c, d, e, 35, w3);
+ w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
+ SHA512ROUND(e, f, g, h, a, b, c, d, 36, w4);
+ w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
+ SHA512ROUND(d, e, f, g, h, a, b, c, 37, w5);
+ w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
+ SHA512ROUND(c, d, e, f, g, h, a, b, 38, w6);
+ w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
+ SHA512ROUND(b, c, d, e, f, g, h, a, 39, w7);
+ w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
+ SHA512ROUND(a, b, c, d, e, f, g, h, 40, w8);
+ w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
+ SHA512ROUND(h, a, b, c, d, e, f, g, 41, w9);
+ w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
+ SHA512ROUND(g, h, a, b, c, d, e, f, 42, w10);
+ w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
+ SHA512ROUND(f, g, h, a, b, c, d, e, 43, w11);
+ w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
+ SHA512ROUND(e, f, g, h, a, b, c, d, 44, w12);
+ w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
+ SHA512ROUND(d, e, f, g, h, a, b, c, 45, w13);
+ w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
+ SHA512ROUND(c, d, e, f, g, h, a, b, 46, w14);
+ w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
+ SHA512ROUND(b, c, d, e, f, g, h, a, 47, w15);
+
+ w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
+ SHA512ROUND(a, b, c, d, e, f, g, h, 48, w0);
+ w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
+ SHA512ROUND(h, a, b, c, d, e, f, g, 49, w1);
+ w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
+ SHA512ROUND(g, h, a, b, c, d, e, f, 50, w2);
+ w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
+ SHA512ROUND(f, g, h, a, b, c, d, e, 51, w3);
+ w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
+ SHA512ROUND(e, f, g, h, a, b, c, d, 52, w4);
+ w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
+ SHA512ROUND(d, e, f, g, h, a, b, c, 53, w5);
+ w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
+ SHA512ROUND(c, d, e, f, g, h, a, b, 54, w6);
+ w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
+ SHA512ROUND(b, c, d, e, f, g, h, a, 55, w7);
+ w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
+ SHA512ROUND(a, b, c, d, e, f, g, h, 56, w8);
+ w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
+ SHA512ROUND(h, a, b, c, d, e, f, g, 57, w9);
+ w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
+ SHA512ROUND(g, h, a, b, c, d, e, f, 58, w10);
+ w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
+ SHA512ROUND(f, g, h, a, b, c, d, e, 59, w11);
+ w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
+ SHA512ROUND(e, f, g, h, a, b, c, d, 60, w12);
+ w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
+ SHA512ROUND(d, e, f, g, h, a, b, c, 61, w13);
+ w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
+ SHA512ROUND(c, d, e, f, g, h, a, b, 62, w14);
+ w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
+ SHA512ROUND(b, c, d, e, f, g, h, a, 63, w15);
+
+ w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0;
+ SHA512ROUND(a, b, c, d, e, f, g, h, 64, w0);
+ w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1;
+ SHA512ROUND(h, a, b, c, d, e, f, g, 65, w1);
+ w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2;
+ SHA512ROUND(g, h, a, b, c, d, e, f, 66, w2);
+ w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3;
+ SHA512ROUND(f, g, h, a, b, c, d, e, 67, w3);
+ w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4;
+ SHA512ROUND(e, f, g, h, a, b, c, d, 68, w4);
+ w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5;
+ SHA512ROUND(d, e, f, g, h, a, b, c, 69, w5);
+ w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6;
+ SHA512ROUND(c, d, e, f, g, h, a, b, 70, w6);
+ w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7;
+ SHA512ROUND(b, c, d, e, f, g, h, a, 71, w7);
+ w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8;
+ SHA512ROUND(a, b, c, d, e, f, g, h, 72, w8);
+ w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9;
+ SHA512ROUND(h, a, b, c, d, e, f, g, 73, w9);
+ w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10;
+ SHA512ROUND(g, h, a, b, c, d, e, f, 74, w10);
+ w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11;
+ SHA512ROUND(f, g, h, a, b, c, d, e, 75, w11);
+ w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12;
+ SHA512ROUND(e, f, g, h, a, b, c, d, 76, w12);
+ w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13;
+ SHA512ROUND(d, e, f, g, h, a, b, c, 77, w13);
+ w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14;
+ SHA512ROUND(c, d, e, f, g, h, a, b, 78, w14);
+ w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15;
+ SHA512ROUND(b, c, d, e, f, g, h, a, 79, w15);
+
+ ctx->state.s64[0] += a;
+ ctx->state.s64[1] += b;
+ ctx->state.s64[2] += c;
+ ctx->state.s64[3] += d;
+ ctx->state.s64[4] += e;
+ ctx->state.s64[5] += f;
+ ctx->state.s64[6] += g;
+ ctx->state.s64[7] += h;
+
+}
+#endif /* !__amd64 */
+
+
+/*
+ * Encode()
+ *
+ * purpose: to convert a list of numbers from little endian to big endian
+ * input: uint8_t * : place to store the converted big endian numbers
+ * uint32_t * : place to get numbers to convert from
+ * size_t : the length of the input in bytes
+ * output: void
+ */
+
+static void
+Encode(uint8_t *_RESTRICT_KYWD output, uint32_t *_RESTRICT_KYWD input,
+ size_t len)
+{
+ size_t i, j;
+
+#if defined(__sparc)
+ if (IS_P2ALIGNED(output, sizeof (uint32_t))) {
+ for (i = 0, j = 0; j < len; i++, j += 4) {
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ *((uint32_t *)(output + j)) = input[i];
+ }
+ } else {
+#endif /* little endian -- will work on big endian, but slowly */
+ for (i = 0, j = 0; j < len; i++, j += 4) {
+ output[j] = (input[i] >> 24) & 0xff;
+ output[j + 1] = (input[i] >> 16) & 0xff;
+ output[j + 2] = (input[i] >> 8) & 0xff;
+ output[j + 3] = input[i] & 0xff;
+ }
+#if defined(__sparc)
+ }
+#endif
+}
+
+static void
+Encode64(uint8_t *_RESTRICT_KYWD output, uint64_t *_RESTRICT_KYWD input,
+ size_t len)
+{
+ size_t i, j;
+
+#if defined(__sparc)
+ if (IS_P2ALIGNED(output, sizeof (uint64_t))) {
+ for (i = 0, j = 0; j < len; i++, j += 8) {
+ /* LINTED E_BAD_PTR_CAST_ALIGN */
+ *((uint64_t *)(output + j)) = input[i];
+ }
+ } else {
+#endif /* little endian -- will work on big endian, but slowly */
+ for (i = 0, j = 0; j < len; i++, j += 8) {
+
+ output[j] = (input[i] >> 56) & 0xff;
+ output[j + 1] = (input[i] >> 48) & 0xff;
+ output[j + 2] = (input[i] >> 40) & 0xff;
+ output[j + 3] = (input[i] >> 32) & 0xff;
+ output[j + 4] = (input[i] >> 24) & 0xff;
+ output[j + 5] = (input[i] >> 16) & 0xff;
+ output[j + 6] = (input[i] >> 8) & 0xff;
+ output[j + 7] = input[i] & 0xff;
+ }
+#if defined(__sparc)
+ }
+#endif
+}
+
+
+void
+SHA2Init(uint64_t mech, SHA2_CTX *ctx)
+{
+
+ switch (mech) {
+ case SHA256_MECH_INFO_TYPE:
+ case SHA256_HMAC_MECH_INFO_TYPE:
+ case SHA256_HMAC_GEN_MECH_INFO_TYPE:
+ ctx->state.s32[0] = 0x6a09e667U;
+ ctx->state.s32[1] = 0xbb67ae85U;
+ ctx->state.s32[2] = 0x3c6ef372U;
+ ctx->state.s32[3] = 0xa54ff53aU;
+ ctx->state.s32[4] = 0x510e527fU;
+ ctx->state.s32[5] = 0x9b05688cU;
+ ctx->state.s32[6] = 0x1f83d9abU;
+ ctx->state.s32[7] = 0x5be0cd19U;
+ break;
+ case SHA384_MECH_INFO_TYPE:
+ case SHA384_HMAC_MECH_INFO_TYPE:
+ case SHA384_HMAC_GEN_MECH_INFO_TYPE:
+ ctx->state.s64[0] = 0xcbbb9d5dc1059ed8ULL;
+ ctx->state.s64[1] = 0x629a292a367cd507ULL;
+ ctx->state.s64[2] = 0x9159015a3070dd17ULL;
+ ctx->state.s64[3] = 0x152fecd8f70e5939ULL;
+ ctx->state.s64[4] = 0x67332667ffc00b31ULL;
+ ctx->state.s64[5] = 0x8eb44a8768581511ULL;
+ ctx->state.s64[6] = 0xdb0c2e0d64f98fa7ULL;
+ ctx->state.s64[7] = 0x47b5481dbefa4fa4ULL;
+ break;
+ case SHA512_MECH_INFO_TYPE:
+ case SHA512_HMAC_MECH_INFO_TYPE:
+ case SHA512_HMAC_GEN_MECH_INFO_TYPE:
+ ctx->state.s64[0] = 0x6a09e667f3bcc908ULL;
+ ctx->state.s64[1] = 0xbb67ae8584caa73bULL;
+ ctx->state.s64[2] = 0x3c6ef372fe94f82bULL;
+ ctx->state.s64[3] = 0xa54ff53a5f1d36f1ULL;
+ ctx->state.s64[4] = 0x510e527fade682d1ULL;
+ ctx->state.s64[5] = 0x9b05688c2b3e6c1fULL;
+ ctx->state.s64[6] = 0x1f83d9abfb41bd6bULL;
+ ctx->state.s64[7] = 0x5be0cd19137e2179ULL;
+ break;
+ case SHA512_224_MECH_INFO_TYPE:
+ ctx->state.s64[0] = 0x8C3D37C819544DA2ULL;
+ ctx->state.s64[1] = 0x73E1996689DCD4D6ULL;
+ ctx->state.s64[2] = 0x1DFAB7AE32FF9C82ULL;
+ ctx->state.s64[3] = 0x679DD514582F9FCFULL;
+ ctx->state.s64[4] = 0x0F6D2B697BD44DA8ULL;
+ ctx->state.s64[5] = 0x77E36F7304C48942ULL;
+ ctx->state.s64[6] = 0x3F9D85A86A1D36C8ULL;
+ ctx->state.s64[7] = 0x1112E6AD91D692A1ULL;
+ break;
+ case SHA512_256_MECH_INFO_TYPE:
+ ctx->state.s64[0] = 0x22312194FC2BF72CULL;
+ ctx->state.s64[1] = 0x9F555FA3C84C64C2ULL;
+ ctx->state.s64[2] = 0x2393B86B6F53B151ULL;
+ ctx->state.s64[3] = 0x963877195940EABDULL;
+ ctx->state.s64[4] = 0x96283EE2A88EFFE3ULL;
+ ctx->state.s64[5] = 0xBE5E1E2553863992ULL;
+ ctx->state.s64[6] = 0x2B0199FC2C85B8AAULL;
+ ctx->state.s64[7] = 0x0EB72DDC81C52CA2ULL;
+ break;
+#ifdef _KERNEL
+ default:
+ cmn_err(CE_PANIC,
+ "sha2_init: failed to find a supported algorithm: 0x%x",
+ (uint32_t)mech);
+
+#endif /* _KERNEL */
+ }
+
+ ctx->algotype = (uint32_t)mech;
+ ctx->count.c64[0] = ctx->count.c64[1] = 0;
+}
+
+#ifndef _KERNEL
+
+// #pragma inline(SHA256Init, SHA384Init, SHA512Init)
+void
+SHA256Init(SHA256_CTX *ctx)
+{
+ SHA2Init(SHA256, ctx);
+}
+
+void
+SHA384Init(SHA384_CTX *ctx)
+{
+ SHA2Init(SHA384, ctx);
+}
+
+void
+SHA512Init(SHA512_CTX *ctx)
+{
+ SHA2Init(SHA512, ctx);
+}
+
+#endif /* _KERNEL */
+
+/*
+ * SHA2Update()
+ *
+ * purpose: continues an sha2 digest operation, using the message block
+ * to update the context.
+ * input: SHA2_CTX * : the context to update
+ * void * : the message block
+ * size_t : the length of the message block, in bytes
+ * output: void
+ */
+
+void
+SHA2Update(SHA2_CTX *ctx, const void *inptr, size_t input_len)
+{
+ uint32_t i, buf_index, buf_len, buf_limit;
+ const uint8_t *input = inptr;
+ uint32_t algotype = ctx->algotype;
+#if defined(__amd64)
+ uint32_t block_count;
+#endif /* !__amd64 */
+
+
+ /* check for noop */
+ if (input_len == 0)
+ return;
+
+ if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
+ buf_limit = 64;
+
+ /* compute number of bytes mod 64 */
+ buf_index = (ctx->count.c32[1] >> 3) & 0x3F;
+
+ /* update number of bits */
+ if ((ctx->count.c32[1] += (input_len << 3)) < (input_len << 3))
+ ctx->count.c32[0]++;
+
+ ctx->count.c32[0] += (input_len >> 29);
+
+ } else {
+ buf_limit = 128;
+
+ /* compute number of bytes mod 128 */
+ buf_index = (ctx->count.c64[1] >> 3) & 0x7F;
+
+ /* update number of bits */
+ if ((ctx->count.c64[1] += (input_len << 3)) < (input_len << 3))
+ ctx->count.c64[0]++;
+
+ ctx->count.c64[0] += (input_len >> 29);
+ }
+
+ buf_len = buf_limit - buf_index;
+
+ /* transform as many times as possible */
+ i = 0;
+ if (input_len >= buf_len) {
+
+ /*
+ * general optimization:
+ *
+ * only do initial bcopy() and SHA2Transform() if
+ * buf_index != 0. if buf_index == 0, we're just
+ * wasting our time doing the bcopy() since there
+ * wasn't any data left over from a previous call to
+ * SHA2Update().
+ */
+ if (buf_index) {
+ bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len);
+ if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE)
+ SHA256Transform(ctx, ctx->buf_un.buf8);
+ else
+ SHA512Transform(ctx, ctx->buf_un.buf8);
+
+ i = buf_len;
+ }
+
+#if !defined(__amd64)
+ if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
+ for (; i + buf_limit - 1 < input_len; i += buf_limit) {
+ SHA256Transform(ctx, &input[i]);
+ }
+ } else {
+ for (; i + buf_limit - 1 < input_len; i += buf_limit) {
+ SHA512Transform(ctx, &input[i]);
+ }
+ }
+
+#else
+ if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
+ block_count = (input_len - i) >> 6;
+ if (block_count > 0) {
+ SHA256TransformBlocks(ctx, &input[i],
+ block_count);
+ i += block_count << 6;
+ }
+ } else {
+ block_count = (input_len - i) >> 7;
+ if (block_count > 0) {
+ SHA512TransformBlocks(ctx, &input[i],
+ block_count);
+ i += block_count << 7;
+ }
+ }
+#endif /* !__amd64 */
+
+ /*
+ * general optimization:
+ *
+ * if i and input_len are the same, return now instead
+ * of calling bcopy(), since the bcopy() in this case
+ * will be an expensive noop.
+ */
+
+ if (input_len == i)
+ return;
+
+ buf_index = 0;
+ }
+
+ /* buffer remaining input */
+ bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i);
+}
+
+
+/*
+ * SHA2Final()
+ *
+ * purpose: ends an sha2 digest operation, finalizing the message digest and
+ * zeroing the context.
+ * input: uchar_t * : a buffer to store the digest
+ * : The function actually uses void* because many
+ * : callers pass things other than uchar_t here.
+ * SHA2_CTX * : the context to finalize, save, and zero
+ * output: void
+ */
+
+void
+SHA2Final(void *digest, SHA2_CTX *ctx)
+{
+ uint8_t bitcount_be[sizeof (ctx->count.c32)];
+ uint8_t bitcount_be64[sizeof (ctx->count.c64)];
+ uint32_t index;
+ uint32_t algotype = ctx->algotype;
+
+ if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
+ index = (ctx->count.c32[1] >> 3) & 0x3f;
+ Encode(bitcount_be, ctx->count.c32, sizeof (bitcount_be));
+ SHA2Update(ctx, PADDING, ((index < 56) ? 56 : 120) - index);
+ SHA2Update(ctx, bitcount_be, sizeof (bitcount_be));
+ Encode(digest, ctx->state.s32, sizeof (ctx->state.s32));
+ } else {
+ index = (ctx->count.c64[1] >> 3) & 0x7f;
+ Encode64(bitcount_be64, ctx->count.c64,
+ sizeof (bitcount_be64));
+ SHA2Update(ctx, PADDING, ((index < 112) ? 112 : 240) - index);
+ SHA2Update(ctx, bitcount_be64, sizeof (bitcount_be64));
+ if (algotype <= SHA384_HMAC_GEN_MECH_INFO_TYPE) {
+ ctx->state.s64[6] = ctx->state.s64[7] = 0;
+ Encode64(digest, ctx->state.s64,
+ sizeof (uint64_t) * 6);
+ } else if (algotype == SHA512_224_MECH_INFO_TYPE) {
+ uint8_t last[sizeof (uint64_t)];
+ /*
+ * Since SHA-512/224 doesn't align well to 64-bit
+ * boundaries, we must do the encoding in three steps:
+ * 1) encode the three 64-bit words that fit neatly
+ * 2) encode the last 64-bit word to a temp buffer
+ * 3) chop out the lower 32-bits from the temp buffer
+ * and append them to the digest
+ */
+ Encode64(digest, ctx->state.s64, sizeof (uint64_t) * 3);
+ Encode64(last, &ctx->state.s64[3], sizeof (uint64_t));
+ bcopy(last, (uint8_t *)digest + 24, 4);
+ } else if (algotype == SHA512_256_MECH_INFO_TYPE) {
+ Encode64(digest, ctx->state.s64, sizeof (uint64_t) * 4);
+ } else {
+ Encode64(digest, ctx->state.s64,
+ sizeof (ctx->state.s64));
+ }
+ }
+
+ /* zeroize sensitive information */
+ bzero(ctx, sizeof (*ctx));
+}
+
+
+
+#ifdef _KERNEL
+EXPORT_SYMBOL(SHA2Init);
+EXPORT_SYMBOL(SHA2Update);
+EXPORT_SYMBOL(SHA2Final);
+#endif
diff --git a/zfs/module/icp/algs/skein/skein.c b/zfs/module/icp/algs/skein/skein.c
new file mode 100644
index 000000000000..0981eee08929
--- /dev/null
+++ b/zfs/module/icp/algs/skein/skein.c
@@ -0,0 +1,921 @@
+/*
+ * Implementation of the Skein hash function.
+ * Source code author: Doug Whiting, 2008.
+ * This algorithm and source code is released to the public domain.
+ */
+/* Copyright 2013 Doug Whiting. This code is released to the public domain. */
+
+#define SKEIN_PORT_CODE /* instantiate any code in skein_port.h */
+
+#include <sys/types.h>
+#include <sys/note.h>
+#include <sys/skein.h> /* get the Skein API definitions */
+#include "skein_impl.h" /* get internal definitions */
+
+/* External function to process blkCnt (nonzero) full block(s) of data. */
+void Skein_256_Process_Block(Skein_256_Ctxt_t *ctx, const uint8_t *blkPtr,
+ size_t blkCnt, size_t byteCntAdd);
+void Skein_512_Process_Block(Skein_512_Ctxt_t *ctx, const uint8_t *blkPtr,
+ size_t blkCnt, size_t byteCntAdd);
+void Skein1024_Process_Block(Skein1024_Ctxt_t *ctx, const uint8_t *blkPtr,
+ size_t blkCnt, size_t byteCntAdd);
+
+/* 256-bit Skein */
+/* init the context for a straight hashing operation */
+int
+Skein_256_Init(Skein_256_Ctxt_t *ctx, size_t hashBitLen)
+{
+ union {
+ uint8_t b[SKEIN_256_STATE_BYTES];
+ uint64_t w[SKEIN_256_STATE_WORDS];
+ } cfg; /* config block */
+
+ Skein_Assert(hashBitLen > 0, SKEIN_BAD_HASHLEN);
+ ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
+
+ switch (hashBitLen) { /* use pre-computed values, where available */
+#ifndef SKEIN_NO_PRECOMP
+ case 256:
+ bcopy(SKEIN_256_IV_256, ctx->X, sizeof (ctx->X));
+ break;
+ case 224:
+ bcopy(SKEIN_256_IV_224, ctx->X, sizeof (ctx->X));
+ break;
+ case 160:
+ bcopy(SKEIN_256_IV_160, ctx->X, sizeof (ctx->X));
+ break;
+ case 128:
+ bcopy(SKEIN_256_IV_128, ctx->X, sizeof (ctx->X));
+ break;
+#endif
+ default:
+ /* here if there is no precomputed IV value available */
+ /*
+ * build/process the config block, type == CONFIG (could be
+ * precomputed)
+ */
+ /* set tweaks: T0=0; T1=CFG | FINAL */
+ Skein_Start_New_Type(ctx, CFG_FINAL);
+
+ /* set the schema, version */
+ cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);
+ /* hash result length in bits */
+ cfg.w[1] = Skein_Swap64(hashBitLen);
+ cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL);
+ /* zero pad config block */
+ bzero(&cfg.w[3], sizeof (cfg) - 3 * sizeof (cfg.w[0]));
+
+ /* compute the initial chaining values from config block */
+ /* zero the chaining variables */
+ bzero(ctx->X, sizeof (ctx->X));
+ Skein_256_Process_Block(ctx, cfg.b, 1, SKEIN_CFG_STR_LEN);
+ break;
+ }
+ /*
+ * The chaining vars ctx->X are now initialized for the given
+ * hashBitLen.
+ * Set up to process the data message portion of the hash (default)
+ */
+ Skein_Start_New_Type(ctx, MSG); /* T0=0, T1= MSG type */
+
+ return (SKEIN_SUCCESS);
+}
+
+/* init the context for a MAC and/or tree hash operation */
+/*
+ * [identical to Skein_256_Init() when keyBytes == 0 &&
+ * treeInfo == SKEIN_CFG_TREE_INFO_SEQUENTIAL]
+ */
+int
+Skein_256_InitExt(Skein_256_Ctxt_t *ctx, size_t hashBitLen, uint64_t treeInfo,
+ const uint8_t *key, size_t keyBytes)
+{
+ union {
+ uint8_t b[SKEIN_256_STATE_BYTES];
+ uint64_t w[SKEIN_256_STATE_WORDS];
+ } cfg; /* config block */
+
+ Skein_Assert(hashBitLen > 0, SKEIN_BAD_HASHLEN);
+ Skein_Assert(keyBytes == 0 || key != NULL, SKEIN_FAIL);
+
+ /* compute the initial chaining values ctx->X[], based on key */
+ if (keyBytes == 0) { /* is there a key? */
+ /* no key: use all zeroes as key for config block */
+ bzero(ctx->X, sizeof (ctx->X));
+ } else { /* here to pre-process a key */
+
+ Skein_assert(sizeof (cfg.b) >= sizeof (ctx->X));
+ /* do a mini-Init right here */
+ /* set output hash bit count = state size */
+ ctx->h.hashBitLen = 8 * sizeof (ctx->X);
+ /* set tweaks: T0 = 0; T1 = KEY type */
+ Skein_Start_New_Type(ctx, KEY);
+ /* zero the initial chaining variables */
+ bzero(ctx->X, sizeof (ctx->X));
+ /* hash the key */
+ (void) Skein_256_Update(ctx, key, keyBytes);
+ /* put result into cfg.b[] */
+ (void) Skein_256_Final_Pad(ctx, cfg.b);
+ /* copy over into ctx->X[] */
+ bcopy(cfg.b, ctx->X, sizeof (cfg.b));
+#if SKEIN_NEED_SWAP
+ {
+ uint_t i;
+ /* convert key bytes to context words */
+ for (i = 0; i < SKEIN_256_STATE_WORDS; i++)
+ ctx->X[i] = Skein_Swap64(ctx->X[i]);
+ }
+#endif
+ }
+ /*
+ * build/process the config block, type == CONFIG (could be
+ * precomputed for each key)
+ */
+ ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
+ Skein_Start_New_Type(ctx, CFG_FINAL);
+
+ bzero(&cfg.w, sizeof (cfg.w)); /* pre-pad cfg.w[] with zeroes */
+ cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);
+ cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */
+ /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */
+ cfg.w[2] = Skein_Swap64(treeInfo);
+
+ Skein_Show_Key(256, &ctx->h, key, keyBytes);
+
+ /* compute the initial chaining values from config block */
+ Skein_256_Process_Block(ctx, cfg.b, 1, SKEIN_CFG_STR_LEN);
+
+ /* The chaining vars ctx->X are now initialized */
+ /* Set up to process the data message portion of the hash (default) */
+ ctx->h.bCnt = 0; /* buffer b[] starts out empty */
+ Skein_Start_New_Type(ctx, MSG);
+
+ return (SKEIN_SUCCESS);
+}
+
+/* process the input bytes */
+int
+Skein_256_Update(Skein_256_Ctxt_t *ctx, const uint8_t *msg, size_t msgByteCnt)
+{
+ size_t n;
+
+ /* catch uninitialized context */
+ Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES, SKEIN_FAIL);
+
+ /* process full blocks, if any */
+ if (msgByteCnt + ctx->h.bCnt > SKEIN_256_BLOCK_BYTES) {
+ /* finish up any buffered message data */
+ if (ctx->h.bCnt) {
+ /* # bytes free in buffer b[] */
+ n = SKEIN_256_BLOCK_BYTES - ctx->h.bCnt;
+ if (n) {
+ /* check on our logic here */
+ Skein_assert(n < msgByteCnt);
+ bcopy(msg, &ctx->b[ctx->h.bCnt], n);
+ msgByteCnt -= n;
+ msg += n;
+ ctx->h.bCnt += n;
+ }
+ Skein_assert(ctx->h.bCnt == SKEIN_256_BLOCK_BYTES);
+ Skein_256_Process_Block(ctx, ctx->b, 1,
+ SKEIN_256_BLOCK_BYTES);
+ ctx->h.bCnt = 0;
+ }
+ /*
+ * now process any remaining full blocks, directly from input
+ * message data
+ */
+ if (msgByteCnt > SKEIN_256_BLOCK_BYTES) {
+ /* number of full blocks to process */
+ n = (msgByteCnt - 1) / SKEIN_256_BLOCK_BYTES;
+ Skein_256_Process_Block(ctx, msg, n,
+ SKEIN_256_BLOCK_BYTES);
+ msgByteCnt -= n * SKEIN_256_BLOCK_BYTES;
+ msg += n * SKEIN_256_BLOCK_BYTES;
+ }
+ Skein_assert(ctx->h.bCnt == 0);
+ }
+
+ /* copy any remaining source message data bytes into b[] */
+ if (msgByteCnt) {
+ Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES);
+ bcopy(msg, &ctx->b[ctx->h.bCnt], msgByteCnt);
+ ctx->h.bCnt += msgByteCnt;
+ }
+
+ return (SKEIN_SUCCESS);
+}
+
+/* finalize the hash computation and output the result */
+int
+Skein_256_Final(Skein_256_Ctxt_t *ctx, uint8_t *hashVal)
+{
+ size_t i, n, byteCnt;
+ uint64_t X[SKEIN_256_STATE_WORDS];
+
+ /* catch uninitialized context */
+ Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES, SKEIN_FAIL);
+
+ ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */
+ /* zero pad b[] if necessary */
+ if (ctx->h.bCnt < SKEIN_256_BLOCK_BYTES)
+ bzero(&ctx->b[ctx->h.bCnt],
+ SKEIN_256_BLOCK_BYTES - ctx->h.bCnt);
+
+ /* process the final block */
+ Skein_256_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt);
+
+ /* now output the result */
+ /* total number of output bytes */
+ byteCnt = (ctx->h.hashBitLen + 7) >> 3;
+
+ /* run Threefish in "counter mode" to generate output */
+ /* zero out b[], so it can hold the counter */
+ bzero(ctx->b, sizeof (ctx->b));
+ /* keep a local copy of counter mode "key" */
+ bcopy(ctx->X, X, sizeof (X));
+ for (i = 0; i * SKEIN_256_BLOCK_BYTES < byteCnt; i++) {
+ /* build the counter block */
+ uint64_t tmp = Skein_Swap64((uint64_t)i);
+ bcopy(&tmp, ctx->b, sizeof (tmp));
+ Skein_Start_New_Type(ctx, OUT_FINAL);
+ /* run "counter mode" */
+ Skein_256_Process_Block(ctx, ctx->b, 1, sizeof (uint64_t));
+ /* number of output bytes left to go */
+ n = byteCnt - i * SKEIN_256_BLOCK_BYTES;
+ if (n >= SKEIN_256_BLOCK_BYTES)
+ n = SKEIN_256_BLOCK_BYTES;
+ Skein_Put64_LSB_First(hashVal + i * SKEIN_256_BLOCK_BYTES,
+ ctx->X, n); /* "output" the ctr mode bytes */
+ Skein_Show_Final(256, &ctx->h, n,
+ hashVal + i * SKEIN_256_BLOCK_BYTES);
+ /* restore the counter mode key for next time */
+ bcopy(X, ctx->X, sizeof (X));
+ }
+ return (SKEIN_SUCCESS);
+}
+
+/* 512-bit Skein */
+
+/* init the context for a straight hashing operation */
+int
+Skein_512_Init(Skein_512_Ctxt_t *ctx, size_t hashBitLen)
+{
+ union {
+ uint8_t b[SKEIN_512_STATE_BYTES];
+ uint64_t w[SKEIN_512_STATE_WORDS];
+ } cfg; /* config block */
+
+ Skein_Assert(hashBitLen > 0, SKEIN_BAD_HASHLEN);
+ ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
+
+ switch (hashBitLen) { /* use pre-computed values, where available */
+#ifndef SKEIN_NO_PRECOMP
+ case 512:
+ bcopy(SKEIN_512_IV_512, ctx->X, sizeof (ctx->X));
+ break;
+ case 384:
+ bcopy(SKEIN_512_IV_384, ctx->X, sizeof (ctx->X));
+ break;
+ case 256:
+ bcopy(SKEIN_512_IV_256, ctx->X, sizeof (ctx->X));
+ break;
+ case 224:
+ bcopy(SKEIN_512_IV_224, ctx->X, sizeof (ctx->X));
+ break;
+#endif
+ default:
+ /*
+ * here if there is no precomputed IV value available
+ * build/process the config block, type == CONFIG (could be
+ * precomputed)
+ */
+ /* set tweaks: T0=0; T1=CFG | FINAL */
+ Skein_Start_New_Type(ctx, CFG_FINAL);
+
+ /* set the schema, version */
+ cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);
+ /* hash result length in bits */
+ cfg.w[1] = Skein_Swap64(hashBitLen);
+ cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL);
+ /* zero pad config block */
+ bzero(&cfg.w[3], sizeof (cfg) - 3 * sizeof (cfg.w[0]));
+
+ /* compute the initial chaining values from config block */
+ /* zero the chaining variables */
+ bzero(ctx->X, sizeof (ctx->X));
+ Skein_512_Process_Block(ctx, cfg.b, 1, SKEIN_CFG_STR_LEN);
+ break;
+ }
+
+ /*
+ * The chaining vars ctx->X are now initialized for the given
+ * hashBitLen. Set up to process the data message portion of the
+ * hash (default)
+ */
+ Skein_Start_New_Type(ctx, MSG); /* T0=0, T1= MSG type */
+
+ return (SKEIN_SUCCESS);
+}
+
+/* init the context for a MAC and/or tree hash operation */
+/*
+ * [identical to Skein_512_Init() when keyBytes == 0 &&
+ * treeInfo == SKEIN_CFG_TREE_INFO_SEQUENTIAL]
+ */
+int
+Skein_512_InitExt(Skein_512_Ctxt_t *ctx, size_t hashBitLen, uint64_t treeInfo,
+ const uint8_t *key, size_t keyBytes)
+{
+ union {
+ uint8_t b[SKEIN_512_STATE_BYTES];
+ uint64_t w[SKEIN_512_STATE_WORDS];
+ } cfg; /* config block */
+
+ Skein_Assert(hashBitLen > 0, SKEIN_BAD_HASHLEN);
+ Skein_Assert(keyBytes == 0 || key != NULL, SKEIN_FAIL);
+
+ /* compute the initial chaining values ctx->X[], based on key */
+ if (keyBytes == 0) { /* is there a key? */
+ /* no key: use all zeroes as key for config block */
+ bzero(ctx->X, sizeof (ctx->X));
+ } else { /* here to pre-process a key */
+
+ Skein_assert(sizeof (cfg.b) >= sizeof (ctx->X));
+ /* do a mini-Init right here */
+ /* set output hash bit count = state size */
+ ctx->h.hashBitLen = 8 * sizeof (ctx->X);
+ /* set tweaks: T0 = 0; T1 = KEY type */
+ Skein_Start_New_Type(ctx, KEY);
+ /* zero the initial chaining variables */
+ bzero(ctx->X, sizeof (ctx->X));
+ (void) Skein_512_Update(ctx, key, keyBytes); /* hash the key */
+ /* put result into cfg.b[] */
+ (void) Skein_512_Final_Pad(ctx, cfg.b);
+ /* copy over into ctx->X[] */
+ bcopy(cfg.b, ctx->X, sizeof (cfg.b));
+#if SKEIN_NEED_SWAP
+ {
+ uint_t i;
+ /* convert key bytes to context words */
+ for (i = 0; i < SKEIN_512_STATE_WORDS; i++)
+ ctx->X[i] = Skein_Swap64(ctx->X[i]);
+ }
+#endif
+ }
+ /*
+ * build/process the config block, type == CONFIG (could be
+ * precomputed for each key)
+ */
+ ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
+ Skein_Start_New_Type(ctx, CFG_FINAL);
+
+ bzero(&cfg.w, sizeof (cfg.w)); /* pre-pad cfg.w[] with zeroes */
+ cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);
+ cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */
+ /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */
+ cfg.w[2] = Skein_Swap64(treeInfo);
+
+ Skein_Show_Key(512, &ctx->h, key, keyBytes);
+
+ /* compute the initial chaining values from config block */
+ Skein_512_Process_Block(ctx, cfg.b, 1, SKEIN_CFG_STR_LEN);
+
+ /* The chaining vars ctx->X are now initialized */
+ /* Set up to process the data message portion of the hash (default) */
+ ctx->h.bCnt = 0; /* buffer b[] starts out empty */
+ Skein_Start_New_Type(ctx, MSG);
+
+ return (SKEIN_SUCCESS);
+}
+
+/* process the input bytes */
+int
+Skein_512_Update(Skein_512_Ctxt_t *ctx, const uint8_t *msg, size_t msgByteCnt)
+{
+ size_t n;
+
+ /* catch uninitialized context */
+ Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES, SKEIN_FAIL);
+
+ /* process full blocks, if any */
+ if (msgByteCnt + ctx->h.bCnt > SKEIN_512_BLOCK_BYTES) {
+ /* finish up any buffered message data */
+ if (ctx->h.bCnt) {
+ /* # bytes free in buffer b[] */
+ n = SKEIN_512_BLOCK_BYTES - ctx->h.bCnt;
+ if (n) {
+ /* check on our logic here */
+ Skein_assert(n < msgByteCnt);
+ bcopy(msg, &ctx->b[ctx->h.bCnt], n);
+ msgByteCnt -= n;
+ msg += n;
+ ctx->h.bCnt += n;
+ }
+ Skein_assert(ctx->h.bCnt == SKEIN_512_BLOCK_BYTES);
+ Skein_512_Process_Block(ctx, ctx->b, 1,
+ SKEIN_512_BLOCK_BYTES);
+ ctx->h.bCnt = 0;
+ }
+ /*
+ * now process any remaining full blocks, directly from input
+ * message data
+ */
+ if (msgByteCnt > SKEIN_512_BLOCK_BYTES) {
+ /* number of full blocks to process */
+ n = (msgByteCnt - 1) / SKEIN_512_BLOCK_BYTES;
+ Skein_512_Process_Block(ctx, msg, n,
+ SKEIN_512_BLOCK_BYTES);
+ msgByteCnt -= n * SKEIN_512_BLOCK_BYTES;
+ msg += n * SKEIN_512_BLOCK_BYTES;
+ }
+ Skein_assert(ctx->h.bCnt == 0);
+ }
+
+ /* copy any remaining source message data bytes into b[] */
+ if (msgByteCnt) {
+ Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES);
+ bcopy(msg, &ctx->b[ctx->h.bCnt], msgByteCnt);
+ ctx->h.bCnt += msgByteCnt;
+ }
+
+ return (SKEIN_SUCCESS);
+}
+
+/* finalize the hash computation and output the result */
+int
+Skein_512_Final(Skein_512_Ctxt_t *ctx, uint8_t *hashVal)
+{
+ size_t i, n, byteCnt;
+ uint64_t X[SKEIN_512_STATE_WORDS];
+
+ /* catch uninitialized context */
+ Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES, SKEIN_FAIL);
+
+ ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */
+ /* zero pad b[] if necessary */
+ if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES)
+ bzero(&ctx->b[ctx->h.bCnt],
+ SKEIN_512_BLOCK_BYTES - ctx->h.bCnt);
+
+ /* process the final block */
+ Skein_512_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt);
+
+ /* now output the result */
+ /* total number of output bytes */
+ byteCnt = (ctx->h.hashBitLen + 7) >> 3;
+
+ /* run Threefish in "counter mode" to generate output */
+ /* zero out b[], so it can hold the counter */
+ bzero(ctx->b, sizeof (ctx->b));
+ /* keep a local copy of counter mode "key" */
+ bcopy(ctx->X, X, sizeof (X));
+ for (i = 0; i * SKEIN_512_BLOCK_BYTES < byteCnt; i++) {
+ /* build the counter block */
+ uint64_t tmp = Skein_Swap64((uint64_t)i);
+ bcopy(&tmp, ctx->b, sizeof (tmp));
+ Skein_Start_New_Type(ctx, OUT_FINAL);
+ /* run "counter mode" */
+ Skein_512_Process_Block(ctx, ctx->b, 1, sizeof (uint64_t));
+ /* number of output bytes left to go */
+ n = byteCnt - i * SKEIN_512_BLOCK_BYTES;
+ if (n >= SKEIN_512_BLOCK_BYTES)
+ n = SKEIN_512_BLOCK_BYTES;
+ Skein_Put64_LSB_First(hashVal + i * SKEIN_512_BLOCK_BYTES,
+ ctx->X, n); /* "output" the ctr mode bytes */
+ Skein_Show_Final(512, &ctx->h, n,
+ hashVal + i * SKEIN_512_BLOCK_BYTES);
+ /* restore the counter mode key for next time */
+ bcopy(X, ctx->X, sizeof (X));
+ }
+ return (SKEIN_SUCCESS);
+}
+
+/* 1024-bit Skein */
+
+/* init the context for a straight hashing operation */
+int
+Skein1024_Init(Skein1024_Ctxt_t *ctx, size_t hashBitLen)
+{
+ union {
+ uint8_t b[SKEIN1024_STATE_BYTES];
+ uint64_t w[SKEIN1024_STATE_WORDS];
+ } cfg; /* config block */
+
+ Skein_Assert(hashBitLen > 0, SKEIN_BAD_HASHLEN);
+ ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
+
+ switch (hashBitLen) { /* use pre-computed values, where available */
+#ifndef SKEIN_NO_PRECOMP
+ case 512:
+ bcopy(SKEIN1024_IV_512, ctx->X, sizeof (ctx->X));
+ break;
+ case 384:
+ bcopy(SKEIN1024_IV_384, ctx->X, sizeof (ctx->X));
+ break;
+ case 1024:
+ bcopy(SKEIN1024_IV_1024, ctx->X, sizeof (ctx->X));
+ break;
+#endif
+ default:
+ /* here if there is no precomputed IV value available */
+ /*
+ * build/process the config block, type == CONFIG (could be
+ * precomputed)
+ */
+ /* set tweaks: T0=0; T1=CFG | FINAL */
+ Skein_Start_New_Type(ctx, CFG_FINAL);
+
+ /* set the schema, version */
+ cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);
+ /* hash result length in bits */
+ cfg.w[1] = Skein_Swap64(hashBitLen);
+ cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL);
+ /* zero pad config block */
+ bzero(&cfg.w[3], sizeof (cfg) - 3 * sizeof (cfg.w[0]));
+
+ /* compute the initial chaining values from config block */
+ /* zero the chaining variables */
+ bzero(ctx->X, sizeof (ctx->X));
+ Skein1024_Process_Block(ctx, cfg.b, 1, SKEIN_CFG_STR_LEN);
+ break;
+ }
+
+ /*
+ * The chaining vars ctx->X are now initialized for the given
+ * hashBitLen. Set up to process the data message portion of the hash
+ * (default)
+ */
+ Skein_Start_New_Type(ctx, MSG); /* T0=0, T1= MSG type */
+
+ return (SKEIN_SUCCESS);
+}
+
+/* init the context for a MAC and/or tree hash operation */
+/*
+ * [identical to Skein1024_Init() when keyBytes == 0 &&
+ * treeInfo == SKEIN_CFG_TREE_INFO_SEQUENTIAL]
+ */
+int
+Skein1024_InitExt(Skein1024_Ctxt_t *ctx, size_t hashBitLen, uint64_t treeInfo,
+ const uint8_t *key, size_t keyBytes)
+{
+ union {
+ uint8_t b[SKEIN1024_STATE_BYTES];
+ uint64_t w[SKEIN1024_STATE_WORDS];
+ } cfg; /* config block */
+
+ Skein_Assert(hashBitLen > 0, SKEIN_BAD_HASHLEN);
+ Skein_Assert(keyBytes == 0 || key != NULL, SKEIN_FAIL);
+
+ /* compute the initial chaining values ctx->X[], based on key */
+ if (keyBytes == 0) { /* is there a key? */
+ /* no key: use all zeroes as key for config block */
+ bzero(ctx->X, sizeof (ctx->X));
+ } else { /* here to pre-process a key */
+ Skein_assert(sizeof (cfg.b) >= sizeof (ctx->X));
+ /* do a mini-Init right here */
+ /* set output hash bit count = state size */
+ ctx->h.hashBitLen = 8 * sizeof (ctx->X);
+ /* set tweaks: T0 = 0; T1 = KEY type */
+ Skein_Start_New_Type(ctx, KEY);
+ /* zero the initial chaining variables */
+ bzero(ctx->X, sizeof (ctx->X));
+ (void) Skein1024_Update(ctx, key, keyBytes); /* hash the key */
+ /* put result into cfg.b[] */
+ (void) Skein1024_Final_Pad(ctx, cfg.b);
+ /* copy over into ctx->X[] */
+ bcopy(cfg.b, ctx->X, sizeof (cfg.b));
+#if SKEIN_NEED_SWAP
+ {
+ uint_t i;
+ /* convert key bytes to context words */
+ for (i = 0; i < SKEIN1024_STATE_WORDS; i++)
+ ctx->X[i] = Skein_Swap64(ctx->X[i]);
+ }
+#endif
+ }
+ /*
+ * build/process the config block, type == CONFIG (could be
+ * precomputed for each key)
+ */
+ ctx->h.hashBitLen = hashBitLen; /* output hash bit count */
+ Skein_Start_New_Type(ctx, CFG_FINAL);
+
+ bzero(&cfg.w, sizeof (cfg.w)); /* pre-pad cfg.w[] with zeroes */
+ cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER);
+ /* hash result length in bits */
+ cfg.w[1] = Skein_Swap64(hashBitLen);
+ /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */
+ cfg.w[2] = Skein_Swap64(treeInfo);
+
+ Skein_Show_Key(1024, &ctx->h, key, keyBytes);
+
+ /* compute the initial chaining values from config block */
+ Skein1024_Process_Block(ctx, cfg.b, 1, SKEIN_CFG_STR_LEN);
+
+ /* The chaining vars ctx->X are now initialized */
+ /* Set up to process the data message portion of the hash (default) */
+ ctx->h.bCnt = 0; /* buffer b[] starts out empty */
+ Skein_Start_New_Type(ctx, MSG);
+
+ return (SKEIN_SUCCESS);
+}
+
+/* process the input bytes */
+int
+Skein1024_Update(Skein1024_Ctxt_t *ctx, const uint8_t *msg, size_t msgByteCnt)
+{
+ size_t n;
+
+ /* catch uninitialized context */
+ Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES, SKEIN_FAIL);
+
+ /* process full blocks, if any */
+ if (msgByteCnt + ctx->h.bCnt > SKEIN1024_BLOCK_BYTES) {
+ /* finish up any buffered message data */
+ if (ctx->h.bCnt) {
+ /* # bytes free in buffer b[] */
+ n = SKEIN1024_BLOCK_BYTES - ctx->h.bCnt;
+ if (n) {
+ /* check on our logic here */
+ Skein_assert(n < msgByteCnt);
+ bcopy(msg, &ctx->b[ctx->h.bCnt], n);
+ msgByteCnt -= n;
+ msg += n;
+ ctx->h.bCnt += n;
+ }
+ Skein_assert(ctx->h.bCnt == SKEIN1024_BLOCK_BYTES);
+ Skein1024_Process_Block(ctx, ctx->b, 1,
+ SKEIN1024_BLOCK_BYTES);
+ ctx->h.bCnt = 0;
+ }
+ /*
+ * now process any remaining full blocks, directly from
+ * input message data
+ */
+ if (msgByteCnt > SKEIN1024_BLOCK_BYTES) {
+ /* number of full blocks to process */
+ n = (msgByteCnt - 1) / SKEIN1024_BLOCK_BYTES;
+ Skein1024_Process_Block(ctx, msg, n,
+ SKEIN1024_BLOCK_BYTES);
+ msgByteCnt -= n * SKEIN1024_BLOCK_BYTES;
+ msg += n * SKEIN1024_BLOCK_BYTES;
+ }
+ Skein_assert(ctx->h.bCnt == 0);
+ }
+
+ /* copy any remaining source message data bytes into b[] */
+ if (msgByteCnt) {
+ Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES);
+ bcopy(msg, &ctx->b[ctx->h.bCnt], msgByteCnt);
+ ctx->h.bCnt += msgByteCnt;
+ }
+
+ return (SKEIN_SUCCESS);
+}
+
+/* finalize the hash computation and output the result */
+int
+Skein1024_Final(Skein1024_Ctxt_t *ctx, uint8_t *hashVal)
+{
+ size_t i, n, byteCnt;
+ uint64_t X[SKEIN1024_STATE_WORDS];
+
+ /* catch uninitialized context */
+ Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES, SKEIN_FAIL);
+
+ ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */
+ /* zero pad b[] if necessary */
+ if (ctx->h.bCnt < SKEIN1024_BLOCK_BYTES)
+ bzero(&ctx->b[ctx->h.bCnt],
+ SKEIN1024_BLOCK_BYTES - ctx->h.bCnt);
+
+ /* process the final block */
+ Skein1024_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt);
+
+ /* now output the result */
+ /* total number of output bytes */
+ byteCnt = (ctx->h.hashBitLen + 7) >> 3;
+
+ /* run Threefish in "counter mode" to generate output */
+ /* zero out b[], so it can hold the counter */
+ bzero(ctx->b, sizeof (ctx->b));
+ /* keep a local copy of counter mode "key" */
+ bcopy(ctx->X, X, sizeof (X));
+ for (i = 0; i * SKEIN1024_BLOCK_BYTES < byteCnt; i++) {
+ /* build the counter block */
+ uint64_t tmp = Skein_Swap64((uint64_t)i);
+ bcopy(&tmp, ctx->b, sizeof (tmp));
+ Skein_Start_New_Type(ctx, OUT_FINAL);
+ /* run "counter mode" */
+ Skein1024_Process_Block(ctx, ctx->b, 1, sizeof (uint64_t));
+ /* number of output bytes left to go */
+ n = byteCnt - i * SKEIN1024_BLOCK_BYTES;
+ if (n >= SKEIN1024_BLOCK_BYTES)
+ n = SKEIN1024_BLOCK_BYTES;
+ Skein_Put64_LSB_First(hashVal + i * SKEIN1024_BLOCK_BYTES,
+ ctx->X, n); /* "output" the ctr mode bytes */
+ Skein_Show_Final(1024, &ctx->h, n,
+ hashVal + i * SKEIN1024_BLOCK_BYTES);
+ /* restore the counter mode key for next time */
+ bcopy(X, ctx->X, sizeof (X));
+ }
+ return (SKEIN_SUCCESS);
+}
+
+/* Functions to support MAC/tree hashing */
+/* (this code is identical for Optimized and Reference versions) */
+
+/* finalize the hash computation and output the block, no OUTPUT stage */
+int
+Skein_256_Final_Pad(Skein_256_Ctxt_t *ctx, uint8_t *hashVal)
+{
+ /* catch uninitialized context */
+ Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES, SKEIN_FAIL);
+
+ ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */
+ /* zero pad b[] if necessary */
+ if (ctx->h.bCnt < SKEIN_256_BLOCK_BYTES)
+ bzero(&ctx->b[ctx->h.bCnt],
+ SKEIN_256_BLOCK_BYTES - ctx->h.bCnt);
+ /* process the final block */
+ Skein_256_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt);
+
+ /* "output" the state bytes */
+ Skein_Put64_LSB_First(hashVal, ctx->X, SKEIN_256_BLOCK_BYTES);
+
+ return (SKEIN_SUCCESS);
+}
+
+/* finalize the hash computation and output the block, no OUTPUT stage */
+int
+Skein_512_Final_Pad(Skein_512_Ctxt_t *ctx, uint8_t *hashVal)
+{
+ /* catch uninitialized context */
+ Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES, SKEIN_FAIL);
+
+ ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */
+ /* zero pad b[] if necessary */
+ if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES)
+ bzero(&ctx->b[ctx->h.bCnt],
+ SKEIN_512_BLOCK_BYTES - ctx->h.bCnt);
+ /* process the final block */
+ Skein_512_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt);
+
+ /* "output" the state bytes */
+ Skein_Put64_LSB_First(hashVal, ctx->X, SKEIN_512_BLOCK_BYTES);
+
+ return (SKEIN_SUCCESS);
+}
+
+/* finalize the hash computation and output the block, no OUTPUT stage */
+int
+Skein1024_Final_Pad(Skein1024_Ctxt_t *ctx, uint8_t *hashVal)
+{
+ /* catch uninitialized context */
+ Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES, SKEIN_FAIL);
+
+ /* tag as the final block */
+ ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL;
+ /* zero pad b[] if necessary */
+ if (ctx->h.bCnt < SKEIN1024_BLOCK_BYTES)
+ bzero(&ctx->b[ctx->h.bCnt],
+ SKEIN1024_BLOCK_BYTES - ctx->h.bCnt);
+ /* process the final block */
+ Skein1024_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt);
+
+ /* "output" the state bytes */
+ Skein_Put64_LSB_First(hashVal, ctx->X, SKEIN1024_BLOCK_BYTES);
+
+ return (SKEIN_SUCCESS);
+}
+
+#if SKEIN_TREE_HASH
+/* just do the OUTPUT stage */
+int
+Skein_256_Output(Skein_256_Ctxt_t *ctx, uint8_t *hashVal)
+{
+ size_t i, n, byteCnt;
+ uint64_t X[SKEIN_256_STATE_WORDS];
+
+ /* catch uninitialized context */
+ Skein_Assert(ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES, SKEIN_FAIL);
+
+ /* now output the result */
+ /* total number of output bytes */
+ byteCnt = (ctx->h.hashBitLen + 7) >> 3;
+
+ /* run Threefish in "counter mode" to generate output */
+ /* zero out b[], so it can hold the counter */
+ bzero(ctx->b, sizeof (ctx->b));
+ /* keep a local copy of counter mode "key" */
+ bcopy(ctx->X, X, sizeof (X));
+ for (i = 0; i * SKEIN_256_BLOCK_BYTES < byteCnt; i++) {
+ /* build the counter block */
+ uint64_t tmp = Skein_Swap64((uint64_t)i);
+ bcopy(&tmp, ctx->b, sizeof (tmp));
+ Skein_Start_New_Type(ctx, OUT_FINAL);
+ /* run "counter mode" */
+ Skein_256_Process_Block(ctx, ctx->b, 1, sizeof (uint64_t));
+ /* number of output bytes left to go */
+ n = byteCnt - i * SKEIN_256_BLOCK_BYTES;
+ if (n >= SKEIN_256_BLOCK_BYTES)
+ n = SKEIN_256_BLOCK_BYTES;
+ Skein_Put64_LSB_First(hashVal + i * SKEIN_256_BLOCK_BYTES,
+ ctx->X, n); /* "output" the ctr mode bytes */
+ Skein_Show_Final(256, &ctx->h, n,
+ hashVal + i * SKEIN_256_BLOCK_BYTES);
+ /* restore the counter mode key for next time */
+ bcopy(X, ctx->X, sizeof (X));
+ }
+ return (SKEIN_SUCCESS);
+}
+
+/* just do the OUTPUT stage */
+int
+Skein_512_Output(Skein_512_Ctxt_t *ctx, uint8_t *hashVal)
+{
+ size_t i, n, byteCnt;
+ uint64_t X[SKEIN_512_STATE_WORDS];
+
+ /* catch uninitialized context */
+ Skein_Assert(ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES, SKEIN_FAIL);
+
+ /* now output the result */
+ /* total number of output bytes */
+ byteCnt = (ctx->h.hashBitLen + 7) >> 3;
+
+ /* run Threefish in "counter mode" to generate output */
+ /* zero out b[], so it can hold the counter */
+ bzero(ctx->b, sizeof (ctx->b));
+ /* keep a local copy of counter mode "key" */
+ bcopy(ctx->X, X, sizeof (X));
+ for (i = 0; i * SKEIN_512_BLOCK_BYTES < byteCnt; i++) {
+ /* build the counter block */
+ uint64_t tmp = Skein_Swap64((uint64_t)i);
+ bcopy(&tmp, ctx->b, sizeof (tmp));
+ Skein_Start_New_Type(ctx, OUT_FINAL);
+ /* run "counter mode" */
+ Skein_512_Process_Block(ctx, ctx->b, 1, sizeof (uint64_t));
+ /* number of output bytes left to go */
+ n = byteCnt - i * SKEIN_512_BLOCK_BYTES;
+ if (n >= SKEIN_512_BLOCK_BYTES)
+ n = SKEIN_512_BLOCK_BYTES;
+ Skein_Put64_LSB_First(hashVal + i * SKEIN_512_BLOCK_BYTES,
+ ctx->X, n); /* "output" the ctr mode bytes */
+ Skein_Show_Final(256, &ctx->h, n,
+ hashVal + i * SKEIN_512_BLOCK_BYTES);
+ /* restore the counter mode key for next time */
+ bcopy(X, ctx->X, sizeof (X));
+ }
+ return (SKEIN_SUCCESS);
+}
+
+/* just do the OUTPUT stage */
+int
+Skein1024_Output(Skein1024_Ctxt_t *ctx, uint8_t *hashVal)
+{
+ size_t i, n, byteCnt;
+ uint64_t X[SKEIN1024_STATE_WORDS];
+
+ /* catch uninitialized context */
+ Skein_Assert(ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES, SKEIN_FAIL);
+
+ /* now output the result */
+ /* total number of output bytes */
+ byteCnt = (ctx->h.hashBitLen + 7) >> 3;
+
+ /* run Threefish in "counter mode" to generate output */
+ /* zero out b[], so it can hold the counter */
+ bzero(ctx->b, sizeof (ctx->b));
+ /* keep a local copy of counter mode "key" */
+ bcopy(ctx->X, X, sizeof (X));
+ for (i = 0; i * SKEIN1024_BLOCK_BYTES < byteCnt; i++) {
+ /* build the counter block */
+ uint64_t tmp = Skein_Swap64((uint64_t)i);
+ bcopy(&tmp, ctx->b, sizeof (tmp));
+ Skein_Start_New_Type(ctx, OUT_FINAL);
+ /* run "counter mode" */
+ Skein1024_Process_Block(ctx, ctx->b, 1, sizeof (uint64_t));
+ /* number of output bytes left to go */
+ n = byteCnt - i * SKEIN1024_BLOCK_BYTES;
+ if (n >= SKEIN1024_BLOCK_BYTES)
+ n = SKEIN1024_BLOCK_BYTES;
+ Skein_Put64_LSB_First(hashVal + i * SKEIN1024_BLOCK_BYTES,
+ ctx->X, n); /* "output" the ctr mode bytes */
+ Skein_Show_Final(256, &ctx->h, n,
+ hashVal + i * SKEIN1024_BLOCK_BYTES);
+ /* restore the counter mode key for next time */
+ bcopy(X, ctx->X, sizeof (X));
+ }
+ return (SKEIN_SUCCESS);
+}
+#endif
+
+#ifdef _KERNEL
+EXPORT_SYMBOL(Skein_512_Init);
+EXPORT_SYMBOL(Skein_512_InitExt);
+EXPORT_SYMBOL(Skein_512_Update);
+EXPORT_SYMBOL(Skein_512_Final);
+#endif
diff --git a/zfs/module/icp/algs/skein/skein_block.c b/zfs/module/icp/algs/skein/skein_block.c
new file mode 100644
index 000000000000..6d85cb7d9e98
--- /dev/null
+++ b/zfs/module/icp/algs/skein/skein_block.c
@@ -0,0 +1,790 @@
+/*
+ * Implementation of the Skein block functions.
+ * Source code author: Doug Whiting, 2008.
+ * This algorithm and source code is released to the public domain.
+ * Compile-time switches:
+ * SKEIN_USE_ASM -- set bits (256/512/1024) to select which
+ * versions use ASM code for block processing
+ * [default: use C for all block sizes]
+ */
+/* Copyright 2013 Doug Whiting. This code is released to the public domain. */
+
+#include <sys/skein.h>
+#include "skein_impl.h"
+#include <sys/isa_defs.h> /* for _ILP32 */
+
+#ifndef SKEIN_USE_ASM
+#define SKEIN_USE_ASM (0) /* default is all C code (no ASM) */
+#endif
+
+#ifndef SKEIN_LOOP
+/*
+ * The low-level checksum routines use a lot of stack space. On systems where
+ * small stacks frame are enforced (like 32-bit kernel builds), do not unroll
+ * checksum calculations to save stack space.
+ *
+ * Even with no loops unrolled, we still can exceed the 1k stack frame limit
+ * in Skein1024_Process_Block() (it hits 1272 bytes on ARM32). We can
+ * safely ignore it though, since that the checksum functions will be called
+ * from a worker thread that won't be using much stack. That's why we have
+ * the #pragma here to ignore the warning.
+ */
+#if defined(_ILP32) || defined(__powerpc) /* Assume small stack */
+#pragma GCC diagnostic ignored "-Wframe-larger-than="
+/*
+ * We're running on 32-bit, don't unroll loops to save stack frame space
+ *
+ * Due to the ways the calculations on SKEIN_LOOP are done in
+ * Skein_*_Process_Block(), a value of 111 disables unrolling loops
+ * in any of those functions.
+ */
+#define SKEIN_LOOP 111
+#else
+/* We're compiling with large stacks */
+#define SKEIN_LOOP 001 /* default: unroll 256 and 512, but not 1024 */
+#endif
+#endif
+
+/* some useful definitions for code here */
+#define BLK_BITS (WCNT*64)
+#define KW_TWK_BASE (0)
+#define KW_KEY_BASE (3)
+#define ks (kw + KW_KEY_BASE)
+#define ts (kw + KW_TWK_BASE)
+
+/* no debugging in Illumos version */
+#define DebugSaveTweak(ctx)
+
+/* Skein_256 */
+#if !(SKEIN_USE_ASM & 256)
+void
+Skein_256_Process_Block(Skein_256_Ctxt_t *ctx, const uint8_t *blkPtr,
+ size_t blkCnt, size_t byteCntAdd)
+{
+ enum {
+ WCNT = SKEIN_256_STATE_WORDS
+ };
+#undef RCNT
+#define RCNT (SKEIN_256_ROUNDS_TOTAL / 8)
+
+#ifdef SKEIN_LOOP /* configure how much to unroll the loop */
+#define SKEIN_UNROLL_256 (((SKEIN_LOOP) / 100) % 10)
+#else
+#define SKEIN_UNROLL_256 (0)
+#endif
+
+#if SKEIN_UNROLL_256
+#if (RCNT % SKEIN_UNROLL_256)
+#error "Invalid SKEIN_UNROLL_256" /* sanity check on unroll count */
+#endif
+ size_t r;
+ /* key schedule words : chaining vars + tweak + "rotation" */
+ uint64_t kw[WCNT + 4 + RCNT * 2];
+#else
+ uint64_t kw[WCNT + 4]; /* key schedule words : chaining vars + tweak */
+#endif
+ /* local copy of context vars, for speed */
+ uint64_t X0, X1, X2, X3;
+ uint64_t w[WCNT]; /* local copy of input block */
+#ifdef SKEIN_DEBUG
+ /* use for debugging (help compiler put Xn in registers) */
+ const uint64_t *Xptr[4];
+ Xptr[0] = &X0;
+ Xptr[1] = &X1;
+ Xptr[2] = &X2;
+ Xptr[3] = &X3;
+#endif
+ Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */
+ ts[0] = ctx->h.T[0];
+ ts[1] = ctx->h.T[1];
+ do {
+ /*
+ * this implementation only supports 2**64 input bytes
+ * (no carry out here)
+ */
+ ts[0] += byteCntAdd; /* update processed length */
+
+ /* precompute the key schedule for this block */
+ ks[0] = ctx->X[0];
+ ks[1] = ctx->X[1];
+ ks[2] = ctx->X[2];
+ ks[3] = ctx->X[3];
+ ks[4] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^ SKEIN_KS_PARITY;
+
+ ts[2] = ts[0] ^ ts[1];
+
+ /* get input block in little-endian format */
+ Skein_Get64_LSB_First(w, blkPtr, WCNT);
+ DebugSaveTweak(ctx);
+ Skein_Show_Block(BLK_BITS, &ctx->h, ctx->X, blkPtr, w, ks, ts);
+
+ X0 = w[0] + ks[0]; /* do the first full key injection */
+ X1 = w[1] + ks[1] + ts[0];
+ X2 = w[2] + ks[2] + ts[1];
+ X3 = w[3] + ks[3];
+
+ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INITIAL,
+ Xptr); /* show starting state values */
+
+ blkPtr += SKEIN_256_BLOCK_BYTES;
+
+ /* run the rounds */
+
+#define Round256(p0, p1, p2, p3, ROT, rNum) \
+ X##p0 += X##p1; X##p1 = RotL_64(X##p1, ROT##_0); X##p1 ^= X##p0; \
+ X##p2 += X##p3; X##p3 = RotL_64(X##p3, ROT##_1); X##p3 ^= X##p2; \
+
+#if SKEIN_UNROLL_256 == 0
+#define R256(p0, p1, p2, p3, ROT, rNum) /* fully unrolled */ \
+ Round256(p0, p1, p2, p3, ROT, rNum) \
+ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, rNum, Xptr);
+
+#define I256(R) \
+ X0 += ks[((R) + 1) % 5]; /* inject the key schedule value */ \
+ X1 += ks[((R) + 2) % 5] + ts[((R) + 1) % 3]; \
+ X2 += ks[((R) + 3) % 5] + ts[((R) + 2) % 3]; \
+ X3 += ks[((R) + 4) % 5] + (R) + 1; \
+ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
+#else /* looping version */
+#define R256(p0, p1, p2, p3, ROT, rNum) \
+ Round256(p0, p1, p2, p3, ROT, rNum) \
+ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, 4 * (r - 1) + rNum, Xptr);
+
+#define I256(R) \
+ X0 += ks[r + (R) + 0]; /* inject the key schedule value */ \
+ X1 += ks[r + (R) + 1] + ts[r + (R) + 0]; \
+ X2 += ks[r + (R) + 2] + ts[r + (R) + 1]; \
+ X3 += ks[r + (R) + 3] + r + (R); \
+ ks[r + (R) + 4] = ks[r + (R) - 1]; /* rotate key schedule */ \
+ ts[r + (R) + 2] = ts[r + (R) - 1]; \
+ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
+
+ /* loop thru it */
+ for (r = 1; r < 2 * RCNT; r += 2 * SKEIN_UNROLL_256)
+#endif
+ {
+#define R256_8_rounds(R) \
+ R256(0, 1, 2, 3, R_256_0, 8 * (R) + 1); \
+ R256(0, 3, 2, 1, R_256_1, 8 * (R) + 2); \
+ R256(0, 1, 2, 3, R_256_2, 8 * (R) + 3); \
+ R256(0, 3, 2, 1, R_256_3, 8 * (R) + 4); \
+ I256(2 * (R)); \
+ R256(0, 1, 2, 3, R_256_4, 8 * (R) + 5); \
+ R256(0, 3, 2, 1, R_256_5, 8 * (R) + 6); \
+ R256(0, 1, 2, 3, R_256_6, 8 * (R) + 7); \
+ R256(0, 3, 2, 1, R_256_7, 8 * (R) + 8); \
+ I256(2 * (R) + 1);
+
+ R256_8_rounds(0);
+
+#define R256_Unroll_R(NN) \
+ ((SKEIN_UNROLL_256 == 0 && SKEIN_256_ROUNDS_TOTAL / 8 > (NN)) || \
+ (SKEIN_UNROLL_256 > (NN)))
+
+#if R256_Unroll_R(1)
+ R256_8_rounds(1);
+#endif
+#if R256_Unroll_R(2)
+ R256_8_rounds(2);
+#endif
+#if R256_Unroll_R(3)
+ R256_8_rounds(3);
+#endif
+#if R256_Unroll_R(4)
+ R256_8_rounds(4);
+#endif
+#if R256_Unroll_R(5)
+ R256_8_rounds(5);
+#endif
+#if R256_Unroll_R(6)
+ R256_8_rounds(6);
+#endif
+#if R256_Unroll_R(7)
+ R256_8_rounds(7);
+#endif
+#if R256_Unroll_R(8)
+ R256_8_rounds(8);
+#endif
+#if R256_Unroll_R(9)
+ R256_8_rounds(9);
+#endif
+#if R256_Unroll_R(10)
+ R256_8_rounds(10);
+#endif
+#if R256_Unroll_R(11)
+ R256_8_rounds(11);
+#endif
+#if R256_Unroll_R(12)
+ R256_8_rounds(12);
+#endif
+#if R256_Unroll_R(13)
+ R256_8_rounds(13);
+#endif
+#if R256_Unroll_R(14)
+ R256_8_rounds(14);
+#endif
+#if (SKEIN_UNROLL_256 > 14)
+#error "need more unrolling in Skein_256_Process_Block"
+#endif
+ }
+ /*
+ * do the final "feedforward" xor, update context chaining vars
+ */
+ ctx->X[0] = X0 ^ w[0];
+ ctx->X[1] = X1 ^ w[1];
+ ctx->X[2] = X2 ^ w[2];
+ ctx->X[3] = X3 ^ w[3];
+
+ Skein_Show_Round(BLK_BITS, &ctx->h, SKEIN_RND_FEED_FWD, ctx->X);
+
+ ts[1] &= ~SKEIN_T1_FLAG_FIRST;
+ } while (--blkCnt);
+ ctx->h.T[0] = ts[0];
+ ctx->h.T[1] = ts[1];
+}
+
+#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
+size_t
+Skein_256_Process_Block_CodeSize(void)
+{
+ return ((uint8_t *)Skein_256_Process_Block_CodeSize) -
+ ((uint8_t *)Skein_256_Process_Block);
+}
+
+uint_t
+Skein_256_Unroll_Cnt(void)
+{
+ return (SKEIN_UNROLL_256);
+}
+#endif
+#endif
+
+/* Skein_512 */
+#if !(SKEIN_USE_ASM & 512)
+void
+Skein_512_Process_Block(Skein_512_Ctxt_t *ctx, const uint8_t *blkPtr,
+ size_t blkCnt, size_t byteCntAdd)
+{
+ enum {
+ WCNT = SKEIN_512_STATE_WORDS
+ };
+#undef RCNT
+#define RCNT (SKEIN_512_ROUNDS_TOTAL / 8)
+
+#ifdef SKEIN_LOOP /* configure how much to unroll the loop */
+#define SKEIN_UNROLL_512 (((SKEIN_LOOP) / 10) % 10)
+#else
+#define SKEIN_UNROLL_512 (0)
+#endif
+
+#if SKEIN_UNROLL_512
+#if (RCNT % SKEIN_UNROLL_512)
+#error "Invalid SKEIN_UNROLL_512" /* sanity check on unroll count */
+#endif
+ size_t r;
+ /* key schedule words : chaining vars + tweak + "rotation" */
+ uint64_t kw[WCNT + 4 + RCNT * 2];
+#else
+ uint64_t kw[WCNT + 4]; /* key schedule words : chaining vars + tweak */
+#endif
+ /* local copy of vars, for speed */
+ uint64_t X0, X1, X2, X3, X4, X5, X6, X7;
+ uint64_t w[WCNT]; /* local copy of input block */
+#ifdef SKEIN_DEBUG
+ /* use for debugging (help compiler put Xn in registers) */
+ const uint64_t *Xptr[8];
+ Xptr[0] = &X0;
+ Xptr[1] = &X1;
+ Xptr[2] = &X2;
+ Xptr[3] = &X3;
+ Xptr[4] = &X4;
+ Xptr[5] = &X5;
+ Xptr[6] = &X6;
+ Xptr[7] = &X7;
+#endif
+
+ Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */
+ ts[0] = ctx->h.T[0];
+ ts[1] = ctx->h.T[1];
+ do {
+ /*
+ * this implementation only supports 2**64 input bytes
+ * (no carry out here)
+ */
+ ts[0] += byteCntAdd; /* update processed length */
+
+ /* precompute the key schedule for this block */
+ ks[0] = ctx->X[0];
+ ks[1] = ctx->X[1];
+ ks[2] = ctx->X[2];
+ ks[3] = ctx->X[3];
+ ks[4] = ctx->X[4];
+ ks[5] = ctx->X[5];
+ ks[6] = ctx->X[6];
+ ks[7] = ctx->X[7];
+ ks[8] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^
+ ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^ SKEIN_KS_PARITY;
+
+ ts[2] = ts[0] ^ ts[1];
+
+ /* get input block in little-endian format */
+ Skein_Get64_LSB_First(w, blkPtr, WCNT);
+ DebugSaveTweak(ctx);
+ Skein_Show_Block(BLK_BITS, &ctx->h, ctx->X, blkPtr, w, ks, ts);
+
+ X0 = w[0] + ks[0]; /* do the first full key injection */
+ X1 = w[1] + ks[1];
+ X2 = w[2] + ks[2];
+ X3 = w[3] + ks[3];
+ X4 = w[4] + ks[4];
+ X5 = w[5] + ks[5] + ts[0];
+ X6 = w[6] + ks[6] + ts[1];
+ X7 = w[7] + ks[7];
+
+ blkPtr += SKEIN_512_BLOCK_BYTES;
+
+ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INITIAL,
+ Xptr);
+ /* run the rounds */
+#define Round512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum) \
+ X##p0 += X##p1; X##p1 = RotL_64(X##p1, ROT##_0); X##p1 ^= X##p0;\
+ X##p2 += X##p3; X##p3 = RotL_64(X##p3, ROT##_1); X##p3 ^= X##p2;\
+ X##p4 += X##p5; X##p5 = RotL_64(X##p5, ROT##_2); X##p5 ^= X##p4;\
+ X##p6 += X##p7; X##p7 = RotL_64(X##p7, ROT##_3); X##p7 ^= X##p6;
+
+#if SKEIN_UNROLL_512 == 0
+#define R512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum) /* unrolled */ \
+ Round512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum) \
+ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, rNum, Xptr);
+
+#define I512(R) \
+ X0 += ks[((R) + 1) % 9]; /* inject the key schedule value */\
+ X1 += ks[((R) + 2) % 9]; \
+ X2 += ks[((R) + 3) % 9]; \
+ X3 += ks[((R) + 4) % 9]; \
+ X4 += ks[((R) + 5) % 9]; \
+ X5 += ks[((R) + 6) % 9] + ts[((R) + 1) % 3]; \
+ X6 += ks[((R) + 7) % 9] + ts[((R) + 2) % 3]; \
+ X7 += ks[((R) + 8) % 9] + (R) + 1; \
+ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
+#else /* looping version */
+#define R512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum) \
+ Round512(p0, p1, p2, p3, p4, p5, p6, p7, ROT, rNum) \
+ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, 4 * (r - 1) + rNum, Xptr);
+
+#define I512(R) \
+ X0 += ks[r + (R) + 0]; /* inject the key schedule value */ \
+ X1 += ks[r + (R) + 1]; \
+ X2 += ks[r + (R) + 2]; \
+ X3 += ks[r + (R) + 3]; \
+ X4 += ks[r + (R) + 4]; \
+ X5 += ks[r + (R) + 5] + ts[r + (R) + 0]; \
+ X6 += ks[r + (R) + 6] + ts[r + (R) + 1]; \
+ X7 += ks[r + (R) + 7] + r + (R); \
+ ks[r + (R)+8] = ks[r + (R) - 1]; /* rotate key schedule */\
+ ts[r + (R)+2] = ts[r + (R) - 1]; \
+ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
+
+ /* loop thru it */
+ for (r = 1; r < 2 * RCNT; r += 2 * SKEIN_UNROLL_512)
+#endif /* end of looped code definitions */
+ {
+#define R512_8_rounds(R) /* do 8 full rounds */ \
+ R512(0, 1, 2, 3, 4, 5, 6, 7, R_512_0, 8 * (R) + 1); \
+ R512(2, 1, 4, 7, 6, 5, 0, 3, R_512_1, 8 * (R) + 2); \
+ R512(4, 1, 6, 3, 0, 5, 2, 7, R_512_2, 8 * (R) + 3); \
+ R512(6, 1, 0, 7, 2, 5, 4, 3, R_512_3, 8 * (R) + 4); \
+ I512(2 * (R)); \
+ R512(0, 1, 2, 3, 4, 5, 6, 7, R_512_4, 8 * (R) + 5); \
+ R512(2, 1, 4, 7, 6, 5, 0, 3, R_512_5, 8 * (R) + 6); \
+ R512(4, 1, 6, 3, 0, 5, 2, 7, R_512_6, 8 * (R) + 7); \
+ R512(6, 1, 0, 7, 2, 5, 4, 3, R_512_7, 8 * (R) + 8); \
+ I512(2*(R) + 1); /* and key injection */
+
+ R512_8_rounds(0);
+
+#define R512_Unroll_R(NN) \
+ ((SKEIN_UNROLL_512 == 0 && SKEIN_512_ROUNDS_TOTAL / 8 > (NN)) || \
+ (SKEIN_UNROLL_512 > (NN)))
+
+#if R512_Unroll_R(1)
+ R512_8_rounds(1);
+#endif
+#if R512_Unroll_R(2)
+ R512_8_rounds(2);
+#endif
+#if R512_Unroll_R(3)
+ R512_8_rounds(3);
+#endif
+#if R512_Unroll_R(4)
+ R512_8_rounds(4);
+#endif
+#if R512_Unroll_R(5)
+ R512_8_rounds(5);
+#endif
+#if R512_Unroll_R(6)
+ R512_8_rounds(6);
+#endif
+#if R512_Unroll_R(7)
+ R512_8_rounds(7);
+#endif
+#if R512_Unroll_R(8)
+ R512_8_rounds(8);
+#endif
+#if R512_Unroll_R(9)
+ R512_8_rounds(9);
+#endif
+#if R512_Unroll_R(10)
+ R512_8_rounds(10);
+#endif
+#if R512_Unroll_R(11)
+ R512_8_rounds(11);
+#endif
+#if R512_Unroll_R(12)
+ R512_8_rounds(12);
+#endif
+#if R512_Unroll_R(13)
+ R512_8_rounds(13);
+#endif
+#if R512_Unroll_R(14)
+ R512_8_rounds(14);
+#endif
+#if (SKEIN_UNROLL_512 > 14)
+#error "need more unrolling in Skein_512_Process_Block"
+#endif
+ }
+
+ /*
+ * do the final "feedforward" xor, update context chaining vars
+ */
+ ctx->X[0] = X0 ^ w[0];
+ ctx->X[1] = X1 ^ w[1];
+ ctx->X[2] = X2 ^ w[2];
+ ctx->X[3] = X3 ^ w[3];
+ ctx->X[4] = X4 ^ w[4];
+ ctx->X[5] = X5 ^ w[5];
+ ctx->X[6] = X6 ^ w[6];
+ ctx->X[7] = X7 ^ w[7];
+ Skein_Show_Round(BLK_BITS, &ctx->h, SKEIN_RND_FEED_FWD, ctx->X);
+
+ ts[1] &= ~SKEIN_T1_FLAG_FIRST;
+ } while (--blkCnt);
+ ctx->h.T[0] = ts[0];
+ ctx->h.T[1] = ts[1];
+}
+
+#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
+size_t
+Skein_512_Process_Block_CodeSize(void)
+{
+ return ((uint8_t *)Skein_512_Process_Block_CodeSize) -
+ ((uint8_t *)Skein_512_Process_Block);
+}
+
+uint_t
+Skein_512_Unroll_Cnt(void)
+{
+ return (SKEIN_UNROLL_512);
+}
+#endif
+#endif
+
+/* Skein1024 */
+#if !(SKEIN_USE_ASM & 1024)
+void
+Skein1024_Process_Block(Skein1024_Ctxt_t *ctx, const uint8_t *blkPtr,
+ size_t blkCnt, size_t byteCntAdd)
+{
+ /* do it in C, always looping (unrolled is bigger AND slower!) */
+ enum {
+ WCNT = SKEIN1024_STATE_WORDS
+ };
+#undef RCNT
+#define RCNT (SKEIN1024_ROUNDS_TOTAL/8)
+
+#ifdef SKEIN_LOOP /* configure how much to unroll the loop */
+#define SKEIN_UNROLL_1024 ((SKEIN_LOOP)%10)
+#else
+#define SKEIN_UNROLL_1024 (0)
+#endif
+
+#if (SKEIN_UNROLL_1024 != 0)
+#if (RCNT % SKEIN_UNROLL_1024)
+#error "Invalid SKEIN_UNROLL_1024" /* sanity check on unroll count */
+#endif
+ size_t r;
+ /* key schedule words : chaining vars + tweak + "rotation" */
+ uint64_t kw[WCNT + 4 + RCNT * 2];
+#else
+ uint64_t kw[WCNT + 4]; /* key schedule words : chaining vars + tweak */
+#endif
+
+ /* local copy of vars, for speed */
+ uint64_t X00, X01, X02, X03, X04, X05, X06, X07, X08, X09, X10, X11,
+ X12, X13, X14, X15;
+ uint64_t w[WCNT]; /* local copy of input block */
+#ifdef SKEIN_DEBUG
+ /* use for debugging (help compiler put Xn in registers) */
+ const uint64_t *Xptr[16];
+ Xptr[0] = &X00;
+ Xptr[1] = &X01;
+ Xptr[2] = &X02;
+ Xptr[3] = &X03;
+ Xptr[4] = &X04;
+ Xptr[5] = &X05;
+ Xptr[6] = &X06;
+ Xptr[7] = &X07;
+ Xptr[8] = &X08;
+ Xptr[9] = &X09;
+ Xptr[10] = &X10;
+ Xptr[11] = &X11;
+ Xptr[12] = &X12;
+ Xptr[13] = &X13;
+ Xptr[14] = &X14;
+ Xptr[15] = &X15;
+#endif
+
+ Skein_assert(blkCnt != 0); /* never call with blkCnt == 0! */
+ ts[0] = ctx->h.T[0];
+ ts[1] = ctx->h.T[1];
+ do {
+ /*
+ * this implementation only supports 2**64 input bytes
+ * (no carry out here)
+ */
+ ts[0] += byteCntAdd; /* update processed length */
+
+ /* precompute the key schedule for this block */
+ ks[0] = ctx->X[0];
+ ks[1] = ctx->X[1];
+ ks[2] = ctx->X[2];
+ ks[3] = ctx->X[3];
+ ks[4] = ctx->X[4];
+ ks[5] = ctx->X[5];
+ ks[6] = ctx->X[6];
+ ks[7] = ctx->X[7];
+ ks[8] = ctx->X[8];
+ ks[9] = ctx->X[9];
+ ks[10] = ctx->X[10];
+ ks[11] = ctx->X[11];
+ ks[12] = ctx->X[12];
+ ks[13] = ctx->X[13];
+ ks[14] = ctx->X[14];
+ ks[15] = ctx->X[15];
+ ks[16] = ks[0] ^ ks[1] ^ ks[2] ^ ks[3] ^
+ ks[4] ^ ks[5] ^ ks[6] ^ ks[7] ^
+ ks[8] ^ ks[9] ^ ks[10] ^ ks[11] ^
+ ks[12] ^ ks[13] ^ ks[14] ^ ks[15] ^ SKEIN_KS_PARITY;
+
+ ts[2] = ts[0] ^ ts[1];
+
+ /* get input block in little-endian format */
+ Skein_Get64_LSB_First(w, blkPtr, WCNT);
+ DebugSaveTweak(ctx);
+ Skein_Show_Block(BLK_BITS, &ctx->h, ctx->X, blkPtr, w, ks, ts);
+
+ X00 = w[0] + ks[0]; /* do the first full key injection */
+ X01 = w[1] + ks[1];
+ X02 = w[2] + ks[2];
+ X03 = w[3] + ks[3];
+ X04 = w[4] + ks[4];
+ X05 = w[5] + ks[5];
+ X06 = w[6] + ks[6];
+ X07 = w[7] + ks[7];
+ X08 = w[8] + ks[8];
+ X09 = w[9] + ks[9];
+ X10 = w[10] + ks[10];
+ X11 = w[11] + ks[11];
+ X12 = w[12] + ks[12];
+ X13 = w[13] + ks[13] + ts[0];
+ X14 = w[14] + ks[14] + ts[1];
+ X15 = w[15] + ks[15];
+
+ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INITIAL,
+ Xptr);
+
+#define Round1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, \
+ pD, pE, pF, ROT, rNum) \
+ X##p0 += X##p1; X##p1 = RotL_64(X##p1, ROT##_0); X##p1 ^= X##p0;\
+ X##p2 += X##p3; X##p3 = RotL_64(X##p3, ROT##_1); X##p3 ^= X##p2;\
+ X##p4 += X##p5; X##p5 = RotL_64(X##p5, ROT##_2); X##p5 ^= X##p4;\
+ X##p6 += X##p7; X##p7 = RotL_64(X##p7, ROT##_3); X##p7 ^= X##p6;\
+ X##p8 += X##p9; X##p9 = RotL_64(X##p9, ROT##_4); X##p9 ^= X##p8;\
+ X##pA += X##pB; X##pB = RotL_64(X##pB, ROT##_5); X##pB ^= X##pA;\
+ X##pC += X##pD; X##pD = RotL_64(X##pD, ROT##_6); X##pD ^= X##pC;\
+ X##pE += X##pF; X##pF = RotL_64(X##pF, ROT##_7); X##pF ^= X##pE;
+
+#if SKEIN_UNROLL_1024 == 0
+#define R1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, \
+ pE, pF, ROT, rn) \
+ Round1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, \
+ pD, pE, pF, ROT, rn) \
+ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, rn, Xptr);
+
+#define I1024(R) \
+ X00 += ks[((R) + 1) % 17]; /* inject the key schedule value */\
+ X01 += ks[((R) + 2) % 17]; \
+ X02 += ks[((R) + 3) % 17]; \
+ X03 += ks[((R) + 4) % 17]; \
+ X04 += ks[((R) + 5) % 17]; \
+ X05 += ks[((R) + 6) % 17]; \
+ X06 += ks[((R) + 7) % 17]; \
+ X07 += ks[((R) + 8) % 17]; \
+ X08 += ks[((R) + 9) % 17]; \
+ X09 += ks[((R) + 10) % 17]; \
+ X10 += ks[((R) + 11) % 17]; \
+ X11 += ks[((R) + 12) % 17]; \
+ X12 += ks[((R) + 13) % 17]; \
+ X13 += ks[((R) + 14) % 17] + ts[((R) + 1) % 3]; \
+ X14 += ks[((R) + 15) % 17] + ts[((R) + 2) % 3]; \
+ X15 += ks[((R) + 16) % 17] + (R) +1; \
+ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
+#else /* looping version */
+#define R1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, pD, \
+ pE, pF, ROT, rn) \
+ Round1024(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, pA, pB, pC, \
+ pD, pE, pF, ROT, rn) \
+ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, 4 * (r - 1) + rn, Xptr);
+
+#define I1024(R) \
+ X00 += ks[r + (R) + 0]; /* inject the key schedule value */ \
+ X01 += ks[r + (R) + 1]; \
+ X02 += ks[r + (R) + 2]; \
+ X03 += ks[r + (R) + 3]; \
+ X04 += ks[r + (R) + 4]; \
+ X05 += ks[r + (R) + 5]; \
+ X06 += ks[r + (R) + 6]; \
+ X07 += ks[r + (R) + 7]; \
+ X08 += ks[r + (R) + 8]; \
+ X09 += ks[r + (R) + 9]; \
+ X10 += ks[r + (R) + 10]; \
+ X11 += ks[r + (R) + 11]; \
+ X12 += ks[r + (R) + 12]; \
+ X13 += ks[r + (R) + 13] + ts[r + (R) + 0]; \
+ X14 += ks[r + (R) + 14] + ts[r + (R) + 1]; \
+ X15 += ks[r + (R) + 15] + r + (R); \
+ ks[r + (R) + 16] = ks[r + (R) - 1]; /* rotate key schedule */\
+ ts[r + (R) + 2] = ts[r + (R) - 1]; \
+ Skein_Show_R_Ptr(BLK_BITS, &ctx->h, SKEIN_RND_KEY_INJECT, Xptr);
+
+ /* loop thru it */
+ for (r = 1; r <= 2 * RCNT; r += 2 * SKEIN_UNROLL_1024)
+#endif
+ {
+#define R1024_8_rounds(R) /* do 8 full rounds */ \
+ R1024(00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13, \
+ 14, 15, R1024_0, 8 * (R) + 1); \
+ R1024(00, 09, 02, 13, 06, 11, 04, 15, 10, 07, 12, 03, 14, 05, \
+ 08, 01, R1024_1, 8 * (R) + 2); \
+ R1024(00, 07, 02, 05, 04, 03, 06, 01, 12, 15, 14, 13, 08, 11, \
+ 10, 09, R1024_2, 8 * (R) + 3); \
+ R1024(00, 15, 02, 11, 06, 13, 04, 09, 14, 01, 08, 05, 10, 03, \
+ 12, 07, R1024_3, 8 * (R) + 4); \
+ I1024(2 * (R)); \
+ R1024(00, 01, 02, 03, 04, 05, 06, 07, 08, 09, 10, 11, 12, 13, \
+ 14, 15, R1024_4, 8 * (R) + 5); \
+ R1024(00, 09, 02, 13, 06, 11, 04, 15, 10, 07, 12, 03, 14, 05, \
+ 08, 01, R1024_5, 8 * (R) + 6); \
+ R1024(00, 07, 02, 05, 04, 03, 06, 01, 12, 15, 14, 13, 08, 11, \
+ 10, 09, R1024_6, 8 * (R) + 7); \
+ R1024(00, 15, 02, 11, 06, 13, 04, 09, 14, 01, 08, 05, 10, 03, \
+ 12, 07, R1024_7, 8 * (R) + 8); \
+ I1024(2 * (R) + 1);
+
+ R1024_8_rounds(0);
+
+#define R1024_Unroll_R(NN) \
+ ((SKEIN_UNROLL_1024 == 0 && SKEIN1024_ROUNDS_TOTAL/8 > (NN)) || \
+ (SKEIN_UNROLL_1024 > (NN)))
+
+#if R1024_Unroll_R(1)
+ R1024_8_rounds(1);
+#endif
+#if R1024_Unroll_R(2)
+ R1024_8_rounds(2);
+#endif
+#if R1024_Unroll_R(3)
+ R1024_8_rounds(3);
+#endif
+#if R1024_Unroll_R(4)
+ R1024_8_rounds(4);
+#endif
+#if R1024_Unroll_R(5)
+ R1024_8_rounds(5);
+#endif
+#if R1024_Unroll_R(6)
+ R1024_8_rounds(6);
+#endif
+#if R1024_Unroll_R(7)
+ R1024_8_rounds(7);
+#endif
+#if R1024_Unroll_R(8)
+ R1024_8_rounds(8);
+#endif
+#if R1024_Unroll_R(9)
+ R1024_8_rounds(9);
+#endif
+#if R1024_Unroll_R(10)
+ R1024_8_rounds(10);
+#endif
+#if R1024_Unroll_R(11)
+ R1024_8_rounds(11);
+#endif
+#if R1024_Unroll_R(12)
+ R1024_8_rounds(12);
+#endif
+#if R1024_Unroll_R(13)
+ R1024_8_rounds(13);
+#endif
+#if R1024_Unroll_R(14)
+ R1024_8_rounds(14);
+#endif
+#if (SKEIN_UNROLL_1024 > 14)
+#error "need more unrolling in Skein_1024_Process_Block"
+#endif
+ }
+ /*
+ * do the final "feedforward" xor, update context chaining vars
+ */
+
+ ctx->X[0] = X00 ^ w[0];
+ ctx->X[1] = X01 ^ w[1];
+ ctx->X[2] = X02 ^ w[2];
+ ctx->X[3] = X03 ^ w[3];
+ ctx->X[4] = X04 ^ w[4];
+ ctx->X[5] = X05 ^ w[5];
+ ctx->X[6] = X06 ^ w[6];
+ ctx->X[7] = X07 ^ w[7];
+ ctx->X[8] = X08 ^ w[8];
+ ctx->X[9] = X09 ^ w[9];
+ ctx->X[10] = X10 ^ w[10];
+ ctx->X[11] = X11 ^ w[11];
+ ctx->X[12] = X12 ^ w[12];
+ ctx->X[13] = X13 ^ w[13];
+ ctx->X[14] = X14 ^ w[14];
+ ctx->X[15] = X15 ^ w[15];
+
+ Skein_Show_Round(BLK_BITS, &ctx->h, SKEIN_RND_FEED_FWD, ctx->X);
+
+ ts[1] &= ~SKEIN_T1_FLAG_FIRST;
+ blkPtr += SKEIN1024_BLOCK_BYTES;
+ } while (--blkCnt);
+ ctx->h.T[0] = ts[0];
+ ctx->h.T[1] = ts[1];
+}
+
+#if defined(SKEIN_CODE_SIZE) || defined(SKEIN_PERF)
+size_t
+Skein1024_Process_Block_CodeSize(void)
+{
+ return ((uint8_t *)Skein1024_Process_Block_CodeSize) -
+ ((uint8_t *)Skein1024_Process_Block);
+}
+
+uint_t
+Skein1024_Unroll_Cnt(void)
+{
+ return (SKEIN_UNROLL_1024);
+}
+#endif
+#endif
diff --git a/zfs/module/icp/algs/skein/skein_impl.h b/zfs/module/icp/algs/skein/skein_impl.h
new file mode 100644
index 000000000000..e83a06971bb1
--- /dev/null
+++ b/zfs/module/icp/algs/skein/skein_impl.h
@@ -0,0 +1,289 @@
+/*
+ * Internal definitions for Skein hashing.
+ * Source code author: Doug Whiting, 2008.
+ * This algorithm and source code is released to the public domain.
+ *
+ * The following compile-time switches may be defined to control some
+ * tradeoffs between speed, code size, error checking, and security.
+ *
+ * The "default" note explains what happens when the switch is not defined.
+ *
+ * SKEIN_DEBUG -- make callouts from inside Skein code
+ * to examine/display intermediate values.
+ * [default: no callouts (no overhead)]
+ *
+ * SKEIN_ERR_CHECK -- how error checking is handled inside Skein
+ * code. If not defined, most error checking
+ * is disabled (for performance). Otherwise,
+ * the switch value is interpreted as:
+ * 0: use assert() to flag errors
+ * 1: return SKEIN_FAIL to flag errors
+ */
+/* Copyright 2013 Doug Whiting. This code is released to the public domain. */
+
+#ifndef _SKEIN_IMPL_H_
+#define _SKEIN_IMPL_H_
+
+#include <sys/skein.h>
+#include "skein_impl.h"
+#include "skein_port.h"
+
+/* determine where we can get bcopy/bzero declarations */
+#ifdef _KERNEL
+#include <sys/systm.h>
+#else
+#include <strings.h>
+#endif
+
+/*
+ * "Internal" Skein definitions
+ * -- not needed for sequential hashing API, but will be
+ * helpful for other uses of Skein (e.g., tree hash mode).
+ * -- included here so that they can be shared between
+ * reference and optimized code.
+ */
+
+/* tweak word T[1]: bit field starting positions */
+/* offset 64 because it's the second word */
+#define SKEIN_T1_BIT(BIT) ((BIT) - 64)
+
+/* bits 112..118: level in hash tree */
+#define SKEIN_T1_POS_TREE_LVL SKEIN_T1_BIT(112)
+/* bit 119: partial final input byte */
+#define SKEIN_T1_POS_BIT_PAD SKEIN_T1_BIT(119)
+/* bits 120..125: type field */
+#define SKEIN_T1_POS_BLK_TYPE SKEIN_T1_BIT(120)
+/* bits 126: first block flag */
+#define SKEIN_T1_POS_FIRST SKEIN_T1_BIT(126)
+/* bit 127: final block flag */
+#define SKEIN_T1_POS_FINAL SKEIN_T1_BIT(127)
+
+/* tweak word T[1]: flag bit definition(s) */
+#define SKEIN_T1_FLAG_FIRST (((uint64_t)1) << SKEIN_T1_POS_FIRST)
+#define SKEIN_T1_FLAG_FINAL (((uint64_t)1) << SKEIN_T1_POS_FINAL)
+#define SKEIN_T1_FLAG_BIT_PAD (((uint64_t)1) << SKEIN_T1_POS_BIT_PAD)
+
+/* tweak word T[1]: tree level bit field mask */
+#define SKEIN_T1_TREE_LVL_MASK (((uint64_t)0x7F) << SKEIN_T1_POS_TREE_LVL)
+#define SKEIN_T1_TREE_LEVEL(n) (((uint64_t)(n)) << SKEIN_T1_POS_TREE_LVL)
+
+/* tweak word T[1]: block type field */
+#define SKEIN_BLK_TYPE_KEY (0) /* key, for MAC and KDF */
+#define SKEIN_BLK_TYPE_CFG (4) /* configuration block */
+#define SKEIN_BLK_TYPE_PERS (8) /* personalization string */
+#define SKEIN_BLK_TYPE_PK (12) /* public key (for signature hashing) */
+#define SKEIN_BLK_TYPE_KDF (16) /* key identifier for KDF */
+#define SKEIN_BLK_TYPE_NONCE (20) /* nonce for PRNG */
+#define SKEIN_BLK_TYPE_MSG (48) /* message processing */
+#define SKEIN_BLK_TYPE_OUT (63) /* output stage */
+#define SKEIN_BLK_TYPE_MASK (63) /* bit field mask */
+
+#define SKEIN_T1_BLK_TYPE(T) \
+ (((uint64_t)(SKEIN_BLK_TYPE_##T)) << SKEIN_T1_POS_BLK_TYPE)
+/* key, for MAC and KDF */
+#define SKEIN_T1_BLK_TYPE_KEY SKEIN_T1_BLK_TYPE(KEY)
+/* configuration block */
+#define SKEIN_T1_BLK_TYPE_CFG SKEIN_T1_BLK_TYPE(CFG)
+/* personalization string */
+#define SKEIN_T1_BLK_TYPE_PERS SKEIN_T1_BLK_TYPE(PERS)
+/* public key (for digital signature hashing) */
+#define SKEIN_T1_BLK_TYPE_PK SKEIN_T1_BLK_TYPE(PK)
+/* key identifier for KDF */
+#define SKEIN_T1_BLK_TYPE_KDF SKEIN_T1_BLK_TYPE(KDF)
+/* nonce for PRNG */
+#define SKEIN_T1_BLK_TYPE_NONCE SKEIN_T1_BLK_TYPE(NONCE)
+/* message processing */
+#define SKEIN_T1_BLK_TYPE_MSG SKEIN_T1_BLK_TYPE(MSG)
+/* output stage */
+#define SKEIN_T1_BLK_TYPE_OUT SKEIN_T1_BLK_TYPE(OUT)
+/* field bit mask */
+#define SKEIN_T1_BLK_TYPE_MASK SKEIN_T1_BLK_TYPE(MASK)
+
+#define SKEIN_T1_BLK_TYPE_CFG_FINAL \
+ (SKEIN_T1_BLK_TYPE_CFG | SKEIN_T1_FLAG_FINAL)
+#define SKEIN_T1_BLK_TYPE_OUT_FINAL \
+ (SKEIN_T1_BLK_TYPE_OUT | SKEIN_T1_FLAG_FINAL)
+
+#define SKEIN_VERSION (1)
+
+#ifndef SKEIN_ID_STRING_LE /* allow compile-time personalization */
+#define SKEIN_ID_STRING_LE (0x33414853) /* "SHA3" (little-endian) */
+#endif
+
+#define SKEIN_MK_64(hi32, lo32) ((lo32) + (((uint64_t)(hi32)) << 32))
+#define SKEIN_SCHEMA_VER SKEIN_MK_64(SKEIN_VERSION, SKEIN_ID_STRING_LE)
+#define SKEIN_KS_PARITY SKEIN_MK_64(0x1BD11BDA, 0xA9FC1A22)
+
+#define SKEIN_CFG_STR_LEN (4*8)
+
+/* bit field definitions in config block treeInfo word */
+#define SKEIN_CFG_TREE_LEAF_SIZE_POS (0)
+#define SKEIN_CFG_TREE_NODE_SIZE_POS (8)
+#define SKEIN_CFG_TREE_MAX_LEVEL_POS (16)
+
+#define SKEIN_CFG_TREE_LEAF_SIZE_MSK \
+ (((uint64_t)0xFF) << SKEIN_CFG_TREE_LEAF_SIZE_POS)
+#define SKEIN_CFG_TREE_NODE_SIZE_MSK \
+ (((uint64_t)0xFF) << SKEIN_CFG_TREE_NODE_SIZE_POS)
+#define SKEIN_CFG_TREE_MAX_LEVEL_MSK \
+ (((uint64_t)0xFF) << SKEIN_CFG_TREE_MAX_LEVEL_POS)
+
+#define SKEIN_CFG_TREE_INFO(leaf, node, maxLvl) \
+ ((((uint64_t)(leaf)) << SKEIN_CFG_TREE_LEAF_SIZE_POS) | \
+ (((uint64_t)(node)) << SKEIN_CFG_TREE_NODE_SIZE_POS) | \
+ (((uint64_t)(maxLvl)) << SKEIN_CFG_TREE_MAX_LEVEL_POS))
+
+/* use as treeInfo in InitExt() call for sequential processing */
+#define SKEIN_CFG_TREE_INFO_SEQUENTIAL SKEIN_CFG_TREE_INFO(0, 0, 0)
+
+/*
+ * Skein macros for getting/setting tweak words, etc.
+ * These are useful for partial input bytes, hash tree init/update, etc.
+ */
+#define Skein_Get_Tweak(ctxPtr, TWK_NUM) ((ctxPtr)->h.T[TWK_NUM])
+#define Skein_Set_Tweak(ctxPtr, TWK_NUM, tVal) \
+ do { \
+ (ctxPtr)->h.T[TWK_NUM] = (tVal); \
+ _NOTE(CONSTCOND) \
+ } while (0)
+
+#define Skein_Get_T0(ctxPtr) Skein_Get_Tweak(ctxPtr, 0)
+#define Skein_Get_T1(ctxPtr) Skein_Get_Tweak(ctxPtr, 1)
+#define Skein_Set_T0(ctxPtr, T0) Skein_Set_Tweak(ctxPtr, 0, T0)
+#define Skein_Set_T1(ctxPtr, T1) Skein_Set_Tweak(ctxPtr, 1, T1)
+
+/* set both tweak words at once */
+#define Skein_Set_T0_T1(ctxPtr, T0, T1) \
+ do { \
+ Skein_Set_T0(ctxPtr, (T0)); \
+ Skein_Set_T1(ctxPtr, (T1)); \
+ _NOTE(CONSTCOND) \
+ } while (0)
+
+#define Skein_Set_Type(ctxPtr, BLK_TYPE) \
+ Skein_Set_T1(ctxPtr, SKEIN_T1_BLK_TYPE_##BLK_TYPE)
+
+/*
+ * set up for starting with a new type: h.T[0]=0; h.T[1] = NEW_TYPE; h.bCnt=0;
+ */
+#define Skein_Start_New_Type(ctxPtr, BLK_TYPE) \
+ do { \
+ Skein_Set_T0_T1(ctxPtr, 0, SKEIN_T1_FLAG_FIRST | \
+ SKEIN_T1_BLK_TYPE_ ## BLK_TYPE); \
+ (ctxPtr)->h.bCnt = 0; \
+ _NOTE(CONSTCOND) \
+ } while (0)
+
+#define Skein_Clear_First_Flag(hdr) \
+ do { \
+ (hdr).T[1] &= ~SKEIN_T1_FLAG_FIRST; \
+ _NOTE(CONSTCOND) \
+ } while (0)
+#define Skein_Set_Bit_Pad_Flag(hdr) \
+ do { \
+ (hdr).T[1] |= SKEIN_T1_FLAG_BIT_PAD; \
+ _NOTE(CONSTCOND) \
+ } while (0)
+
+#define Skein_Set_Tree_Level(hdr, height) \
+ do { \
+ (hdr).T[1] |= SKEIN_T1_TREE_LEVEL(height); \
+ _NOTE(CONSTCOND) \
+ } while (0)
+
+/*
+ * "Internal" Skein definitions for debugging and error checking
+ * Note: in Illumos we always disable debugging features.
+ */
+#define Skein_Show_Block(bits, ctx, X, blkPtr, wPtr, ksEvenPtr, ksOddPtr)
+#define Skein_Show_Round(bits, ctx, r, X)
+#define Skein_Show_R_Ptr(bits, ctx, r, X_ptr)
+#define Skein_Show_Final(bits, ctx, cnt, outPtr)
+#define Skein_Show_Key(bits, ctx, key, keyBytes)
+
+/* run-time checks (e.g., bad params, uninitialized context)? */
+#ifndef SKEIN_ERR_CHECK
+/* default: ignore all Asserts, for performance */
+#define Skein_Assert(x, retCode)
+#define Skein_assert(x)
+#elif defined(SKEIN_ASSERT)
+#include <sys/debug.h>
+#define Skein_Assert(x, retCode) ASSERT(x)
+#define Skein_assert(x) ASSERT(x)
+#else
+#include <sys/debug.h>
+/* caller error */
+#define Skein_Assert(x, retCode) \
+ do { \
+ if (!(x)) \
+ return (retCode); \
+ _NOTE(CONSTCOND) \
+ } while (0)
+/* internal error */
+#define Skein_assert(x) ASSERT(x)
+#endif
+
+/*
+ * Skein block function constants (shared across Ref and Opt code)
+ */
+enum {
+ /* Skein_256 round rotation constants */
+ R_256_0_0 = 14, R_256_0_1 = 16,
+ R_256_1_0 = 52, R_256_1_1 = 57,
+ R_256_2_0 = 23, R_256_2_1 = 40,
+ R_256_3_0 = 5, R_256_3_1 = 37,
+ R_256_4_0 = 25, R_256_4_1 = 33,
+ R_256_5_0 = 46, R_256_5_1 = 12,
+ R_256_6_0 = 58, R_256_6_1 = 22,
+ R_256_7_0 = 32, R_256_7_1 = 32,
+
+ /* Skein_512 round rotation constants */
+ R_512_0_0 = 46, R_512_0_1 = 36, R_512_0_2 = 19, R_512_0_3 = 37,
+ R_512_1_0 = 33, R_512_1_1 = 27, R_512_1_2 = 14, R_512_1_3 = 42,
+ R_512_2_0 = 17, R_512_2_1 = 49, R_512_2_2 = 36, R_512_2_3 = 39,
+ R_512_3_0 = 44, R_512_3_1 = 9, R_512_3_2 = 54, R_512_3_3 = 56,
+ R_512_4_0 = 39, R_512_4_1 = 30, R_512_4_2 = 34, R_512_4_3 = 24,
+ R_512_5_0 = 13, R_512_5_1 = 50, R_512_5_2 = 10, R_512_5_3 = 17,
+ R_512_6_0 = 25, R_512_6_1 = 29, R_512_6_2 = 39, R_512_6_3 = 43,
+ R_512_7_0 = 8, R_512_7_1 = 35, R_512_7_2 = 56, R_512_7_3 = 22,
+
+ /* Skein1024 round rotation constants */
+ R1024_0_0 = 24, R1024_0_1 = 13, R1024_0_2 = 8, R1024_0_3 =
+ 47, R1024_0_4 = 8, R1024_0_5 = 17, R1024_0_6 = 22, R1024_0_7 = 37,
+ R1024_1_0 = 38, R1024_1_1 = 19, R1024_1_2 = 10, R1024_1_3 =
+ 55, R1024_1_4 = 49, R1024_1_5 = 18, R1024_1_6 = 23, R1024_1_7 = 52,
+ R1024_2_0 = 33, R1024_2_1 = 4, R1024_2_2 = 51, R1024_2_3 =
+ 13, R1024_2_4 = 34, R1024_2_5 = 41, R1024_2_6 = 59, R1024_2_7 = 17,
+ R1024_3_0 = 5, R1024_3_1 = 20, R1024_3_2 = 48, R1024_3_3 =
+ 41, R1024_3_4 = 47, R1024_3_5 = 28, R1024_3_6 = 16, R1024_3_7 = 25,
+ R1024_4_0 = 41, R1024_4_1 = 9, R1024_4_2 = 37, R1024_4_3 =
+ 31, R1024_4_4 = 12, R1024_4_5 = 47, R1024_4_6 = 44, R1024_4_7 = 30,
+ R1024_5_0 = 16, R1024_5_1 = 34, R1024_5_2 = 56, R1024_5_3 =
+ 51, R1024_5_4 = 4, R1024_5_5 = 53, R1024_5_6 = 42, R1024_5_7 = 41,
+ R1024_6_0 = 31, R1024_6_1 = 44, R1024_6_2 = 47, R1024_6_3 =
+ 46, R1024_6_4 = 19, R1024_6_5 = 42, R1024_6_6 = 44, R1024_6_7 = 25,
+ R1024_7_0 = 9, R1024_7_1 = 48, R1024_7_2 = 35, R1024_7_3 =
+ 52, R1024_7_4 = 23, R1024_7_5 = 31, R1024_7_6 = 37, R1024_7_7 = 20
+};
+
+/* number of rounds for the different block sizes */
+#define SKEIN_256_ROUNDS_TOTAL (72)
+#define SKEIN_512_ROUNDS_TOTAL (72)
+#define SKEIN1024_ROUNDS_TOTAL (80)
+
+
+extern const uint64_t SKEIN_256_IV_128[];
+extern const uint64_t SKEIN_256_IV_160[];
+extern const uint64_t SKEIN_256_IV_224[];
+extern const uint64_t SKEIN_256_IV_256[];
+extern const uint64_t SKEIN_512_IV_128[];
+extern const uint64_t SKEIN_512_IV_160[];
+extern const uint64_t SKEIN_512_IV_224[];
+extern const uint64_t SKEIN_512_IV_256[];
+extern const uint64_t SKEIN_512_IV_384[];
+extern const uint64_t SKEIN_512_IV_512[];
+extern const uint64_t SKEIN1024_IV_384[];
+extern const uint64_t SKEIN1024_IV_512[];
+extern const uint64_t SKEIN1024_IV_1024[];
+
+#endif /* _SKEIN_IMPL_H_ */
diff --git a/zfs/module/icp/algs/skein/skein_iv.c b/zfs/module/icp/algs/skein/skein_iv.c
new file mode 100644
index 000000000000..140d38f76547
--- /dev/null
+++ b/zfs/module/icp/algs/skein/skein_iv.c
@@ -0,0 +1,185 @@
+/*
+ * Pre-computed Skein IVs
+ *
+ * NOTE: these values are not "magic" constants, but
+ * are generated using the Threefish block function.
+ * They are pre-computed here only for speed; i.e., to
+ * avoid the need for a Threefish call during Init().
+ *
+ * The IV for any fixed hash length may be pre-computed.
+ * Only the most common values are included here.
+ */
+/* Copyright 2013 Doug Whiting. This code is released to the public domain. */
+/*
+ * Illumos implementation note: these constants are for Skein v1.3 as per:
+ * http://www.skein-hash.info/sites/default/files/skein1.3.pdf
+ */
+
+#include <sys/skein.h> /* get Skein macros and types */
+#include "skein_impl.h" /* get internal definitions */
+
+#define MK_64 SKEIN_MK_64
+
+/* blkSize = 256 bits. hashSize = 128 bits */
+const uint64_t SKEIN_256_IV_128[] = {
+ MK_64(0xE1111906, 0x964D7260),
+ MK_64(0x883DAAA7, 0x7C8D811C),
+ MK_64(0x10080DF4, 0x91960F7A),
+ MK_64(0xCCF7DDE5, 0xB45BC1C2)
+};
+
+/* blkSize = 256 bits. hashSize = 160 bits */
+const uint64_t SKEIN_256_IV_160[] = {
+ MK_64(0x14202314, 0x72825E98),
+ MK_64(0x2AC4E9A2, 0x5A77E590),
+ MK_64(0xD47A5856, 0x8838D63E),
+ MK_64(0x2DD2E496, 0x8586AB7D)
+};
+
+/* blkSize = 256 bits. hashSize = 224 bits */
+const uint64_t SKEIN_256_IV_224[] = {
+ MK_64(0xC6098A8C, 0x9AE5EA0B),
+ MK_64(0x876D5686, 0x08C5191C),
+ MK_64(0x99CB88D7, 0xD7F53884),
+ MK_64(0x384BDDB1, 0xAEDDB5DE)
+};
+
+/* blkSize = 256 bits. hashSize = 256 bits */
+const uint64_t SKEIN_256_IV_256[] = {
+ MK_64(0xFC9DA860, 0xD048B449),
+ MK_64(0x2FCA6647, 0x9FA7D833),
+ MK_64(0xB33BC389, 0x6656840F),
+ MK_64(0x6A54E920, 0xFDE8DA69)
+};
+
+/* blkSize = 512 bits. hashSize = 128 bits */
+const uint64_t SKEIN_512_IV_128[] = {
+ MK_64(0xA8BC7BF3, 0x6FBF9F52),
+ MK_64(0x1E9872CE, 0xBD1AF0AA),
+ MK_64(0x309B1790, 0xB32190D3),
+ MK_64(0xBCFBB854, 0x3F94805C),
+ MK_64(0x0DA61BCD, 0x6E31B11B),
+ MK_64(0x1A18EBEA, 0xD46A32E3),
+ MK_64(0xA2CC5B18, 0xCE84AA82),
+ MK_64(0x6982AB28, 0x9D46982D)
+};
+
+/* blkSize = 512 bits. hashSize = 160 bits */
+const uint64_t SKEIN_512_IV_160[] = {
+ MK_64(0x28B81A2A, 0xE013BD91),
+ MK_64(0xC2F11668, 0xB5BDF78F),
+ MK_64(0x1760D8F3, 0xF6A56F12),
+ MK_64(0x4FB74758, 0x8239904F),
+ MK_64(0x21EDE07F, 0x7EAF5056),
+ MK_64(0xD908922E, 0x63ED70B8),
+ MK_64(0xB8EC76FF, 0xECCB52FA),
+ MK_64(0x01A47BB8, 0xA3F27A6E)
+};
+
+/* blkSize = 512 bits. hashSize = 224 bits */
+const uint64_t SKEIN_512_IV_224[] = {
+ MK_64(0xCCD06162, 0x48677224),
+ MK_64(0xCBA65CF3, 0xA92339EF),
+ MK_64(0x8CCD69D6, 0x52FF4B64),
+ MK_64(0x398AED7B, 0x3AB890B4),
+ MK_64(0x0F59D1B1, 0x457D2BD0),
+ MK_64(0x6776FE65, 0x75D4EB3D),
+ MK_64(0x99FBC70E, 0x997413E9),
+ MK_64(0x9E2CFCCF, 0xE1C41EF7)
+};
+
+/* blkSize = 512 bits. hashSize = 256 bits */
+const uint64_t SKEIN_512_IV_256[] = {
+ MK_64(0xCCD044A1, 0x2FDB3E13),
+ MK_64(0xE8359030, 0x1A79A9EB),
+ MK_64(0x55AEA061, 0x4F816E6F),
+ MK_64(0x2A2767A4, 0xAE9B94DB),
+ MK_64(0xEC06025E, 0x74DD7683),
+ MK_64(0xE7A436CD, 0xC4746251),
+ MK_64(0xC36FBAF9, 0x393AD185),
+ MK_64(0x3EEDBA18, 0x33EDFC13)
+};
+
+/* blkSize = 512 bits. hashSize = 384 bits */
+const uint64_t SKEIN_512_IV_384[] = {
+ MK_64(0xA3F6C6BF, 0x3A75EF5F),
+ MK_64(0xB0FEF9CC, 0xFD84FAA4),
+ MK_64(0x9D77DD66, 0x3D770CFE),
+ MK_64(0xD798CBF3, 0xB468FDDA),
+ MK_64(0x1BC4A666, 0x8A0E4465),
+ MK_64(0x7ED7D434, 0xE5807407),
+ MK_64(0x548FC1AC, 0xD4EC44D6),
+ MK_64(0x266E1754, 0x6AA18FF8)
+};
+
+/* blkSize = 512 bits. hashSize = 512 bits */
+const uint64_t SKEIN_512_IV_512[] = {
+ MK_64(0x4903ADFF, 0x749C51CE),
+ MK_64(0x0D95DE39, 0x9746DF03),
+ MK_64(0x8FD19341, 0x27C79BCE),
+ MK_64(0x9A255629, 0xFF352CB1),
+ MK_64(0x5DB62599, 0xDF6CA7B0),
+ MK_64(0xEABE394C, 0xA9D5C3F4),
+ MK_64(0x991112C7, 0x1A75B523),
+ MK_64(0xAE18A40B, 0x660FCC33)
+};
+
+/* blkSize = 1024 bits. hashSize = 384 bits */
+const uint64_t SKEIN1024_IV_384[] = {
+ MK_64(0x5102B6B8, 0xC1894A35),
+ MK_64(0xFEEBC9E3, 0xFE8AF11A),
+ MK_64(0x0C807F06, 0xE32BED71),
+ MK_64(0x60C13A52, 0xB41A91F6),
+ MK_64(0x9716D35D, 0xD4917C38),
+ MK_64(0xE780DF12, 0x6FD31D3A),
+ MK_64(0x797846B6, 0xC898303A),
+ MK_64(0xB172C2A8, 0xB3572A3B),
+ MK_64(0xC9BC8203, 0xA6104A6C),
+ MK_64(0x65909338, 0xD75624F4),
+ MK_64(0x94BCC568, 0x4B3F81A0),
+ MK_64(0x3EBBF51E, 0x10ECFD46),
+ MK_64(0x2DF50F0B, 0xEEB08542),
+ MK_64(0x3B5A6530, 0x0DBC6516),
+ MK_64(0x484B9CD2, 0x167BBCE1),
+ MK_64(0x2D136947, 0xD4CBAFEA)
+};
+
+/* blkSize = 1024 bits. hashSize = 512 bits */
+const uint64_t SKEIN1024_IV_512[] = {
+ MK_64(0xCAEC0E5D, 0x7C1B1B18),
+ MK_64(0xA01B0E04, 0x5F03E802),
+ MK_64(0x33840451, 0xED912885),
+ MK_64(0x374AFB04, 0xEAEC2E1C),
+ MK_64(0xDF25A0E2, 0x813581F7),
+ MK_64(0xE4004093, 0x8B12F9D2),
+ MK_64(0xA662D539, 0xC2ED39B6),
+ MK_64(0xFA8B85CF, 0x45D8C75A),
+ MK_64(0x8316ED8E, 0x29EDE796),
+ MK_64(0x053289C0, 0x2E9F91B8),
+ MK_64(0xC3F8EF1D, 0x6D518B73),
+ MK_64(0xBDCEC3C4, 0xD5EF332E),
+ MK_64(0x549A7E52, 0x22974487),
+ MK_64(0x67070872, 0x5B749816),
+ MK_64(0xB9CD28FB, 0xF0581BD1),
+ MK_64(0x0E2940B8, 0x15804974)
+};
+
+/* blkSize = 1024 bits. hashSize = 1024 bits */
+const uint64_t SKEIN1024_IV_1024[] = {
+ MK_64(0xD593DA07, 0x41E72355),
+ MK_64(0x15B5E511, 0xAC73E00C),
+ MK_64(0x5180E5AE, 0xBAF2C4F0),
+ MK_64(0x03BD41D3, 0xFCBCAFAF),
+ MK_64(0x1CAEC6FD, 0x1983A898),
+ MK_64(0x6E510B8B, 0xCDD0589F),
+ MK_64(0x77E2BDFD, 0xC6394ADA),
+ MK_64(0xC11E1DB5, 0x24DCB0A3),
+ MK_64(0xD6D14AF9, 0xC6329AB5),
+ MK_64(0x6A9B0BFC, 0x6EB67E0D),
+ MK_64(0x9243C60D, 0xCCFF1332),
+ MK_64(0x1A1F1DDE, 0x743F02D4),
+ MK_64(0x0996753C, 0x10ED0BB8),
+ MK_64(0x6572DD22, 0xF2B4969A),
+ MK_64(0x61FD3062, 0xD00A579A),
+ MK_64(0x1DE0536E, 0x8682E539)
+};
diff --git a/zfs/module/icp/algs/skein/skein_port.h b/zfs/module/icp/algs/skein/skein_port.h
new file mode 100644
index 000000000000..1b0225236993
--- /dev/null
+++ b/zfs/module/icp/algs/skein/skein_port.h
@@ -0,0 +1,128 @@
+/*
+ * Platform-specific definitions for Skein hash function.
+ *
+ * Source code author: Doug Whiting, 2008.
+ *
+ * This algorithm and source code is released to the public domain.
+ *
+ * Many thanks to Brian Gladman for his portable header files.
+ *
+ * To port Skein to an "unsupported" platform, change the definitions
+ * in this file appropriately.
+ */
+/* Copyright 2013 Doug Whiting. This code is released to the public domain. */
+
+#ifndef _SKEIN_PORT_H_
+#define _SKEIN_PORT_H_
+
+#include <sys/types.h> /* get integer type definitions */
+#include <sys/systm.h> /* for bcopy() */
+
+#ifndef RotL_64
+#define RotL_64(x, N) (((x) << (N)) | ((x) >> (64 - (N))))
+#endif
+
+/*
+ * Skein is "natively" little-endian (unlike SHA-xxx), for optimal
+ * performance on x86 CPUs. The Skein code requires the following
+ * definitions for dealing with endianness:
+ *
+ * SKEIN_NEED_SWAP: 0 for little-endian, 1 for big-endian
+ * Skein_Put64_LSB_First
+ * Skein_Get64_LSB_First
+ * Skein_Swap64
+ *
+ * If SKEIN_NEED_SWAP is defined at compile time, it is used here
+ * along with the portable versions of Put64/Get64/Swap64, which
+ * are slow in general.
+ *
+ * Otherwise, an "auto-detect" of endianness is attempted below.
+ * If the default handling doesn't work well, the user may insert
+ * platform-specific code instead (e.g., for big-endian CPUs).
+ *
+ */
+#ifndef SKEIN_NEED_SWAP /* compile-time "override" for endianness? */
+
+#include <sys/isa_defs.h> /* get endianness selection */
+
+#define PLATFORM_MUST_ALIGN _ALIGNMENT_REQUIRED
+#if defined(_BIG_ENDIAN)
+/* here for big-endian CPUs */
+#define SKEIN_NEED_SWAP (1)
+#else
+/* here for x86 and x86-64 CPUs (and other detected little-endian CPUs) */
+#define SKEIN_NEED_SWAP (0)
+#if PLATFORM_MUST_ALIGN == 0 /* ok to use "fast" versions? */
+#define Skein_Put64_LSB_First(dst08, src64, bCnt) bcopy(src64, dst08, bCnt)
+#define Skein_Get64_LSB_First(dst64, src08, wCnt) \
+ bcopy(src08, dst64, 8 * (wCnt))
+#endif
+#endif
+
+#endif /* ifndef SKEIN_NEED_SWAP */
+
+/*
+ * Provide any definitions still needed.
+ */
+#ifndef Skein_Swap64 /* swap for big-endian, nop for little-endian */
+#if SKEIN_NEED_SWAP
+#define Skein_Swap64(w64) \
+ (((((uint64_t)(w64)) & 0xFF) << 56) | \
+ (((((uint64_t)(w64)) >> 8) & 0xFF) << 48) | \
+ (((((uint64_t)(w64)) >> 16) & 0xFF) << 40) | \
+ (((((uint64_t)(w64)) >> 24) & 0xFF) << 32) | \
+ (((((uint64_t)(w64)) >> 32) & 0xFF) << 24) | \
+ (((((uint64_t)(w64)) >> 40) & 0xFF) << 16) | \
+ (((((uint64_t)(w64)) >> 48) & 0xFF) << 8) | \
+ (((((uint64_t)(w64)) >> 56) & 0xFF)))
+#else
+#define Skein_Swap64(w64) (w64)
+#endif
+#endif /* ifndef Skein_Swap64 */
+
+#ifndef Skein_Put64_LSB_First
+void
+Skein_Put64_LSB_First(uint8_t *dst, const uint64_t *src, size_t bCnt)
+#ifdef SKEIN_PORT_CODE /* instantiate the function code here? */
+{
+ /*
+ * this version is fully portable (big-endian or little-endian),
+ * but slow
+ */
+ size_t n;
+
+ for (n = 0; n < bCnt; n++)
+ dst[n] = (uint8_t)(src[n >> 3] >> (8 * (n & 7)));
+}
+#else
+; /* output only the function prototype */
+#endif
+#endif /* ifndef Skein_Put64_LSB_First */
+
+#ifndef Skein_Get64_LSB_First
+void
+Skein_Get64_LSB_First(uint64_t *dst, const uint8_t *src, size_t wCnt)
+#ifdef SKEIN_PORT_CODE /* instantiate the function code here? */
+{
+ /*
+ * this version is fully portable (big-endian or little-endian),
+ * but slow
+ */
+ size_t n;
+
+ for (n = 0; n < 8 * wCnt; n += 8)
+ dst[n / 8] = (((uint64_t)src[n])) +
+ (((uint64_t)src[n + 1]) << 8) +
+ (((uint64_t)src[n + 2]) << 16) +
+ (((uint64_t)src[n + 3]) << 24) +
+ (((uint64_t)src[n + 4]) << 32) +
+ (((uint64_t)src[n + 5]) << 40) +
+ (((uint64_t)src[n + 6]) << 48) +
+ (((uint64_t)src[n + 7]) << 56);
+}
+#else
+; /* output only the function prototype */
+#endif
+#endif /* ifndef Skein_Get64_LSB_First */
+
+#endif /* _SKEIN_PORT_H_ */
diff --git a/zfs/module/icp/api/kcf_cipher.c b/zfs/module/icp/api/kcf_cipher.c
new file mode 100644
index 000000000000..2585b7fedae7
--- /dev/null
+++ b/zfs/module/icp/api/kcf_cipher.c
@@ -0,0 +1,935 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/crypto/common.h>
+#include <sys/crypto/impl.h>
+#include <sys/crypto/api.h>
+#include <sys/crypto/spi.h>
+#include <sys/crypto/sched_impl.h>
+
+#define CRYPTO_OPS_OFFSET(f) offsetof(crypto_ops_t, co_##f)
+#define CRYPTO_CIPHER_OFFSET(f) offsetof(crypto_cipher_ops_t, f)
+
+/*
+ * Encryption and decryption routines.
+ */
+
+/*
+ * The following are the possible returned values common to all the routines
+ * below. The applicability of some of these return values depends on the
+ * presence of the arguments.
+ *
+ * CRYPTO_SUCCESS: The operation completed successfully.
+ * CRYPTO_QUEUED: A request was submitted successfully. The callback
+ * routine will be called when the operation is done.
+ * CRYPTO_INVALID_MECH_NUMBER, CRYPTO_INVALID_MECH_PARAM, or
+ * CRYPTO_INVALID_MECH for problems with the 'mech'.
+ * CRYPTO_INVALID_DATA for bogus 'data'
+ * CRYPTO_HOST_MEMORY for failure to allocate memory to handle this work.
+ * CRYPTO_INVALID_CONTEXT: Not a valid context.
+ * CRYPTO_BUSY: Cannot process the request now. Schedule a
+ * crypto_bufcall(), or try later.
+ * CRYPTO_NOT_SUPPORTED and CRYPTO_MECH_NOT_SUPPORTED: No provider is
+ * capable of a function or a mechanism.
+ * CRYPTO_INVALID_KEY: bogus 'key' argument.
+ * CRYPTO_INVALID_PLAINTEXT: bogus 'plaintext' argument.
+ * CRYPTO_INVALID_CIPHERTEXT: bogus 'ciphertext' argument.
+ */
+
+/*
+ * crypto_cipher_init_prov()
+ *
+ * Arguments:
+ *
+ * pd: provider descriptor
+ * sid: session id
+ * mech: crypto_mechanism_t pointer.
+ * mech_type is a valid value previously returned by
+ * crypto_mech2id();
+ * When the mech's parameter is not NULL, its definition depends
+ * on the standard definition of the mechanism.
+ * key: pointer to a crypto_key_t structure.
+ * tmpl: a crypto_ctx_template_t, opaque template of a context of an
+ * encryption or decryption with the 'mech' using 'key'.
+ * 'tmpl' is created by a previous call to
+ * crypto_create_ctx_template().
+ * ctxp: Pointer to a crypto_context_t.
+ * func: CRYPTO_FG_ENCRYPT or CRYPTO_FG_DECRYPT.
+ * cr: crypto_call_req_t calling conditions and call back info.
+ *
+ * Description:
+ * This is a common function invoked internally by both
+ * crypto_encrypt_init() and crypto_decrypt_init().
+ * Asynchronously submits a request for, or synchronously performs the
+ * initialization of an encryption or a decryption operation.
+ * When possible and applicable, will internally use the pre-expanded key
+ * schedule from the context template, tmpl.
+ * When complete and successful, 'ctxp' will contain a crypto_context_t
+ * valid for later calls to encrypt_update() and encrypt_final(), or
+ * decrypt_update() and decrypt_final().
+ * The caller should hold a reference on the specified provider
+ * descriptor before calling this function.
+ *
+ * Context:
+ * Process or interrupt, according to the semantics dictated by the 'cr'.
+ *
+ * Returns:
+ * See comment in the beginning of the file.
+ */
+static int
+crypto_cipher_init_prov(crypto_provider_t provider, crypto_session_id_t sid,
+ crypto_mechanism_t *mech, crypto_key_t *key,
+ crypto_spi_ctx_template_t tmpl, crypto_context_t *ctxp,
+ crypto_call_req_t *crq, crypto_func_group_t func)
+{
+ int error;
+ crypto_ctx_t *ctx;
+ kcf_req_params_t params;
+ kcf_provider_desc_t *pd = provider;
+ kcf_provider_desc_t *real_provider = pd;
+
+ ASSERT(KCF_PROV_REFHELD(pd));
+
+ if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) {
+ if (func == CRYPTO_FG_ENCRYPT) {
+ error = kcf_get_hardware_provider(mech->cm_type,
+ CRYPTO_MECH_INVALID, CHECK_RESTRICT(crq), pd,
+ &real_provider, CRYPTO_FG_ENCRYPT);
+ } else {
+ error = kcf_get_hardware_provider(mech->cm_type,
+ CRYPTO_MECH_INVALID, CHECK_RESTRICT(crq), pd,
+ &real_provider, CRYPTO_FG_DECRYPT);
+ }
+
+ if (error != CRYPTO_SUCCESS)
+ return (error);
+ }
+
+ /* Allocate and initialize the canonical context */
+ if ((ctx = kcf_new_ctx(crq, real_provider, sid)) == NULL) {
+ if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER)
+ KCF_PROV_REFRELE(real_provider);
+ return (CRYPTO_HOST_MEMORY);
+ }
+
+ /* The fast path for SW providers. */
+ if (CHECK_FASTPATH(crq, pd)) {
+ crypto_mechanism_t lmech;
+
+ lmech = *mech;
+ KCF_SET_PROVIDER_MECHNUM(mech->cm_type, real_provider, &lmech);
+
+ if (func == CRYPTO_FG_ENCRYPT)
+ error = KCF_PROV_ENCRYPT_INIT(real_provider, ctx,
+ &lmech, key, tmpl, KCF_SWFP_RHNDL(crq));
+ else {
+ ASSERT(func == CRYPTO_FG_DECRYPT);
+
+ error = KCF_PROV_DECRYPT_INIT(real_provider, ctx,
+ &lmech, key, tmpl, KCF_SWFP_RHNDL(crq));
+ }
+ KCF_PROV_INCRSTATS(pd, error);
+
+ goto done;
+ }
+
+ /* Check if context sharing is possible */
+ if (pd->pd_prov_type == CRYPTO_HW_PROVIDER &&
+ key->ck_format == CRYPTO_KEY_RAW &&
+ KCF_CAN_SHARE_OPSTATE(pd, mech->cm_type)) {
+ kcf_context_t *tctxp = (kcf_context_t *)ctx;
+ kcf_provider_desc_t *tpd = NULL;
+ crypto_mech_info_t *sinfo;
+
+ if ((kcf_get_sw_prov(mech->cm_type, &tpd, &tctxp->kc_mech,
+ B_FALSE) == CRYPTO_SUCCESS)) {
+ int tlen;
+
+ sinfo = &(KCF_TO_PROV_MECHINFO(tpd, mech->cm_type));
+ /*
+ * key->ck_length from the consumer is always in bits.
+ * We convert it to be in the same unit registered by
+ * the provider in order to do a comparison.
+ */
+ if (sinfo->cm_mech_flags & CRYPTO_KEYSIZE_UNIT_IN_BYTES)
+ tlen = key->ck_length >> 3;
+ else
+ tlen = key->ck_length;
+ /*
+ * Check if the software provider can support context
+ * sharing and support this key length.
+ */
+ if ((sinfo->cm_mech_flags & CRYPTO_CAN_SHARE_OPSTATE) &&
+ (tlen >= sinfo->cm_min_key_length) &&
+ (tlen <= sinfo->cm_max_key_length)) {
+ ctx->cc_flags = CRYPTO_INIT_OPSTATE;
+ tctxp->kc_sw_prov_desc = tpd;
+ } else
+ KCF_PROV_REFRELE(tpd);
+ }
+ }
+
+ if (func == CRYPTO_FG_ENCRYPT) {
+ KCF_WRAP_ENCRYPT_OPS_PARAMS(¶ms, KCF_OP_INIT, sid,
+ mech, key, NULL, NULL, tmpl);
+ } else {
+ ASSERT(func == CRYPTO_FG_DECRYPT);
+ KCF_WRAP_DECRYPT_OPS_PARAMS(¶ms, KCF_OP_INIT, sid,
+ mech, key, NULL, NULL, tmpl);
+ }
+
+ error = kcf_submit_request(real_provider, ctx, crq, ¶ms,
+ B_FALSE);
+
+ if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER)
+ KCF_PROV_REFRELE(real_provider);
+
+done:
+ if ((error == CRYPTO_SUCCESS) || (error == CRYPTO_QUEUED))
+ *ctxp = (crypto_context_t)ctx;
+ else {
+ /* Release the hold done in kcf_new_ctx(). */
+ KCF_CONTEXT_REFRELE((kcf_context_t *)ctx->cc_framework_private);
+ }
+
+ return (error);
+}
+
+/*
+ * Same as crypto_cipher_init_prov(), but relies on the scheduler to pick
+ * an appropriate provider. See crypto_cipher_init_prov() comments for more
+ * details.
+ */
+static int
+crypto_cipher_init(crypto_mechanism_t *mech, crypto_key_t *key,
+ crypto_ctx_template_t tmpl, crypto_context_t *ctxp,
+ crypto_call_req_t *crq, crypto_func_group_t func)
+{
+ int error;
+ kcf_mech_entry_t *me;
+ kcf_provider_desc_t *pd;
+ kcf_ctx_template_t *ctx_tmpl;
+ crypto_spi_ctx_template_t spi_ctx_tmpl = NULL;
+ kcf_prov_tried_t *list = NULL;
+
+retry:
+ /* pd is returned held */
+ if ((pd = kcf_get_mech_provider(mech->cm_type, &me, &error,
+ list, func, CHECK_RESTRICT(crq), 0)) == NULL) {
+ if (list != NULL)
+ kcf_free_triedlist(list);
+ return (error);
+ }
+
+ /*
+ * For SW providers, check the validity of the context template
+ * It is very rare that the generation number mis-matches, so
+ * is acceptable to fail here, and let the consumer recover by
+ * freeing this tmpl and create a new one for the key and new SW
+ * provider
+ */
+ if ((pd->pd_prov_type == CRYPTO_SW_PROVIDER) &&
+ ((ctx_tmpl = (kcf_ctx_template_t *)tmpl) != NULL)) {
+ if (ctx_tmpl->ct_generation != me->me_gen_swprov) {
+ if (list != NULL)
+ kcf_free_triedlist(list);
+ KCF_PROV_REFRELE(pd);
+ return (CRYPTO_OLD_CTX_TEMPLATE);
+ } else {
+ spi_ctx_tmpl = ctx_tmpl->ct_prov_tmpl;
+ }
+ }
+
+ error = crypto_cipher_init_prov(pd, pd->pd_sid, mech, key,
+ spi_ctx_tmpl, ctxp, crq, func);
+ if (error != CRYPTO_SUCCESS && error != CRYPTO_QUEUED &&
+ IS_RECOVERABLE(error)) {
+ /* Add pd to the linked list of providers tried. */
+ if (kcf_insert_triedlist(&list, pd, KCF_KMFLAG(crq)) != NULL)
+ goto retry;
+ }
+
+ if (list != NULL)
+ kcf_free_triedlist(list);
+
+ KCF_PROV_REFRELE(pd);
+ return (error);
+}
+
+/*
+ * crypto_encrypt_prov()
+ *
+ * Arguments:
+ * pd: provider descriptor
+ * sid: session id
+ * mech: crypto_mechanism_t pointer.
+ * mech_type is a valid value previously returned by
+ * crypto_mech2id();
+ * When the mech's parameter is not NULL, its definition depends
+ * on the standard definition of the mechanism.
+ * key: pointer to a crypto_key_t structure.
+ * plaintext: The message to be encrypted
+ * ciphertext: Storage for the encrypted message. The length needed
+ * depends on the mechanism, and the plaintext's size.
+ * tmpl: a crypto_ctx_template_t, opaque template of a context of an
+ * encryption with the 'mech' using 'key'. 'tmpl' is created by
+ * a previous call to crypto_create_ctx_template().
+ * cr: crypto_call_req_t calling conditions and call back info.
+ *
+ * Description:
+ * Asynchronously submits a request for, or synchronously performs a
+ * single-part encryption of 'plaintext' with the mechanism 'mech', using
+ * the key 'key'.
+ * When complete and successful, 'ciphertext' will contain the encrypted
+ * message.
+ *
+ * Context:
+ * Process or interrupt, according to the semantics dictated by the 'cr'.
+ *
+ * Returns:
+ * See comment in the beginning of the file.
+ */
+int
+crypto_encrypt_prov(crypto_provider_t provider, crypto_session_id_t sid,
+ crypto_mechanism_t *mech, crypto_data_t *plaintext, crypto_key_t *key,
+ crypto_ctx_template_t tmpl, crypto_data_t *ciphertext,
+ crypto_call_req_t *crq)
+{
+ kcf_req_params_t params;
+ kcf_provider_desc_t *pd = provider;
+ kcf_provider_desc_t *real_provider = pd;
+ int error;
+
+ ASSERT(KCF_PROV_REFHELD(pd));
+
+ if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) {
+ error = kcf_get_hardware_provider(mech->cm_type,
+ CRYPTO_MECH_INVALID, CHECK_RESTRICT(crq), pd,
+ &real_provider, CRYPTO_FG_ENCRYPT_ATOMIC);
+
+ if (error != CRYPTO_SUCCESS)
+ return (error);
+ }
+
+ KCF_WRAP_ENCRYPT_OPS_PARAMS(¶ms, KCF_OP_ATOMIC, sid, mech, key,
+ plaintext, ciphertext, tmpl);
+
+ error = kcf_submit_request(real_provider, NULL, crq, ¶ms, B_FALSE);
+ if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER)
+ KCF_PROV_REFRELE(real_provider);
+
+ return (error);
+}
+
+/*
+ * Same as crypto_encrypt_prov(), but relies on the scheduler to pick
+ * a provider. See crypto_encrypt_prov() for more details.
+ */
+int
+crypto_encrypt(crypto_mechanism_t *mech, crypto_data_t *plaintext,
+ crypto_key_t *key, crypto_ctx_template_t tmpl, crypto_data_t *ciphertext,
+ crypto_call_req_t *crq)
+{
+ int error;
+ kcf_mech_entry_t *me;
+ kcf_req_params_t params;
+ kcf_provider_desc_t *pd;
+ kcf_ctx_template_t *ctx_tmpl;
+ crypto_spi_ctx_template_t spi_ctx_tmpl = NULL;
+ kcf_prov_tried_t *list = NULL;
+
+retry:
+ /* pd is returned held */
+ if ((pd = kcf_get_mech_provider(mech->cm_type, &me, &error,
+ list, CRYPTO_FG_ENCRYPT_ATOMIC, CHECK_RESTRICT(crq),
+ plaintext->cd_length)) == NULL) {
+ if (list != NULL)
+ kcf_free_triedlist(list);
+ return (error);
+ }
+
+ /*
+ * For SW providers, check the validity of the context template
+ * It is very rare that the generation number mis-matches, so
+ * is acceptable to fail here, and let the consumer recover by
+ * freeing this tmpl and create a new one for the key and new SW
+ * provider
+ */
+ if ((pd->pd_prov_type == CRYPTO_SW_PROVIDER) &&
+ ((ctx_tmpl = (kcf_ctx_template_t *)tmpl) != NULL)) {
+ if (ctx_tmpl->ct_generation != me->me_gen_swprov) {
+ if (list != NULL)
+ kcf_free_triedlist(list);
+ KCF_PROV_REFRELE(pd);
+ return (CRYPTO_OLD_CTX_TEMPLATE);
+ } else {
+ spi_ctx_tmpl = ctx_tmpl->ct_prov_tmpl;
+ }
+ }
+
+ /* The fast path for SW providers. */
+ if (CHECK_FASTPATH(crq, pd)) {
+ crypto_mechanism_t lmech;
+
+ lmech = *mech;
+ KCF_SET_PROVIDER_MECHNUM(mech->cm_type, pd, &lmech);
+
+ error = KCF_PROV_ENCRYPT_ATOMIC(pd, pd->pd_sid, &lmech, key,
+ plaintext, ciphertext, spi_ctx_tmpl, KCF_SWFP_RHNDL(crq));
+ KCF_PROV_INCRSTATS(pd, error);
+ } else {
+ KCF_WRAP_ENCRYPT_OPS_PARAMS(¶ms, KCF_OP_ATOMIC, pd->pd_sid,
+ mech, key, plaintext, ciphertext, spi_ctx_tmpl);
+ error = kcf_submit_request(pd, NULL, crq, ¶ms, B_FALSE);
+ }
+
+ if (error != CRYPTO_SUCCESS && error != CRYPTO_QUEUED &&
+ IS_RECOVERABLE(error)) {
+ /* Add pd to the linked list of providers tried. */
+ if (kcf_insert_triedlist(&list, pd, KCF_KMFLAG(crq)) != NULL)
+ goto retry;
+ }
+
+ if (list != NULL)
+ kcf_free_triedlist(list);
+
+ KCF_PROV_REFRELE(pd);
+ return (error);
+}
+
+/*
+ * crypto_encrypt_init_prov()
+ *
+ * Calls crypto_cipher_init_prov() to initialize an encryption operation.
+ */
+int
+crypto_encrypt_init_prov(crypto_provider_t pd, crypto_session_id_t sid,
+ crypto_mechanism_t *mech, crypto_key_t *key,
+ crypto_ctx_template_t tmpl, crypto_context_t *ctxp,
+ crypto_call_req_t *crq)
+{
+ return (crypto_cipher_init_prov(pd, sid, mech, key, tmpl, ctxp, crq,
+ CRYPTO_FG_ENCRYPT));
+}
+
+/*
+ * crypto_encrypt_init()
+ *
+ * Calls crypto_cipher_init() to initialize an encryption operation
+ */
+int
+crypto_encrypt_init(crypto_mechanism_t *mech, crypto_key_t *key,
+ crypto_ctx_template_t tmpl, crypto_context_t *ctxp,
+ crypto_call_req_t *crq)
+{
+ return (crypto_cipher_init(mech, key, tmpl, ctxp, crq,
+ CRYPTO_FG_ENCRYPT));
+}
+
+/*
+ * crypto_encrypt_update()
+ *
+ * Arguments:
+ * context: A crypto_context_t initialized by encrypt_init().
+ * plaintext: The message part to be encrypted
+ * ciphertext: Storage for the encrypted message part.
+ * cr: crypto_call_req_t calling conditions and call back info.
+ *
+ * Description:
+ * Asynchronously submits a request for, or synchronously performs a
+ * part of an encryption operation.
+ *
+ * Context:
+ * Process or interrupt, according to the semantics dictated by the 'cr'.
+ *
+ * Returns:
+ * See comment in the beginning of the file.
+ */
+int
+crypto_encrypt_update(crypto_context_t context, crypto_data_t *plaintext,
+ crypto_data_t *ciphertext, crypto_call_req_t *cr)
+{
+ crypto_ctx_t *ctx = (crypto_ctx_t *)context;
+ kcf_context_t *kcf_ctx;
+ kcf_provider_desc_t *pd;
+ int error;
+ kcf_req_params_t params;
+
+ if ((ctx == NULL) ||
+ ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) ||
+ ((pd = kcf_ctx->kc_prov_desc) == NULL)) {
+ return (CRYPTO_INVALID_CONTEXT);
+ }
+
+ ASSERT(pd->pd_prov_type != CRYPTO_LOGICAL_PROVIDER);
+
+ /* The fast path for SW providers. */
+ if (CHECK_FASTPATH(cr, pd)) {
+ error = KCF_PROV_ENCRYPT_UPDATE(pd, ctx, plaintext,
+ ciphertext, NULL);
+ KCF_PROV_INCRSTATS(pd, error);
+ return (error);
+ }
+
+ /* Check if we should use a software provider for small jobs */
+ if ((ctx->cc_flags & CRYPTO_USE_OPSTATE) && cr == NULL) {
+ if (plaintext->cd_length < kcf_ctx->kc_mech->me_threshold &&
+ kcf_ctx->kc_sw_prov_desc != NULL &&
+ KCF_IS_PROV_USABLE(kcf_ctx->kc_sw_prov_desc)) {
+ pd = kcf_ctx->kc_sw_prov_desc;
+ }
+ }
+
+ KCF_WRAP_ENCRYPT_OPS_PARAMS(¶ms, KCF_OP_UPDATE,
+ ctx->cc_session, NULL, NULL, plaintext, ciphertext, NULL);
+ error = kcf_submit_request(pd, ctx, cr, ¶ms, B_FALSE);
+
+ return (error);
+}
+
+/*
+ * crypto_encrypt_final()
+ *
+ * Arguments:
+ * context: A crypto_context_t initialized by encrypt_init().
+ * ciphertext: Storage for the last part of encrypted message
+ * cr: crypto_call_req_t calling conditions and call back info.
+ *
+ * Description:
+ * Asynchronously submits a request for, or synchronously performs the
+ * final part of an encryption operation.
+ *
+ * Context:
+ * Process or interrupt, according to the semantics dictated by the 'cr'.
+ *
+ * Returns:
+ * See comment in the beginning of the file.
+ */
+int
+crypto_encrypt_final(crypto_context_t context, crypto_data_t *ciphertext,
+ crypto_call_req_t *cr)
+{
+ crypto_ctx_t *ctx = (crypto_ctx_t *)context;
+ kcf_context_t *kcf_ctx;
+ kcf_provider_desc_t *pd;
+ int error;
+ kcf_req_params_t params;
+
+ if ((ctx == NULL) ||
+ ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) ||
+ ((pd = kcf_ctx->kc_prov_desc) == NULL)) {
+ return (CRYPTO_INVALID_CONTEXT);
+ }
+
+ ASSERT(pd->pd_prov_type != CRYPTO_LOGICAL_PROVIDER);
+
+ /* The fast path for SW providers. */
+ if (CHECK_FASTPATH(cr, pd)) {
+ error = KCF_PROV_ENCRYPT_FINAL(pd, ctx, ciphertext, NULL);
+ KCF_PROV_INCRSTATS(pd, error);
+ } else {
+ KCF_WRAP_ENCRYPT_OPS_PARAMS(¶ms, KCF_OP_FINAL,
+ ctx->cc_session, NULL, NULL, NULL, ciphertext, NULL);
+ error = kcf_submit_request(pd, ctx, cr, ¶ms, B_FALSE);
+ }
+
+ /* Release the hold done in kcf_new_ctx() during init step. */
+ KCF_CONTEXT_COND_RELEASE(error, kcf_ctx);
+ return (error);
+}
+
+/*
+ * crypto_decrypt_prov()
+ *
+ * Arguments:
+ * pd: provider descriptor
+ * sid: session id
+ * mech: crypto_mechanism_t pointer.
+ * mech_type is a valid value previously returned by
+ * crypto_mech2id();
+ * When the mech's parameter is not NULL, its definition depends
+ * on the standard definition of the mechanism.
+ * key: pointer to a crypto_key_t structure.
+ * ciphertext: The message to be encrypted
+ * plaintext: Storage for the encrypted message. The length needed
+ * depends on the mechanism, and the plaintext's size.
+ * tmpl: a crypto_ctx_template_t, opaque template of a context of an
+ * encryption with the 'mech' using 'key'. 'tmpl' is created by
+ * a previous call to crypto_create_ctx_template().
+ * cr: crypto_call_req_t calling conditions and call back info.
+ *
+ * Description:
+ * Asynchronously submits a request for, or synchronously performs a
+ * single-part decryption of 'ciphertext' with the mechanism 'mech', using
+ * the key 'key'.
+ * When complete and successful, 'plaintext' will contain the decrypted
+ * message.
+ *
+ * Context:
+ * Process or interrupt, according to the semantics dictated by the 'cr'.
+ *
+ * Returns:
+ * See comment in the beginning of the file.
+ */
+int
+crypto_decrypt_prov(crypto_provider_t provider, crypto_session_id_t sid,
+ crypto_mechanism_t *mech, crypto_data_t *ciphertext, crypto_key_t *key,
+ crypto_ctx_template_t tmpl, crypto_data_t *plaintext,
+ crypto_call_req_t *crq)
+{
+ kcf_req_params_t params;
+ kcf_provider_desc_t *pd = provider;
+ kcf_provider_desc_t *real_provider = pd;
+ int rv;
+
+ ASSERT(KCF_PROV_REFHELD(pd));
+
+ if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) {
+ rv = kcf_get_hardware_provider(mech->cm_type,
+ CRYPTO_MECH_INVALID, CHECK_RESTRICT(crq), pd,
+ &real_provider, CRYPTO_FG_DECRYPT_ATOMIC);
+
+ if (rv != CRYPTO_SUCCESS)
+ return (rv);
+ }
+
+ KCF_WRAP_DECRYPT_OPS_PARAMS(¶ms, KCF_OP_ATOMIC, sid, mech, key,
+ ciphertext, plaintext, tmpl);
+
+ rv = kcf_submit_request(real_provider, NULL, crq, ¶ms, B_FALSE);
+ if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER)
+ KCF_PROV_REFRELE(real_provider);
+
+ return (rv);
+}
+
+/*
+ * Same as crypto_decrypt_prov(), but relies on the KCF scheduler to
+ * choose a provider. See crypto_decrypt_prov() comments for more
+ * information.
+ */
+int
+crypto_decrypt(crypto_mechanism_t *mech, crypto_data_t *ciphertext,
+ crypto_key_t *key, crypto_ctx_template_t tmpl, crypto_data_t *plaintext,
+ crypto_call_req_t *crq)
+{
+ int error;
+ kcf_mech_entry_t *me;
+ kcf_req_params_t params;
+ kcf_provider_desc_t *pd;
+ kcf_ctx_template_t *ctx_tmpl;
+ crypto_spi_ctx_template_t spi_ctx_tmpl = NULL;
+ kcf_prov_tried_t *list = NULL;
+
+retry:
+ /* pd is returned held */
+ if ((pd = kcf_get_mech_provider(mech->cm_type, &me, &error,
+ list, CRYPTO_FG_DECRYPT_ATOMIC, CHECK_RESTRICT(crq),
+ ciphertext->cd_length)) == NULL) {
+ if (list != NULL)
+ kcf_free_triedlist(list);
+ return (error);
+ }
+
+ /*
+ * For SW providers, check the validity of the context template
+ * It is very rare that the generation number mis-matches, so
+ * is acceptable to fail here, and let the consumer recover by
+ * freeing this tmpl and create a new one for the key and new SW
+ * provider
+ */
+ if ((pd->pd_prov_type == CRYPTO_SW_PROVIDER) &&
+ ((ctx_tmpl = (kcf_ctx_template_t *)tmpl) != NULL)) {
+ if (ctx_tmpl->ct_generation != me->me_gen_swprov) {
+ if (list != NULL)
+ kcf_free_triedlist(list);
+ KCF_PROV_REFRELE(pd);
+ return (CRYPTO_OLD_CTX_TEMPLATE);
+ } else {
+ spi_ctx_tmpl = ctx_tmpl->ct_prov_tmpl;
+ }
+ }
+
+ /* The fast path for SW providers. */
+ if (CHECK_FASTPATH(crq, pd)) {
+ crypto_mechanism_t lmech;
+
+ lmech = *mech;
+ KCF_SET_PROVIDER_MECHNUM(mech->cm_type, pd, &lmech);
+
+ error = KCF_PROV_DECRYPT_ATOMIC(pd, pd->pd_sid, &lmech, key,
+ ciphertext, plaintext, spi_ctx_tmpl, KCF_SWFP_RHNDL(crq));
+ KCF_PROV_INCRSTATS(pd, error);
+ } else {
+ KCF_WRAP_DECRYPT_OPS_PARAMS(¶ms, KCF_OP_ATOMIC, pd->pd_sid,
+ mech, key, ciphertext, plaintext, spi_ctx_tmpl);
+ error = kcf_submit_request(pd, NULL, crq, ¶ms, B_FALSE);
+ }
+
+ if (error != CRYPTO_SUCCESS && error != CRYPTO_QUEUED &&
+ IS_RECOVERABLE(error)) {
+ /* Add pd to the linked list of providers tried. */
+ if (kcf_insert_triedlist(&list, pd, KCF_KMFLAG(crq)) != NULL)
+ goto retry;
+ }
+
+ if (list != NULL)
+ kcf_free_triedlist(list);
+
+ KCF_PROV_REFRELE(pd);
+ return (error);
+}
+
+/*
+ * crypto_decrypt_init_prov()
+ *
+ * Calls crypto_cipher_init_prov() to initialize a decryption operation
+ */
+int
+crypto_decrypt_init_prov(crypto_provider_t pd, crypto_session_id_t sid,
+ crypto_mechanism_t *mech, crypto_key_t *key,
+ crypto_ctx_template_t tmpl, crypto_context_t *ctxp,
+ crypto_call_req_t *crq)
+{
+ return (crypto_cipher_init_prov(pd, sid, mech, key, tmpl, ctxp, crq,
+ CRYPTO_FG_DECRYPT));
+}
+
+/*
+ * crypto_decrypt_init()
+ *
+ * Calls crypto_cipher_init() to initialize a decryption operation
+ */
+int
+crypto_decrypt_init(crypto_mechanism_t *mech, crypto_key_t *key,
+ crypto_ctx_template_t tmpl, crypto_context_t *ctxp,
+ crypto_call_req_t *crq)
+{
+ return (crypto_cipher_init(mech, key, tmpl, ctxp, crq,
+ CRYPTO_FG_DECRYPT));
+}
+
+/*
+ * crypto_decrypt_update()
+ *
+ * Arguments:
+ * context: A crypto_context_t initialized by decrypt_init().
+ * ciphertext: The message part to be decrypted
+ * plaintext: Storage for the decrypted message part.
+ * cr: crypto_call_req_t calling conditions and call back info.
+ *
+ * Description:
+ * Asynchronously submits a request for, or synchronously performs a
+ * part of an decryption operation.
+ *
+ * Context:
+ * Process or interrupt, according to the semantics dictated by the 'cr'.
+ *
+ * Returns:
+ * See comment in the beginning of the file.
+ */
+int
+crypto_decrypt_update(crypto_context_t context, crypto_data_t *ciphertext,
+ crypto_data_t *plaintext, crypto_call_req_t *cr)
+{
+ crypto_ctx_t *ctx = (crypto_ctx_t *)context;
+ kcf_context_t *kcf_ctx;
+ kcf_provider_desc_t *pd;
+ int error;
+ kcf_req_params_t params;
+
+ if ((ctx == NULL) ||
+ ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) ||
+ ((pd = kcf_ctx->kc_prov_desc) == NULL)) {
+ return (CRYPTO_INVALID_CONTEXT);
+ }
+
+ ASSERT(pd->pd_prov_type != CRYPTO_LOGICAL_PROVIDER);
+
+ /* The fast path for SW providers. */
+ if (CHECK_FASTPATH(cr, pd)) {
+ error = KCF_PROV_DECRYPT_UPDATE(pd, ctx, ciphertext,
+ plaintext, NULL);
+ KCF_PROV_INCRSTATS(pd, error);
+ return (error);
+ }
+
+ /* Check if we should use a software provider for small jobs */
+ if ((ctx->cc_flags & CRYPTO_USE_OPSTATE) && cr == NULL) {
+ if (ciphertext->cd_length < kcf_ctx->kc_mech->me_threshold &&
+ kcf_ctx->kc_sw_prov_desc != NULL &&
+ KCF_IS_PROV_USABLE(kcf_ctx->kc_sw_prov_desc)) {
+ pd = kcf_ctx->kc_sw_prov_desc;
+ }
+ }
+
+ KCF_WRAP_DECRYPT_OPS_PARAMS(¶ms, KCF_OP_UPDATE,
+ ctx->cc_session, NULL, NULL, ciphertext, plaintext, NULL);
+ error = kcf_submit_request(pd, ctx, cr, ¶ms, B_FALSE);
+
+ return (error);
+}
+
+/*
+ * crypto_decrypt_final()
+ *
+ * Arguments:
+ * context: A crypto_context_t initialized by decrypt_init().
+ * plaintext: Storage for the last part of the decrypted message
+ * cr: crypto_call_req_t calling conditions and call back info.
+ *
+ * Description:
+ * Asynchronously submits a request for, or synchronously performs the
+ * final part of a decryption operation.
+ *
+ * Context:
+ * Process or interrupt, according to the semantics dictated by the 'cr'.
+ *
+ * Returns:
+ * See comment in the beginning of the file.
+ */
+int
+crypto_decrypt_final(crypto_context_t context, crypto_data_t *plaintext,
+ crypto_call_req_t *cr)
+{
+ crypto_ctx_t *ctx = (crypto_ctx_t *)context;
+ kcf_context_t *kcf_ctx;
+ kcf_provider_desc_t *pd;
+ int error;
+ kcf_req_params_t params;
+
+ if ((ctx == NULL) ||
+ ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) ||
+ ((pd = kcf_ctx->kc_prov_desc) == NULL)) {
+ return (CRYPTO_INVALID_CONTEXT);
+ }
+
+ ASSERT(pd->pd_prov_type != CRYPTO_LOGICAL_PROVIDER);
+
+ /* The fast path for SW providers. */
+ if (CHECK_FASTPATH(cr, pd)) {
+ error = KCF_PROV_DECRYPT_FINAL(pd, ctx, plaintext,
+ NULL);
+ KCF_PROV_INCRSTATS(pd, error);
+ } else {
+ KCF_WRAP_DECRYPT_OPS_PARAMS(¶ms, KCF_OP_FINAL,
+ ctx->cc_session, NULL, NULL, NULL, plaintext, NULL);
+ error = kcf_submit_request(pd, ctx, cr, ¶ms, B_FALSE);
+ }
+
+ /* Release the hold done in kcf_new_ctx() during init step. */
+ KCF_CONTEXT_COND_RELEASE(error, kcf_ctx);
+ return (error);
+}
+
+/*
+ * See comments for crypto_encrypt_update().
+ */
+int
+crypto_encrypt_single(crypto_context_t context, crypto_data_t *plaintext,
+ crypto_data_t *ciphertext, crypto_call_req_t *cr)
+{
+ crypto_ctx_t *ctx = (crypto_ctx_t *)context;
+ kcf_context_t *kcf_ctx;
+ kcf_provider_desc_t *pd;
+ int error;
+ kcf_req_params_t params;
+
+ if ((ctx == NULL) ||
+ ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) ||
+ ((pd = kcf_ctx->kc_prov_desc) == NULL)) {
+ return (CRYPTO_INVALID_CONTEXT);
+ }
+
+ /* The fast path for SW providers. */
+ if (CHECK_FASTPATH(cr, pd)) {
+ error = KCF_PROV_ENCRYPT(pd, ctx, plaintext,
+ ciphertext, NULL);
+ KCF_PROV_INCRSTATS(pd, error);
+ } else {
+ KCF_WRAP_ENCRYPT_OPS_PARAMS(¶ms, KCF_OP_SINGLE, pd->pd_sid,
+ NULL, NULL, plaintext, ciphertext, NULL);
+ error = kcf_submit_request(pd, ctx, cr, ¶ms, B_FALSE);
+ }
+
+ /* Release the hold done in kcf_new_ctx() during init step. */
+ KCF_CONTEXT_COND_RELEASE(error, kcf_ctx);
+ return (error);
+}
+
+/*
+ * See comments for crypto_decrypt_update().
+ */
+int
+crypto_decrypt_single(crypto_context_t context, crypto_data_t *ciphertext,
+ crypto_data_t *plaintext, crypto_call_req_t *cr)
+{
+ crypto_ctx_t *ctx = (crypto_ctx_t *)context;
+ kcf_context_t *kcf_ctx;
+ kcf_provider_desc_t *pd;
+ int error;
+ kcf_req_params_t params;
+
+ if ((ctx == NULL) ||
+ ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) ||
+ ((pd = kcf_ctx->kc_prov_desc) == NULL)) {
+ return (CRYPTO_INVALID_CONTEXT);
+ }
+
+ /* The fast path for SW providers. */
+ if (CHECK_FASTPATH(cr, pd)) {
+ error = KCF_PROV_DECRYPT(pd, ctx, ciphertext,
+ plaintext, NULL);
+ KCF_PROV_INCRSTATS(pd, error);
+ } else {
+ KCF_WRAP_DECRYPT_OPS_PARAMS(¶ms, KCF_OP_SINGLE, pd->pd_sid,
+ NULL, NULL, ciphertext, plaintext, NULL);
+ error = kcf_submit_request(pd, ctx, cr, ¶ms, B_FALSE);
+ }
+
+ /* Release the hold done in kcf_new_ctx() during init step. */
+ KCF_CONTEXT_COND_RELEASE(error, kcf_ctx);
+ return (error);
+}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+EXPORT_SYMBOL(crypto_cipher_init_prov);
+EXPORT_SYMBOL(crypto_cipher_init);
+EXPORT_SYMBOL(crypto_encrypt_prov);
+EXPORT_SYMBOL(crypto_encrypt);
+EXPORT_SYMBOL(crypto_encrypt_init_prov);
+EXPORT_SYMBOL(crypto_encrypt_init);
+EXPORT_SYMBOL(crypto_encrypt_update);
+EXPORT_SYMBOL(crypto_encrypt_final);
+EXPORT_SYMBOL(crypto_decrypt_prov);
+EXPORT_SYMBOL(crypto_decrypt);
+EXPORT_SYMBOL(crypto_decrypt_init_prov);
+EXPORT_SYMBOL(crypto_decrypt_init);
+EXPORT_SYMBOL(crypto_decrypt_update);
+EXPORT_SYMBOL(crypto_decrypt_final);
+EXPORT_SYMBOL(crypto_encrypt_single);
+EXPORT_SYMBOL(crypto_decrypt_single);
+#endif
diff --git a/zfs/module/icp/api/kcf_ctxops.c b/zfs/module/icp/api/kcf_ctxops.c
new file mode 100644
index 000000000000..3f90674b0a33
--- /dev/null
+++ b/zfs/module/icp/api/kcf_ctxops.c
@@ -0,0 +1,151 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/crypto/common.h>
+#include <sys/crypto/impl.h>
+#include <sys/crypto/api.h>
+#include <sys/crypto/spi.h>
+#include <sys/crypto/sched_impl.h>
+
+/*
+ * Crypto contexts manipulation routines
+ */
+
+/*
+ * crypto_create_ctx_template()
+ *
+ * Arguments:
+ *
+ * mech: crypto_mechanism_t pointer.
+ * mech_type is a valid value previously returned by
+ * crypto_mech2id();
+ * When the mech's parameter is not NULL, its definition depends
+ * on the standard definition of the mechanism.
+ * key: pointer to a crypto_key_t structure.
+ * ptmpl: a storage for the opaque crypto_ctx_template_t, allocated and
+ * initialized by the software provider this routine is
+ * dispatched to.
+ * kmflag: KM_SLEEP/KM_NOSLEEP mem. alloc. flag.
+ *
+ * Description:
+ * Redirects the call to the software provider of the specified
+ * mechanism. That provider will allocate and pre-compute/pre-expand
+ * the context template, reusable by later calls to crypto_xxx_init().
+ * The size and address of that provider context template are stored
+ * in an internal structure, kcf_ctx_template_t. The address of that
+ * structure is given back to the caller in *ptmpl.
+ *
+ * Context:
+ * Process or interrupt.
+ *
+ * Returns:
+ * CRYPTO_SUCCESS when the context template is successfully created.
+ * CRYPTO_HOST_MEMEORY: mem alloc failure
+ * CRYPTO_ARGUMENTS_BAD: NULL storage for the ctx template.
+ * RYPTO_MECHANISM_INVALID: invalid mechanism 'mech'.
+ */
+int
+crypto_create_ctx_template(crypto_mechanism_t *mech, crypto_key_t *key,
+ crypto_ctx_template_t *ptmpl, int kmflag)
+{
+ int error;
+ kcf_mech_entry_t *me;
+ kcf_provider_desc_t *pd;
+ kcf_ctx_template_t *ctx_tmpl;
+ crypto_mechanism_t prov_mech;
+
+ /* A few args validation */
+
+ if (ptmpl == NULL)
+ return (CRYPTO_ARGUMENTS_BAD);
+
+ if (mech == NULL)
+ return (CRYPTO_MECHANISM_INVALID);
+
+ error = kcf_get_sw_prov(mech->cm_type, &pd, &me, B_TRUE);
+ if (error != CRYPTO_SUCCESS)
+ return (error);
+
+ if ((ctx_tmpl = (kcf_ctx_template_t *)kmem_alloc(
+ sizeof (kcf_ctx_template_t), kmflag)) == NULL) {
+ KCF_PROV_REFRELE(pd);
+ return (CRYPTO_HOST_MEMORY);
+ }
+
+ /* Pass a mechtype that the provider understands */
+ prov_mech.cm_type = KCF_TO_PROV_MECHNUM(pd, mech->cm_type);
+ prov_mech.cm_param = mech->cm_param;
+ prov_mech.cm_param_len = mech->cm_param_len;
+
+ error = KCF_PROV_CREATE_CTX_TEMPLATE(pd, &prov_mech, key,
+ &(ctx_tmpl->ct_prov_tmpl), &(ctx_tmpl->ct_size), KCF_RHNDL(kmflag));
+
+ if (error == CRYPTO_SUCCESS) {
+ ctx_tmpl->ct_generation = me->me_gen_swprov;
+ *ptmpl = ctx_tmpl;
+ } else {
+ kmem_free(ctx_tmpl, sizeof (kcf_ctx_template_t));
+ }
+ KCF_PROV_REFRELE(pd);
+
+ return (error);
+}
+
+/*
+ * crypto_destroy_ctx_template()
+ *
+ * Arguments:
+ *
+ * tmpl: an opaque crypto_ctx_template_t previously created by
+ * crypto_create_ctx_template()
+ *
+ * Description:
+ * Frees the inbedded crypto_spi_ctx_template_t, then the
+ * kcf_ctx_template_t.
+ *
+ * Context:
+ * Process or interrupt.
+ *
+ */
+void
+crypto_destroy_ctx_template(crypto_ctx_template_t tmpl)
+{
+ kcf_ctx_template_t *ctx_tmpl = (kcf_ctx_template_t *)tmpl;
+
+ if (ctx_tmpl == NULL)
+ return;
+
+ ASSERT(ctx_tmpl->ct_prov_tmpl != NULL);
+
+ bzero(ctx_tmpl->ct_prov_tmpl, ctx_tmpl->ct_size);
+ kmem_free(ctx_tmpl->ct_prov_tmpl, ctx_tmpl->ct_size);
+ kmem_free(ctx_tmpl, sizeof (kcf_ctx_template_t));
+}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+EXPORT_SYMBOL(crypto_create_ctx_template);
+EXPORT_SYMBOL(crypto_destroy_ctx_template);
+#endif
diff --git a/zfs/module/icp/api/kcf_digest.c b/zfs/module/icp/api/kcf_digest.c
new file mode 100644
index 000000000000..b58d3b452829
--- /dev/null
+++ b/zfs/module/icp/api/kcf_digest.c
@@ -0,0 +1,494 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/crypto/common.h>
+#include <sys/crypto/impl.h>
+#include <sys/crypto/api.h>
+#include <sys/crypto/spi.h>
+#include <sys/crypto/sched_impl.h>
+
+#define CRYPTO_OPS_OFFSET(f) offsetof(crypto_ops_t, co_##f)
+#define CRYPTO_DIGEST_OFFSET(f) offsetof(crypto_digest_ops_t, f)
+
+/*
+ * Message digest routines
+ */
+
+/*
+ * The following are the possible returned values common to all the routines
+ * below. The applicability of some of these return values depends on the
+ * presence of the arguments.
+ *
+ * CRYPTO_SUCCESS: The operation completed successfully.
+ * CRYPTO_QUEUED: A request was submitted successfully. The callback
+ * routine will be called when the operation is done.
+ * CRYPTO_MECHANISM_INVALID or CRYPTO_INVALID_MECH_PARAM
+ * for problems with the 'mech'.
+ * CRYPTO_INVALID_DATA for bogus 'data'
+ * CRYPTO_HOST_MEMORY for failure to allocate memory to handle this work.
+ * CRYPTO_INVALID_CONTEXT: Not a valid context.
+ * CRYPTO_BUSY: Cannot process the request now. Schedule a
+ * crypto_bufcall(), or try later.
+ * CRYPTO_NOT_SUPPORTED and CRYPTO_MECH_NOT_SUPPORTED:
+ * No provider is capable of a function or a mechanism.
+ */
+
+
+/*
+ * crypto_digest_prov()
+ *
+ * Arguments:
+ * pd: pointer to the descriptor of the provider to use for this
+ * operation.
+ * sid: provider session id.
+ * mech: crypto_mechanism_t pointer.
+ * mech_type is a valid value previously returned by
+ * crypto_mech2id();
+ * When the mech's parameter is not NULL, its definition depends
+ * on the standard definition of the mechanism.
+ * data: The message to be digested.
+ * digest: Storage for the digest. The length needed depends on the
+ * mechanism.
+ * cr: crypto_call_req_t calling conditions and call back info.
+ *
+ * Description:
+ * Asynchronously submits a request for, or synchronously performs the
+ * digesting operation of 'data' on the specified
+ * provider with the specified session.
+ * When complete and successful, 'digest' will contain the digest value.
+ * The caller should hold a reference on the specified provider
+ * descriptor before calling this function.
+ *
+ * Context:
+ * Process or interrupt, according to the semantics dictated by the 'cr'.
+ *
+ * Returns:
+ * See comment in the beginning of the file.
+ */
+int
+crypto_digest_prov(crypto_provider_t provider, crypto_session_id_t sid,
+ crypto_mechanism_t *mech, crypto_data_t *data, crypto_data_t *digest,
+ crypto_call_req_t *crq)
+{
+ kcf_req_params_t params;
+ kcf_provider_desc_t *pd = provider;
+ kcf_provider_desc_t *real_provider = pd;
+ int rv;
+
+ ASSERT(KCF_PROV_REFHELD(pd));
+
+ if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) {
+ rv = kcf_get_hardware_provider(mech->cm_type,
+ CRYPTO_MECH_INVALID, CHECK_RESTRICT(crq),
+ pd, &real_provider, CRYPTO_FG_DIGEST_ATOMIC);
+
+ if (rv != CRYPTO_SUCCESS)
+ return (rv);
+ }
+ KCF_WRAP_DIGEST_OPS_PARAMS(¶ms, KCF_OP_ATOMIC, sid, mech, NULL,
+ data, digest);
+
+ /* no crypto context to carry between multiple parts. */
+ rv = kcf_submit_request(real_provider, NULL, crq, ¶ms, B_FALSE);
+ if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER)
+ KCF_PROV_REFRELE(real_provider);
+
+ return (rv);
+}
+
+
+/*
+ * Same as crypto_digest_prov(), but relies on the KCF scheduler to
+ * choose a provider. See crypto_digest_prov() comments for more information.
+ */
+int
+crypto_digest(crypto_mechanism_t *mech, crypto_data_t *data,
+ crypto_data_t *digest, crypto_call_req_t *crq)
+{
+ int error;
+ kcf_provider_desc_t *pd;
+ kcf_req_params_t params;
+ kcf_prov_tried_t *list = NULL;
+
+retry:
+ /* The pd is returned held */
+ if ((pd = kcf_get_mech_provider(mech->cm_type, NULL, &error, list,
+ CRYPTO_FG_DIGEST_ATOMIC, CHECK_RESTRICT(crq),
+ data->cd_length)) == NULL) {
+ if (list != NULL)
+ kcf_free_triedlist(list);
+ return (error);
+ }
+
+ /* The fast path for SW providers. */
+ if (CHECK_FASTPATH(crq, pd)) {
+ crypto_mechanism_t lmech;
+
+ lmech = *mech;
+ KCF_SET_PROVIDER_MECHNUM(mech->cm_type, pd, &lmech);
+ error = KCF_PROV_DIGEST_ATOMIC(pd, pd->pd_sid, &lmech, data,
+ digest, KCF_SWFP_RHNDL(crq));
+ KCF_PROV_INCRSTATS(pd, error);
+ } else {
+ if (pd->pd_prov_type == CRYPTO_HW_PROVIDER &&
+ (pd->pd_flags & CRYPTO_HASH_NO_UPDATE) &&
+ (data->cd_length > pd->pd_hash_limit)) {
+ error = CRYPTO_BUFFER_TOO_BIG;
+ } else {
+ KCF_WRAP_DIGEST_OPS_PARAMS(¶ms, KCF_OP_ATOMIC,
+ pd->pd_sid, mech, NULL, data, digest);
+
+ /* no crypto context to carry between multiple parts. */
+ error = kcf_submit_request(pd, NULL, crq, ¶ms,
+ B_FALSE);
+ }
+ }
+
+ if (error != CRYPTO_SUCCESS && error != CRYPTO_QUEUED &&
+ IS_RECOVERABLE(error)) {
+ /* Add pd to the linked list of providers tried. */
+ if (kcf_insert_triedlist(&list, pd, KCF_KMFLAG(crq)) != NULL)
+ goto retry;
+ }
+
+ if (list != NULL)
+ kcf_free_triedlist(list);
+
+ KCF_PROV_REFRELE(pd);
+ return (error);
+}
+
+/*
+ * crypto_digest_init_prov()
+ *
+ * pd: pointer to the descriptor of the provider to use for this
+ * operation.
+ * sid: provider session id.
+ * mech: crypto_mechanism_t pointer.
+ * mech_type is a valid value previously returned by
+ * crypto_mech2id();
+ * When the mech's parameter is not NULL, its definition depends
+ * on the standard definition of the mechanism.
+ * ctxp: Pointer to a crypto_context_t.
+ * cr: crypto_call_req_t calling conditions and call back info.
+ *
+ * Description:
+ * Asynchronously submits a request for, or synchronously performs the
+ * initialization of a message digest operation on the specified
+ * provider with the specified session.
+ * When complete and successful, 'ctxp' will contain a crypto_context_t
+ * valid for later calls to digest_update() and digest_final().
+ * The caller should hold a reference on the specified provider
+ * descriptor before calling this function.
+ */
+int
+crypto_digest_init_prov(crypto_provider_t provider, crypto_session_id_t sid,
+ crypto_mechanism_t *mech, crypto_context_t *ctxp, crypto_call_req_t *crq)
+{
+ int error;
+ crypto_ctx_t *ctx;
+ kcf_req_params_t params;
+ kcf_provider_desc_t *pd = provider;
+ kcf_provider_desc_t *real_provider = pd;
+
+ ASSERT(KCF_PROV_REFHELD(pd));
+
+ if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) {
+ error = kcf_get_hardware_provider(mech->cm_type,
+ CRYPTO_MECH_INVALID, CHECK_RESTRICT(crq), pd,
+ &real_provider, CRYPTO_FG_DIGEST);
+
+ if (error != CRYPTO_SUCCESS)
+ return (error);
+ }
+
+ /* Allocate and initialize the canonical context */
+ if ((ctx = kcf_new_ctx(crq, real_provider, sid)) == NULL) {
+ if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER)
+ KCF_PROV_REFRELE(real_provider);
+ return (CRYPTO_HOST_MEMORY);
+ }
+
+ /* The fast path for SW providers. */
+ if (CHECK_FASTPATH(crq, pd)) {
+ crypto_mechanism_t lmech;
+
+ lmech = *mech;
+ KCF_SET_PROVIDER_MECHNUM(mech->cm_type, real_provider, &lmech);
+ error = KCF_PROV_DIGEST_INIT(real_provider, ctx, &lmech,
+ KCF_SWFP_RHNDL(crq));
+ KCF_PROV_INCRSTATS(pd, error);
+ } else {
+ KCF_WRAP_DIGEST_OPS_PARAMS(¶ms, KCF_OP_INIT, sid,
+ mech, NULL, NULL, NULL);
+ error = kcf_submit_request(real_provider, ctx, crq, ¶ms,
+ B_FALSE);
+ }
+
+ if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER)
+ KCF_PROV_REFRELE(real_provider);
+
+ if ((error == CRYPTO_SUCCESS) || (error == CRYPTO_QUEUED))
+ *ctxp = (crypto_context_t)ctx;
+ else {
+ /* Release the hold done in kcf_new_ctx(). */
+ KCF_CONTEXT_REFRELE((kcf_context_t *)ctx->cc_framework_private);
+ }
+
+ return (error);
+}
+
+/*
+ * Same as crypto_digest_init_prov(), but relies on the KCF scheduler
+ * to choose a provider. See crypto_digest_init_prov() comments for
+ * more information.
+ */
+int
+crypto_digest_init(crypto_mechanism_t *mech, crypto_context_t *ctxp,
+ crypto_call_req_t *crq)
+{
+ int error;
+ kcf_provider_desc_t *pd;
+ kcf_prov_tried_t *list = NULL;
+
+retry:
+ /* The pd is returned held */
+ if ((pd = kcf_get_mech_provider(mech->cm_type, NULL, &error,
+ list, CRYPTO_FG_DIGEST, CHECK_RESTRICT(crq), 0)) == NULL) {
+ if (list != NULL)
+ kcf_free_triedlist(list);
+ return (error);
+ }
+
+ if (pd->pd_prov_type == CRYPTO_HW_PROVIDER &&
+ (pd->pd_flags & CRYPTO_HASH_NO_UPDATE)) {
+ /*
+ * The hardware provider has limited digest support.
+ * So, we fallback early here to using a software provider.
+ *
+ * XXX - need to enhance to do the fallback later in
+ * crypto_digest_update() if the size of accumulated input data
+ * exceeds the maximum size digestable by hardware provider.
+ */
+ error = CRYPTO_BUFFER_TOO_BIG;
+ } else {
+ error = crypto_digest_init_prov(pd, pd->pd_sid,
+ mech, ctxp, crq);
+ }
+
+ if (error != CRYPTO_SUCCESS && error != CRYPTO_QUEUED &&
+ IS_RECOVERABLE(error)) {
+ /* Add pd to the linked list of providers tried. */
+ if (kcf_insert_triedlist(&list, pd, KCF_KMFLAG(crq)) != NULL)
+ goto retry;
+ }
+
+ if (list != NULL)
+ kcf_free_triedlist(list);
+ KCF_PROV_REFRELE(pd);
+ return (error);
+}
+
+/*
+ * crypto_digest_update()
+ *
+ * Arguments:
+ * context: A crypto_context_t initialized by digest_init().
+ * data: The part of message to be digested.
+ * cr: crypto_call_req_t calling conditions and call back info.
+ *
+ * Description:
+ * Asynchronously submits a request for, or synchronously performs a
+ * part of a message digest operation.
+ *
+ * Context:
+ * Process or interrupt, according to the semantics dictated by the 'cr'.
+ *
+ * Returns:
+ * See comment in the beginning of the file.
+ */
+int
+crypto_digest_update(crypto_context_t context, crypto_data_t *data,
+ crypto_call_req_t *cr)
+{
+ crypto_ctx_t *ctx = (crypto_ctx_t *)context;
+ kcf_context_t *kcf_ctx;
+ kcf_provider_desc_t *pd;
+ int error;
+ kcf_req_params_t params;
+
+ if ((ctx == NULL) ||
+ ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) ||
+ ((pd = kcf_ctx->kc_prov_desc) == NULL)) {
+ return (CRYPTO_INVALID_CONTEXT);
+ }
+
+ ASSERT(pd->pd_prov_type != CRYPTO_LOGICAL_PROVIDER);
+
+ /* The fast path for SW providers. */
+ if (CHECK_FASTPATH(cr, pd)) {
+ error = KCF_PROV_DIGEST_UPDATE(pd, ctx, data, NULL);
+ KCF_PROV_INCRSTATS(pd, error);
+ } else {
+ KCF_WRAP_DIGEST_OPS_PARAMS(¶ms, KCF_OP_UPDATE,
+ ctx->cc_session, NULL, NULL, data, NULL);
+ error = kcf_submit_request(pd, ctx, cr, ¶ms, B_FALSE);
+ }
+
+ return (error);
+}
+
+/*
+ * crypto_digest_final()
+ *
+ * Arguments:
+ * context: A crypto_context_t initialized by digest_init().
+ * digest: The storage for the digest.
+ * cr: crypto_call_req_t calling conditions and call back info.
+ *
+ * Description:
+ * Asynchronously submits a request for, or synchronously performs the
+ * final part of a message digest operation.
+ *
+ * Context:
+ * Process or interrupt, according to the semantics dictated by the 'cr'.
+ *
+ * Returns:
+ * See comment in the beginning of the file.
+ */
+int
+crypto_digest_final(crypto_context_t context, crypto_data_t *digest,
+ crypto_call_req_t *cr)
+{
+ crypto_ctx_t *ctx = (crypto_ctx_t *)context;
+ kcf_context_t *kcf_ctx;
+ kcf_provider_desc_t *pd;
+ int error;
+ kcf_req_params_t params;
+
+ if ((ctx == NULL) ||
+ ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) ||
+ ((pd = kcf_ctx->kc_prov_desc) == NULL)) {
+ return (CRYPTO_INVALID_CONTEXT);
+ }
+
+ ASSERT(pd->pd_prov_type != CRYPTO_LOGICAL_PROVIDER);
+
+ /* The fast path for SW providers. */
+ if (CHECK_FASTPATH(cr, pd)) {
+ error = KCF_PROV_DIGEST_FINAL(pd, ctx, digest, NULL);
+ KCF_PROV_INCRSTATS(pd, error);
+ } else {
+ KCF_WRAP_DIGEST_OPS_PARAMS(¶ms, KCF_OP_FINAL,
+ ctx->cc_session, NULL, NULL, NULL, digest);
+ error = kcf_submit_request(pd, ctx, cr, ¶ms, B_FALSE);
+ }
+
+ /* Release the hold done in kcf_new_ctx() during init step. */
+ KCF_CONTEXT_COND_RELEASE(error, kcf_ctx);
+ return (error);
+}
+
+/*
+ * Performs a digest update on the specified key. Note that there is
+ * no k-API crypto_digest_key() equivalent of this function.
+ */
+int
+crypto_digest_key_prov(crypto_context_t context, crypto_key_t *key,
+ crypto_call_req_t *cr)
+{
+ crypto_ctx_t *ctx = (crypto_ctx_t *)context;
+ kcf_context_t *kcf_ctx;
+ kcf_provider_desc_t *pd;
+ int error;
+ kcf_req_params_t params;
+
+ if ((ctx == NULL) ||
+ ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) ||
+ ((pd = kcf_ctx->kc_prov_desc) == NULL)) {
+ return (CRYPTO_INVALID_CONTEXT);
+ }
+
+ ASSERT(pd->pd_prov_type != CRYPTO_LOGICAL_PROVIDER);
+
+ /* The fast path for SW providers. */
+ if (CHECK_FASTPATH(cr, pd)) {
+ error = KCF_PROV_DIGEST_KEY(pd, ctx, key, NULL);
+ KCF_PROV_INCRSTATS(pd, error);
+ } else {
+ KCF_WRAP_DIGEST_OPS_PARAMS(¶ms, KCF_OP_DIGEST_KEY,
+ ctx->cc_session, NULL, key, NULL, NULL);
+ error = kcf_submit_request(pd, ctx, cr, ¶ms, B_FALSE);
+ }
+
+ return (error);
+}
+
+/*
+ * See comments for crypto_digest_update() and crypto_digest_final().
+ */
+int
+crypto_digest_single(crypto_context_t context, crypto_data_t *data,
+ crypto_data_t *digest, crypto_call_req_t *cr)
+{
+ crypto_ctx_t *ctx = (crypto_ctx_t *)context;
+ kcf_context_t *kcf_ctx;
+ kcf_provider_desc_t *pd;
+ int error;
+ kcf_req_params_t params;
+
+ if ((ctx == NULL) ||
+ ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) ||
+ ((pd = kcf_ctx->kc_prov_desc) == NULL)) {
+ return (CRYPTO_INVALID_CONTEXT);
+ }
+
+
+ /* The fast path for SW providers. */
+ if (CHECK_FASTPATH(cr, pd)) {
+ error = KCF_PROV_DIGEST(pd, ctx, data, digest, NULL);
+ KCF_PROV_INCRSTATS(pd, error);
+ } else {
+ KCF_WRAP_DIGEST_OPS_PARAMS(¶ms, KCF_OP_SINGLE, pd->pd_sid,
+ NULL, NULL, data, digest);
+ error = kcf_submit_request(pd, ctx, cr, ¶ms, B_FALSE);
+ }
+
+ /* Release the hold done in kcf_new_ctx() during init step. */
+ KCF_CONTEXT_COND_RELEASE(error, kcf_ctx);
+ return (error);
+}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+EXPORT_SYMBOL(crypto_digest_prov);
+EXPORT_SYMBOL(crypto_digest);
+EXPORT_SYMBOL(crypto_digest_init_prov);
+EXPORT_SYMBOL(crypto_digest_init);
+EXPORT_SYMBOL(crypto_digest_update);
+EXPORT_SYMBOL(crypto_digest_final);
+EXPORT_SYMBOL(crypto_digest_key_prov);
+EXPORT_SYMBOL(crypto_digest_single);
+#endif
diff --git a/zfs/module/icp/api/kcf_mac.c b/zfs/module/icp/api/kcf_mac.c
new file mode 100644
index 000000000000..2b4691c0371e
--- /dev/null
+++ b/zfs/module/icp/api/kcf_mac.c
@@ -0,0 +1,648 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/crypto/common.h>
+#include <sys/crypto/impl.h>
+#include <sys/crypto/api.h>
+#include <sys/crypto/spi.h>
+#include <sys/crypto/sched_impl.h>
+
+#define CRYPTO_OPS_OFFSET(f) offsetof(crypto_ops_t, co_##f)
+#define CRYPTO_MAC_OFFSET(f) offsetof(crypto_mac_ops_t, f)
+
+/*
+ * Message authentication codes routines.
+ */
+
+/*
+ * The following are the possible returned values common to all the routines
+ * below. The applicability of some of these return values depends on the
+ * presence of the arguments.
+ *
+ * CRYPTO_SUCCESS: The operation completed successfully.
+ * CRYPTO_QUEUED: A request was submitted successfully. The callback
+ * routine will be called when the operation is done.
+ * CRYPTO_INVALID_MECH_NUMBER, CRYPTO_INVALID_MECH_PARAM, or
+ * CRYPTO_INVALID_MECH for problems with the 'mech'.
+ * CRYPTO_INVALID_DATA for bogus 'data'
+ * CRYPTO_HOST_MEMORY for failure to allocate memory to handle this work.
+ * CRYPTO_INVALID_CONTEXT: Not a valid context.
+ * CRYPTO_BUSY: Cannot process the request now. Schedule a
+ * crypto_bufcall(), or try later.
+ * CRYPTO_NOT_SUPPORTED and CRYPTO_MECH_NOT_SUPPORTED: No provider is
+ * capable of a function or a mechanism.
+ * CRYPTO_INVALID_KEY: bogus 'key' argument.
+ * CRYPTO_INVALID_MAC: bogus 'mac' argument.
+ */
+
+/*
+ * crypto_mac_prov()
+ *
+ * Arguments:
+ * mech: crypto_mechanism_t pointer.
+ * mech_type is a valid value previously returned by
+ * crypto_mech2id();
+ * When the mech's parameter is not NULL, its definition depends
+ * on the standard definition of the mechanism.
+ * key: pointer to a crypto_key_t structure.
+ * data: The message to compute the MAC for.
+ * mac: Storage for the MAC. The length needed depends on the mechanism.
+ * tmpl: a crypto_ctx_template_t, opaque template of a context of a
+ * MAC with the 'mech' using 'key'. 'tmpl' is created by
+ * a previous call to crypto_create_ctx_template().
+ * cr: crypto_call_req_t calling conditions and call back info.
+ *
+ * Description:
+ * Asynchronously submits a request for, or synchronously performs a
+ * single-part message authentication of 'data' with the mechanism
+ * 'mech', using * the key 'key', on the specified provider with
+ * the specified session id.
+ * When complete and successful, 'mac' will contain the message
+ * authentication code.
+ *
+ * Context:
+ * Process or interrupt, according to the semantics dictated by the 'crq'.
+ *
+ * Returns:
+ * See comment in the beginning of the file.
+ */
+int
+crypto_mac_prov(crypto_provider_t provider, crypto_session_id_t sid,
+ crypto_mechanism_t *mech, crypto_data_t *data, crypto_key_t *key,
+ crypto_ctx_template_t tmpl, crypto_data_t *mac, crypto_call_req_t *crq)
+{
+ kcf_req_params_t params;
+ kcf_provider_desc_t *pd = provider;
+ kcf_provider_desc_t *real_provider = pd;
+ int rv;
+
+ ASSERT(KCF_PROV_REFHELD(pd));
+
+ if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) {
+ rv = kcf_get_hardware_provider(mech->cm_type,
+ CRYPTO_MECH_INVALID, CHECK_RESTRICT(crq), pd,
+ &real_provider, CRYPTO_FG_MAC_ATOMIC);
+
+ if (rv != CRYPTO_SUCCESS)
+ return (rv);
+ }
+
+ KCF_WRAP_MAC_OPS_PARAMS(¶ms, KCF_OP_ATOMIC, sid, mech, key,
+ data, mac, tmpl);
+ rv = kcf_submit_request(real_provider, NULL, crq, ¶ms, B_FALSE);
+ if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER)
+ KCF_PROV_REFRELE(real_provider);
+
+ return (rv);
+}
+
+/*
+ * Same as crypto_mac_prov(), but relies on the KCF scheduler to choose
+ * a provider. See crypto_mac() comments for more information.
+ */
+int
+crypto_mac(crypto_mechanism_t *mech, crypto_data_t *data,
+ crypto_key_t *key, crypto_ctx_template_t tmpl, crypto_data_t *mac,
+ crypto_call_req_t *crq)
+{
+ int error;
+ kcf_mech_entry_t *me;
+ kcf_req_params_t params;
+ kcf_provider_desc_t *pd;
+ kcf_ctx_template_t *ctx_tmpl;
+ crypto_spi_ctx_template_t spi_ctx_tmpl = NULL;
+ kcf_prov_tried_t *list = NULL;
+
+retry:
+ /* The pd is returned held */
+ if ((pd = kcf_get_mech_provider(mech->cm_type, &me, &error,
+ list, CRYPTO_FG_MAC_ATOMIC, CHECK_RESTRICT(crq),
+ data->cd_length)) == NULL) {
+ if (list != NULL)
+ kcf_free_triedlist(list);
+ return (error);
+ }
+
+ /*
+ * For SW providers, check the validity of the context template
+ * It is very rare that the generation number mis-matches, so
+ * is acceptable to fail here, and let the consumer recover by
+ * freeing this tmpl and create a new one for the key and new SW
+ * provider
+ */
+ if ((pd->pd_prov_type == CRYPTO_SW_PROVIDER) &&
+ ((ctx_tmpl = (kcf_ctx_template_t *)tmpl) != NULL)) {
+ if (ctx_tmpl->ct_generation != me->me_gen_swprov) {
+ if (list != NULL)
+ kcf_free_triedlist(list);
+ KCF_PROV_REFRELE(pd);
+ return (CRYPTO_OLD_CTX_TEMPLATE);
+ } else {
+ spi_ctx_tmpl = ctx_tmpl->ct_prov_tmpl;
+ }
+ }
+
+ /* The fast path for SW providers. */
+ if (CHECK_FASTPATH(crq, pd)) {
+ crypto_mechanism_t lmech;
+
+ lmech = *mech;
+ KCF_SET_PROVIDER_MECHNUM(mech->cm_type, pd, &lmech);
+
+ error = KCF_PROV_MAC_ATOMIC(pd, pd->pd_sid, &lmech, key, data,
+ mac, spi_ctx_tmpl, KCF_SWFP_RHNDL(crq));
+ KCF_PROV_INCRSTATS(pd, error);
+ } else {
+ if (pd->pd_prov_type == CRYPTO_HW_PROVIDER &&
+ (pd->pd_flags & CRYPTO_HASH_NO_UPDATE) &&
+ (data->cd_length > pd->pd_hash_limit)) {
+ /*
+ * XXX - We need a check to see if this is indeed
+ * a HMAC. So far, all kernel clients use
+ * this interface only for HMAC. So, this is fine
+ * for now.
+ */
+ error = CRYPTO_BUFFER_TOO_BIG;
+ } else {
+ KCF_WRAP_MAC_OPS_PARAMS(¶ms, KCF_OP_ATOMIC,
+ pd->pd_sid, mech, key, data, mac, spi_ctx_tmpl);
+
+ error = kcf_submit_request(pd, NULL, crq, ¶ms,
+ KCF_ISDUALREQ(crq));
+ }
+ }
+
+ if (error != CRYPTO_SUCCESS && error != CRYPTO_QUEUED &&
+ IS_RECOVERABLE(error)) {
+ /* Add pd to the linked list of providers tried. */
+ if (kcf_insert_triedlist(&list, pd, KCF_KMFLAG(crq)) != NULL)
+ goto retry;
+ }
+
+ if (list != NULL)
+ kcf_free_triedlist(list);
+
+ KCF_PROV_REFRELE(pd);
+ return (error);
+}
+
+/*
+ * Single part operation to compute the MAC corresponding to the specified
+ * 'data' and to verify that it matches the MAC specified by 'mac'.
+ * The other arguments are the same as the function crypto_mac_prov().
+ */
+int
+crypto_mac_verify_prov(crypto_provider_t provider, crypto_session_id_t sid,
+ crypto_mechanism_t *mech, crypto_data_t *data, crypto_key_t *key,
+ crypto_ctx_template_t tmpl, crypto_data_t *mac, crypto_call_req_t *crq)
+{
+ kcf_req_params_t params;
+ kcf_provider_desc_t *pd = provider;
+ kcf_provider_desc_t *real_provider = pd;
+ int rv;
+
+ ASSERT(KCF_PROV_REFHELD(pd));
+
+ if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) {
+ rv = kcf_get_hardware_provider(mech->cm_type,
+ CRYPTO_MECH_INVALID, CHECK_RESTRICT(crq), pd,
+ &real_provider, CRYPTO_FG_MAC_ATOMIC);
+
+ if (rv != CRYPTO_SUCCESS)
+ return (rv);
+ }
+
+ KCF_WRAP_MAC_OPS_PARAMS(¶ms, KCF_OP_MAC_VERIFY_ATOMIC, sid, mech,
+ key, data, mac, tmpl);
+ rv = kcf_submit_request(real_provider, NULL, crq, ¶ms, B_FALSE);
+ if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER)
+ KCF_PROV_REFRELE(real_provider);
+
+ return (rv);
+}
+
+/*
+ * Same as crypto_mac_verify_prov(), but relies on the KCF scheduler to choose
+ * a provider. See crypto_mac_verify_prov() comments for more information.
+ */
+int
+crypto_mac_verify(crypto_mechanism_t *mech, crypto_data_t *data,
+ crypto_key_t *key, crypto_ctx_template_t tmpl, crypto_data_t *mac,
+ crypto_call_req_t *crq)
+{
+ int error;
+ kcf_mech_entry_t *me;
+ kcf_req_params_t params;
+ kcf_provider_desc_t *pd;
+ kcf_ctx_template_t *ctx_tmpl;
+ crypto_spi_ctx_template_t spi_ctx_tmpl = NULL;
+ kcf_prov_tried_t *list = NULL;
+
+retry:
+ /* The pd is returned held */
+ if ((pd = kcf_get_mech_provider(mech->cm_type, &me, &error,
+ list, CRYPTO_FG_MAC_ATOMIC, CHECK_RESTRICT(crq),
+ data->cd_length)) == NULL) {
+ if (list != NULL)
+ kcf_free_triedlist(list);
+ return (error);
+ }
+
+ /*
+ * For SW providers, check the validity of the context template
+ * It is very rare that the generation number mis-matches, so
+ * is acceptable to fail here, and let the consumer recover by
+ * freeing this tmpl and create a new one for the key and new SW
+ * provider
+ */
+ if ((pd->pd_prov_type == CRYPTO_SW_PROVIDER) &&
+ ((ctx_tmpl = (kcf_ctx_template_t *)tmpl) != NULL)) {
+ if (ctx_tmpl->ct_generation != me->me_gen_swprov) {
+ if (list != NULL)
+ kcf_free_triedlist(list);
+ KCF_PROV_REFRELE(pd);
+ return (CRYPTO_OLD_CTX_TEMPLATE);
+ } else {
+ spi_ctx_tmpl = ctx_tmpl->ct_prov_tmpl;
+ }
+ }
+
+ /* The fast path for SW providers. */
+ if (CHECK_FASTPATH(crq, pd)) {
+ crypto_mechanism_t lmech;
+
+ lmech = *mech;
+ KCF_SET_PROVIDER_MECHNUM(mech->cm_type, pd, &lmech);
+
+ error = KCF_PROV_MAC_VERIFY_ATOMIC(pd, pd->pd_sid, &lmech, key,
+ data, mac, spi_ctx_tmpl, KCF_SWFP_RHNDL(crq));
+ KCF_PROV_INCRSTATS(pd, error);
+ } else {
+ if (pd->pd_prov_type == CRYPTO_HW_PROVIDER &&
+ (pd->pd_flags & CRYPTO_HASH_NO_UPDATE) &&
+ (data->cd_length > pd->pd_hash_limit)) {
+ /* see comments in crypto_mac() */
+ error = CRYPTO_BUFFER_TOO_BIG;
+ } else {
+ KCF_WRAP_MAC_OPS_PARAMS(¶ms,
+ KCF_OP_MAC_VERIFY_ATOMIC, pd->pd_sid, mech,
+ key, data, mac, spi_ctx_tmpl);
+
+ error = kcf_submit_request(pd, NULL, crq, ¶ms,
+ KCF_ISDUALREQ(crq));
+ }
+ }
+
+ if (error != CRYPTO_SUCCESS && error != CRYPTO_QUEUED &&
+ IS_RECOVERABLE(error)) {
+ /* Add pd to the linked list of providers tried. */
+ if (kcf_insert_triedlist(&list, pd, KCF_KMFLAG(crq)) != NULL)
+ goto retry;
+ }
+
+ if (list != NULL)
+ kcf_free_triedlist(list);
+
+ KCF_PROV_REFRELE(pd);
+ return (error);
+}
+
+/*
+ * crypto_mac_init_prov()
+ *
+ * Arguments:
+ * pd: pointer to the descriptor of the provider to use for this
+ * operation.
+ * sid: provider session id.
+ * mech: crypto_mechanism_t pointer.
+ * mech_type is a valid value previously returned by
+ * crypto_mech2id();
+ * When the mech's parameter is not NULL, its definition depends
+ * on the standard definition of the mechanism.
+ * key: pointer to a crypto_key_t structure.
+ * tmpl: a crypto_ctx_template_t, opaque template of a context of a
+ * MAC with the 'mech' using 'key'. 'tmpl' is created by
+ * a previous call to crypto_create_ctx_template().
+ * ctxp: Pointer to a crypto_context_t.
+ * cr: crypto_call_req_t calling conditions and call back info.
+ *
+ * Description:
+ * Asynchronously submits a request for, or synchronously performs the
+ * initialization of a MAC operation on the specified provider with
+ * the specified session.
+ * When possible and applicable, will internally use the pre-computed MAC
+ * context from the context template, tmpl.
+ * When complete and successful, 'ctxp' will contain a crypto_context_t
+ * valid for later calls to mac_update() and mac_final().
+ * The caller should hold a reference on the specified provider
+ * descriptor before calling this function.
+ *
+ * Context:
+ * Process or interrupt, according to the semantics dictated by the 'cr'.
+ *
+ * Returns:
+ * See comment in the beginning of the file.
+ */
+int
+crypto_mac_init_prov(crypto_provider_t provider, crypto_session_id_t sid,
+ crypto_mechanism_t *mech, crypto_key_t *key, crypto_spi_ctx_template_t tmpl,
+ crypto_context_t *ctxp, crypto_call_req_t *crq)
+{
+ int rv;
+ crypto_ctx_t *ctx;
+ kcf_req_params_t params;
+ kcf_provider_desc_t *pd = provider;
+ kcf_provider_desc_t *real_provider = pd;
+
+ ASSERT(KCF_PROV_REFHELD(pd));
+
+ if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) {
+ rv = kcf_get_hardware_provider(mech->cm_type,
+ CRYPTO_MECH_INVALID, CHECK_RESTRICT(crq), pd,
+ &real_provider, CRYPTO_FG_MAC);
+
+ if (rv != CRYPTO_SUCCESS)
+ return (rv);
+ }
+
+ /* Allocate and initialize the canonical context */
+ if ((ctx = kcf_new_ctx(crq, real_provider, sid)) == NULL) {
+ if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER)
+ KCF_PROV_REFRELE(real_provider);
+ return (CRYPTO_HOST_MEMORY);
+ }
+
+ /* The fast path for SW providers. */
+ if (CHECK_FASTPATH(crq, pd)) {
+ crypto_mechanism_t lmech;
+
+ lmech = *mech;
+ KCF_SET_PROVIDER_MECHNUM(mech->cm_type, real_provider, &lmech);
+ rv = KCF_PROV_MAC_INIT(real_provider, ctx, &lmech, key, tmpl,
+ KCF_SWFP_RHNDL(crq));
+ KCF_PROV_INCRSTATS(pd, rv);
+ } else {
+ KCF_WRAP_MAC_OPS_PARAMS(¶ms, KCF_OP_INIT, sid, mech, key,
+ NULL, NULL, tmpl);
+ rv = kcf_submit_request(real_provider, ctx, crq, ¶ms,
+ B_FALSE);
+ }
+
+ if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER)
+ KCF_PROV_REFRELE(real_provider);
+
+ if ((rv == CRYPTO_SUCCESS) || (rv == CRYPTO_QUEUED))
+ *ctxp = (crypto_context_t)ctx;
+ else {
+ /* Release the hold done in kcf_new_ctx(). */
+ KCF_CONTEXT_REFRELE((kcf_context_t *)ctx->cc_framework_private);
+ }
+
+ return (rv);
+}
+
+/*
+ * Same as crypto_mac_init_prov(), but relies on the KCF scheduler to
+ * choose a provider. See crypto_mac_init_prov() comments for more
+ * information.
+ */
+int
+crypto_mac_init(crypto_mechanism_t *mech, crypto_key_t *key,
+ crypto_ctx_template_t tmpl, crypto_context_t *ctxp,
+ crypto_call_req_t *crq)
+{
+ int error;
+ kcf_mech_entry_t *me;
+ kcf_provider_desc_t *pd;
+ kcf_ctx_template_t *ctx_tmpl;
+ crypto_spi_ctx_template_t spi_ctx_tmpl = NULL;
+ kcf_prov_tried_t *list = NULL;
+
+retry:
+ /* The pd is returned held */
+ if ((pd = kcf_get_mech_provider(mech->cm_type, &me, &error,
+ list, CRYPTO_FG_MAC, CHECK_RESTRICT(crq), 0)) == NULL) {
+ if (list != NULL)
+ kcf_free_triedlist(list);
+ return (error);
+ }
+
+ /*
+ * For SW providers, check the validity of the context template
+ * It is very rare that the generation number mis-matches, so
+ * is acceptable to fail here, and let the consumer recover by
+ * freeing this tmpl and create a new one for the key and new SW
+ * provider
+ */
+
+ if ((pd->pd_prov_type == CRYPTO_SW_PROVIDER) &&
+ ((ctx_tmpl = (kcf_ctx_template_t *)tmpl) != NULL)) {
+ if (ctx_tmpl->ct_generation != me->me_gen_swprov) {
+ if (list != NULL)
+ kcf_free_triedlist(list);
+ KCF_PROV_REFRELE(pd);
+ return (CRYPTO_OLD_CTX_TEMPLATE);
+ } else {
+ spi_ctx_tmpl = ctx_tmpl->ct_prov_tmpl;
+ }
+ }
+
+ if (pd->pd_prov_type == CRYPTO_HW_PROVIDER &&
+ (pd->pd_flags & CRYPTO_HASH_NO_UPDATE)) {
+ /*
+ * The hardware provider has limited HMAC support.
+ * So, we fallback early here to using a software provider.
+ *
+ * XXX - need to enhance to do the fallback later in
+ * crypto_mac_update() if the size of accumulated input data
+ * exceeds the maximum size digestable by hardware provider.
+ */
+ error = CRYPTO_BUFFER_TOO_BIG;
+ } else {
+ error = crypto_mac_init_prov(pd, pd->pd_sid, mech, key,
+ spi_ctx_tmpl, ctxp, crq);
+ }
+ if (error != CRYPTO_SUCCESS && error != CRYPTO_QUEUED &&
+ IS_RECOVERABLE(error)) {
+ /* Add pd to the linked list of providers tried. */
+ if (kcf_insert_triedlist(&list, pd, KCF_KMFLAG(crq)) != NULL)
+ goto retry;
+ }
+
+ if (list != NULL)
+ kcf_free_triedlist(list);
+
+ KCF_PROV_REFRELE(pd);
+ return (error);
+}
+
+/*
+ * crypto_mac_update()
+ *
+ * Arguments:
+ * context: A crypto_context_t initialized by mac_init().
+ * data: The message part to be MAC'ed
+ * cr: crypto_call_req_t calling conditions and call back info.
+ *
+ * Description:
+ * Asynchronously submits a request for, or synchronously performs a
+ * part of a MAC operation.
+ *
+ * Context:
+ * Process or interrupt, according to the semantics dictated by the 'cr'.
+ *
+ * Returns:
+ * See comment in the beginning of the file.
+ */
+int
+crypto_mac_update(crypto_context_t context, crypto_data_t *data,
+ crypto_call_req_t *cr)
+{
+ crypto_ctx_t *ctx = (crypto_ctx_t *)context;
+ kcf_context_t *kcf_ctx;
+ kcf_provider_desc_t *pd;
+ kcf_req_params_t params;
+ int rv;
+
+ if ((ctx == NULL) ||
+ ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) ||
+ ((pd = kcf_ctx->kc_prov_desc) == NULL)) {
+ return (CRYPTO_INVALID_CONTEXT);
+ }
+
+ ASSERT(pd->pd_prov_type != CRYPTO_LOGICAL_PROVIDER);
+
+ /* The fast path for SW providers. */
+ if (CHECK_FASTPATH(cr, pd)) {
+ rv = KCF_PROV_MAC_UPDATE(pd, ctx, data, NULL);
+ KCF_PROV_INCRSTATS(pd, rv);
+ } else {
+ KCF_WRAP_MAC_OPS_PARAMS(¶ms, KCF_OP_UPDATE,
+ ctx->cc_session, NULL, NULL, data, NULL, NULL);
+ rv = kcf_submit_request(pd, ctx, cr, ¶ms, B_FALSE);
+ }
+
+ return (rv);
+}
+
+/*
+ * crypto_mac_final()
+ *
+ * Arguments:
+ * context: A crypto_context_t initialized by mac_init().
+ * mac: Storage for the message authentication code.
+ * cr: crypto_call_req_t calling conditions and call back info.
+ *
+ * Description:
+ * Asynchronously submits a request for, or synchronously performs a
+ * part of a message authentication operation.
+ *
+ * Context:
+ * Process or interrupt, according to the semantics dictated by the 'cr'.
+ *
+ * Returns:
+ * See comment in the beginning of the file.
+ */
+int
+crypto_mac_final(crypto_context_t context, crypto_data_t *mac,
+ crypto_call_req_t *cr)
+{
+ crypto_ctx_t *ctx = (crypto_ctx_t *)context;
+ kcf_context_t *kcf_ctx;
+ kcf_provider_desc_t *pd;
+ kcf_req_params_t params;
+ int rv;
+
+ if ((ctx == NULL) ||
+ ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) ||
+ ((pd = kcf_ctx->kc_prov_desc) == NULL)) {
+ return (CRYPTO_INVALID_CONTEXT);
+ }
+
+ ASSERT(pd->pd_prov_type != CRYPTO_LOGICAL_PROVIDER);
+
+ /* The fast path for SW providers. */
+ if (CHECK_FASTPATH(cr, pd)) {
+ rv = KCF_PROV_MAC_FINAL(pd, ctx, mac, NULL);
+ KCF_PROV_INCRSTATS(pd, rv);
+ } else {
+ KCF_WRAP_MAC_OPS_PARAMS(¶ms, KCF_OP_FINAL,
+ ctx->cc_session, NULL, NULL, NULL, mac, NULL);
+ rv = kcf_submit_request(pd, ctx, cr, ¶ms, B_FALSE);
+ }
+
+ /* Release the hold done in kcf_new_ctx() during init step. */
+ KCF_CONTEXT_COND_RELEASE(rv, kcf_ctx);
+ return (rv);
+}
+
+/*
+ * See comments for crypto_mac_update() and crypto_mac_final().
+ */
+int
+crypto_mac_single(crypto_context_t context, crypto_data_t *data,
+ crypto_data_t *mac, crypto_call_req_t *cr)
+{
+ crypto_ctx_t *ctx = (crypto_ctx_t *)context;
+ kcf_context_t *kcf_ctx;
+ kcf_provider_desc_t *pd;
+ int error;
+ kcf_req_params_t params;
+
+
+ if ((ctx == NULL) ||
+ ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) ||
+ ((pd = kcf_ctx->kc_prov_desc) == NULL)) {
+ return (CRYPTO_INVALID_CONTEXT);
+ }
+
+
+ /* The fast path for SW providers. */
+ if (CHECK_FASTPATH(cr, pd)) {
+ error = KCF_PROV_MAC(pd, ctx, data, mac, NULL);
+ KCF_PROV_INCRSTATS(pd, error);
+ } else {
+ KCF_WRAP_MAC_OPS_PARAMS(¶ms, KCF_OP_SINGLE, pd->pd_sid,
+ NULL, NULL, data, mac, NULL);
+ error = kcf_submit_request(pd, ctx, cr, ¶ms, B_FALSE);
+ }
+
+ /* Release the hold done in kcf_new_ctx() during init step. */
+ KCF_CONTEXT_COND_RELEASE(error, kcf_ctx);
+ return (error);
+}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+EXPORT_SYMBOL(crypto_mac_prov);
+EXPORT_SYMBOL(crypto_mac);
+EXPORT_SYMBOL(crypto_mac_verify_prov);
+EXPORT_SYMBOL(crypto_mac_verify);
+EXPORT_SYMBOL(crypto_mac_init_prov);
+EXPORT_SYMBOL(crypto_mac_init);
+EXPORT_SYMBOL(crypto_mac_update);
+EXPORT_SYMBOL(crypto_mac_final);
+EXPORT_SYMBOL(crypto_mac_single);
+#endif
diff --git a/zfs/module/icp/api/kcf_miscapi.c b/zfs/module/icp/api/kcf_miscapi.c
new file mode 100644
index 000000000000..09d50f7be176
--- /dev/null
+++ b/zfs/module/icp/api/kcf_miscapi.c
@@ -0,0 +1,127 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/crypto/common.h>
+#include <sys/crypto/api.h>
+#include <sys/crypto/impl.h>
+#include <sys/crypto/sched_impl.h>
+
+/*
+ * All event subscribers are put on a list. kcf_notify_list_lock
+ * protects changes to this list.
+ *
+ * The following locking order is maintained in the code - The
+ * global kcf_notify_list_lock followed by the individual lock
+ * in a kcf_ntfy_elem structure (kn_lock).
+ */
+kmutex_t ntfy_list_lock;
+kcondvar_t ntfy_list_cv; /* cv the service thread waits on */
+static kcf_ntfy_elem_t *ntfy_list_head;
+
+/*
+ * crypto_mech2id()
+ *
+ * Arguments:
+ * . mechname: A null-terminated string identifying the mechanism name.
+ *
+ * Description:
+ * Walks the mechanisms tables, looking for an entry that matches the
+ * mechname. Once it find it, it builds the 64-bit mech_type and returns
+ * it. If there are no hardware or software providers for the mechanism,
+ * but there is an unloaded software provider, this routine will attempt
+ * to load it.
+ *
+ * Context:
+ * Process and interruption.
+ *
+ * Returns:
+ * The unique mechanism identified by 'mechname', if found.
+ * CRYPTO_MECH_INVALID otherwise.
+ */
+crypto_mech_type_t
+crypto_mech2id(char *mechname)
+{
+ return (crypto_mech2id_common(mechname, B_TRUE));
+}
+
+/*
+ * We walk the notification list and do the callbacks.
+ */
+void
+kcf_walk_ntfylist(uint32_t event, void *event_arg)
+{
+ kcf_ntfy_elem_t *nep;
+ int nelem = 0;
+
+ mutex_enter(&ntfy_list_lock);
+
+ /*
+ * Count how many clients are on the notification list. We need
+ * this count to ensure that clients which joined the list after we
+ * have started this walk, are not wrongly notified.
+ */
+ for (nep = ntfy_list_head; nep != NULL; nep = nep->kn_next)
+ nelem++;
+
+ for (nep = ntfy_list_head; (nep != NULL && nelem); nep = nep->kn_next) {
+ nelem--;
+
+ /*
+ * Check if this client is interested in the
+ * event.
+ */
+ if (!(nep->kn_event_mask & event))
+ continue;
+
+ mutex_enter(&nep->kn_lock);
+ nep->kn_state = NTFY_RUNNING;
+ mutex_exit(&nep->kn_lock);
+ mutex_exit(&ntfy_list_lock);
+
+ /*
+ * We invoke the callback routine with no locks held. Another
+ * client could have joined the list meanwhile. This is fine
+ * as we maintain nelem as stated above. The NULL check in the
+ * for loop guards against shrinkage. Also, any callers of
+ * crypto_unnotify_events() at this point cv_wait till kn_state
+ * changes to NTFY_WAITING. Hence, nep is assured to be valid.
+ */
+ (*nep->kn_func)(event, event_arg);
+
+ mutex_enter(&nep->kn_lock);
+ nep->kn_state = NTFY_WAITING;
+ cv_broadcast(&nep->kn_cv);
+ mutex_exit(&nep->kn_lock);
+
+ mutex_enter(&ntfy_list_lock);
+ }
+
+ mutex_exit(&ntfy_list_lock);
+}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+EXPORT_SYMBOL(crypto_mech2id);
+#endif
diff --git a/zfs/module/icp/asm-x86_64/aes/aes_amd64.S b/zfs/module/icp/asm-x86_64/aes/aes_amd64.S
new file mode 100644
index 000000000000..9db3a3179230
--- /dev/null
+++ b/zfs/module/icp/asm-x86_64/aes/aes_amd64.S
@@ -0,0 +1,906 @@
+/*
+ * ---------------------------------------------------------------------------
+ * Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved.
+ *
+ * LICENSE TERMS
+ *
+ * The free distribution and use of this software is allowed (with or without
+ * changes) provided that:
+ *
+ * 1. source code distributions include the above copyright notice, this
+ * list of conditions and the following disclaimer;
+ *
+ * 2. binary distributions include the above copyright notice, this list
+ * of conditions and the following disclaimer in their documentation;
+ *
+ * 3. the name of the copyright holder is not used to endorse products
+ * built using this software without specific written permission.
+ *
+ * DISCLAIMER
+ *
+ * This software is provided 'as is' with no explicit or implied warranties
+ * in respect of its properties, including, but not limited to, correctness
+ * and/or fitness for purpose.
+ * ---------------------------------------------------------------------------
+ * Issue 20/12/2007
+ *
+ * I am grateful to Dag Arne Osvik for many discussions of the techniques that
+ * can be used to optimise AES assembler code on AMD64/EM64T architectures.
+ * Some of the techniques used in this implementation are the result of
+ * suggestions made by him for which I am most grateful.
+ *
+ * An AES implementation for AMD64 processors using the YASM assembler. This
+ * implementation provides only encryption, decryption and hence requires key
+ * scheduling support in C. It uses 8k bytes of tables but its encryption and
+ * decryption performance is very close to that obtained using large tables.
+ * It can use either MS Windows or Gnu/Linux/OpenSolaris OS calling conventions,
+ * which are as follows:
+ * ms windows gnu/linux/opensolaris os
+ *
+ * in_blk rcx rdi
+ * out_blk rdx rsi
+ * context (cx) r8 rdx
+ *
+ * preserved rsi - + rbx, rbp, rsp, r12, r13, r14 & r15
+ * registers rdi - on both
+ *
+ * destroyed - rsi + rax, rcx, rdx, r8, r9, r10 & r11
+ * registers - rdi on both
+ *
+ * The convention used here is that for gnu/linux/opensolaris os.
+ *
+ * This code provides the standard AES block size (128 bits, 16 bytes) and the
+ * three standard AES key sizes (128, 192 and 256 bits). It has the same call
+ * interface as my C implementation. It uses the Microsoft C AMD64 calling
+ * conventions in which the three parameters are placed in rcx, rdx and r8
+ * respectively. The rbx, rsi, rdi, rbp and r12..r15 registers are preserved.
+ *
+ * OpenSolaris Note:
+ * Modified to use GNU/Linux/Solaris calling conventions.
+ * That is parameters are placed in rdi, rsi, rdx, and rcx, respectively.
+ *
+ * AES_RETURN aes_encrypt(const unsigned char in_blk[],
+ * unsigned char out_blk[], const aes_encrypt_ctx cx[1])/
+ *
+ * AES_RETURN aes_decrypt(const unsigned char in_blk[],
+ * unsigned char out_blk[], const aes_decrypt_ctx cx[1])/
+ *
+ * AES_RETURN aes_encrypt_key<NNN>(const unsigned char key[],
+ * const aes_encrypt_ctx cx[1])/
+ *
+ * AES_RETURN aes_decrypt_key<NNN>(const unsigned char key[],
+ * const aes_decrypt_ctx cx[1])/
+ *
+ * AES_RETURN aes_encrypt_key(const unsigned char key[],
+ * unsigned int len, const aes_decrypt_ctx cx[1])/
+ *
+ * AES_RETURN aes_decrypt_key(const unsigned char key[],
+ * unsigned int len, const aes_decrypt_ctx cx[1])/
+ *
+ * where <NNN> is 128, 102 or 256. In the last two calls the length can be in
+ * either bits or bytes.
+ *
+ * Comment in/out the following lines to obtain the desired subroutines. These
+ * selections MUST match those in the C header file aesopt.h
+ */
+#define AES_REV_DKS /* define if key decryption schedule is reversed */
+
+#define LAST_ROUND_TABLES /* define for the faster version using extra tables */
+
+/*
+ * The encryption key schedule has the following in memory layout where N is the
+ * number of rounds (10, 12 or 14):
+ *
+ * lo: | input key (round 0) | / each round is four 32-bit words
+ * | encryption round 1 |
+ * | encryption round 2 |
+ * ....
+ * | encryption round N-1 |
+ * hi: | encryption round N |
+ *
+ * The decryption key schedule is normally set up so that it has the same
+ * layout as above by actually reversing the order of the encryption key
+ * schedule in memory (this happens when AES_REV_DKS is set):
+ *
+ * lo: | decryption round 0 | = | encryption round N |
+ * | decryption round 1 | = INV_MIX_COL[ | encryption round N-1 | ]
+ * | decryption round 2 | = INV_MIX_COL[ | encryption round N-2 | ]
+ * .... ....
+ * | decryption round N-1 | = INV_MIX_COL[ | encryption round 1 | ]
+ * hi: | decryption round N | = | input key (round 0) |
+ *
+ * with rounds except the first and last modified using inv_mix_column()
+ * But if AES_REV_DKS is NOT set the order of keys is left as it is for
+ * encryption so that it has to be accessed in reverse when used for
+ * decryption (although the inverse mix column modifications are done)
+ *
+ * lo: | decryption round 0 | = | input key (round 0) |
+ * | decryption round 1 | = INV_MIX_COL[ | encryption round 1 | ]
+ * | decryption round 2 | = INV_MIX_COL[ | encryption round 2 | ]
+ * .... ....
+ * | decryption round N-1 | = INV_MIX_COL[ | encryption round N-1 | ]
+ * hi: | decryption round N | = | encryption round N |
+ *
+ * This layout is faster when the assembler key scheduling provided here
+ * is used.
+ *
+ * End of user defines
+ */
+
+/*
+ * ---------------------------------------------------------------------------
+ * OpenSolaris OS modifications
+ *
+ * This source originates from Brian Gladman file aes_amd64.asm
+ * in http://fp.gladman.plus.com/AES/aes-src-04-03-08.zip
+ * with these changes:
+ *
+ * 1. Removed MS Windows-specific code within DLL_EXPORT, _SEH_, and
+ * !__GNUC__ ifdefs. Also removed ENCRYPTION, DECRYPTION,
+ * AES_128, AES_192, AES_256, AES_VAR ifdefs.
+ *
+ * 2. Translate yasm/nasm %define and .macro definitions to cpp(1) #define
+ *
+ * 3. Translate yasm/nasm %ifdef/%ifndef to cpp(1) #ifdef
+ *
+ * 4. Translate Intel/yasm/nasm syntax to ATT/OpenSolaris as(1) syntax
+ * (operands reversed, literals prefixed with "$", registers prefixed with "%",
+ * and "[register+offset]", addressing changed to "offset(register)",
+ * parenthesis in constant expressions "()" changed to square brackets "[]",
+ * "." removed from local (numeric) labels, and other changes.
+ * Examples:
+ * Intel/yasm/nasm Syntax ATT/OpenSolaris Syntax
+ * mov rax,(4*20h) mov $[4*0x20],%rax
+ * mov rax,[ebx+20h] mov 0x20(%ebx),%rax
+ * lea rax,[ebx+ecx] lea (%ebx,%ecx),%rax
+ * sub rax,[ebx+ecx*4-20h] sub -0x20(%ebx,%ecx,4),%rax
+ *
+ * 5. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
+ * /usr/include/sys/asm_linkage.h, lint(1B) guards, and dummy C function
+ * definitions for lint.
+ *
+ * 6. Renamed functions and reordered parameters to match OpenSolaris:
+ * Original Gladman interface:
+ * int aes_encrypt(const unsigned char *in,
+ * unsigned char *out, const aes_encrypt_ctx cx[1])/
+ * int aes_decrypt(const unsigned char *in,
+ * unsigned char *out, const aes_encrypt_ctx cx[1])/
+ * Note: aes_encrypt_ctx contains ks, a 60 element array of uint32_t,
+ * and a union type, inf., containing inf.l, a uint32_t and
+ * inf.b, a 4-element array of uint32_t. Only b[0] in the array (aka "l") is
+ * used and contains the key schedule length * 16 where key schedule length is
+ * 10, 12, or 14 bytes.
+ *
+ * OpenSolaris OS interface:
+ * void aes_encrypt_amd64(const aes_ks_t *ks, int Nr,
+ * const uint32_t pt[4], uint32_t ct[4])/
+ * void aes_decrypt_amd64(const aes_ks_t *ks, int Nr,
+ * const uint32_t pt[4], uint32_t ct[4])/
+ * typedef union {uint64_t ks64[(MAX_AES_NR + 1) * 4]/
+ * uint32_t ks32[(MAX_AES_NR + 1) * 4]/ } aes_ks_t/
+ * Note: ks is the AES key schedule, Nr is number of rounds, pt is plain text,
+ * ct is crypto text, and MAX_AES_NR is 14.
+ * For the x86 64-bit architecture, OpenSolaris OS uses ks32 instead of ks64.
+ */
+
+#if defined(lint) || defined(__lint)
+
+#include <sys/types.h>
+/* ARGSUSED */
+void
+aes_encrypt_amd64(const uint32_t rk[], int Nr, const uint32_t pt[4],
+ uint32_t ct[4]) {
+}
+/* ARGSUSED */
+void
+aes_decrypt_amd64(const uint32_t rk[], int Nr, const uint32_t ct[4],
+ uint32_t pt[4]) {
+}
+
+
+#else
+
+#define _ASM
+#include <sys/asm_linkage.h>
+
+#define KS_LENGTH 60
+
+#define raxd eax
+#define rdxd edx
+#define rcxd ecx
+#define rbxd ebx
+#define rsid esi
+#define rdid edi
+
+#define raxb al
+#define rdxb dl
+#define rcxb cl
+#define rbxb bl
+#define rsib sil
+#define rdib dil
+
+// finite field multiplies by {02}, {04} and {08}
+
+#define f2(x) [[x<<1]^[[[x>>7]&1]*0x11b]]
+#define f4(x) [[x<<2]^[[[x>>6]&1]*0x11b]^[[[x>>6]&2]*0x11b]]
+#define f8(x) [[x<<3]^[[[x>>5]&1]*0x11b]^[[[x>>5]&2]*0x11b]^[[[x>>5]&4]*0x11b]]
+
+// finite field multiplies required in table generation
+
+#define f3(x) [[f2(x)] ^ [x]]
+#define f9(x) [[f8(x)] ^ [x]]
+#define fb(x) [[f8(x)] ^ [f2(x)] ^ [x]]
+#define fd(x) [[f8(x)] ^ [f4(x)] ^ [x]]
+#define fe(x) [[f8(x)] ^ [f4(x)] ^ [f2(x)]]
+
+// macros for expanding S-box data
+
+#define u8(x) [f2(x)], [x], [x], [f3(x)], [f2(x)], [x], [x], [f3(x)]
+#define v8(x) [fe(x)], [f9(x)], [fd(x)], [fb(x)], [fe(x)], [f9(x)], [fd(x)], [x]
+#define w8(x) [x], 0, 0, 0, [x], 0, 0, 0
+
+#define enc_vals(x) \
+ .byte x(0x63),x(0x7c),x(0x77),x(0x7b),x(0xf2),x(0x6b),x(0x6f),x(0xc5); \
+ .byte x(0x30),x(0x01),x(0x67),x(0x2b),x(0xfe),x(0xd7),x(0xab),x(0x76); \
+ .byte x(0xca),x(0x82),x(0xc9),x(0x7d),x(0xfa),x(0x59),x(0x47),x(0xf0); \
+ .byte x(0xad),x(0xd4),x(0xa2),x(0xaf),x(0x9c),x(0xa4),x(0x72),x(0xc0); \
+ .byte x(0xb7),x(0xfd),x(0x93),x(0x26),x(0x36),x(0x3f),x(0xf7),x(0xcc); \
+ .byte x(0x34),x(0xa5),x(0xe5),x(0xf1),x(0x71),x(0xd8),x(0x31),x(0x15); \
+ .byte x(0x04),x(0xc7),x(0x23),x(0xc3),x(0x18),x(0x96),x(0x05),x(0x9a); \
+ .byte x(0x07),x(0x12),x(0x80),x(0xe2),x(0xeb),x(0x27),x(0xb2),x(0x75); \
+ .byte x(0x09),x(0x83),x(0x2c),x(0x1a),x(0x1b),x(0x6e),x(0x5a),x(0xa0); \
+ .byte x(0x52),x(0x3b),x(0xd6),x(0xb3),x(0x29),x(0xe3),x(0x2f),x(0x84); \
+ .byte x(0x53),x(0xd1),x(0x00),x(0xed),x(0x20),x(0xfc),x(0xb1),x(0x5b); \
+ .byte x(0x6a),x(0xcb),x(0xbe),x(0x39),x(0x4a),x(0x4c),x(0x58),x(0xcf); \
+ .byte x(0xd0),x(0xef),x(0xaa),x(0xfb),x(0x43),x(0x4d),x(0x33),x(0x85); \
+ .byte x(0x45),x(0xf9),x(0x02),x(0x7f),x(0x50),x(0x3c),x(0x9f),x(0xa8); \
+ .byte x(0x51),x(0xa3),x(0x40),x(0x8f),x(0x92),x(0x9d),x(0x38),x(0xf5); \
+ .byte x(0xbc),x(0xb6),x(0xda),x(0x21),x(0x10),x(0xff),x(0xf3),x(0xd2); \
+ .byte x(0xcd),x(0x0c),x(0x13),x(0xec),x(0x5f),x(0x97),x(0x44),x(0x17); \
+ .byte x(0xc4),x(0xa7),x(0x7e),x(0x3d),x(0x64),x(0x5d),x(0x19),x(0x73); \
+ .byte x(0x60),x(0x81),x(0x4f),x(0xdc),x(0x22),x(0x2a),x(0x90),x(0x88); \
+ .byte x(0x46),x(0xee),x(0xb8),x(0x14),x(0xde),x(0x5e),x(0x0b),x(0xdb); \
+ .byte x(0xe0),x(0x32),x(0x3a),x(0x0a),x(0x49),x(0x06),x(0x24),x(0x5c); \
+ .byte x(0xc2),x(0xd3),x(0xac),x(0x62),x(0x91),x(0x95),x(0xe4),x(0x79); \
+ .byte x(0xe7),x(0xc8),x(0x37),x(0x6d),x(0x8d),x(0xd5),x(0x4e),x(0xa9); \
+ .byte x(0x6c),x(0x56),x(0xf4),x(0xea),x(0x65),x(0x7a),x(0xae),x(0x08); \
+ .byte x(0xba),x(0x78),x(0x25),x(0x2e),x(0x1c),x(0xa6),x(0xb4),x(0xc6); \
+ .byte x(0xe8),x(0xdd),x(0x74),x(0x1f),x(0x4b),x(0xbd),x(0x8b),x(0x8a); \
+ .byte x(0x70),x(0x3e),x(0xb5),x(0x66),x(0x48),x(0x03),x(0xf6),x(0x0e); \
+ .byte x(0x61),x(0x35),x(0x57),x(0xb9),x(0x86),x(0xc1),x(0x1d),x(0x9e); \
+ .byte x(0xe1),x(0xf8),x(0x98),x(0x11),x(0x69),x(0xd9),x(0x8e),x(0x94); \
+ .byte x(0x9b),x(0x1e),x(0x87),x(0xe9),x(0xce),x(0x55),x(0x28),x(0xdf); \
+ .byte x(0x8c),x(0xa1),x(0x89),x(0x0d),x(0xbf),x(0xe6),x(0x42),x(0x68); \
+ .byte x(0x41),x(0x99),x(0x2d),x(0x0f),x(0xb0),x(0x54),x(0xbb),x(0x16)
+
+#define dec_vals(x) \
+ .byte x(0x52),x(0x09),x(0x6a),x(0xd5),x(0x30),x(0x36),x(0xa5),x(0x38); \
+ .byte x(0xbf),x(0x40),x(0xa3),x(0x9e),x(0x81),x(0xf3),x(0xd7),x(0xfb); \
+ .byte x(0x7c),x(0xe3),x(0x39),x(0x82),x(0x9b),x(0x2f),x(0xff),x(0x87); \
+ .byte x(0x34),x(0x8e),x(0x43),x(0x44),x(0xc4),x(0xde),x(0xe9),x(0xcb); \
+ .byte x(0x54),x(0x7b),x(0x94),x(0x32),x(0xa6),x(0xc2),x(0x23),x(0x3d); \
+ .byte x(0xee),x(0x4c),x(0x95),x(0x0b),x(0x42),x(0xfa),x(0xc3),x(0x4e); \
+ .byte x(0x08),x(0x2e),x(0xa1),x(0x66),x(0x28),x(0xd9),x(0x24),x(0xb2); \
+ .byte x(0x76),x(0x5b),x(0xa2),x(0x49),x(0x6d),x(0x8b),x(0xd1),x(0x25); \
+ .byte x(0x72),x(0xf8),x(0xf6),x(0x64),x(0x86),x(0x68),x(0x98),x(0x16); \
+ .byte x(0xd4),x(0xa4),x(0x5c),x(0xcc),x(0x5d),x(0x65),x(0xb6),x(0x92); \
+ .byte x(0x6c),x(0x70),x(0x48),x(0x50),x(0xfd),x(0xed),x(0xb9),x(0xda); \
+ .byte x(0x5e),x(0x15),x(0x46),x(0x57),x(0xa7),x(0x8d),x(0x9d),x(0x84); \
+ .byte x(0x90),x(0xd8),x(0xab),x(0x00),x(0x8c),x(0xbc),x(0xd3),x(0x0a); \
+ .byte x(0xf7),x(0xe4),x(0x58),x(0x05),x(0xb8),x(0xb3),x(0x45),x(0x06); \
+ .byte x(0xd0),x(0x2c),x(0x1e),x(0x8f),x(0xca),x(0x3f),x(0x0f),x(0x02); \
+ .byte x(0xc1),x(0xaf),x(0xbd),x(0x03),x(0x01),x(0x13),x(0x8a),x(0x6b); \
+ .byte x(0x3a),x(0x91),x(0x11),x(0x41),x(0x4f),x(0x67),x(0xdc),x(0xea); \
+ .byte x(0x97),x(0xf2),x(0xcf),x(0xce),x(0xf0),x(0xb4),x(0xe6),x(0x73); \
+ .byte x(0x96),x(0xac),x(0x74),x(0x22),x(0xe7),x(0xad),x(0x35),x(0x85); \
+ .byte x(0xe2),x(0xf9),x(0x37),x(0xe8),x(0x1c),x(0x75),x(0xdf),x(0x6e); \
+ .byte x(0x47),x(0xf1),x(0x1a),x(0x71),x(0x1d),x(0x29),x(0xc5),x(0x89); \
+ .byte x(0x6f),x(0xb7),x(0x62),x(0x0e),x(0xaa),x(0x18),x(0xbe),x(0x1b); \
+ .byte x(0xfc),x(0x56),x(0x3e),x(0x4b),x(0xc6),x(0xd2),x(0x79),x(0x20); \
+ .byte x(0x9a),x(0xdb),x(0xc0),x(0xfe),x(0x78),x(0xcd),x(0x5a),x(0xf4); \
+ .byte x(0x1f),x(0xdd),x(0xa8),x(0x33),x(0x88),x(0x07),x(0xc7),x(0x31); \
+ .byte x(0xb1),x(0x12),x(0x10),x(0x59),x(0x27),x(0x80),x(0xec),x(0x5f); \
+ .byte x(0x60),x(0x51),x(0x7f),x(0xa9),x(0x19),x(0xb5),x(0x4a),x(0x0d); \
+ .byte x(0x2d),x(0xe5),x(0x7a),x(0x9f),x(0x93),x(0xc9),x(0x9c),x(0xef); \
+ .byte x(0xa0),x(0xe0),x(0x3b),x(0x4d),x(0xae),x(0x2a),x(0xf5),x(0xb0); \
+ .byte x(0xc8),x(0xeb),x(0xbb),x(0x3c),x(0x83),x(0x53),x(0x99),x(0x61); \
+ .byte x(0x17),x(0x2b),x(0x04),x(0x7e),x(0xba),x(0x77),x(0xd6),x(0x26); \
+ .byte x(0xe1),x(0x69),x(0x14),x(0x63),x(0x55),x(0x21),x(0x0c),x(0x7d)
+
+#define tptr %rbp /* table pointer */
+#define kptr %r8 /* key schedule pointer */
+#define fofs 128 /* adjust offset in key schedule to keep |disp| < 128 */
+#define fk_ref(x, y) -16*x+fofs+4*y(kptr)
+
+#ifdef AES_REV_DKS
+#define rofs 128
+#define ik_ref(x, y) -16*x+rofs+4*y(kptr)
+
+#else
+#define rofs -128
+#define ik_ref(x, y) 16*x+rofs+4*y(kptr)
+#endif /* AES_REV_DKS */
+
+#define tab_0(x) (tptr,x,8)
+#define tab_1(x) 3(tptr,x,8)
+#define tab_2(x) 2(tptr,x,8)
+#define tab_3(x) 1(tptr,x,8)
+#define tab_f(x) 1(tptr,x,8)
+#define tab_i(x) 7(tptr,x,8)
+
+#define ff_rnd(p1, p2, p3, p4, round) /* normal forward round */ \
+ mov fk_ref(round,0), p1; \
+ mov fk_ref(round,1), p2; \
+ mov fk_ref(round,2), p3; \
+ mov fk_ref(round,3), p4; \
+ \
+ movzx %al, %esi; \
+ movzx %ah, %edi; \
+ shr $16, %eax; \
+ xor tab_0(%rsi), p1; \
+ xor tab_1(%rdi), p4; \
+ movzx %al, %esi; \
+ movzx %ah, %edi; \
+ xor tab_2(%rsi), p3; \
+ xor tab_3(%rdi), p2; \
+ \
+ movzx %bl, %esi; \
+ movzx %bh, %edi; \
+ shr $16, %ebx; \
+ xor tab_0(%rsi), p2; \
+ xor tab_1(%rdi), p1; \
+ movzx %bl, %esi; \
+ movzx %bh, %edi; \
+ xor tab_2(%rsi), p4; \
+ xor tab_3(%rdi), p3; \
+ \
+ movzx %cl, %esi; \
+ movzx %ch, %edi; \
+ shr $16, %ecx; \
+ xor tab_0(%rsi), p3; \
+ xor tab_1(%rdi), p2; \
+ movzx %cl, %esi; \
+ movzx %ch, %edi; \
+ xor tab_2(%rsi), p1; \
+ xor tab_3(%rdi), p4; \
+ \
+ movzx %dl, %esi; \
+ movzx %dh, %edi; \
+ shr $16, %edx; \
+ xor tab_0(%rsi), p4; \
+ xor tab_1(%rdi), p3; \
+ movzx %dl, %esi; \
+ movzx %dh, %edi; \
+ xor tab_2(%rsi), p2; \
+ xor tab_3(%rdi), p1; \
+ \
+ mov p1, %eax; \
+ mov p2, %ebx; \
+ mov p3, %ecx; \
+ mov p4, %edx
+
+#ifdef LAST_ROUND_TABLES
+
+#define fl_rnd(p1, p2, p3, p4, round) /* last forward round */ \
+ add $2048, tptr; \
+ mov fk_ref(round,0), p1; \
+ mov fk_ref(round,1), p2; \
+ mov fk_ref(round,2), p3; \
+ mov fk_ref(round,3), p4; \
+ \
+ movzx %al, %esi; \
+ movzx %ah, %edi; \
+ shr $16, %eax; \
+ xor tab_0(%rsi), p1; \
+ xor tab_1(%rdi), p4; \
+ movzx %al, %esi; \
+ movzx %ah, %edi; \
+ xor tab_2(%rsi), p3; \
+ xor tab_3(%rdi), p2; \
+ \
+ movzx %bl, %esi; \
+ movzx %bh, %edi; \
+ shr $16, %ebx; \
+ xor tab_0(%rsi), p2; \
+ xor tab_1(%rdi), p1; \
+ movzx %bl, %esi; \
+ movzx %bh, %edi; \
+ xor tab_2(%rsi), p4; \
+ xor tab_3(%rdi), p3; \
+ \
+ movzx %cl, %esi; \
+ movzx %ch, %edi; \
+ shr $16, %ecx; \
+ xor tab_0(%rsi), p3; \
+ xor tab_1(%rdi), p2; \
+ movzx %cl, %esi; \
+ movzx %ch, %edi; \
+ xor tab_2(%rsi), p1; \
+ xor tab_3(%rdi), p4; \
+ \
+ movzx %dl, %esi; \
+ movzx %dh, %edi; \
+ shr $16, %edx; \
+ xor tab_0(%rsi), p4; \
+ xor tab_1(%rdi), p3; \
+ movzx %dl, %esi; \
+ movzx %dh, %edi; \
+ xor tab_2(%rsi), p2; \
+ xor tab_3(%rdi), p1
+
+#else
+
+#define fl_rnd(p1, p2, p3, p4, round) /* last forward round */ \
+ mov fk_ref(round,0), p1; \
+ mov fk_ref(round,1), p2; \
+ mov fk_ref(round,2), p3; \
+ mov fk_ref(round,3), p4; \
+ \
+ movzx %al, %esi; \
+ movzx %ah, %edi; \
+ shr $16, %eax; \
+ movzx tab_f(%rsi), %esi; \
+ movzx tab_f(%rdi), %edi; \
+ xor %esi, p1; \
+ rol $8, %edi; \
+ xor %edi, p4; \
+ movzx %al, %esi; \
+ movzx %ah, %edi; \
+ movzx tab_f(%rsi), %esi; \
+ movzx tab_f(%rdi), %edi; \
+ rol $16, %esi; \
+ rol $24, %edi; \
+ xor %esi, p3; \
+ xor %edi, p2; \
+ \
+ movzx %bl, %esi; \
+ movzx %bh, %edi; \
+ shr $16, %ebx; \
+ movzx tab_f(%rsi), %esi; \
+ movzx tab_f(%rdi), %edi; \
+ xor %esi, p2; \
+ rol $8, %edi; \
+ xor %edi, p1; \
+ movzx %bl, %esi; \
+ movzx %bh, %edi; \
+ movzx tab_f(%rsi), %esi; \
+ movzx tab_f(%rdi), %edi; \
+ rol $16, %esi; \
+ rol $24, %edi; \
+ xor %esi, p4; \
+ xor %edi, p3; \
+ \
+ movzx %cl, %esi; \
+ movzx %ch, %edi; \
+ movzx tab_f(%rsi), %esi; \
+ movzx tab_f(%rdi), %edi; \
+ shr $16, %ecx; \
+ xor %esi, p3; \
+ rol $8, %edi; \
+ xor %edi, p2; \
+ movzx %cl, %esi; \
+ movzx %ch, %edi; \
+ movzx tab_f(%rsi), %esi; \
+ movzx tab_f(%rdi), %edi; \
+ rol $16, %esi; \
+ rol $24, %edi; \
+ xor %esi, p1; \
+ xor %edi, p4; \
+ \
+ movzx %dl, %esi; \
+ movzx %dh, %edi; \
+ movzx tab_f(%rsi), %esi; \
+ movzx tab_f(%rdi), %edi; \
+ shr $16, %edx; \
+ xor %esi, p4; \
+ rol $8, %edi; \
+ xor %edi, p3; \
+ movzx %dl, %esi; \
+ movzx %dh, %edi; \
+ movzx tab_f(%rsi), %esi; \
+ movzx tab_f(%rdi), %edi; \
+ rol $16, %esi; \
+ rol $24, %edi; \
+ xor %esi, p2; \
+ xor %edi, p1
+
+#endif /* LAST_ROUND_TABLES */
+
+#define ii_rnd(p1, p2, p3, p4, round) /* normal inverse round */ \
+ mov ik_ref(round,0), p1; \
+ mov ik_ref(round,1), p2; \
+ mov ik_ref(round,2), p3; \
+ mov ik_ref(round,3), p4; \
+ \
+ movzx %al, %esi; \
+ movzx %ah, %edi; \
+ shr $16, %eax; \
+ xor tab_0(%rsi), p1; \
+ xor tab_1(%rdi), p2; \
+ movzx %al, %esi; \
+ movzx %ah, %edi; \
+ xor tab_2(%rsi), p3; \
+ xor tab_3(%rdi), p4; \
+ \
+ movzx %bl, %esi; \
+ movzx %bh, %edi; \
+ shr $16, %ebx; \
+ xor tab_0(%rsi), p2; \
+ xor tab_1(%rdi), p3; \
+ movzx %bl, %esi; \
+ movzx %bh, %edi; \
+ xor tab_2(%rsi), p4; \
+ xor tab_3(%rdi), p1; \
+ \
+ movzx %cl, %esi; \
+ movzx %ch, %edi; \
+ shr $16, %ecx; \
+ xor tab_0(%rsi), p3; \
+ xor tab_1(%rdi), p4; \
+ movzx %cl, %esi; \
+ movzx %ch, %edi; \
+ xor tab_2(%rsi), p1; \
+ xor tab_3(%rdi), p2; \
+ \
+ movzx %dl, %esi; \
+ movzx %dh, %edi; \
+ shr $16, %edx; \
+ xor tab_0(%rsi), p4; \
+ xor tab_1(%rdi), p1; \
+ movzx %dl, %esi; \
+ movzx %dh, %edi; \
+ xor tab_2(%rsi), p2; \
+ xor tab_3(%rdi), p3; \
+ \
+ mov p1, %eax; \
+ mov p2, %ebx; \
+ mov p3, %ecx; \
+ mov p4, %edx
+
+#ifdef LAST_ROUND_TABLES
+
+#define il_rnd(p1, p2, p3, p4, round) /* last inverse round */ \
+ add $2048, tptr; \
+ mov ik_ref(round,0), p1; \
+ mov ik_ref(round,1), p2; \
+ mov ik_ref(round,2), p3; \
+ mov ik_ref(round,3), p4; \
+ \
+ movzx %al, %esi; \
+ movzx %ah, %edi; \
+ shr $16, %eax; \
+ xor tab_0(%rsi), p1; \
+ xor tab_1(%rdi), p2; \
+ movzx %al, %esi; \
+ movzx %ah, %edi; \
+ xor tab_2(%rsi), p3; \
+ xor tab_3(%rdi), p4; \
+ \
+ movzx %bl, %esi; \
+ movzx %bh, %edi; \
+ shr $16, %ebx; \
+ xor tab_0(%rsi), p2; \
+ xor tab_1(%rdi), p3; \
+ movzx %bl, %esi; \
+ movzx %bh, %edi; \
+ xor tab_2(%rsi), p4; \
+ xor tab_3(%rdi), p1; \
+ \
+ movzx %cl, %esi; \
+ movzx %ch, %edi; \
+ shr $16, %ecx; \
+ xor tab_0(%rsi), p3; \
+ xor tab_1(%rdi), p4; \
+ movzx %cl, %esi; \
+ movzx %ch, %edi; \
+ xor tab_2(%rsi), p1; \
+ xor tab_3(%rdi), p2; \
+ \
+ movzx %dl, %esi; \
+ movzx %dh, %edi; \
+ shr $16, %edx; \
+ xor tab_0(%rsi), p4; \
+ xor tab_1(%rdi), p1; \
+ movzx %dl, %esi; \
+ movzx %dh, %edi; \
+ xor tab_2(%rsi), p2; \
+ xor tab_3(%rdi), p3
+
+#else
+
+#define il_rnd(p1, p2, p3, p4, round) /* last inverse round */ \
+ mov ik_ref(round,0), p1; \
+ mov ik_ref(round,1), p2; \
+ mov ik_ref(round,2), p3; \
+ mov ik_ref(round,3), p4; \
+ \
+ movzx %al, %esi; \
+ movzx %ah, %edi; \
+ movzx tab_i(%rsi), %esi; \
+ movzx tab_i(%rdi), %edi; \
+ shr $16, %eax; \
+ xor %esi, p1; \
+ rol $8, %edi; \
+ xor %edi, p2; \
+ movzx %al, %esi; \
+ movzx %ah, %edi; \
+ movzx tab_i(%rsi), %esi; \
+ movzx tab_i(%rdi), %edi; \
+ rol $16, %esi; \
+ rol $24, %edi; \
+ xor %esi, p3; \
+ xor %edi, p4; \
+ \
+ movzx %bl, %esi; \
+ movzx %bh, %edi; \
+ movzx tab_i(%rsi), %esi; \
+ movzx tab_i(%rdi), %edi; \
+ shr $16, %ebx; \
+ xor %esi, p2; \
+ rol $8, %edi; \
+ xor %edi, p3; \
+ movzx %bl, %esi; \
+ movzx %bh, %edi; \
+ movzx tab_i(%rsi), %esi; \
+ movzx tab_i(%rdi), %edi; \
+ rol $16, %esi; \
+ rol $24, %edi; \
+ xor %esi, p4; \
+ xor %edi, p1; \
+ \
+ movzx %cl, %esi; \
+ movzx %ch, %edi; \
+ movzx tab_i(%rsi), %esi; \
+ movzx tab_i(%rdi), %edi; \
+ shr $16, %ecx; \
+ xor %esi, p3; \
+ rol $8, %edi; \
+ xor %edi, p4; \
+ movzx %cl, %esi; \
+ movzx %ch, %edi; \
+ movzx tab_i(%rsi), %esi; \
+ movzx tab_i(%rdi), %edi; \
+ rol $16, %esi; \
+ rol $24, %edi; \
+ xor %esi, p1; \
+ xor %edi, p2; \
+ \
+ movzx %dl, %esi; \
+ movzx %dh, %edi; \
+ movzx tab_i(%rsi), %esi; \
+ movzx tab_i(%rdi), %edi; \
+ shr $16, %edx; \
+ xor %esi, p4; \
+ rol $8, %edi; \
+ xor %edi, p1; \
+ movzx %dl, %esi; \
+ movzx %dh, %edi; \
+ movzx tab_i(%rsi), %esi; \
+ movzx tab_i(%rdi), %edi; \
+ rol $16, %esi; \
+ rol $24, %edi; \
+ xor %esi, p2; \
+ xor %edi, p3
+
+#endif /* LAST_ROUND_TABLES */
+
+/*
+ * OpenSolaris OS:
+ * void aes_encrypt_amd64(const aes_ks_t *ks, int Nr,
+ * const uint32_t pt[4], uint32_t ct[4])/
+ *
+ * Original interface:
+ * int aes_encrypt(const unsigned char *in,
+ * unsigned char *out, const aes_encrypt_ctx cx[1])/
+ */
+.data
+.align 64
+enc_tab:
+ enc_vals(u8)
+#ifdef LAST_ROUND_TABLES
+ // Last Round Tables:
+ enc_vals(w8)
+#endif
+
+
+ENTRY_NP(aes_encrypt_amd64)
+#ifdef GLADMAN_INTERFACE
+ // Original interface
+ sub $[4*8], %rsp // gnu/linux/opensolaris binary interface
+ mov %rsi, (%rsp) // output pointer (P2)
+ mov %rdx, %r8 // context (P3)
+
+ mov %rbx, 1*8(%rsp) // P1: input pointer in rdi
+ mov %rbp, 2*8(%rsp) // P2: output pointer in (rsp)
+ mov %r12, 3*8(%rsp) // P3: context in r8
+ movzx 4*KS_LENGTH(kptr), %esi // Get byte key length * 16
+
+#else
+ // OpenSolaris OS interface
+ sub $[4*8], %rsp // Make room on stack to save registers
+ mov %rcx, (%rsp) // Save output pointer (P4) on stack
+ mov %rdi, %r8 // context (P1)
+ mov %rdx, %rdi // P3: save input pointer
+ shl $4, %esi // P2: esi byte key length * 16
+
+ mov %rbx, 1*8(%rsp) // Save registers
+ mov %rbp, 2*8(%rsp)
+ mov %r12, 3*8(%rsp)
+ // P1: context in r8
+ // P2: byte key length * 16 in esi
+ // P3: input pointer in rdi
+ // P4: output pointer in (rsp)
+#endif /* GLADMAN_INTERFACE */
+
+ lea enc_tab(%rip), tptr
+ sub $fofs, kptr
+
+ // Load input block into registers
+ mov (%rdi), %eax
+ mov 1*4(%rdi), %ebx
+ mov 2*4(%rdi), %ecx
+ mov 3*4(%rdi), %edx
+
+ xor fofs(kptr), %eax
+ xor fofs+4(kptr), %ebx
+ xor fofs+8(kptr), %ecx
+ xor fofs+12(kptr), %edx
+
+ lea (kptr,%rsi), kptr
+ // Jump based on byte key length * 16:
+ cmp $[10*16], %esi
+ je 3f
+ cmp $[12*16], %esi
+ je 2f
+ cmp $[14*16], %esi
+ je 1f
+ mov $-1, %rax // error
+ jmp 4f
+
+ // Perform normal forward rounds
+1: ff_rnd(%r9d, %r10d, %r11d, %r12d, 13)
+ ff_rnd(%r9d, %r10d, %r11d, %r12d, 12)
+2: ff_rnd(%r9d, %r10d, %r11d, %r12d, 11)
+ ff_rnd(%r9d, %r10d, %r11d, %r12d, 10)
+3: ff_rnd(%r9d, %r10d, %r11d, %r12d, 9)
+ ff_rnd(%r9d, %r10d, %r11d, %r12d, 8)
+ ff_rnd(%r9d, %r10d, %r11d, %r12d, 7)
+ ff_rnd(%r9d, %r10d, %r11d, %r12d, 6)
+ ff_rnd(%r9d, %r10d, %r11d, %r12d, 5)
+ ff_rnd(%r9d, %r10d, %r11d, %r12d, 4)
+ ff_rnd(%r9d, %r10d, %r11d, %r12d, 3)
+ ff_rnd(%r9d, %r10d, %r11d, %r12d, 2)
+ ff_rnd(%r9d, %r10d, %r11d, %r12d, 1)
+ fl_rnd(%r9d, %r10d, %r11d, %r12d, 0)
+
+ // Copy results
+ mov (%rsp), %rbx
+ mov %r9d, (%rbx)
+ mov %r10d, 4(%rbx)
+ mov %r11d, 8(%rbx)
+ mov %r12d, 12(%rbx)
+ xor %rax, %rax
+4: // Restore registers
+ mov 1*8(%rsp), %rbx
+ mov 2*8(%rsp), %rbp
+ mov 3*8(%rsp), %r12
+ add $[4*8], %rsp
+ ret
+
+ SET_SIZE(aes_encrypt_amd64)
+
+/*
+ * OpenSolaris OS:
+ * void aes_decrypt_amd64(const aes_ks_t *ks, int Nr,
+ * const uint32_t pt[4], uint32_t ct[4])/
+ *
+ * Original interface:
+ * int aes_decrypt(const unsigned char *in,
+ * unsigned char *out, const aes_encrypt_ctx cx[1])/
+ */
+.data
+.align 64
+dec_tab:
+ dec_vals(v8)
+#ifdef LAST_ROUND_TABLES
+ // Last Round Tables:
+ dec_vals(w8)
+#endif
+
+
+ENTRY_NP(aes_decrypt_amd64)
+#ifdef GLADMAN_INTERFACE
+ // Original interface
+ sub $[4*8], %rsp // gnu/linux/opensolaris binary interface
+ mov %rsi, (%rsp) // output pointer (P2)
+ mov %rdx, %r8 // context (P3)
+
+ mov %rbx, 1*8(%rsp) // P1: input pointer in rdi
+ mov %rbp, 2*8(%rsp) // P2: output pointer in (rsp)
+ mov %r12, 3*8(%rsp) // P3: context in r8
+ movzx 4*KS_LENGTH(kptr), %esi // Get byte key length * 16
+
+#else
+ // OpenSolaris OS interface
+ sub $[4*8], %rsp // Make room on stack to save registers
+ mov %rcx, (%rsp) // Save output pointer (P4) on stack
+ mov %rdi, %r8 // context (P1)
+ mov %rdx, %rdi // P3: save input pointer
+ shl $4, %esi // P2: esi byte key length * 16
+
+ mov %rbx, 1*8(%rsp) // Save registers
+ mov %rbp, 2*8(%rsp)
+ mov %r12, 3*8(%rsp)
+ // P1: context in r8
+ // P2: byte key length * 16 in esi
+ // P3: input pointer in rdi
+ // P4: output pointer in (rsp)
+#endif /* GLADMAN_INTERFACE */
+
+ lea dec_tab(%rip), tptr
+ sub $rofs, kptr
+
+ // Load input block into registers
+ mov (%rdi), %eax
+ mov 1*4(%rdi), %ebx
+ mov 2*4(%rdi), %ecx
+ mov 3*4(%rdi), %edx
+
+#ifdef AES_REV_DKS
+ mov kptr, %rdi
+ lea (kptr,%rsi), kptr
+#else
+ lea (kptr,%rsi), %rdi
+#endif
+
+ xor rofs(%rdi), %eax
+ xor rofs+4(%rdi), %ebx
+ xor rofs+8(%rdi), %ecx
+ xor rofs+12(%rdi), %edx
+
+ // Jump based on byte key length * 16:
+ cmp $[10*16], %esi
+ je 3f
+ cmp $[12*16], %esi
+ je 2f
+ cmp $[14*16], %esi
+ je 1f
+ mov $-1, %rax // error
+ jmp 4f
+
+ // Perform normal inverse rounds
+1: ii_rnd(%r9d, %r10d, %r11d, %r12d, 13)
+ ii_rnd(%r9d, %r10d, %r11d, %r12d, 12)
+2: ii_rnd(%r9d, %r10d, %r11d, %r12d, 11)
+ ii_rnd(%r9d, %r10d, %r11d, %r12d, 10)
+3: ii_rnd(%r9d, %r10d, %r11d, %r12d, 9)
+ ii_rnd(%r9d, %r10d, %r11d, %r12d, 8)
+ ii_rnd(%r9d, %r10d, %r11d, %r12d, 7)
+ ii_rnd(%r9d, %r10d, %r11d, %r12d, 6)
+ ii_rnd(%r9d, %r10d, %r11d, %r12d, 5)
+ ii_rnd(%r9d, %r10d, %r11d, %r12d, 4)
+ ii_rnd(%r9d, %r10d, %r11d, %r12d, 3)
+ ii_rnd(%r9d, %r10d, %r11d, %r12d, 2)
+ ii_rnd(%r9d, %r10d, %r11d, %r12d, 1)
+ il_rnd(%r9d, %r10d, %r11d, %r12d, 0)
+
+ // Copy results
+ mov (%rsp), %rbx
+ mov %r9d, (%rbx)
+ mov %r10d, 4(%rbx)
+ mov %r11d, 8(%rbx)
+ mov %r12d, 12(%rbx)
+ xor %rax, %rax
+4: // Restore registers
+ mov 1*8(%rsp), %rbx
+ mov 2*8(%rsp), %rbp
+ mov 3*8(%rsp), %r12
+ add $[4*8], %rsp
+ ret
+
+ SET_SIZE(aes_decrypt_amd64)
+#endif /* lint || __lint */
+
+#ifdef __ELF__
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/zfs/module/icp/asm-x86_64/aes/aes_intel.S b/zfs/module/icp/asm-x86_64/aes/aes_intel.S
new file mode 100644
index 000000000000..ed0df75c5513
--- /dev/null
+++ b/zfs/module/icp/asm-x86_64/aes/aes_intel.S
@@ -0,0 +1,749 @@
+/*
+ * ====================================================================
+ * Written by Intel Corporation for the OpenSSL project to add support
+ * for Intel AES-NI instructions. Rights for redistribution and usage
+ * in source and binary forms are granted according to the OpenSSL
+ * license.
+ *
+ * Author: Huang Ying <ying.huang at intel dot com>
+ * Vinodh Gopal <vinodh.gopal at intel dot com>
+ * Kahraman Akdemir
+ *
+ * Intel AES-NI is a new set of Single Instruction Multiple Data (SIMD)
+ * instructions that are going to be introduced in the next generation
+ * of Intel processor, as of 2009. These instructions enable fast and
+ * secure data encryption and decryption, using the Advanced Encryption
+ * Standard (AES), defined by FIPS Publication number 197. The
+ * architecture introduces six instructions that offer full hardware
+ * support for AES. Four of them support high performance data
+ * encryption and decryption, and the other two instructions support
+ * the AES key expansion procedure.
+ * ====================================================================
+ */
+
+/*
+ * ====================================================================
+ * Copyright (c) 1998-2008 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ * openssl-core at openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ */
+
+/*
+ * ====================================================================
+ * OpenSolaris OS modifications
+ *
+ * This source originates as files aes-intel.S and eng_aesni_asm.pl, in
+ * patches sent sent Dec. 9, 2008 and Dec. 24, 2008, respectively, by
+ * Huang Ying of Intel to the openssl-dev mailing list under the subject
+ * of "Add support to Intel AES-NI instruction set for x86_64 platform".
+ *
+ * This OpenSolaris version has these major changes from the original source:
+ *
+ * 1. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
+ * /usr/include/sys/asm_linkage.h, lint(1B) guards, and dummy C function
+ * definitions for lint.
+ *
+ * 2. Formatted code, added comments, and added #includes and #defines.
+ *
+ * 3. If bit CR0.TS is set, clear and set the TS bit, after and before
+ * calling kpreempt_disable() and kpreempt_enable().
+ * If the TS bit is not set, Save and restore %xmm registers at the beginning
+ * and end of function calls (%xmm* registers are not saved and restored by
+ * during kernel thread preemption).
+ *
+ * 4. Renamed functions, reordered parameters, and changed return value
+ * to match OpenSolaris:
+ *
+ * OpenSSL interface:
+ * int intel_AES_set_encrypt_key(const unsigned char *userKey,
+ * const int bits, AES_KEY *key);
+ * int intel_AES_set_decrypt_key(const unsigned char *userKey,
+ * const int bits, AES_KEY *key);
+ * Return values for above are non-zero on error, 0 on success.
+ *
+ * void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
+ * const AES_KEY *key);
+ * void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
+ * const AES_KEY *key);
+ * typedef struct aes_key_st {
+ * unsigned int rd_key[4 *(AES_MAXNR + 1)];
+ * int rounds;
+ * unsigned int pad[3];
+ * } AES_KEY;
+ * Note: AES_LONG is undefined (that is, Intel uses 32-bit key schedules
+ * (ks32) instead of 64-bit (ks64).
+ * Number of rounds (aka round count) is at offset 240 of AES_KEY.
+ *
+ * OpenSolaris OS interface (#ifdefs removed for readability):
+ * int rijndael_key_setup_dec_intel(uint32_t rk[],
+ * const uint32_t cipherKey[], uint64_t keyBits);
+ * int rijndael_key_setup_enc_intel(uint32_t rk[],
+ * const uint32_t cipherKey[], uint64_t keyBits);
+ * Return values for above are 0 on error, number of rounds on success.
+ *
+ * void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
+ * const uint32_t pt[4], uint32_t ct[4]);
+ * void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
+ * const uint32_t pt[4], uint32_t ct[4]);
+ * typedef union {uint64_t ks64[(MAX_AES_NR + 1) * 4];
+ * uint32_t ks32[(MAX_AES_NR + 1) * 4]; } aes_ks_t;
+ *
+ * typedef union {
+ * uint32_t ks32[((MAX_AES_NR) + 1) * (MAX_AES_NB)];
+ * } aes_ks_t;
+ * typedef struct aes_key {
+ * aes_ks_t encr_ks, decr_ks;
+ * long double align128;
+ * int flags, nr, type;
+ * } aes_key_t;
+ *
+ * Note: ks is the AES key schedule, Nr is number of rounds, pt is plain text,
+ * ct is crypto text, and MAX_AES_NR is 14.
+ * For the x86 64-bit architecture, OpenSolaris OS uses ks32 instead of ks64.
+ *
+ * Note2: aes_ks_t must be aligned on a 0 mod 128 byte boundary.
+ *
+ * ====================================================================
+ */
+
+
+#if defined(lint) || defined(__lint)
+
+#include <sys/types.h>
+
+/* ARGSUSED */
+void
+aes_encrypt_intel(const uint32_t rk[], int Nr, const uint32_t pt[4],
+ uint32_t ct[4]) {
+}
+/* ARGSUSED */
+void
+aes_decrypt_intel(const uint32_t rk[], int Nr, const uint32_t ct[4],
+ uint32_t pt[4]) {
+}
+/* ARGSUSED */
+int
+rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[],
+ uint64_t keyBits) {
+ return (0);
+}
+/* ARGSUSED */
+int
+rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[],
+ uint64_t keyBits) {
+ return (0);
+}
+
+
+#else /* lint */
+
+#define _ASM
+#include <sys/asm_linkage.h>
+
+
+/*
+ * _key_expansion_128(), * _key_expansion_192a(), _key_expansion_192b(),
+ * _key_expansion_256a(), _key_expansion_256b()
+ *
+ * Helper functions called by rijndael_key_setup_inc_intel().
+ * Also used indirectly by rijndael_key_setup_dec_intel().
+ *
+ * Input:
+ * %xmm0 User-provided cipher key
+ * %xmm1 Round constant
+ * Output:
+ * (%rcx) AES key
+ */
+
+ENTRY_NP2(_key_expansion_128, _key_expansion_256a)
+_key_expansion_128_local:
+_key_expansion_256a_local:
+ pshufd $0b11111111, %xmm1, %xmm1
+ shufps $0b00010000, %xmm0, %xmm4
+ pxor %xmm4, %xmm0
+ shufps $0b10001100, %xmm0, %xmm4
+ pxor %xmm4, %xmm0
+ pxor %xmm1, %xmm0
+ movaps %xmm0, (%rcx)
+ add $0x10, %rcx
+ ret
+ nop
+SET_SIZE(_key_expansion_128)
+SET_SIZE(_key_expansion_256a)
+
+
+ENTRY_NP(_key_expansion_192a)
+_key_expansion_192a_local:
+ pshufd $0b01010101, %xmm1, %xmm1
+ shufps $0b00010000, %xmm0, %xmm4
+ pxor %xmm4, %xmm0
+ shufps $0b10001100, %xmm0, %xmm4
+ pxor %xmm4, %xmm0
+ pxor %xmm1, %xmm0
+
+ movaps %xmm2, %xmm5
+ movaps %xmm2, %xmm6
+ pslldq $4, %xmm5
+ pshufd $0b11111111, %xmm0, %xmm3
+ pxor %xmm3, %xmm2
+ pxor %xmm5, %xmm2
+
+ movaps %xmm0, %xmm1
+ shufps $0b01000100, %xmm0, %xmm6
+ movaps %xmm6, (%rcx)
+ shufps $0b01001110, %xmm2, %xmm1
+ movaps %xmm1, 0x10(%rcx)
+ add $0x20, %rcx
+ ret
+SET_SIZE(_key_expansion_192a)
+
+
+ENTRY_NP(_key_expansion_192b)
+_key_expansion_192b_local:
+ pshufd $0b01010101, %xmm1, %xmm1
+ shufps $0b00010000, %xmm0, %xmm4
+ pxor %xmm4, %xmm0
+ shufps $0b10001100, %xmm0, %xmm4
+ pxor %xmm4, %xmm0
+ pxor %xmm1, %xmm0
+
+ movaps %xmm2, %xmm5
+ pslldq $4, %xmm5
+ pshufd $0b11111111, %xmm0, %xmm3
+ pxor %xmm3, %xmm2
+ pxor %xmm5, %xmm2
+
+ movaps %xmm0, (%rcx)
+ add $0x10, %rcx
+ ret
+SET_SIZE(_key_expansion_192b)
+
+
+ENTRY_NP(_key_expansion_256b)
+_key_expansion_256b_local:
+ pshufd $0b10101010, %xmm1, %xmm1
+ shufps $0b00010000, %xmm2, %xmm4
+ pxor %xmm4, %xmm2
+ shufps $0b10001100, %xmm2, %xmm4
+ pxor %xmm4, %xmm2
+ pxor %xmm1, %xmm2
+ movaps %xmm2, (%rcx)
+ add $0x10, %rcx
+ ret
+SET_SIZE(_key_expansion_256b)
+
+
+/*
+ * rijndael_key_setup_enc_intel()
+ * Expand the cipher key into the encryption key schedule.
+ *
+ * For kernel code, caller is responsible for ensuring kpreempt_disable()
+ * has been called. This is because %xmm registers are not saved/restored.
+ * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
+ * on entry. Otherwise, if TS is not set, save and restore %xmm registers
+ * on the stack.
+ *
+ * OpenSolaris interface:
+ * int rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[],
+ * uint64_t keyBits);
+ * Return value is 0 on error, number of rounds on success.
+ *
+ * Original Intel OpenSSL interface:
+ * int intel_AES_set_encrypt_key(const unsigned char *userKey,
+ * const int bits, AES_KEY *key);
+ * Return value is non-zero on error, 0 on success.
+ */
+
+#ifdef OPENSSL_INTERFACE
+#define rijndael_key_setup_enc_intel intel_AES_set_encrypt_key
+#define rijndael_key_setup_dec_intel intel_AES_set_decrypt_key
+
+#define USERCIPHERKEY rdi /* P1, 64 bits */
+#define KEYSIZE32 esi /* P2, 32 bits */
+#define KEYSIZE64 rsi /* P2, 64 bits */
+#define AESKEY rdx /* P3, 64 bits */
+
+#else /* OpenSolaris Interface */
+#define AESKEY rdi /* P1, 64 bits */
+#define USERCIPHERKEY rsi /* P2, 64 bits */
+#define KEYSIZE32 edx /* P3, 32 bits */
+#define KEYSIZE64 rdx /* P3, 64 bits */
+#endif /* OPENSSL_INTERFACE */
+
+#define ROUNDS32 KEYSIZE32 /* temp */
+#define ROUNDS64 KEYSIZE64 /* temp */
+#define ENDAESKEY USERCIPHERKEY /* temp */
+
+ENTRY_NP(rijndael_key_setup_enc_intel)
+rijndael_key_setup_enc_intel_local:
+ FRAME_BEGIN
+ // NULL pointer sanity check
+ test %USERCIPHERKEY, %USERCIPHERKEY
+ jz .Lenc_key_invalid_param
+ test %AESKEY, %AESKEY
+ jz .Lenc_key_invalid_param
+
+ movups (%USERCIPHERKEY), %xmm0 // user key (first 16 bytes)
+ movaps %xmm0, (%AESKEY)
+ lea 0x10(%AESKEY), %rcx // key addr
+ pxor %xmm4, %xmm4 // xmm4 is assumed 0 in _key_expansion_x
+
+ cmp $256, %KEYSIZE32
+ jnz .Lenc_key192
+
+ // AES 256: 14 rounds in encryption key schedule
+#ifdef OPENSSL_INTERFACE
+ mov $14, %ROUNDS32
+ movl %ROUNDS32, 240(%AESKEY) // key.rounds = 14
+#endif /* OPENSSL_INTERFACE */
+
+ movups 0x10(%USERCIPHERKEY), %xmm2 // other user key (2nd 16 bytes)
+ movaps %xmm2, (%rcx)
+ add $0x10, %rcx
+
+ aeskeygenassist $0x1, %xmm2, %xmm1 // expand the key
+ call _key_expansion_256a_local
+ aeskeygenassist $0x1, %xmm0, %xmm1
+ call _key_expansion_256b_local
+ aeskeygenassist $0x2, %xmm2, %xmm1 // expand the key
+ call _key_expansion_256a_local
+ aeskeygenassist $0x2, %xmm0, %xmm1
+ call _key_expansion_256b_local
+ aeskeygenassist $0x4, %xmm2, %xmm1 // expand the key
+ call _key_expansion_256a_local
+ aeskeygenassist $0x4, %xmm0, %xmm1
+ call _key_expansion_256b_local
+ aeskeygenassist $0x8, %xmm2, %xmm1 // expand the key
+ call _key_expansion_256a_local
+ aeskeygenassist $0x8, %xmm0, %xmm1
+ call _key_expansion_256b_local
+ aeskeygenassist $0x10, %xmm2, %xmm1 // expand the key
+ call _key_expansion_256a_local
+ aeskeygenassist $0x10, %xmm0, %xmm1
+ call _key_expansion_256b_local
+ aeskeygenassist $0x20, %xmm2, %xmm1 // expand the key
+ call _key_expansion_256a_local
+ aeskeygenassist $0x20, %xmm0, %xmm1
+ call _key_expansion_256b_local
+ aeskeygenassist $0x40, %xmm2, %xmm1 // expand the key
+ call _key_expansion_256a_local
+
+#ifdef OPENSSL_INTERFACE
+ xor %rax, %rax // return 0 (OK)
+#else /* Open Solaris Interface */
+ mov $14, %rax // return # rounds = 14
+#endif
+ FRAME_END
+ ret
+
+.align 4
+.Lenc_key192:
+ cmp $192, %KEYSIZE32
+ jnz .Lenc_key128
+
+ // AES 192: 12 rounds in encryption key schedule
+#ifdef OPENSSL_INTERFACE
+ mov $12, %ROUNDS32
+ movl %ROUNDS32, 240(%AESKEY) // key.rounds = 12
+#endif /* OPENSSL_INTERFACE */
+
+ movq 0x10(%USERCIPHERKEY), %xmm2 // other user key
+ aeskeygenassist $0x1, %xmm2, %xmm1 // expand the key
+ call _key_expansion_192a_local
+ aeskeygenassist $0x2, %xmm2, %xmm1 // expand the key
+ call _key_expansion_192b_local
+ aeskeygenassist $0x4, %xmm2, %xmm1 // expand the key
+ call _key_expansion_192a_local
+ aeskeygenassist $0x8, %xmm2, %xmm1 // expand the key
+ call _key_expansion_192b_local
+ aeskeygenassist $0x10, %xmm2, %xmm1 // expand the key
+ call _key_expansion_192a_local
+ aeskeygenassist $0x20, %xmm2, %xmm1 // expand the key
+ call _key_expansion_192b_local
+ aeskeygenassist $0x40, %xmm2, %xmm1 // expand the key
+ call _key_expansion_192a_local
+ aeskeygenassist $0x80, %xmm2, %xmm1 // expand the key
+ call _key_expansion_192b_local
+
+#ifdef OPENSSL_INTERFACE
+ xor %rax, %rax // return 0 (OK)
+#else /* OpenSolaris Interface */
+ mov $12, %rax // return # rounds = 12
+#endif
+ FRAME_END
+ ret
+
+.align 4
+.Lenc_key128:
+ cmp $128, %KEYSIZE32
+ jnz .Lenc_key_invalid_key_bits
+
+ // AES 128: 10 rounds in encryption key schedule
+#ifdef OPENSSL_INTERFACE
+ mov $10, %ROUNDS32
+ movl %ROUNDS32, 240(%AESKEY) // key.rounds = 10
+#endif /* OPENSSL_INTERFACE */
+
+ aeskeygenassist $0x1, %xmm0, %xmm1 // expand the key
+ call _key_expansion_128_local
+ aeskeygenassist $0x2, %xmm0, %xmm1 // expand the key
+ call _key_expansion_128_local
+ aeskeygenassist $0x4, %xmm0, %xmm1 // expand the key
+ call _key_expansion_128_local
+ aeskeygenassist $0x8, %xmm0, %xmm1 // expand the key
+ call _key_expansion_128_local
+ aeskeygenassist $0x10, %xmm0, %xmm1 // expand the key
+ call _key_expansion_128_local
+ aeskeygenassist $0x20, %xmm0, %xmm1 // expand the key
+ call _key_expansion_128_local
+ aeskeygenassist $0x40, %xmm0, %xmm1 // expand the key
+ call _key_expansion_128_local
+ aeskeygenassist $0x80, %xmm0, %xmm1 // expand the key
+ call _key_expansion_128_local
+ aeskeygenassist $0x1b, %xmm0, %xmm1 // expand the key
+ call _key_expansion_128_local
+ aeskeygenassist $0x36, %xmm0, %xmm1 // expand the key
+ call _key_expansion_128_local
+
+#ifdef OPENSSL_INTERFACE
+ xor %rax, %rax // return 0 (OK)
+#else /* OpenSolaris Interface */
+ mov $10, %rax // return # rounds = 10
+#endif
+ FRAME_END
+ ret
+
+.Lenc_key_invalid_param:
+#ifdef OPENSSL_INTERFACE
+ mov $-1, %rax // user key or AES key pointer is NULL
+ FRAME_END
+ ret
+#else
+ /* FALLTHROUGH */
+#endif /* OPENSSL_INTERFACE */
+
+.Lenc_key_invalid_key_bits:
+#ifdef OPENSSL_INTERFACE
+ mov $-2, %rax // keysize is invalid
+#else /* Open Solaris Interface */
+ xor %rax, %rax // a key pointer is NULL or invalid keysize
+#endif /* OPENSSL_INTERFACE */
+ FRAME_END
+ ret
+ SET_SIZE(rijndael_key_setup_enc_intel)
+
+
+/*
+ * rijndael_key_setup_dec_intel()
+ * Expand the cipher key into the decryption key schedule.
+ *
+ * For kernel code, caller is responsible for ensuring kpreempt_disable()
+ * has been called. This is because %xmm registers are not saved/restored.
+ * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
+ * on entry. Otherwise, if TS is not set, save and restore %xmm registers
+ * on the stack.
+ *
+ * OpenSolaris interface:
+ * int rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[],
+ * uint64_t keyBits);
+ * Return value is 0 on error, number of rounds on success.
+ * P1->P2, P2->P3, P3->P1
+ *
+ * Original Intel OpenSSL interface:
+ * int intel_AES_set_decrypt_key(const unsigned char *userKey,
+ * const int bits, AES_KEY *key);
+ * Return value is non-zero on error, 0 on success.
+ */
+
+ENTRY_NP(rijndael_key_setup_dec_intel)
+FRAME_BEGIN
+ // Generate round keys used for encryption
+ call rijndael_key_setup_enc_intel_local
+ test %rax, %rax
+#ifdef OPENSSL_INTERFACE
+ jnz .Ldec_key_exit // Failed if returned non-0
+#else /* OpenSolaris Interface */
+ jz .Ldec_key_exit // Failed if returned 0
+#endif /* OPENSSL_INTERFACE */
+
+ /*
+ * Convert round keys used for encryption
+ * to a form usable for decryption
+ */
+#ifndef OPENSSL_INTERFACE /* OpenSolaris Interface */
+ mov %rax, %ROUNDS64 // set # rounds (10, 12, or 14)
+ // (already set for OpenSSL)
+#endif
+
+ lea 0x10(%AESKEY), %rcx // key addr
+ shl $4, %ROUNDS32
+ add %AESKEY, %ROUNDS64
+ mov %ROUNDS64, %ENDAESKEY
+
+.align 4
+.Ldec_key_reorder_loop:
+ movaps (%AESKEY), %xmm0
+ movaps (%ROUNDS64), %xmm1
+ movaps %xmm0, (%ROUNDS64)
+ movaps %xmm1, (%AESKEY)
+ lea 0x10(%AESKEY), %AESKEY
+ lea -0x10(%ROUNDS64), %ROUNDS64
+ cmp %AESKEY, %ROUNDS64
+ ja .Ldec_key_reorder_loop
+
+.align 4
+.Ldec_key_inv_loop:
+ movaps (%rcx), %xmm0
+ // Convert an encryption round key to a form usable for decryption
+ // with the "AES Inverse Mix Columns" instruction
+ aesimc %xmm0, %xmm1
+ movaps %xmm1, (%rcx)
+ lea 0x10(%rcx), %rcx
+ cmp %ENDAESKEY, %rcx
+ jnz .Ldec_key_inv_loop
+
+.Ldec_key_exit:
+ // OpenSolaris: rax = # rounds (10, 12, or 14) or 0 for error
+ // OpenSSL: rax = 0 for OK, or non-zero for error
+ FRAME_END
+ ret
+ SET_SIZE(rijndael_key_setup_dec_intel)
+
+
+/*
+ * aes_encrypt_intel()
+ * Encrypt a single block (in and out can overlap).
+ *
+ * For kernel code, caller is responsible for ensuring kpreempt_disable()
+ * has been called. This is because %xmm registers are not saved/restored.
+ * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
+ * on entry. Otherwise, if TS is not set, save and restore %xmm registers
+ * on the stack.
+ *
+ * Temporary register usage:
+ * %xmm0 State
+ * %xmm1 Key
+ *
+ * Original OpenSolaris Interface:
+ * void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
+ * const uint32_t pt[4], uint32_t ct[4])
+ *
+ * Original Intel OpenSSL Interface:
+ * void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
+ * const AES_KEY *key)
+ */
+
+#ifdef OPENSSL_INTERFACE
+#define aes_encrypt_intel intel_AES_encrypt
+#define aes_decrypt_intel intel_AES_decrypt
+
+#define INP rdi /* P1, 64 bits */
+#define OUTP rsi /* P2, 64 bits */
+#define KEYP rdx /* P3, 64 bits */
+
+/* No NROUNDS parameter--offset 240 from KEYP saved in %ecx: */
+#define NROUNDS32 ecx /* temporary, 32 bits */
+#define NROUNDS cl /* temporary, 8 bits */
+
+#else /* OpenSolaris Interface */
+#define KEYP rdi /* P1, 64 bits */
+#define NROUNDS esi /* P2, 32 bits */
+#define INP rdx /* P3, 64 bits */
+#define OUTP rcx /* P4, 64 bits */
+#endif /* OPENSSL_INTERFACE */
+
+#define STATE xmm0 /* temporary, 128 bits */
+#define KEY xmm1 /* temporary, 128 bits */
+
+
+ENTRY_NP(aes_encrypt_intel)
+
+ movups (%INP), %STATE // input
+ movaps (%KEYP), %KEY // key
+#ifdef OPENSSL_INTERFACE
+ mov 240(%KEYP), %NROUNDS32 // round count
+#else /* OpenSolaris Interface */
+ /* Round count is already present as P2 in %rsi/%esi */
+#endif /* OPENSSL_INTERFACE */
+
+ pxor %KEY, %STATE // round 0
+ lea 0x30(%KEYP), %KEYP
+ cmp $12, %NROUNDS
+ jb .Lenc128
+ lea 0x20(%KEYP), %KEYP
+ je .Lenc192
+
+ // AES 256
+ lea 0x20(%KEYP), %KEYP
+ movaps -0x60(%KEYP), %KEY
+ aesenc %KEY, %STATE
+ movaps -0x50(%KEYP), %KEY
+ aesenc %KEY, %STATE
+
+.align 4
+.Lenc192:
+ // AES 192 and 256
+ movaps -0x40(%KEYP), %KEY
+ aesenc %KEY, %STATE
+ movaps -0x30(%KEYP), %KEY
+ aesenc %KEY, %STATE
+
+.align 4
+.Lenc128:
+ // AES 128, 192, and 256
+ movaps -0x20(%KEYP), %KEY
+ aesenc %KEY, %STATE
+ movaps -0x10(%KEYP), %KEY
+ aesenc %KEY, %STATE
+ movaps (%KEYP), %KEY
+ aesenc %KEY, %STATE
+ movaps 0x10(%KEYP), %KEY
+ aesenc %KEY, %STATE
+ movaps 0x20(%KEYP), %KEY
+ aesenc %KEY, %STATE
+ movaps 0x30(%KEYP), %KEY
+ aesenc %KEY, %STATE
+ movaps 0x40(%KEYP), %KEY
+ aesenc %KEY, %STATE
+ movaps 0x50(%KEYP), %KEY
+ aesenc %KEY, %STATE
+ movaps 0x60(%KEYP), %KEY
+ aesenc %KEY, %STATE
+ movaps 0x70(%KEYP), %KEY
+ aesenclast %KEY, %STATE // last round
+ movups %STATE, (%OUTP) // output
+
+ ret
+ SET_SIZE(aes_encrypt_intel)
+
+
+/*
+ * aes_decrypt_intel()
+ * Decrypt a single block (in and out can overlap).
+ *
+ * For kernel code, caller is responsible for ensuring kpreempt_disable()
+ * has been called. This is because %xmm registers are not saved/restored.
+ * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
+ * on entry. Otherwise, if TS is not set, save and restore %xmm registers
+ * on the stack.
+ *
+ * Temporary register usage:
+ * %xmm0 State
+ * %xmm1 Key
+ *
+ * Original OpenSolaris Interface:
+ * void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
+ * const uint32_t pt[4], uint32_t ct[4])/
+ *
+ * Original Intel OpenSSL Interface:
+ * void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
+ * const AES_KEY *key);
+ */
+ENTRY_NP(aes_decrypt_intel)
+
+ movups (%INP), %STATE // input
+ movaps (%KEYP), %KEY // key
+#ifdef OPENSSL_INTERFACE
+ mov 240(%KEYP), %NROUNDS32 // round count
+#else /* OpenSolaris Interface */
+ /* Round count is already present as P2 in %rsi/%esi */
+#endif /* OPENSSL_INTERFACE */
+
+ pxor %KEY, %STATE // round 0
+ lea 0x30(%KEYP), %KEYP
+ cmp $12, %NROUNDS
+ jb .Ldec128
+ lea 0x20(%KEYP), %KEYP
+ je .Ldec192
+
+ // AES 256
+ lea 0x20(%KEYP), %KEYP
+ movaps -0x60(%KEYP), %KEY
+ aesdec %KEY, %STATE
+ movaps -0x50(%KEYP), %KEY
+ aesdec %KEY, %STATE
+
+.align 4
+.Ldec192:
+ // AES 192 and 256
+ movaps -0x40(%KEYP), %KEY
+ aesdec %KEY, %STATE
+ movaps -0x30(%KEYP), %KEY
+ aesdec %KEY, %STATE
+
+.align 4
+.Ldec128:
+ // AES 128, 192, and 256
+ movaps -0x20(%KEYP), %KEY
+ aesdec %KEY, %STATE
+ movaps -0x10(%KEYP), %KEY
+ aesdec %KEY, %STATE
+ movaps (%KEYP), %KEY
+ aesdec %KEY, %STATE
+ movaps 0x10(%KEYP), %KEY
+ aesdec %KEY, %STATE
+ movaps 0x20(%KEYP), %KEY
+ aesdec %KEY, %STATE
+ movaps 0x30(%KEYP), %KEY
+ aesdec %KEY, %STATE
+ movaps 0x40(%KEYP), %KEY
+ aesdec %KEY, %STATE
+ movaps 0x50(%KEYP), %KEY
+ aesdec %KEY, %STATE
+ movaps 0x60(%KEYP), %KEY
+ aesdec %KEY, %STATE
+ movaps 0x70(%KEYP), %KEY
+ aesdeclast %KEY, %STATE // last round
+ movups %STATE, (%OUTP) // output
+
+ ret
+ SET_SIZE(aes_decrypt_intel)
+
+#endif /* lint || __lint */
+
+#ifdef __ELF__
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/zfs/module/icp/asm-x86_64/aes/aeskey.c b/zfs/module/icp/asm-x86_64/aes/aeskey.c
new file mode 100644
index 000000000000..c3d1f2990874
--- /dev/null
+++ b/zfs/module/icp/asm-x86_64/aes/aeskey.c
@@ -0,0 +1,580 @@
+/*
+ * ---------------------------------------------------------------------------
+ * Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved.
+ *
+ * LICENSE TERMS
+ *
+ * The free distribution and use of this software is allowed (with or without
+ * changes) provided that:
+ *
+ * 1. source code distributions include the above copyright notice, this
+ * list of conditions and the following disclaimer;
+ *
+ * 2. binary distributions include the above copyright notice, this list
+ * of conditions and the following disclaimer in their documentation;
+ *
+ * 3. the name of the copyright holder is not used to endorse products
+ * built using this software without specific written permission.
+ *
+ * DISCLAIMER
+ *
+ * This software is provided 'as is' with no explicit or implied warranties
+ * in respect of its properties, including, but not limited to, correctness
+ * and/or fitness for purpose.
+ * ---------------------------------------------------------------------------
+ * Issue Date: 20/12/2007
+ */
+
+#include <aes/aes_impl.h>
+#include "aesopt.h"
+#include "aestab.h"
+#include "aestab2.h"
+
+/*
+ * Initialise the key schedule from the user supplied key. The key
+ * length can be specified in bytes, with legal values of 16, 24
+ * and 32, or in bits, with legal values of 128, 192 and 256. These
+ * values correspond with Nk values of 4, 6 and 8 respectively.
+ *
+ * The following macros implement a single cycle in the key
+ * schedule generation process. The number of cycles needed
+ * for each cx->n_col and nk value is:
+ *
+ * nk = 4 5 6 7 8
+ * ------------------------------
+ * cx->n_col = 4 10 9 8 7 7
+ * cx->n_col = 5 14 11 10 9 9
+ * cx->n_col = 6 19 15 12 11 11
+ * cx->n_col = 7 21 19 16 13 14
+ * cx->n_col = 8 29 23 19 17 14
+ */
+
+/*
+ * OpenSolaris changes
+ * 1. Added header files aes_impl.h and aestab2.h
+ * 2. Changed uint_8t and uint_32t to uint8_t and uint32_t
+ * 3. Remove code under ifdef USE_VIA_ACE_IF_PRESENT (always undefined)
+ * 4. Removed always-defined ifdefs FUNCS_IN_C, ENC_KEYING_IN_C,
+ * AES_128, AES_192, AES_256, AES_VAR defines
+ * 5. Changed aes_encrypt_key* aes_decrypt_key* functions to "static void"
+ * 6. Changed N_COLS to MAX_AES_NB
+ * 7. Replaced functions aes_encrypt_key and aes_decrypt_key with
+ * OpenSolaris-compatible functions rijndael_key_setup_enc_amd64 and
+ * rijndael_key_setup_dec_amd64
+ * 8. cstyled code and removed lint warnings
+ */
+
+#if defined(REDUCE_CODE_SIZE)
+#define ls_box ls_sub
+ uint32_t ls_sub(const uint32_t t, const uint32_t n);
+#define inv_mcol im_sub
+ uint32_t im_sub(const uint32_t x);
+#ifdef ENC_KS_UNROLL
+#undef ENC_KS_UNROLL
+#endif
+#ifdef DEC_KS_UNROLL
+#undef DEC_KS_UNROLL
+#endif
+#endif /* REDUCE_CODE_SIZE */
+
+
+#define ke4(k, i) \
+{ k[4 * (i) + 4] = ss[0] ^= ls_box(ss[3], 3) ^ t_use(r, c)[i]; \
+ k[4 * (i) + 5] = ss[1] ^= ss[0]; \
+ k[4 * (i) + 6] = ss[2] ^= ss[1]; \
+ k[4 * (i) + 7] = ss[3] ^= ss[2]; \
+}
+
+static void
+aes_encrypt_key128(const unsigned char *key, uint32_t rk[])
+{
+ uint32_t ss[4];
+
+ rk[0] = ss[0] = word_in(key, 0);
+ rk[1] = ss[1] = word_in(key, 1);
+ rk[2] = ss[2] = word_in(key, 2);
+ rk[3] = ss[3] = word_in(key, 3);
+
+#ifdef ENC_KS_UNROLL
+ ke4(rk, 0); ke4(rk, 1);
+ ke4(rk, 2); ke4(rk, 3);
+ ke4(rk, 4); ke4(rk, 5);
+ ke4(rk, 6); ke4(rk, 7);
+ ke4(rk, 8);
+#else
+ {
+ uint32_t i;
+ for (i = 0; i < 9; ++i)
+ ke4(rk, i);
+ }
+#endif /* ENC_KS_UNROLL */
+ ke4(rk, 9);
+}
+
+
+#define kef6(k, i) \
+{ k[6 * (i) + 6] = ss[0] ^= ls_box(ss[5], 3) ^ t_use(r, c)[i]; \
+ k[6 * (i) + 7] = ss[1] ^= ss[0]; \
+ k[6 * (i) + 8] = ss[2] ^= ss[1]; \
+ k[6 * (i) + 9] = ss[3] ^= ss[2]; \
+}
+
+#define ke6(k, i) \
+{ kef6(k, i); \
+ k[6 * (i) + 10] = ss[4] ^= ss[3]; \
+ k[6 * (i) + 11] = ss[5] ^= ss[4]; \
+}
+
+static void
+aes_encrypt_key192(const unsigned char *key, uint32_t rk[])
+{
+ uint32_t ss[6];
+
+ rk[0] = ss[0] = word_in(key, 0);
+ rk[1] = ss[1] = word_in(key, 1);
+ rk[2] = ss[2] = word_in(key, 2);
+ rk[3] = ss[3] = word_in(key, 3);
+ rk[4] = ss[4] = word_in(key, 4);
+ rk[5] = ss[5] = word_in(key, 5);
+
+#ifdef ENC_KS_UNROLL
+ ke6(rk, 0); ke6(rk, 1);
+ ke6(rk, 2); ke6(rk, 3);
+ ke6(rk, 4); ke6(rk, 5);
+ ke6(rk, 6);
+#else
+ {
+ uint32_t i;
+ for (i = 0; i < 7; ++i)
+ ke6(rk, i);
+ }
+#endif /* ENC_KS_UNROLL */
+ kef6(rk, 7);
+}
+
+
+
+#define kef8(k, i) \
+{ k[8 * (i) + 8] = ss[0] ^= ls_box(ss[7], 3) ^ t_use(r, c)[i]; \
+ k[8 * (i) + 9] = ss[1] ^= ss[0]; \
+ k[8 * (i) + 10] = ss[2] ^= ss[1]; \
+ k[8 * (i) + 11] = ss[3] ^= ss[2]; \
+}
+
+#define ke8(k, i) \
+{ kef8(k, i); \
+ k[8 * (i) + 12] = ss[4] ^= ls_box(ss[3], 0); \
+ k[8 * (i) + 13] = ss[5] ^= ss[4]; \
+ k[8 * (i) + 14] = ss[6] ^= ss[5]; \
+ k[8 * (i) + 15] = ss[7] ^= ss[6]; \
+}
+
+static void
+aes_encrypt_key256(const unsigned char *key, uint32_t rk[])
+{
+ uint32_t ss[8];
+
+ rk[0] = ss[0] = word_in(key, 0);
+ rk[1] = ss[1] = word_in(key, 1);
+ rk[2] = ss[2] = word_in(key, 2);
+ rk[3] = ss[3] = word_in(key, 3);
+ rk[4] = ss[4] = word_in(key, 4);
+ rk[5] = ss[5] = word_in(key, 5);
+ rk[6] = ss[6] = word_in(key, 6);
+ rk[7] = ss[7] = word_in(key, 7);
+
+#ifdef ENC_KS_UNROLL
+ ke8(rk, 0); ke8(rk, 1);
+ ke8(rk, 2); ke8(rk, 3);
+ ke8(rk, 4); ke8(rk, 5);
+#else
+ {
+ uint32_t i;
+ for (i = 0; i < 6; ++i)
+ ke8(rk, i);
+ }
+#endif /* ENC_KS_UNROLL */
+ kef8(rk, 6);
+}
+
+
+/*
+ * Expand the cipher key into the encryption key schedule.
+ *
+ * Return the number of rounds for the given cipher key size.
+ * The size of the key schedule depends on the number of rounds
+ * (which can be computed from the size of the key), i.e. 4 * (Nr + 1).
+ *
+ * Parameters:
+ * rk AES key schedule 32-bit array to be initialized
+ * cipherKey User key
+ * keyBits AES key size (128, 192, or 256 bits)
+ */
+int
+rijndael_key_setup_enc_amd64(uint32_t rk[], const uint32_t cipherKey[],
+ int keyBits)
+{
+ switch (keyBits) {
+ case 128:
+ aes_encrypt_key128((unsigned char *)&cipherKey[0], rk);
+ return (10);
+ case 192:
+ aes_encrypt_key192((unsigned char *)&cipherKey[0], rk);
+ return (12);
+ case 256:
+ aes_encrypt_key256((unsigned char *)&cipherKey[0], rk);
+ return (14);
+ default: /* should never get here */
+ break;
+ }
+
+ return (0);
+}
+
+
+/* this is used to store the decryption round keys */
+/* in forward or reverse order */
+
+#ifdef AES_REV_DKS
+#define v(n, i) ((n) - (i) + 2 * ((i) & 3))
+#else
+#define v(n, i) (i)
+#endif
+
+#if DEC_ROUND == NO_TABLES
+#define ff(x) (x)
+#else
+#define ff(x) inv_mcol(x)
+#if defined(dec_imvars)
+#define d_vars dec_imvars
+#endif
+#endif /* FUNCS_IN_C & DEC_KEYING_IN_C */
+
+
+#define k4e(k, i) \
+{ k[v(40, (4 * (i)) + 4)] = ss[0] ^= ls_box(ss[3], 3) ^ t_use(r, c)[i]; \
+ k[v(40, (4 * (i)) + 5)] = ss[1] ^= ss[0]; \
+ k[v(40, (4 * (i)) + 6)] = ss[2] ^= ss[1]; \
+ k[v(40, (4 * (i)) + 7)] = ss[3] ^= ss[2]; \
+}
+
+#if 1
+
+#define kdf4(k, i) \
+{ ss[0] = ss[0] ^ ss[2] ^ ss[1] ^ ss[3]; \
+ ss[1] = ss[1] ^ ss[3]; \
+ ss[2] = ss[2] ^ ss[3]; \
+ ss[4] = ls_box(ss[(i + 3) % 4], 3) ^ t_use(r, c)[i]; \
+ ss[i % 4] ^= ss[4]; \
+ ss[4] ^= k[v(40, (4 * (i)))]; k[v(40, (4 * (i)) + 4)] = ff(ss[4]); \
+ ss[4] ^= k[v(40, (4 * (i)) + 1)]; k[v(40, (4 * (i)) + 5)] = ff(ss[4]); \
+ ss[4] ^= k[v(40, (4 * (i)) + 2)]; k[v(40, (4 * (i)) + 6)] = ff(ss[4]); \
+ ss[4] ^= k[v(40, (4 * (i)) + 3)]; k[v(40, (4 * (i)) + 7)] = ff(ss[4]); \
+}
+
+#define kd4(k, i) \
+{ ss[4] = ls_box(ss[(i + 3) % 4], 3) ^ t_use(r, c)[i]; \
+ ss[i % 4] ^= ss[4]; ss[4] = ff(ss[4]); \
+ k[v(40, (4 * (i)) + 4)] = ss[4] ^= k[v(40, (4 * (i)))]; \
+ k[v(40, (4 * (i)) + 5)] = ss[4] ^= k[v(40, (4 * (i)) + 1)]; \
+ k[v(40, (4 * (i)) + 6)] = ss[4] ^= k[v(40, (4 * (i)) + 2)]; \
+ k[v(40, (4 * (i)) + 7)] = ss[4] ^= k[v(40, (4 * (i)) + 3)]; \
+}
+
+#define kdl4(k, i) \
+{ ss[4] = ls_box(ss[(i + 3) % 4], 3) ^ t_use(r, c)[i]; \
+ ss[i % 4] ^= ss[4]; \
+ k[v(40, (4 * (i)) + 4)] = (ss[0] ^= ss[1]) ^ ss[2] ^ ss[3]; \
+ k[v(40, (4 * (i)) + 5)] = ss[1] ^ ss[3]; \
+ k[v(40, (4 * (i)) + 6)] = ss[0]; \
+ k[v(40, (4 * (i)) + 7)] = ss[1]; \
+}
+
+#else
+
+#define kdf4(k, i) \
+{ ss[0] ^= ls_box(ss[3], 3) ^ t_use(r, c)[i]; \
+ k[v(40, (4 * (i)) + 4)] = ff(ss[0]); \
+ ss[1] ^= ss[0]; k[v(40, (4 * (i)) + 5)] = ff(ss[1]); \
+ ss[2] ^= ss[1]; k[v(40, (4 * (i)) + 6)] = ff(ss[2]); \
+ ss[3] ^= ss[2]; k[v(40, (4 * (i)) + 7)] = ff(ss[3]); \
+}
+
+#define kd4(k, i) \
+{ ss[4] = ls_box(ss[3], 3) ^ t_use(r, c)[i]; \
+ ss[0] ^= ss[4]; \
+ ss[4] = ff(ss[4]); \
+ k[v(40, (4 * (i)) + 4)] = ss[4] ^= k[v(40, (4 * (i)))]; \
+ ss[1] ^= ss[0]; \
+ k[v(40, (4 * (i)) + 5)] = ss[4] ^= k[v(40, (4 * (i)) + 1)]; \
+ ss[2] ^= ss[1]; \
+ k[v(40, (4 * (i)) + 6)] = ss[4] ^= k[v(40, (4 * (i)) + 2)]; \
+ ss[3] ^= ss[2]; \
+ k[v(40, (4 * (i)) + 7)] = ss[4] ^= k[v(40, (4 * (i)) + 3)]; \
+}
+
+#define kdl4(k, i) \
+{ ss[0] ^= ls_box(ss[3], 3) ^ t_use(r, c)[i]; \
+ k[v(40, (4 * (i)) + 4)] = ss[0]; \
+ ss[1] ^= ss[0]; k[v(40, (4 * (i)) + 5)] = ss[1]; \
+ ss[2] ^= ss[1]; k[v(40, (4 * (i)) + 6)] = ss[2]; \
+ ss[3] ^= ss[2]; k[v(40, (4 * (i)) + 7)] = ss[3]; \
+}
+
+#endif
+
+static void
+aes_decrypt_key128(const unsigned char *key, uint32_t rk[])
+{
+ uint32_t ss[5];
+#if defined(d_vars)
+ d_vars;
+#endif
+ rk[v(40, (0))] = ss[0] = word_in(key, 0);
+ rk[v(40, (1))] = ss[1] = word_in(key, 1);
+ rk[v(40, (2))] = ss[2] = word_in(key, 2);
+ rk[v(40, (3))] = ss[3] = word_in(key, 3);
+
+#ifdef DEC_KS_UNROLL
+ kdf4(rk, 0); kd4(rk, 1);
+ kd4(rk, 2); kd4(rk, 3);
+ kd4(rk, 4); kd4(rk, 5);
+ kd4(rk, 6); kd4(rk, 7);
+ kd4(rk, 8); kdl4(rk, 9);
+#else
+ {
+ uint32_t i;
+ for (i = 0; i < 10; ++i)
+ k4e(rk, i);
+#if !(DEC_ROUND == NO_TABLES)
+ for (i = MAX_AES_NB; i < 10 * MAX_AES_NB; ++i)
+ rk[i] = inv_mcol(rk[i]);
+#endif
+ }
+#endif /* DEC_KS_UNROLL */
+}
+
+
+
+#define k6ef(k, i) \
+{ k[v(48, (6 * (i)) + 6)] = ss[0] ^= ls_box(ss[5], 3) ^ t_use(r, c)[i]; \
+ k[v(48, (6 * (i)) + 7)] = ss[1] ^= ss[0]; \
+ k[v(48, (6 * (i)) + 8)] = ss[2] ^= ss[1]; \
+ k[v(48, (6 * (i)) + 9)] = ss[3] ^= ss[2]; \
+}
+
+#define k6e(k, i) \
+{ k6ef(k, i); \
+ k[v(48, (6 * (i)) + 10)] = ss[4] ^= ss[3]; \
+ k[v(48, (6 * (i)) + 11)] = ss[5] ^= ss[4]; \
+}
+
+#define kdf6(k, i) \
+{ ss[0] ^= ls_box(ss[5], 3) ^ t_use(r, c)[i]; \
+ k[v(48, (6 * (i)) + 6)] = ff(ss[0]); \
+ ss[1] ^= ss[0]; k[v(48, (6 * (i)) + 7)] = ff(ss[1]); \
+ ss[2] ^= ss[1]; k[v(48, (6 * (i)) + 8)] = ff(ss[2]); \
+ ss[3] ^= ss[2]; k[v(48, (6 * (i)) + 9)] = ff(ss[3]); \
+ ss[4] ^= ss[3]; k[v(48, (6 * (i)) + 10)] = ff(ss[4]); \
+ ss[5] ^= ss[4]; k[v(48, (6 * (i)) + 11)] = ff(ss[5]); \
+}
+
+#define kd6(k, i) \
+{ ss[6] = ls_box(ss[5], 3) ^ t_use(r, c)[i]; \
+ ss[0] ^= ss[6]; ss[6] = ff(ss[6]); \
+ k[v(48, (6 * (i)) + 6)] = ss[6] ^= k[v(48, (6 * (i)))]; \
+ ss[1] ^= ss[0]; \
+ k[v(48, (6 * (i)) + 7)] = ss[6] ^= k[v(48, (6 * (i)) + 1)]; \
+ ss[2] ^= ss[1]; \
+ k[v(48, (6 * (i)) + 8)] = ss[6] ^= k[v(48, (6 * (i)) + 2)]; \
+ ss[3] ^= ss[2]; \
+ k[v(48, (6 * (i)) + 9)] = ss[6] ^= k[v(48, (6 * (i)) + 3)]; \
+ ss[4] ^= ss[3]; \
+ k[v(48, (6 * (i)) + 10)] = ss[6] ^= k[v(48, (6 * (i)) + 4)]; \
+ ss[5] ^= ss[4]; \
+ k[v(48, (6 * (i)) + 11)] = ss[6] ^= k[v(48, (6 * (i)) + 5)]; \
+}
+
+#define kdl6(k, i) \
+{ ss[0] ^= ls_box(ss[5], 3) ^ t_use(r, c)[i]; \
+ k[v(48, (6 * (i)) + 6)] = ss[0]; \
+ ss[1] ^= ss[0]; k[v(48, (6 * (i)) + 7)] = ss[1]; \
+ ss[2] ^= ss[1]; k[v(48, (6 * (i)) + 8)] = ss[2]; \
+ ss[3] ^= ss[2]; k[v(48, (6 * (i)) + 9)] = ss[3]; \
+}
+
+static void
+aes_decrypt_key192(const unsigned char *key, uint32_t rk[])
+{
+ uint32_t ss[7];
+#if defined(d_vars)
+ d_vars;
+#endif
+ rk[v(48, (0))] = ss[0] = word_in(key, 0);
+ rk[v(48, (1))] = ss[1] = word_in(key, 1);
+ rk[v(48, (2))] = ss[2] = word_in(key, 2);
+ rk[v(48, (3))] = ss[3] = word_in(key, 3);
+
+#ifdef DEC_KS_UNROLL
+ ss[4] = word_in(key, 4);
+ rk[v(48, (4))] = ff(ss[4]);
+ ss[5] = word_in(key, 5);
+ rk[v(48, (5))] = ff(ss[5]);
+ kdf6(rk, 0); kd6(rk, 1);
+ kd6(rk, 2); kd6(rk, 3);
+ kd6(rk, 4); kd6(rk, 5);
+ kd6(rk, 6); kdl6(rk, 7);
+#else
+ rk[v(48, (4))] = ss[4] = word_in(key, 4);
+ rk[v(48, (5))] = ss[5] = word_in(key, 5);
+ {
+ uint32_t i;
+
+ for (i = 0; i < 7; ++i)
+ k6e(rk, i);
+ k6ef(rk, 7);
+#if !(DEC_ROUND == NO_TABLES)
+ for (i = MAX_AES_NB; i < 12 * MAX_AES_NB; ++i)
+ rk[i] = inv_mcol(rk[i]);
+#endif
+ }
+#endif
+}
+
+
+
+#define k8ef(k, i) \
+{ k[v(56, (8 * (i)) + 8)] = ss[0] ^= ls_box(ss[7], 3) ^ t_use(r, c)[i]; \
+ k[v(56, (8 * (i)) + 9)] = ss[1] ^= ss[0]; \
+ k[v(56, (8 * (i)) + 10)] = ss[2] ^= ss[1]; \
+ k[v(56, (8 * (i)) + 11)] = ss[3] ^= ss[2]; \
+}
+
+#define k8e(k, i) \
+{ k8ef(k, i); \
+ k[v(56, (8 * (i)) + 12)] = ss[4] ^= ls_box(ss[3], 0); \
+ k[v(56, (8 * (i)) + 13)] = ss[5] ^= ss[4]; \
+ k[v(56, (8 * (i)) + 14)] = ss[6] ^= ss[5]; \
+ k[v(56, (8 * (i)) + 15)] = ss[7] ^= ss[6]; \
+}
+
+#define kdf8(k, i) \
+{ ss[0] ^= ls_box(ss[7], 3) ^ t_use(r, c)[i]; \
+ k[v(56, (8 * (i)) + 8)] = ff(ss[0]); \
+ ss[1] ^= ss[0]; k[v(56, (8 * (i)) + 9)] = ff(ss[1]); \
+ ss[2] ^= ss[1]; k[v(56, (8 * (i)) + 10)] = ff(ss[2]); \
+ ss[3] ^= ss[2]; k[v(56, (8 * (i)) + 11)] = ff(ss[3]); \
+ ss[4] ^= ls_box(ss[3], 0); k[v(56, (8 * (i)) + 12)] = ff(ss[4]); \
+ ss[5] ^= ss[4]; k[v(56, (8 * (i)) + 13)] = ff(ss[5]); \
+ ss[6] ^= ss[5]; k[v(56, (8 * (i)) + 14)] = ff(ss[6]); \
+ ss[7] ^= ss[6]; k[v(56, (8 * (i)) + 15)] = ff(ss[7]); \
+}
+
+#define kd8(k, i) \
+{ ss[8] = ls_box(ss[7], 3) ^ t_use(r, c)[i]; \
+ ss[0] ^= ss[8]; \
+ ss[8] = ff(ss[8]); \
+ k[v(56, (8 * (i)) + 8)] = ss[8] ^= k[v(56, (8 * (i)))]; \
+ ss[1] ^= ss[0]; \
+ k[v(56, (8 * (i)) + 9)] = ss[8] ^= k[v(56, (8 * (i)) + 1)]; \
+ ss[2] ^= ss[1]; \
+ k[v(56, (8 * (i)) + 10)] = ss[8] ^= k[v(56, (8 * (i)) + 2)]; \
+ ss[3] ^= ss[2]; \
+ k[v(56, (8 * (i)) + 11)] = ss[8] ^= k[v(56, (8 * (i)) + 3)]; \
+ ss[8] = ls_box(ss[3], 0); \
+ ss[4] ^= ss[8]; \
+ ss[8] = ff(ss[8]); \
+ k[v(56, (8 * (i)) + 12)] = ss[8] ^= k[v(56, (8 * (i)) + 4)]; \
+ ss[5] ^= ss[4]; \
+ k[v(56, (8 * (i)) + 13)] = ss[8] ^= k[v(56, (8 * (i)) + 5)]; \
+ ss[6] ^= ss[5]; \
+ k[v(56, (8 * (i)) + 14)] = ss[8] ^= k[v(56, (8 * (i)) + 6)]; \
+ ss[7] ^= ss[6]; \
+ k[v(56, (8 * (i)) + 15)] = ss[8] ^= k[v(56, (8 * (i)) + 7)]; \
+}
+
+#define kdl8(k, i) \
+{ ss[0] ^= ls_box(ss[7], 3) ^ t_use(r, c)[i]; \
+ k[v(56, (8 * (i)) + 8)] = ss[0]; \
+ ss[1] ^= ss[0]; k[v(56, (8 * (i)) + 9)] = ss[1]; \
+ ss[2] ^= ss[1]; k[v(56, (8 * (i)) + 10)] = ss[2]; \
+ ss[3] ^= ss[2]; k[v(56, (8 * (i)) + 11)] = ss[3]; \
+}
+
+static void
+aes_decrypt_key256(const unsigned char *key, uint32_t rk[])
+{
+ uint32_t ss[9];
+#if defined(d_vars)
+ d_vars;
+#endif
+ rk[v(56, (0))] = ss[0] = word_in(key, 0);
+ rk[v(56, (1))] = ss[1] = word_in(key, 1);
+ rk[v(56, (2))] = ss[2] = word_in(key, 2);
+ rk[v(56, (3))] = ss[3] = word_in(key, 3);
+
+#ifdef DEC_KS_UNROLL
+ ss[4] = word_in(key, 4);
+ rk[v(56, (4))] = ff(ss[4]);
+ ss[5] = word_in(key, 5);
+ rk[v(56, (5))] = ff(ss[5]);
+ ss[6] = word_in(key, 6);
+ rk[v(56, (6))] = ff(ss[6]);
+ ss[7] = word_in(key, 7);
+ rk[v(56, (7))] = ff(ss[7]);
+ kdf8(rk, 0); kd8(rk, 1);
+ kd8(rk, 2); kd8(rk, 3);
+ kd8(rk, 4); kd8(rk, 5);
+ kdl8(rk, 6);
+#else
+ rk[v(56, (4))] = ss[4] = word_in(key, 4);
+ rk[v(56, (5))] = ss[5] = word_in(key, 5);
+ rk[v(56, (6))] = ss[6] = word_in(key, 6);
+ rk[v(56, (7))] = ss[7] = word_in(key, 7);
+ {
+ uint32_t i;
+
+ for (i = 0; i < 6; ++i)
+ k8e(rk, i);
+ k8ef(rk, 6);
+#if !(DEC_ROUND == NO_TABLES)
+ for (i = MAX_AES_NB; i < 14 * MAX_AES_NB; ++i)
+ rk[i] = inv_mcol(rk[i]);
+#endif
+ }
+#endif /* DEC_KS_UNROLL */
+}
+
+
+/*
+ * Expand the cipher key into the decryption key schedule.
+ *
+ * Return the number of rounds for the given cipher key size.
+ * The size of the key schedule depends on the number of rounds
+ * (which can be computed from the size of the key), i.e. 4 * (Nr + 1).
+ *
+ * Parameters:
+ * rk AES key schedule 32-bit array to be initialized
+ * cipherKey User key
+ * keyBits AES key size (128, 192, or 256 bits)
+ */
+int
+rijndael_key_setup_dec_amd64(uint32_t rk[], const uint32_t cipherKey[],
+ int keyBits)
+{
+ switch (keyBits) {
+ case 128:
+ aes_decrypt_key128((unsigned char *)&cipherKey[0], rk);
+ return (10);
+ case 192:
+ aes_decrypt_key192((unsigned char *)&cipherKey[0], rk);
+ return (12);
+ case 256:
+ aes_decrypt_key256((unsigned char *)&cipherKey[0], rk);
+ return (14);
+ default: /* should never get here */
+ break;
+ }
+
+ return (0);
+}
diff --git a/zfs/module/icp/asm-x86_64/aes/aesopt.h b/zfs/module/icp/asm-x86_64/aes/aesopt.h
new file mode 100644
index 000000000000..6aa61db8275a
--- /dev/null
+++ b/zfs/module/icp/asm-x86_64/aes/aesopt.h
@@ -0,0 +1,770 @@
+/*
+ * ---------------------------------------------------------------------------
+ * Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved.
+ *
+ * LICENSE TERMS
+ *
+ * The free distribution and use of this software is allowed (with or without
+ * changes) provided that:
+ *
+ * 1. source code distributions include the above copyright notice, this
+ * list of conditions and the following disclaimer;
+ *
+ * 2. binary distributions include the above copyright notice, this list
+ * of conditions and the following disclaimer in their documentation;
+ *
+ * 3. the name of the copyright holder is not used to endorse products
+ * built using this software without specific written permission.
+ *
+ * DISCLAIMER
+ *
+ * This software is provided 'as is' with no explicit or implied warranties
+ * in respect of its properties, including, but not limited to, correctness
+ * and/or fitness for purpose.
+ * ---------------------------------------------------------------------------
+ * Issue Date: 20/12/2007
+ *
+ * This file contains the compilation options for AES (Rijndael) and code
+ * that is common across encryption, key scheduling and table generation.
+ *
+ * OPERATION
+ *
+ * These source code files implement the AES algorithm Rijndael designed by
+ * Joan Daemen and Vincent Rijmen. This version is designed for the standard
+ * block size of 16 bytes and for key sizes of 128, 192 and 256 bits (16, 24
+ * and 32 bytes).
+ *
+ * This version is designed for flexibility and speed using operations on
+ * 32-bit words rather than operations on bytes. It can be compiled with
+ * either big or little endian internal byte order but is faster when the
+ * native byte order for the processor is used.
+ *
+ * THE CIPHER INTERFACE
+ *
+ * The cipher interface is implemented as an array of bytes in which lower
+ * AES bit sequence indexes map to higher numeric significance within bytes.
+ */
+
+/*
+ * OpenSolaris changes
+ * 1. Added __cplusplus and _AESTAB_H header guards
+ * 2. Added header files sys/types.h and aes_impl.h
+ * 3. Added defines for AES_ENCRYPT, AES_DECRYPT, AES_REV_DKS, and ASM_AMD64_C
+ * 4. Moved defines for IS_BIG_ENDIAN, IS_LITTLE_ENDIAN, PLATFORM_BYTE_ORDER
+ * from brg_endian.h
+ * 5. Undefined VIA_ACE_POSSIBLE and ASSUME_VIA_ACE_PRESENT
+ * 6. Changed uint_8t and uint_32t to uint8_t and uint32_t
+ * 7. Defined aes_sw32 as htonl() for byte swapping
+ * 8. Cstyled and hdrchk code
+ *
+ */
+
+#ifndef _AESOPT_H
+#define _AESOPT_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/zfs_context.h>
+#include <aes/aes_impl.h>
+
+/* SUPPORT FEATURES */
+#define AES_ENCRYPT /* if support for encryption is needed */
+#define AES_DECRYPT /* if support for decryption is needed */
+
+/* PLATFORM-SPECIFIC FEATURES */
+#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */
+#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */
+#define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
+#define AES_REV_DKS /* define to reverse decryption key schedule */
+
+
+/*
+ * CONFIGURATION - THE USE OF DEFINES
+ * Later in this section there are a number of defines that control the
+ * operation of the code. In each section, the purpose of each define is
+ * explained so that the relevant form can be included or excluded by
+ * setting either 1's or 0's respectively on the branches of the related
+ * #if clauses. The following local defines should not be changed.
+ */
+
+#define ENCRYPTION_IN_C 1
+#define DECRYPTION_IN_C 2
+#define ENC_KEYING_IN_C 4
+#define DEC_KEYING_IN_C 8
+
+#define NO_TABLES 0
+#define ONE_TABLE 1
+#define FOUR_TABLES 4
+#define NONE 0
+#define PARTIAL 1
+#define FULL 2
+
+/* --- START OF USER CONFIGURED OPTIONS --- */
+
+/*
+ * 1. BYTE ORDER WITHIN 32 BIT WORDS
+ *
+ * The fundamental data processing units in Rijndael are 8-bit bytes. The
+ * input, output and key input are all enumerated arrays of bytes in which
+ * bytes are numbered starting at zero and increasing to one less than the
+ * number of bytes in the array in question. This enumeration is only used
+ * for naming bytes and does not imply any adjacency or order relationship
+ * from one byte to another. When these inputs and outputs are considered
+ * as bit sequences, bits 8*n to 8*n+7 of the bit sequence are mapped to
+ * byte[n] with bit 8n+i in the sequence mapped to bit 7-i within the byte.
+ * In this implementation bits are numbered from 0 to 7 starting at the
+ * numerically least significant end of each byte. Bit n represents 2^n.
+ *
+ * However, Rijndael can be implemented more efficiently using 32-bit
+ * words by packing bytes into words so that bytes 4*n to 4*n+3 are placed
+ * into word[n]. While in principle these bytes can be assembled into words
+ * in any positions, this implementation only supports the two formats in
+ * which bytes in adjacent positions within words also have adjacent byte
+ * numbers. This order is called big-endian if the lowest numbered bytes
+ * in words have the highest numeric significance and little-endian if the
+ * opposite applies.
+ *
+ * This code can work in either order irrespective of the order used by the
+ * machine on which it runs. Normally the internal byte order will be set
+ * to the order of the processor on which the code is to be run but this
+ * define can be used to reverse this in special situations
+ *
+ * WARNING: Assembler code versions rely on PLATFORM_BYTE_ORDER being set.
+ * This define will hence be redefined later (in section 4) if necessary
+ */
+
+#if 1
+#define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER
+#elif 0
+#define ALGORITHM_BYTE_ORDER IS_LITTLE_ENDIAN
+#elif 0
+#define ALGORITHM_BYTE_ORDER IS_BIG_ENDIAN
+#else
+#error The algorithm byte order is not defined
+#endif
+
+/* 2. VIA ACE SUPPORT */
+
+#if defined(__GNUC__) && defined(__i386__) || \
+ defined(_WIN32) && defined(_M_IX86) && \
+ !(defined(_WIN64) || defined(_WIN32_WCE) || \
+ defined(_MSC_VER) && (_MSC_VER <= 800))
+#define VIA_ACE_POSSIBLE
+#endif
+
+/*
+ * Define this option if support for the VIA ACE is required. This uses
+ * inline assembler instructions and is only implemented for the Microsoft,
+ * Intel and GCC compilers. If VIA ACE is known to be present, then defining
+ * ASSUME_VIA_ACE_PRESENT will remove the ordinary encryption/decryption
+ * code. If USE_VIA_ACE_IF_PRESENT is defined then VIA ACE will be used if
+ * it is detected (both present and enabled) but the normal AES code will
+ * also be present.
+ *
+ * When VIA ACE is to be used, all AES encryption contexts MUST be 16 byte
+ * aligned; other input/output buffers do not need to be 16 byte aligned
+ * but there are very large performance gains if this can be arranged.
+ * VIA ACE also requires the decryption key schedule to be in reverse
+ * order (which later checks below ensure).
+ */
+
+/* VIA ACE is not used here for OpenSolaris: */
+#undef VIA_ACE_POSSIBLE
+#undef ASSUME_VIA_ACE_PRESENT
+
+#if 0 && defined(VIA_ACE_POSSIBLE) && !defined(USE_VIA_ACE_IF_PRESENT)
+#define USE_VIA_ACE_IF_PRESENT
+#endif
+
+#if 0 && defined(VIA_ACE_POSSIBLE) && !defined(ASSUME_VIA_ACE_PRESENT)
+#define ASSUME_VIA_ACE_PRESENT
+#endif
+
+
+/*
+ * 3. ASSEMBLER SUPPORT
+ *
+ * This define (which can be on the command line) enables the use of the
+ * assembler code routines for encryption, decryption and key scheduling
+ * as follows:
+ *
+ * ASM_X86_V1C uses the assembler (aes_x86_v1.asm) with large tables for
+ * encryption and decryption and but with key scheduling in C
+ * ASM_X86_V2 uses assembler (aes_x86_v2.asm) with compressed tables for
+ * encryption, decryption and key scheduling
+ * ASM_X86_V2C uses assembler (aes_x86_v2.asm) with compressed tables for
+ * encryption and decryption and but with key scheduling in C
+ * ASM_AMD64_C uses assembler (aes_amd64.asm) with compressed tables for
+ * encryption and decryption and but with key scheduling in C
+ *
+ * Change one 'if 0' below to 'if 1' to select the version or define
+ * as a compilation option.
+ */
+
+#if 0 && !defined(ASM_X86_V1C)
+#define ASM_X86_V1C
+#elif 0 && !defined(ASM_X86_V2)
+#define ASM_X86_V2
+#elif 0 && !defined(ASM_X86_V2C)
+#define ASM_X86_V2C
+#elif 1 && !defined(ASM_AMD64_C)
+#define ASM_AMD64_C
+#endif
+
+#if (defined(ASM_X86_V1C) || defined(ASM_X86_V2) || defined(ASM_X86_V2C)) && \
+ !defined(_M_IX86) || defined(ASM_AMD64_C) && !defined(_M_X64) && \
+ !defined(__amd64)
+#error Assembler code is only available for x86 and AMD64 systems
+#endif
+
+/*
+ * 4. FAST INPUT/OUTPUT OPERATIONS.
+ *
+ * On some machines it is possible to improve speed by transferring the
+ * bytes in the input and output arrays to and from the internal 32-bit
+ * variables by addressing these arrays as if they are arrays of 32-bit
+ * words. On some machines this will always be possible but there may
+ * be a large performance penalty if the byte arrays are not aligned on
+ * the normal word boundaries. On other machines this technique will
+ * lead to memory access errors when such 32-bit word accesses are not
+ * properly aligned. The option SAFE_IO avoids such problems but will
+ * often be slower on those machines that support misaligned access
+ * (especially so if care is taken to align the input and output byte
+ * arrays on 32-bit word boundaries). If SAFE_IO is not defined it is
+ * assumed that access to byte arrays as if they are arrays of 32-bit
+ * words will not cause problems when such accesses are misaligned.
+ */
+#if 1 && !defined(_MSC_VER)
+#define SAFE_IO
+#endif
+
+/*
+ * 5. LOOP UNROLLING
+ *
+ * The code for encryption and decryption cycles through a number of rounds
+ * that can be implemented either in a loop or by expanding the code into a
+ * long sequence of instructions, the latter producing a larger program but
+ * one that will often be much faster. The latter is called loop unrolling.
+ * There are also potential speed advantages in expanding two iterations in
+ * a loop with half the number of iterations, which is called partial loop
+ * unrolling. The following options allow partial or full loop unrolling
+ * to be set independently for encryption and decryption
+ */
+#if 1
+#define ENC_UNROLL FULL
+#elif 0
+#define ENC_UNROLL PARTIAL
+#else
+#define ENC_UNROLL NONE
+#endif
+
+#if 1
+#define DEC_UNROLL FULL
+#elif 0
+#define DEC_UNROLL PARTIAL
+#else
+#define DEC_UNROLL NONE
+#endif
+
+#if 1
+#define ENC_KS_UNROLL
+#endif
+
+#if 1
+#define DEC_KS_UNROLL
+#endif
+
+/*
+ * 6. FAST FINITE FIELD OPERATIONS
+ *
+ * If this section is included, tables are used to provide faster finite
+ * field arithmetic. This has no effect if FIXED_TABLES is defined.
+ */
+#if 1
+#define FF_TABLES
+#endif
+
+/*
+ * 7. INTERNAL STATE VARIABLE FORMAT
+ *
+ * The internal state of Rijndael is stored in a number of local 32-bit
+ * word variables which can be defined either as an array or as individual
+ * names variables. Include this section if you want to store these local
+ * variables in arrays. Otherwise individual local variables will be used.
+ */
+#if 1
+#define ARRAYS
+#endif
+
+/*
+ * 8. FIXED OR DYNAMIC TABLES
+ *
+ * When this section is included the tables used by the code are compiled
+ * statically into the binary file. Otherwise the subroutine aes_init()
+ * must be called to compute them before the code is first used.
+ */
+#if 1 && !(defined(_MSC_VER) && (_MSC_VER <= 800))
+#define FIXED_TABLES
+#endif
+
+/*
+ * 9. MASKING OR CASTING FROM LONGER VALUES TO BYTES
+ *
+ * In some systems it is better to mask longer values to extract bytes
+ * rather than using a cast. This option allows this choice.
+ */
+#if 0
+#define to_byte(x) ((uint8_t)(x))
+#else
+#define to_byte(x) ((x) & 0xff)
+#endif
+
+/*
+ * 10. TABLE ALIGNMENT
+ *
+ * On some systems speed will be improved by aligning the AES large lookup
+ * tables on particular boundaries. This define should be set to a power of
+ * two giving the desired alignment. It can be left undefined if alignment
+ * is not needed. This option is specific to the Micrsoft VC++ compiler -
+ * it seems to sometimes cause trouble for the VC++ version 6 compiler.
+ */
+
+#if 1 && defined(_MSC_VER) && (_MSC_VER >= 1300)
+#define TABLE_ALIGN 32
+#endif
+
+/*
+ * 11. REDUCE CODE AND TABLE SIZE
+ *
+ * This replaces some expanded macros with function calls if AES_ASM_V2 or
+ * AES_ASM_V2C are defined
+ */
+
+#if 1 && (defined(ASM_X86_V2) || defined(ASM_X86_V2C))
+#define REDUCE_CODE_SIZE
+#endif
+
+/*
+ * 12. TABLE OPTIONS
+ *
+ * This cipher proceeds by repeating in a number of cycles known as rounds
+ * which are implemented by a round function which is optionally be speeded
+ * up using tables. The basic tables are 256 32-bit words, with either
+ * one or four tables being required for each round function depending on
+ * how much speed is required. Encryption and decryption round functions
+ * are different and the last encryption and decryption round functions are
+ * different again making four different round functions in all.
+ *
+ * This means that:
+ * 1. Normal encryption and decryption rounds can each use either 0, 1
+ * or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
+ * 2. The last encryption and decryption rounds can also use either 0, 1
+ * or 4 tables and table spaces of 0, 1024 or 4096 bytes each.
+ *
+ * Include or exclude the appropriate definitions below to set the number
+ * of tables used by this implementation.
+ */
+
+#if 1 /* set tables for the normal encryption round */
+#define ENC_ROUND FOUR_TABLES
+#elif 0
+#define ENC_ROUND ONE_TABLE
+#else
+#define ENC_ROUND NO_TABLES
+#endif
+
+#if 1 /* set tables for the last encryption round */
+#define LAST_ENC_ROUND FOUR_TABLES
+#elif 0
+#define LAST_ENC_ROUND ONE_TABLE
+#else
+#define LAST_ENC_ROUND NO_TABLES
+#endif
+
+#if 1 /* set tables for the normal decryption round */
+#define DEC_ROUND FOUR_TABLES
+#elif 0
+#define DEC_ROUND ONE_TABLE
+#else
+#define DEC_ROUND NO_TABLES
+#endif
+
+#if 1 /* set tables for the last decryption round */
+#define LAST_DEC_ROUND FOUR_TABLES
+#elif 0
+#define LAST_DEC_ROUND ONE_TABLE
+#else
+#define LAST_DEC_ROUND NO_TABLES
+#endif
+
+/*
+ * The decryption key schedule can be speeded up with tables in the same
+ * way that the round functions can. Include or exclude the following
+ * defines to set this requirement.
+ */
+#if 1
+#define KEY_SCHED FOUR_TABLES
+#elif 0
+#define KEY_SCHED ONE_TABLE
+#else
+#define KEY_SCHED NO_TABLES
+#endif
+
+/* ---- END OF USER CONFIGURED OPTIONS ---- */
+
+/* VIA ACE support is only available for VC++ and GCC */
+
+#if !defined(_MSC_VER) && !defined(__GNUC__)
+#if defined(ASSUME_VIA_ACE_PRESENT)
+#undef ASSUME_VIA_ACE_PRESENT
+#endif
+#if defined(USE_VIA_ACE_IF_PRESENT)
+#undef USE_VIA_ACE_IF_PRESENT
+#endif
+#endif
+
+#if defined(ASSUME_VIA_ACE_PRESENT) && !defined(USE_VIA_ACE_IF_PRESENT)
+#define USE_VIA_ACE_IF_PRESENT
+#endif
+
+#if defined(USE_VIA_ACE_IF_PRESENT) && !defined(AES_REV_DKS)
+#define AES_REV_DKS
+#endif
+
+/* Assembler support requires the use of platform byte order */
+
+#if (defined(ASM_X86_V1C) || defined(ASM_X86_V2C) || defined(ASM_AMD64_C)) && \
+ (ALGORITHM_BYTE_ORDER != PLATFORM_BYTE_ORDER)
+#undef ALGORITHM_BYTE_ORDER
+#define ALGORITHM_BYTE_ORDER PLATFORM_BYTE_ORDER
+#endif
+
+/*
+ * In this implementation the columns of the state array are each held in
+ * 32-bit words. The state array can be held in various ways: in an array
+ * of words, in a number of individual word variables or in a number of
+ * processor registers. The following define maps a variable name x and
+ * a column number c to the way the state array variable is to be held.
+ * The first define below maps the state into an array x[c] whereas the
+ * second form maps the state into a number of individual variables x0,
+ * x1, etc. Another form could map individual state columns to machine
+ * register names.
+ */
+
+#if defined(ARRAYS)
+#define s(x, c) x[c]
+#else
+#define s(x, c) x##c
+#endif
+
+/*
+ * This implementation provides subroutines for encryption, decryption
+ * and for setting the three key lengths (separately) for encryption
+ * and decryption. Since not all functions are needed, masks are set
+ * up here to determine which will be implemented in C
+ */
+
+#if !defined(AES_ENCRYPT)
+#define EFUNCS_IN_C 0
+#elif defined(ASSUME_VIA_ACE_PRESENT) || defined(ASM_X86_V1C) || \
+ defined(ASM_X86_V2C) || defined(ASM_AMD64_C)
+#define EFUNCS_IN_C ENC_KEYING_IN_C
+#elif !defined(ASM_X86_V2)
+#define EFUNCS_IN_C (ENCRYPTION_IN_C | ENC_KEYING_IN_C)
+#else
+#define EFUNCS_IN_C 0
+#endif
+
+#if !defined(AES_DECRYPT)
+#define DFUNCS_IN_C 0
+#elif defined(ASSUME_VIA_ACE_PRESENT) || defined(ASM_X86_V1C) || \
+ defined(ASM_X86_V2C) || defined(ASM_AMD64_C)
+#define DFUNCS_IN_C DEC_KEYING_IN_C
+#elif !defined(ASM_X86_V2)
+#define DFUNCS_IN_C (DECRYPTION_IN_C | DEC_KEYING_IN_C)
+#else
+#define DFUNCS_IN_C 0
+#endif
+
+#define FUNCS_IN_C (EFUNCS_IN_C | DFUNCS_IN_C)
+
+/* END OF CONFIGURATION OPTIONS */
+
+/* Disable or report errors on some combinations of options */
+
+#if ENC_ROUND == NO_TABLES && LAST_ENC_ROUND != NO_TABLES
+#undef LAST_ENC_ROUND
+#define LAST_ENC_ROUND NO_TABLES
+#elif ENC_ROUND == ONE_TABLE && LAST_ENC_ROUND == FOUR_TABLES
+#undef LAST_ENC_ROUND
+#define LAST_ENC_ROUND ONE_TABLE
+#endif
+
+#if ENC_ROUND == NO_TABLES && ENC_UNROLL != NONE
+#undef ENC_UNROLL
+#define ENC_UNROLL NONE
+#endif
+
+#if DEC_ROUND == NO_TABLES && LAST_DEC_ROUND != NO_TABLES
+#undef LAST_DEC_ROUND
+#define LAST_DEC_ROUND NO_TABLES
+#elif DEC_ROUND == ONE_TABLE && LAST_DEC_ROUND == FOUR_TABLES
+#undef LAST_DEC_ROUND
+#define LAST_DEC_ROUND ONE_TABLE
+#endif
+
+#if DEC_ROUND == NO_TABLES && DEC_UNROLL != NONE
+#undef DEC_UNROLL
+#define DEC_UNROLL NONE
+#endif
+
+#if (ALGORITHM_BYTE_ORDER == IS_LITTLE_ENDIAN)
+#define aes_sw32 htonl
+#elif defined(bswap32)
+#define aes_sw32 bswap32
+#elif defined(bswap_32)
+#define aes_sw32 bswap_32
+#else
+#define brot(x, n) (((uint32_t)(x) << (n)) | ((uint32_t)(x) >> (32 - (n))))
+#define aes_sw32(x) ((brot((x), 8) & 0x00ff00ff) | (brot((x), 24) & 0xff00ff00))
+#endif
+
+
+/*
+ * upr(x, n): rotates bytes within words by n positions, moving bytes to
+ * higher index positions with wrap around into low positions
+ * ups(x, n): moves bytes by n positions to higher index positions in
+ * words but without wrap around
+ * bval(x, n): extracts a byte from a word
+ *
+ * WARNING: The definitions given here are intended only for use with
+ * unsigned variables and with shift counts that are compile
+ * time constants
+ */
+
+#if (ALGORITHM_BYTE_ORDER == IS_LITTLE_ENDIAN)
+#define upr(x, n) (((uint32_t)(x) << (8 * (n))) | \
+ ((uint32_t)(x) >> (32 - 8 * (n))))
+#define ups(x, n) ((uint32_t)(x) << (8 * (n)))
+#define bval(x, n) to_byte((x) >> (8 * (n)))
+#define bytes2word(b0, b1, b2, b3) \
+ (((uint32_t)(b3) << 24) | ((uint32_t)(b2) << 16) | \
+ ((uint32_t)(b1) << 8) | (b0))
+#endif
+
+#if (ALGORITHM_BYTE_ORDER == IS_BIG_ENDIAN)
+#define upr(x, n) (((uint32_t)(x) >> (8 * (n))) | \
+ ((uint32_t)(x) << (32 - 8 * (n))))
+#define ups(x, n) ((uint32_t)(x) >> (8 * (n)))
+#define bval(x, n) to_byte((x) >> (24 - 8 * (n)))
+#define bytes2word(b0, b1, b2, b3) \
+ (((uint32_t)(b0) << 24) | ((uint32_t)(b1) << 16) | \
+ ((uint32_t)(b2) << 8) | (b3))
+#endif
+
+#if defined(SAFE_IO)
+#define word_in(x, c) bytes2word(((const uint8_t *)(x) + 4 * c)[0], \
+ ((const uint8_t *)(x) + 4 * c)[1], \
+ ((const uint8_t *)(x) + 4 * c)[2], \
+ ((const uint8_t *)(x) + 4 * c)[3])
+#define word_out(x, c, v) { ((uint8_t *)(x) + 4 * c)[0] = bval(v, 0); \
+ ((uint8_t *)(x) + 4 * c)[1] = bval(v, 1); \
+ ((uint8_t *)(x) + 4 * c)[2] = bval(v, 2); \
+ ((uint8_t *)(x) + 4 * c)[3] = bval(v, 3); }
+#elif (ALGORITHM_BYTE_ORDER == PLATFORM_BYTE_ORDER)
+#define word_in(x, c) (*((uint32_t *)(x) + (c)))
+#define word_out(x, c, v) (*((uint32_t *)(x) + (c)) = (v))
+#else
+#define word_in(x, c) aes_sw32(*((uint32_t *)(x) + (c)))
+#define word_out(x, c, v) (*((uint32_t *)(x) + (c)) = aes_sw32(v))
+#endif
+
+/* the finite field modular polynomial and elements */
+
+#define WPOLY 0x011b
+#define BPOLY 0x1b
+
+/* multiply four bytes in GF(2^8) by 'x' {02} in parallel */
+
+#define m1 0x80808080
+#define m2 0x7f7f7f7f
+#define gf_mulx(x) ((((x) & m2) << 1) ^ ((((x) & m1) >> 7) * BPOLY))
+
+/*
+ * The following defines provide alternative definitions of gf_mulx that might
+ * give improved performance if a fast 32-bit multiply is not available. Note
+ * that a temporary variable u needs to be defined where gf_mulx is used.
+ *
+ * #define gf_mulx(x) (u = (x) & m1, u |= (u >> 1), ((x) & m2) << 1) ^ \
+ * ((u >> 3) | (u >> 6))
+ * #define m4 (0x01010101 * BPOLY)
+ * #define gf_mulx(x) (u = (x) & m1, ((x) & m2) << 1) ^ ((u - (u >> 7)) \
+ * & m4)
+ */
+
+/* Work out which tables are needed for the different options */
+
+#if defined(ASM_X86_V1C)
+#if defined(ENC_ROUND)
+#undef ENC_ROUND
+#endif
+#define ENC_ROUND FOUR_TABLES
+#if defined(LAST_ENC_ROUND)
+#undef LAST_ENC_ROUND
+#endif
+#define LAST_ENC_ROUND FOUR_TABLES
+#if defined(DEC_ROUND)
+#undef DEC_ROUND
+#endif
+#define DEC_ROUND FOUR_TABLES
+#if defined(LAST_DEC_ROUND)
+#undef LAST_DEC_ROUND
+#endif
+#define LAST_DEC_ROUND FOUR_TABLES
+#if defined(KEY_SCHED)
+#undef KEY_SCHED
+#define KEY_SCHED FOUR_TABLES
+#endif
+#endif
+
+#if (FUNCS_IN_C & ENCRYPTION_IN_C) || defined(ASM_X86_V1C)
+#if ENC_ROUND == ONE_TABLE
+#define FT1_SET
+#elif ENC_ROUND == FOUR_TABLES
+#define FT4_SET
+#else
+#define SBX_SET
+#endif
+#if LAST_ENC_ROUND == ONE_TABLE
+#define FL1_SET
+#elif LAST_ENC_ROUND == FOUR_TABLES
+#define FL4_SET
+#elif !defined(SBX_SET)
+#define SBX_SET
+#endif
+#endif
+
+#if (FUNCS_IN_C & DECRYPTION_IN_C) || defined(ASM_X86_V1C)
+#if DEC_ROUND == ONE_TABLE
+#define IT1_SET
+#elif DEC_ROUND == FOUR_TABLES
+#define IT4_SET
+#else
+#define ISB_SET
+#endif
+#if LAST_DEC_ROUND == ONE_TABLE
+#define IL1_SET
+#elif LAST_DEC_ROUND == FOUR_TABLES
+#define IL4_SET
+#elif !defined(ISB_SET)
+#define ISB_SET
+#endif
+#endif
+
+
+#if !(defined(REDUCE_CODE_SIZE) && (defined(ASM_X86_V2) || \
+ defined(ASM_X86_V2C)))
+#if ((FUNCS_IN_C & ENC_KEYING_IN_C) || (FUNCS_IN_C & DEC_KEYING_IN_C))
+#if KEY_SCHED == ONE_TABLE
+#if !defined(FL1_SET) && !defined(FL4_SET)
+#define LS1_SET
+#endif
+#elif KEY_SCHED == FOUR_TABLES
+#if !defined(FL4_SET)
+#define LS4_SET
+#endif
+#elif !defined(SBX_SET)
+#define SBX_SET
+#endif
+#endif
+#if (FUNCS_IN_C & DEC_KEYING_IN_C)
+#if KEY_SCHED == ONE_TABLE
+#define IM1_SET
+#elif KEY_SCHED == FOUR_TABLES
+#define IM4_SET
+#elif !defined(SBX_SET)
+#define SBX_SET
+#endif
+#endif
+#endif
+
+/* generic definitions of Rijndael macros that use tables */
+
+#define no_table(x, box, vf, rf, c) bytes2word(\
+ box[bval(vf(x, 0, c), rf(0, c))], \
+ box[bval(vf(x, 1, c), rf(1, c))], \
+ box[bval(vf(x, 2, c), rf(2, c))], \
+ box[bval(vf(x, 3, c), rf(3, c))])
+
+#define one_table(x, op, tab, vf, rf, c) \
+ (tab[bval(vf(x, 0, c), rf(0, c))] \
+ ^ op(tab[bval(vf(x, 1, c), rf(1, c))], 1) \
+ ^ op(tab[bval(vf(x, 2, c), rf(2, c))], 2) \
+ ^ op(tab[bval(vf(x, 3, c), rf(3, c))], 3))
+
+#define four_tables(x, tab, vf, rf, c) \
+ (tab[0][bval(vf(x, 0, c), rf(0, c))] \
+ ^ tab[1][bval(vf(x, 1, c), rf(1, c))] \
+ ^ tab[2][bval(vf(x, 2, c), rf(2, c))] \
+ ^ tab[3][bval(vf(x, 3, c), rf(3, c))])
+
+#define vf1(x, r, c) (x)
+#define rf1(r, c) (r)
+#define rf2(r, c) ((8+r-c)&3)
+
+/*
+ * Perform forward and inverse column mix operation on four bytes in long word
+ * x in parallel. NOTE: x must be a simple variable, NOT an expression in
+ * these macros.
+ */
+
+#if !(defined(REDUCE_CODE_SIZE) && (defined(ASM_X86_V2) || \
+ defined(ASM_X86_V2C)))
+
+#if defined(FM4_SET) /* not currently used */
+#define fwd_mcol(x) four_tables(x, t_use(f, m), vf1, rf1, 0)
+#elif defined(FM1_SET) /* not currently used */
+#define fwd_mcol(x) one_table(x, upr, t_use(f, m), vf1, rf1, 0)
+#else
+#define dec_fmvars uint32_t g2
+#define fwd_mcol(x) (g2 = gf_mulx(x), g2 ^ upr((x) ^ g2, 3) ^ \
+ upr((x), 2) ^ upr((x), 1))
+#endif
+
+#if defined(IM4_SET)
+#define inv_mcol(x) four_tables(x, t_use(i, m), vf1, rf1, 0)
+#elif defined(IM1_SET)
+#define inv_mcol(x) one_table(x, upr, t_use(i, m), vf1, rf1, 0)
+#else
+#define dec_imvars uint32_t g2, g4, g9
+#define inv_mcol(x) (g2 = gf_mulx(x), g4 = gf_mulx(g2), g9 = \
+ (x) ^ gf_mulx(g4), g4 ^= g9, \
+ (x) ^ g2 ^ g4 ^ upr(g2 ^ g9, 3) ^ \
+ upr(g4, 2) ^ upr(g9, 1))
+#endif
+
+#if defined(FL4_SET)
+#define ls_box(x, c) four_tables(x, t_use(f, l), vf1, rf2, c)
+#elif defined(LS4_SET)
+#define ls_box(x, c) four_tables(x, t_use(l, s), vf1, rf2, c)
+#elif defined(FL1_SET)
+#define ls_box(x, c) one_table(x, upr, t_use(f, l), vf1, rf2, c)
+#elif defined(LS1_SET)
+#define ls_box(x, c) one_table(x, upr, t_use(l, s), vf1, rf2, c)
+#else
+#define ls_box(x, c) no_table(x, t_use(s, box), vf1, rf2, c)
+#endif
+
+#endif
+
+#if defined(ASM_X86_V1C) && defined(AES_DECRYPT) && !defined(ISB_SET)
+#define ISB_SET
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _AESOPT_H */
diff --git a/zfs/module/icp/asm-x86_64/aes/aestab.h b/zfs/module/icp/asm-x86_64/aes/aestab.h
new file mode 100644
index 000000000000..33cdb6c6f9fe
--- /dev/null
+++ b/zfs/module/icp/asm-x86_64/aes/aestab.h
@@ -0,0 +1,165 @@
+/*
+ * ---------------------------------------------------------------------------
+ * Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved.
+ *
+ * LICENSE TERMS
+ *
+ * The free distribution and use of this software is allowed (with or without
+ * changes) provided that:
+ *
+ * 1. source code distributions include the above copyright notice, this
+ * list of conditions and the following disclaimer;
+ *
+ * 2. binary distributions include the above copyright notice, this list
+ * of conditions and the following disclaimer in their documentation;
+ *
+ * 3. the name of the copyright holder is not used to endorse products
+ * built using this software without specific written permission.
+ *
+ * DISCLAIMER
+ *
+ * This software is provided 'as is' with no explicit or implied warranties
+ * in respect of its properties, including, but not limited to, correctness
+ * and/or fitness for purpose.
+ * ---------------------------------------------------------------------------
+ * Issue Date: 20/12/2007
+ *
+ * This file contains the code for declaring the tables needed to implement
+ * AES. The file aesopt.h is assumed to be included before this header file.
+ * If there are no global variables, the definitions here can be used to put
+ * the AES tables in a structure so that a pointer can then be added to the
+ * AES context to pass them to the AES routines that need them. If this
+ * facility is used, the calling program has to ensure that this pointer is
+ * managed appropriately. In particular, the value of the t_dec(in, it) item
+ * in the table structure must be set to zero in order to ensure that the
+ * tables are initialised. In practice the three code sequences in aeskey.c
+ * that control the calls to aes_init() and the aes_init() routine itself will
+ * have to be changed for a specific implementation. If global variables are
+ * available it will generally be preferable to use them with the precomputed
+ * FIXED_TABLES option that uses static global tables.
+ *
+ * The following defines can be used to control the way the tables
+ * are defined, initialised and used in embedded environments that
+ * require special features for these purposes
+ *
+ * the 't_dec' construction is used to declare fixed table arrays
+ * the 't_set' construction is used to set fixed table values
+ * the 't_use' construction is used to access fixed table values
+ *
+ * 256 byte tables:
+ *
+ * t_xxx(s, box) => forward S box
+ * t_xxx(i, box) => inverse S box
+ *
+ * 256 32-bit word OR 4 x 256 32-bit word tables:
+ *
+ * t_xxx(f, n) => forward normal round
+ * t_xxx(f, l) => forward last round
+ * t_xxx(i, n) => inverse normal round
+ * t_xxx(i, l) => inverse last round
+ * t_xxx(l, s) => key schedule table
+ * t_xxx(i, m) => key schedule table
+ *
+ * Other variables and tables:
+ *
+ * t_xxx(r, c) => the rcon table
+ */
+
+/*
+ * OpenSolaris OS modifications
+ *
+ * 1. Added __cplusplus and _AESTAB_H header guards
+ * 2. Added header file sys/types.h
+ * 3. Remove code defined for _MSC_VER
+ * 4. Changed all variables to "static const"
+ * 5. Changed uint_8t and uint_32t to uint8_t and uint32_t
+ * 6. Cstyled and hdrchk code
+ */
+
+#ifndef _AESTAB_H
+#define _AESTAB_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+
+#define t_dec(m, n) t_##m##n
+#define t_set(m, n) t_##m##n
+#define t_use(m, n) t_##m##n
+
+#if defined(DO_TABLES) && defined(FIXED_TABLES)
+#define d_1(t, n, b, e) static const t n[256] = b(e)
+#define d_4(t, n, b, e, f, g, h) static const t n[4][256] = \
+ {b(e), b(f), b(g), b(h)}
+static const uint32_t t_dec(r, c)[RC_LENGTH] = rc_data(w0);
+#else
+#define d_1(t, n, b, e) static const t n[256]
+#define d_4(t, n, b, e, f, g, h) static const t n[4][256]
+static const uint32_t t_dec(r, c)[RC_LENGTH];
+#endif
+
+#if defined(SBX_SET)
+ d_1(uint8_t, t_dec(s, box), sb_data, h0);
+#endif
+#if defined(ISB_SET)
+ d_1(uint8_t, t_dec(i, box), isb_data, h0);
+#endif
+
+#if defined(FT1_SET)
+ d_1(uint32_t, t_dec(f, n), sb_data, u0);
+#endif
+#if defined(FT4_SET)
+ d_4(uint32_t, t_dec(f, n), sb_data, u0, u1, u2, u3);
+#endif
+
+#if defined(FL1_SET)
+ d_1(uint32_t, t_dec(f, l), sb_data, w0);
+#endif
+#if defined(FL4_SET)
+ d_4(uint32_t, t_dec(f, l), sb_data, w0, w1, w2, w3);
+#endif
+
+#if defined(IT1_SET)
+ d_1(uint32_t, t_dec(i, n), isb_data, v0);
+#endif
+#if defined(IT4_SET)
+ d_4(uint32_t, t_dec(i, n), isb_data, v0, v1, v2, v3);
+#endif
+
+#if defined(IL1_SET)
+ d_1(uint32_t, t_dec(i, l), isb_data, w0);
+#endif
+#if defined(IL4_SET)
+ d_4(uint32_t, t_dec(i, l), isb_data, w0, w1, w2, w3);
+#endif
+
+#if defined(LS1_SET)
+#if defined(FL1_SET)
+#undef LS1_SET
+#else
+ d_1(uint32_t, t_dec(l, s), sb_data, w0);
+#endif
+#endif
+
+#if defined(LS4_SET)
+#if defined(FL4_SET)
+#undef LS4_SET
+#else
+ d_4(uint32_t, t_dec(l, s), sb_data, w0, w1, w2, w3);
+#endif
+#endif
+
+#if defined(IM1_SET)
+ d_1(uint32_t, t_dec(i, m), mm_data, v0);
+#endif
+#if defined(IM4_SET)
+ d_4(uint32_t, t_dec(i, m), mm_data, v0, v1, v2, v3);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _AESTAB_H */
diff --git a/zfs/module/icp/asm-x86_64/aes/aestab2.h b/zfs/module/icp/asm-x86_64/aes/aestab2.h
new file mode 100644
index 000000000000..eb13f72b10d8
--- /dev/null
+++ b/zfs/module/icp/asm-x86_64/aes/aestab2.h
@@ -0,0 +1,594 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _AESTAB2_H
+#define _AESTAB2_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * To create this file for OpenSolaris:
+ * 1. Compile and run tablegen.c, from aes-src-04-03-08.zip,
+ * after defining ASM_AMD64_C
+ * 2. mv aestab2.c aestab2.h
+ * 3. Add __cplusplus and _AESTAB2_H header guards
+ * 3. Add #include <aes_impl.h>
+ * 4. Change "uint_32t" to "uint32_t"
+ * 5. Change all variables to "static const"
+ * 6. Cstyle and hdrchk this file
+ */
+
+#include <aes/aes_impl.h>
+
+static const uint32_t t_rc[RC_LENGTH] =
+{
+ 0x00000001, 0x00000002, 0x00000004, 0x00000008,
+ 0x00000010, 0x00000020, 0x00000040, 0x00000080,
+ 0x0000001b, 0x00000036
+};
+
+static const uint32_t t_ls[4][256] =
+{
+ {
+ 0x00000063, 0x0000007c, 0x00000077, 0x0000007b,
+ 0x000000f2, 0x0000006b, 0x0000006f, 0x000000c5,
+ 0x00000030, 0x00000001, 0x00000067, 0x0000002b,
+ 0x000000fe, 0x000000d7, 0x000000ab, 0x00000076,
+ 0x000000ca, 0x00000082, 0x000000c9, 0x0000007d,
+ 0x000000fa, 0x00000059, 0x00000047, 0x000000f0,
+ 0x000000ad, 0x000000d4, 0x000000a2, 0x000000af,
+ 0x0000009c, 0x000000a4, 0x00000072, 0x000000c0,
+ 0x000000b7, 0x000000fd, 0x00000093, 0x00000026,
+ 0x00000036, 0x0000003f, 0x000000f7, 0x000000cc,
+ 0x00000034, 0x000000a5, 0x000000e5, 0x000000f1,
+ 0x00000071, 0x000000d8, 0x00000031, 0x00000015,
+ 0x00000004, 0x000000c7, 0x00000023, 0x000000c3,
+ 0x00000018, 0x00000096, 0x00000005, 0x0000009a,
+ 0x00000007, 0x00000012, 0x00000080, 0x000000e2,
+ 0x000000eb, 0x00000027, 0x000000b2, 0x00000075,
+ 0x00000009, 0x00000083, 0x0000002c, 0x0000001a,
+ 0x0000001b, 0x0000006e, 0x0000005a, 0x000000a0,
+ 0x00000052, 0x0000003b, 0x000000d6, 0x000000b3,
+ 0x00000029, 0x000000e3, 0x0000002f, 0x00000084,
+ 0x00000053, 0x000000d1, 0x00000000, 0x000000ed,
+ 0x00000020, 0x000000fc, 0x000000b1, 0x0000005b,
+ 0x0000006a, 0x000000cb, 0x000000be, 0x00000039,
+ 0x0000004a, 0x0000004c, 0x00000058, 0x000000cf,
+ 0x000000d0, 0x000000ef, 0x000000aa, 0x000000fb,
+ 0x00000043, 0x0000004d, 0x00000033, 0x00000085,
+ 0x00000045, 0x000000f9, 0x00000002, 0x0000007f,
+ 0x00000050, 0x0000003c, 0x0000009f, 0x000000a8,
+ 0x00000051, 0x000000a3, 0x00000040, 0x0000008f,
+ 0x00000092, 0x0000009d, 0x00000038, 0x000000f5,
+ 0x000000bc, 0x000000b6, 0x000000da, 0x00000021,
+ 0x00000010, 0x000000ff, 0x000000f3, 0x000000d2,
+ 0x000000cd, 0x0000000c, 0x00000013, 0x000000ec,
+ 0x0000005f, 0x00000097, 0x00000044, 0x00000017,
+ 0x000000c4, 0x000000a7, 0x0000007e, 0x0000003d,
+ 0x00000064, 0x0000005d, 0x00000019, 0x00000073,
+ 0x00000060, 0x00000081, 0x0000004f, 0x000000dc,
+ 0x00000022, 0x0000002a, 0x00000090, 0x00000088,
+ 0x00000046, 0x000000ee, 0x000000b8, 0x00000014,
+ 0x000000de, 0x0000005e, 0x0000000b, 0x000000db,
+ 0x000000e0, 0x00000032, 0x0000003a, 0x0000000a,
+ 0x00000049, 0x00000006, 0x00000024, 0x0000005c,
+ 0x000000c2, 0x000000d3, 0x000000ac, 0x00000062,
+ 0x00000091, 0x00000095, 0x000000e4, 0x00000079,
+ 0x000000e7, 0x000000c8, 0x00000037, 0x0000006d,
+ 0x0000008d, 0x000000d5, 0x0000004e, 0x000000a9,
+ 0x0000006c, 0x00000056, 0x000000f4, 0x000000ea,
+ 0x00000065, 0x0000007a, 0x000000ae, 0x00000008,
+ 0x000000ba, 0x00000078, 0x00000025, 0x0000002e,
+ 0x0000001c, 0x000000a6, 0x000000b4, 0x000000c6,
+ 0x000000e8, 0x000000dd, 0x00000074, 0x0000001f,
+ 0x0000004b, 0x000000bd, 0x0000008b, 0x0000008a,
+ 0x00000070, 0x0000003e, 0x000000b5, 0x00000066,
+ 0x00000048, 0x00000003, 0x000000f6, 0x0000000e,
+ 0x00000061, 0x00000035, 0x00000057, 0x000000b9,
+ 0x00000086, 0x000000c1, 0x0000001d, 0x0000009e,
+ 0x000000e1, 0x000000f8, 0x00000098, 0x00000011,
+ 0x00000069, 0x000000d9, 0x0000008e, 0x00000094,
+ 0x0000009b, 0x0000001e, 0x00000087, 0x000000e9,
+ 0x000000ce, 0x00000055, 0x00000028, 0x000000df,
+ 0x0000008c, 0x000000a1, 0x00000089, 0x0000000d,
+ 0x000000bf, 0x000000e6, 0x00000042, 0x00000068,
+ 0x00000041, 0x00000099, 0x0000002d, 0x0000000f,
+ 0x000000b0, 0x00000054, 0x000000bb, 0x00000016
+ },
+ {
+ 0x00006300, 0x00007c00, 0x00007700, 0x00007b00,
+ 0x0000f200, 0x00006b00, 0x00006f00, 0x0000c500,
+ 0x00003000, 0x00000100, 0x00006700, 0x00002b00,
+ 0x0000fe00, 0x0000d700, 0x0000ab00, 0x00007600,
+ 0x0000ca00, 0x00008200, 0x0000c900, 0x00007d00,
+ 0x0000fa00, 0x00005900, 0x00004700, 0x0000f000,
+ 0x0000ad00, 0x0000d400, 0x0000a200, 0x0000af00,
+ 0x00009c00, 0x0000a400, 0x00007200, 0x0000c000,
+ 0x0000b700, 0x0000fd00, 0x00009300, 0x00002600,
+ 0x00003600, 0x00003f00, 0x0000f700, 0x0000cc00,
+ 0x00003400, 0x0000a500, 0x0000e500, 0x0000f100,
+ 0x00007100, 0x0000d800, 0x00003100, 0x00001500,
+ 0x00000400, 0x0000c700, 0x00002300, 0x0000c300,
+ 0x00001800, 0x00009600, 0x00000500, 0x00009a00,
+ 0x00000700, 0x00001200, 0x00008000, 0x0000e200,
+ 0x0000eb00, 0x00002700, 0x0000b200, 0x00007500,
+ 0x00000900, 0x00008300, 0x00002c00, 0x00001a00,
+ 0x00001b00, 0x00006e00, 0x00005a00, 0x0000a000,
+ 0x00005200, 0x00003b00, 0x0000d600, 0x0000b300,
+ 0x00002900, 0x0000e300, 0x00002f00, 0x00008400,
+ 0x00005300, 0x0000d100, 0x00000000, 0x0000ed00,
+ 0x00002000, 0x0000fc00, 0x0000b100, 0x00005b00,
+ 0x00006a00, 0x0000cb00, 0x0000be00, 0x00003900,
+ 0x00004a00, 0x00004c00, 0x00005800, 0x0000cf00,
+ 0x0000d000, 0x0000ef00, 0x0000aa00, 0x0000fb00,
+ 0x00004300, 0x00004d00, 0x00003300, 0x00008500,
+ 0x00004500, 0x0000f900, 0x00000200, 0x00007f00,
+ 0x00005000, 0x00003c00, 0x00009f00, 0x0000a800,
+ 0x00005100, 0x0000a300, 0x00004000, 0x00008f00,
+ 0x00009200, 0x00009d00, 0x00003800, 0x0000f500,
+ 0x0000bc00, 0x0000b600, 0x0000da00, 0x00002100,
+ 0x00001000, 0x0000ff00, 0x0000f300, 0x0000d200,
+ 0x0000cd00, 0x00000c00, 0x00001300, 0x0000ec00,
+ 0x00005f00, 0x00009700, 0x00004400, 0x00001700,
+ 0x0000c400, 0x0000a700, 0x00007e00, 0x00003d00,
+ 0x00006400, 0x00005d00, 0x00001900, 0x00007300,
+ 0x00006000, 0x00008100, 0x00004f00, 0x0000dc00,
+ 0x00002200, 0x00002a00, 0x00009000, 0x00008800,
+ 0x00004600, 0x0000ee00, 0x0000b800, 0x00001400,
+ 0x0000de00, 0x00005e00, 0x00000b00, 0x0000db00,
+ 0x0000e000, 0x00003200, 0x00003a00, 0x00000a00,
+ 0x00004900, 0x00000600, 0x00002400, 0x00005c00,
+ 0x0000c200, 0x0000d300, 0x0000ac00, 0x00006200,
+ 0x00009100, 0x00009500, 0x0000e400, 0x00007900,
+ 0x0000e700, 0x0000c800, 0x00003700, 0x00006d00,
+ 0x00008d00, 0x0000d500, 0x00004e00, 0x0000a900,
+ 0x00006c00, 0x00005600, 0x0000f400, 0x0000ea00,
+ 0x00006500, 0x00007a00, 0x0000ae00, 0x00000800,
+ 0x0000ba00, 0x00007800, 0x00002500, 0x00002e00,
+ 0x00001c00, 0x0000a600, 0x0000b400, 0x0000c600,
+ 0x0000e800, 0x0000dd00, 0x00007400, 0x00001f00,
+ 0x00004b00, 0x0000bd00, 0x00008b00, 0x00008a00,
+ 0x00007000, 0x00003e00, 0x0000b500, 0x00006600,
+ 0x00004800, 0x00000300, 0x0000f600, 0x00000e00,
+ 0x00006100, 0x00003500, 0x00005700, 0x0000b900,
+ 0x00008600, 0x0000c100, 0x00001d00, 0x00009e00,
+ 0x0000e100, 0x0000f800, 0x00009800, 0x00001100,
+ 0x00006900, 0x0000d900, 0x00008e00, 0x00009400,
+ 0x00009b00, 0x00001e00, 0x00008700, 0x0000e900,
+ 0x0000ce00, 0x00005500, 0x00002800, 0x0000df00,
+ 0x00008c00, 0x0000a100, 0x00008900, 0x00000d00,
+ 0x0000bf00, 0x0000e600, 0x00004200, 0x00006800,
+ 0x00004100, 0x00009900, 0x00002d00, 0x00000f00,
+ 0x0000b000, 0x00005400, 0x0000bb00, 0x00001600
+ },
+ {
+ 0x00630000, 0x007c0000, 0x00770000, 0x007b0000,
+ 0x00f20000, 0x006b0000, 0x006f0000, 0x00c50000,
+ 0x00300000, 0x00010000, 0x00670000, 0x002b0000,
+ 0x00fe0000, 0x00d70000, 0x00ab0000, 0x00760000,
+ 0x00ca0000, 0x00820000, 0x00c90000, 0x007d0000,
+ 0x00fa0000, 0x00590000, 0x00470000, 0x00f00000,
+ 0x00ad0000, 0x00d40000, 0x00a20000, 0x00af0000,
+ 0x009c0000, 0x00a40000, 0x00720000, 0x00c00000,
+ 0x00b70000, 0x00fd0000, 0x00930000, 0x00260000,
+ 0x00360000, 0x003f0000, 0x00f70000, 0x00cc0000,
+ 0x00340000, 0x00a50000, 0x00e50000, 0x00f10000,
+ 0x00710000, 0x00d80000, 0x00310000, 0x00150000,
+ 0x00040000, 0x00c70000, 0x00230000, 0x00c30000,
+ 0x00180000, 0x00960000, 0x00050000, 0x009a0000,
+ 0x00070000, 0x00120000, 0x00800000, 0x00e20000,
+ 0x00eb0000, 0x00270000, 0x00b20000, 0x00750000,
+ 0x00090000, 0x00830000, 0x002c0000, 0x001a0000,
+ 0x001b0000, 0x006e0000, 0x005a0000, 0x00a00000,
+ 0x00520000, 0x003b0000, 0x00d60000, 0x00b30000,
+ 0x00290000, 0x00e30000, 0x002f0000, 0x00840000,
+ 0x00530000, 0x00d10000, 0x00000000, 0x00ed0000,
+ 0x00200000, 0x00fc0000, 0x00b10000, 0x005b0000,
+ 0x006a0000, 0x00cb0000, 0x00be0000, 0x00390000,
+ 0x004a0000, 0x004c0000, 0x00580000, 0x00cf0000,
+ 0x00d00000, 0x00ef0000, 0x00aa0000, 0x00fb0000,
+ 0x00430000, 0x004d0000, 0x00330000, 0x00850000,
+ 0x00450000, 0x00f90000, 0x00020000, 0x007f0000,
+ 0x00500000, 0x003c0000, 0x009f0000, 0x00a80000,
+ 0x00510000, 0x00a30000, 0x00400000, 0x008f0000,
+ 0x00920000, 0x009d0000, 0x00380000, 0x00f50000,
+ 0x00bc0000, 0x00b60000, 0x00da0000, 0x00210000,
+ 0x00100000, 0x00ff0000, 0x00f30000, 0x00d20000,
+ 0x00cd0000, 0x000c0000, 0x00130000, 0x00ec0000,
+ 0x005f0000, 0x00970000, 0x00440000, 0x00170000,
+ 0x00c40000, 0x00a70000, 0x007e0000, 0x003d0000,
+ 0x00640000, 0x005d0000, 0x00190000, 0x00730000,
+ 0x00600000, 0x00810000, 0x004f0000, 0x00dc0000,
+ 0x00220000, 0x002a0000, 0x00900000, 0x00880000,
+ 0x00460000, 0x00ee0000, 0x00b80000, 0x00140000,
+ 0x00de0000, 0x005e0000, 0x000b0000, 0x00db0000,
+ 0x00e00000, 0x00320000, 0x003a0000, 0x000a0000,
+ 0x00490000, 0x00060000, 0x00240000, 0x005c0000,
+ 0x00c20000, 0x00d30000, 0x00ac0000, 0x00620000,
+ 0x00910000, 0x00950000, 0x00e40000, 0x00790000,
+ 0x00e70000, 0x00c80000, 0x00370000, 0x006d0000,
+ 0x008d0000, 0x00d50000, 0x004e0000, 0x00a90000,
+ 0x006c0000, 0x00560000, 0x00f40000, 0x00ea0000,
+ 0x00650000, 0x007a0000, 0x00ae0000, 0x00080000,
+ 0x00ba0000, 0x00780000, 0x00250000, 0x002e0000,
+ 0x001c0000, 0x00a60000, 0x00b40000, 0x00c60000,
+ 0x00e80000, 0x00dd0000, 0x00740000, 0x001f0000,
+ 0x004b0000, 0x00bd0000, 0x008b0000, 0x008a0000,
+ 0x00700000, 0x003e0000, 0x00b50000, 0x00660000,
+ 0x00480000, 0x00030000, 0x00f60000, 0x000e0000,
+ 0x00610000, 0x00350000, 0x00570000, 0x00b90000,
+ 0x00860000, 0x00c10000, 0x001d0000, 0x009e0000,
+ 0x00e10000, 0x00f80000, 0x00980000, 0x00110000,
+ 0x00690000, 0x00d90000, 0x008e0000, 0x00940000,
+ 0x009b0000, 0x001e0000, 0x00870000, 0x00e90000,
+ 0x00ce0000, 0x00550000, 0x00280000, 0x00df0000,
+ 0x008c0000, 0x00a10000, 0x00890000, 0x000d0000,
+ 0x00bf0000, 0x00e60000, 0x00420000, 0x00680000,
+ 0x00410000, 0x00990000, 0x002d0000, 0x000f0000,
+ 0x00b00000, 0x00540000, 0x00bb0000, 0x00160000
+ },
+ {
+ 0x63000000, 0x7c000000, 0x77000000, 0x7b000000,
+ 0xf2000000, 0x6b000000, 0x6f000000, 0xc5000000,
+ 0x30000000, 0x01000000, 0x67000000, 0x2b000000,
+ 0xfe000000, 0xd7000000, 0xab000000, 0x76000000,
+ 0xca000000, 0x82000000, 0xc9000000, 0x7d000000,
+ 0xfa000000, 0x59000000, 0x47000000, 0xf0000000,
+ 0xad000000, 0xd4000000, 0xa2000000, 0xaf000000,
+ 0x9c000000, 0xa4000000, 0x72000000, 0xc0000000,
+ 0xb7000000, 0xfd000000, 0x93000000, 0x26000000,
+ 0x36000000, 0x3f000000, 0xf7000000, 0xcc000000,
+ 0x34000000, 0xa5000000, 0xe5000000, 0xf1000000,
+ 0x71000000, 0xd8000000, 0x31000000, 0x15000000,
+ 0x04000000, 0xc7000000, 0x23000000, 0xc3000000,
+ 0x18000000, 0x96000000, 0x05000000, 0x9a000000,
+ 0x07000000, 0x12000000, 0x80000000, 0xe2000000,
+ 0xeb000000, 0x27000000, 0xb2000000, 0x75000000,
+ 0x09000000, 0x83000000, 0x2c000000, 0x1a000000,
+ 0x1b000000, 0x6e000000, 0x5a000000, 0xa0000000,
+ 0x52000000, 0x3b000000, 0xd6000000, 0xb3000000,
+ 0x29000000, 0xe3000000, 0x2f000000, 0x84000000,
+ 0x53000000, 0xd1000000, 0x00000000, 0xed000000,
+ 0x20000000, 0xfc000000, 0xb1000000, 0x5b000000,
+ 0x6a000000, 0xcb000000, 0xbe000000, 0x39000000,
+ 0x4a000000, 0x4c000000, 0x58000000, 0xcf000000,
+ 0xd0000000, 0xef000000, 0xaa000000, 0xfb000000,
+ 0x43000000, 0x4d000000, 0x33000000, 0x85000000,
+ 0x45000000, 0xf9000000, 0x02000000, 0x7f000000,
+ 0x50000000, 0x3c000000, 0x9f000000, 0xa8000000,
+ 0x51000000, 0xa3000000, 0x40000000, 0x8f000000,
+ 0x92000000, 0x9d000000, 0x38000000, 0xf5000000,
+ 0xbc000000, 0xb6000000, 0xda000000, 0x21000000,
+ 0x10000000, 0xff000000, 0xf3000000, 0xd2000000,
+ 0xcd000000, 0x0c000000, 0x13000000, 0xec000000,
+ 0x5f000000, 0x97000000, 0x44000000, 0x17000000,
+ 0xc4000000, 0xa7000000, 0x7e000000, 0x3d000000,
+ 0x64000000, 0x5d000000, 0x19000000, 0x73000000,
+ 0x60000000, 0x81000000, 0x4f000000, 0xdc000000,
+ 0x22000000, 0x2a000000, 0x90000000, 0x88000000,
+ 0x46000000, 0xee000000, 0xb8000000, 0x14000000,
+ 0xde000000, 0x5e000000, 0x0b000000, 0xdb000000,
+ 0xe0000000, 0x32000000, 0x3a000000, 0x0a000000,
+ 0x49000000, 0x06000000, 0x24000000, 0x5c000000,
+ 0xc2000000, 0xd3000000, 0xac000000, 0x62000000,
+ 0x91000000, 0x95000000, 0xe4000000, 0x79000000,
+ 0xe7000000, 0xc8000000, 0x37000000, 0x6d000000,
+ 0x8d000000, 0xd5000000, 0x4e000000, 0xa9000000,
+ 0x6c000000, 0x56000000, 0xf4000000, 0xea000000,
+ 0x65000000, 0x7a000000, 0xae000000, 0x08000000,
+ 0xba000000, 0x78000000, 0x25000000, 0x2e000000,
+ 0x1c000000, 0xa6000000, 0xb4000000, 0xc6000000,
+ 0xe8000000, 0xdd000000, 0x74000000, 0x1f000000,
+ 0x4b000000, 0xbd000000, 0x8b000000, 0x8a000000,
+ 0x70000000, 0x3e000000, 0xb5000000, 0x66000000,
+ 0x48000000, 0x03000000, 0xf6000000, 0x0e000000,
+ 0x61000000, 0x35000000, 0x57000000, 0xb9000000,
+ 0x86000000, 0xc1000000, 0x1d000000, 0x9e000000,
+ 0xe1000000, 0xf8000000, 0x98000000, 0x11000000,
+ 0x69000000, 0xd9000000, 0x8e000000, 0x94000000,
+ 0x9b000000, 0x1e000000, 0x87000000, 0xe9000000,
+ 0xce000000, 0x55000000, 0x28000000, 0xdf000000,
+ 0x8c000000, 0xa1000000, 0x89000000, 0x0d000000,
+ 0xbf000000, 0xe6000000, 0x42000000, 0x68000000,
+ 0x41000000, 0x99000000, 0x2d000000, 0x0f000000,
+ 0xb0000000, 0x54000000, 0xbb000000, 0x16000000
+ }
+};
+
+static const uint32_t t_im[4][256] =
+{
+ {
+ 0x00000000, 0x0b0d090e, 0x161a121c, 0x1d171b12,
+ 0x2c342438, 0x27392d36, 0x3a2e3624, 0x31233f2a,
+ 0x58684870, 0x5365417e, 0x4e725a6c, 0x457f5362,
+ 0x745c6c48, 0x7f516546, 0x62467e54, 0x694b775a,
+ 0xb0d090e0, 0xbbdd99ee, 0xa6ca82fc, 0xadc78bf2,
+ 0x9ce4b4d8, 0x97e9bdd6, 0x8afea6c4, 0x81f3afca,
+ 0xe8b8d890, 0xe3b5d19e, 0xfea2ca8c, 0xf5afc382,
+ 0xc48cfca8, 0xcf81f5a6, 0xd296eeb4, 0xd99be7ba,
+ 0x7bbb3bdb, 0x70b632d5, 0x6da129c7, 0x66ac20c9,
+ 0x578f1fe3, 0x5c8216ed, 0x41950dff, 0x4a9804f1,
+ 0x23d373ab, 0x28de7aa5, 0x35c961b7, 0x3ec468b9,
+ 0x0fe75793, 0x04ea5e9d, 0x19fd458f, 0x12f04c81,
+ 0xcb6bab3b, 0xc066a235, 0xdd71b927, 0xd67cb029,
+ 0xe75f8f03, 0xec52860d, 0xf1459d1f, 0xfa489411,
+ 0x9303e34b, 0x980eea45, 0x8519f157, 0x8e14f859,
+ 0xbf37c773, 0xb43ace7d, 0xa92dd56f, 0xa220dc61,
+ 0xf66d76ad, 0xfd607fa3, 0xe07764b1, 0xeb7a6dbf,
+ 0xda595295, 0xd1545b9b, 0xcc434089, 0xc74e4987,
+ 0xae053edd, 0xa50837d3, 0xb81f2cc1, 0xb31225cf,
+ 0x82311ae5, 0x893c13eb, 0x942b08f9, 0x9f2601f7,
+ 0x46bde64d, 0x4db0ef43, 0x50a7f451, 0x5baafd5f,
+ 0x6a89c275, 0x6184cb7b, 0x7c93d069, 0x779ed967,
+ 0x1ed5ae3d, 0x15d8a733, 0x08cfbc21, 0x03c2b52f,
+ 0x32e18a05, 0x39ec830b, 0x24fb9819, 0x2ff69117,
+ 0x8dd64d76, 0x86db4478, 0x9bcc5f6a, 0x90c15664,
+ 0xa1e2694e, 0xaaef6040, 0xb7f87b52, 0xbcf5725c,
+ 0xd5be0506, 0xdeb30c08, 0xc3a4171a, 0xc8a91e14,
+ 0xf98a213e, 0xf2872830, 0xef903322, 0xe49d3a2c,
+ 0x3d06dd96, 0x360bd498, 0x2b1ccf8a, 0x2011c684,
+ 0x1132f9ae, 0x1a3ff0a0, 0x0728ebb2, 0x0c25e2bc,
+ 0x656e95e6, 0x6e639ce8, 0x737487fa, 0x78798ef4,
+ 0x495ab1de, 0x4257b8d0, 0x5f40a3c2, 0x544daacc,
+ 0xf7daec41, 0xfcd7e54f, 0xe1c0fe5d, 0xeacdf753,
+ 0xdbeec879, 0xd0e3c177, 0xcdf4da65, 0xc6f9d36b,
+ 0xafb2a431, 0xa4bfad3f, 0xb9a8b62d, 0xb2a5bf23,
+ 0x83868009, 0x888b8907, 0x959c9215, 0x9e919b1b,
+ 0x470a7ca1, 0x4c0775af, 0x51106ebd, 0x5a1d67b3,
+ 0x6b3e5899, 0x60335197, 0x7d244a85, 0x7629438b,
+ 0x1f6234d1, 0x146f3ddf, 0x097826cd, 0x02752fc3,
+ 0x335610e9, 0x385b19e7, 0x254c02f5, 0x2e410bfb,
+ 0x8c61d79a, 0x876cde94, 0x9a7bc586, 0x9176cc88,
+ 0xa055f3a2, 0xab58faac, 0xb64fe1be, 0xbd42e8b0,
+ 0xd4099fea, 0xdf0496e4, 0xc2138df6, 0xc91e84f8,
+ 0xf83dbbd2, 0xf330b2dc, 0xee27a9ce, 0xe52aa0c0,
+ 0x3cb1477a, 0x37bc4e74, 0x2aab5566, 0x21a65c68,
+ 0x10856342, 0x1b886a4c, 0x069f715e, 0x0d927850,
+ 0x64d90f0a, 0x6fd40604, 0x72c31d16, 0x79ce1418,
+ 0x48ed2b32, 0x43e0223c, 0x5ef7392e, 0x55fa3020,
+ 0x01b79aec, 0x0aba93e2, 0x17ad88f0, 0x1ca081fe,
+ 0x2d83bed4, 0x268eb7da, 0x3b99acc8, 0x3094a5c6,
+ 0x59dfd29c, 0x52d2db92, 0x4fc5c080, 0x44c8c98e,
+ 0x75ebf6a4, 0x7ee6ffaa, 0x63f1e4b8, 0x68fcedb6,
+ 0xb1670a0c, 0xba6a0302, 0xa77d1810, 0xac70111e,
+ 0x9d532e34, 0x965e273a, 0x8b493c28, 0x80443526,
+ 0xe90f427c, 0xe2024b72, 0xff155060, 0xf418596e,
+ 0xc53b6644, 0xce366f4a, 0xd3217458, 0xd82c7d56,
+ 0x7a0ca137, 0x7101a839, 0x6c16b32b, 0x671bba25,
+ 0x5638850f, 0x5d358c01, 0x40229713, 0x4b2f9e1d,
+ 0x2264e947, 0x2969e049, 0x347efb5b, 0x3f73f255,
+ 0x0e50cd7f, 0x055dc471, 0x184adf63, 0x1347d66d,
+ 0xcadc31d7, 0xc1d138d9, 0xdcc623cb, 0xd7cb2ac5,
+ 0xe6e815ef, 0xede51ce1, 0xf0f207f3, 0xfbff0efd,
+ 0x92b479a7, 0x99b970a9, 0x84ae6bbb, 0x8fa362b5,
+ 0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d
+ },
+ {
+ 0x00000000, 0x0d090e0b, 0x1a121c16, 0x171b121d,
+ 0x3424382c, 0x392d3627, 0x2e36243a, 0x233f2a31,
+ 0x68487058, 0x65417e53, 0x725a6c4e, 0x7f536245,
+ 0x5c6c4874, 0x5165467f, 0x467e5462, 0x4b775a69,
+ 0xd090e0b0, 0xdd99eebb, 0xca82fca6, 0xc78bf2ad,
+ 0xe4b4d89c, 0xe9bdd697, 0xfea6c48a, 0xf3afca81,
+ 0xb8d890e8, 0xb5d19ee3, 0xa2ca8cfe, 0xafc382f5,
+ 0x8cfca8c4, 0x81f5a6cf, 0x96eeb4d2, 0x9be7bad9,
+ 0xbb3bdb7b, 0xb632d570, 0xa129c76d, 0xac20c966,
+ 0x8f1fe357, 0x8216ed5c, 0x950dff41, 0x9804f14a,
+ 0xd373ab23, 0xde7aa528, 0xc961b735, 0xc468b93e,
+ 0xe757930f, 0xea5e9d04, 0xfd458f19, 0xf04c8112,
+ 0x6bab3bcb, 0x66a235c0, 0x71b927dd, 0x7cb029d6,
+ 0x5f8f03e7, 0x52860dec, 0x459d1ff1, 0x489411fa,
+ 0x03e34b93, 0x0eea4598, 0x19f15785, 0x14f8598e,
+ 0x37c773bf, 0x3ace7db4, 0x2dd56fa9, 0x20dc61a2,
+ 0x6d76adf6, 0x607fa3fd, 0x7764b1e0, 0x7a6dbfeb,
+ 0x595295da, 0x545b9bd1, 0x434089cc, 0x4e4987c7,
+ 0x053eddae, 0x0837d3a5, 0x1f2cc1b8, 0x1225cfb3,
+ 0x311ae582, 0x3c13eb89, 0x2b08f994, 0x2601f79f,
+ 0xbde64d46, 0xb0ef434d, 0xa7f45150, 0xaafd5f5b,
+ 0x89c2756a, 0x84cb7b61, 0x93d0697c, 0x9ed96777,
+ 0xd5ae3d1e, 0xd8a73315, 0xcfbc2108, 0xc2b52f03,
+ 0xe18a0532, 0xec830b39, 0xfb981924, 0xf691172f,
+ 0xd64d768d, 0xdb447886, 0xcc5f6a9b, 0xc1566490,
+ 0xe2694ea1, 0xef6040aa, 0xf87b52b7, 0xf5725cbc,
+ 0xbe0506d5, 0xb30c08de, 0xa4171ac3, 0xa91e14c8,
+ 0x8a213ef9, 0x872830f2, 0x903322ef, 0x9d3a2ce4,
+ 0x06dd963d, 0x0bd49836, 0x1ccf8a2b, 0x11c68420,
+ 0x32f9ae11, 0x3ff0a01a, 0x28ebb207, 0x25e2bc0c,
+ 0x6e95e665, 0x639ce86e, 0x7487fa73, 0x798ef478,
+ 0x5ab1de49, 0x57b8d042, 0x40a3c25f, 0x4daacc54,
+ 0xdaec41f7, 0xd7e54ffc, 0xc0fe5de1, 0xcdf753ea,
+ 0xeec879db, 0xe3c177d0, 0xf4da65cd, 0xf9d36bc6,
+ 0xb2a431af, 0xbfad3fa4, 0xa8b62db9, 0xa5bf23b2,
+ 0x86800983, 0x8b890788, 0x9c921595, 0x919b1b9e,
+ 0x0a7ca147, 0x0775af4c, 0x106ebd51, 0x1d67b35a,
+ 0x3e58996b, 0x33519760, 0x244a857d, 0x29438b76,
+ 0x6234d11f, 0x6f3ddf14, 0x7826cd09, 0x752fc302,
+ 0x5610e933, 0x5b19e738, 0x4c02f525, 0x410bfb2e,
+ 0x61d79a8c, 0x6cde9487, 0x7bc5869a, 0x76cc8891,
+ 0x55f3a2a0, 0x58faacab, 0x4fe1beb6, 0x42e8b0bd,
+ 0x099fead4, 0x0496e4df, 0x138df6c2, 0x1e84f8c9,
+ 0x3dbbd2f8, 0x30b2dcf3, 0x27a9ceee, 0x2aa0c0e5,
+ 0xb1477a3c, 0xbc4e7437, 0xab55662a, 0xa65c6821,
+ 0x85634210, 0x886a4c1b, 0x9f715e06, 0x9278500d,
+ 0xd90f0a64, 0xd406046f, 0xc31d1672, 0xce141879,
+ 0xed2b3248, 0xe0223c43, 0xf7392e5e, 0xfa302055,
+ 0xb79aec01, 0xba93e20a, 0xad88f017, 0xa081fe1c,
+ 0x83bed42d, 0x8eb7da26, 0x99acc83b, 0x94a5c630,
+ 0xdfd29c59, 0xd2db9252, 0xc5c0804f, 0xc8c98e44,
+ 0xebf6a475, 0xe6ffaa7e, 0xf1e4b863, 0xfcedb668,
+ 0x670a0cb1, 0x6a0302ba, 0x7d1810a7, 0x70111eac,
+ 0x532e349d, 0x5e273a96, 0x493c288b, 0x44352680,
+ 0x0f427ce9, 0x024b72e2, 0x155060ff, 0x18596ef4,
+ 0x3b6644c5, 0x366f4ace, 0x217458d3, 0x2c7d56d8,
+ 0x0ca1377a, 0x01a83971, 0x16b32b6c, 0x1bba2567,
+ 0x38850f56, 0x358c015d, 0x22971340, 0x2f9e1d4b,
+ 0x64e94722, 0x69e04929, 0x7efb5b34, 0x73f2553f,
+ 0x50cd7f0e, 0x5dc47105, 0x4adf6318, 0x47d66d13,
+ 0xdc31d7ca, 0xd138d9c1, 0xc623cbdc, 0xcb2ac5d7,
+ 0xe815efe6, 0xe51ce1ed, 0xf207f3f0, 0xff0efdfb,
+ 0xb479a792, 0xb970a999, 0xae6bbb84, 0xa362b58f,
+ 0x805d9fbe, 0x8d5491b5, 0x9a4f83a8, 0x97468da3
+ },
+ {
+ 0x00000000, 0x090e0b0d, 0x121c161a, 0x1b121d17,
+ 0x24382c34, 0x2d362739, 0x36243a2e, 0x3f2a3123,
+ 0x48705868, 0x417e5365, 0x5a6c4e72, 0x5362457f,
+ 0x6c48745c, 0x65467f51, 0x7e546246, 0x775a694b,
+ 0x90e0b0d0, 0x99eebbdd, 0x82fca6ca, 0x8bf2adc7,
+ 0xb4d89ce4, 0xbdd697e9, 0xa6c48afe, 0xafca81f3,
+ 0xd890e8b8, 0xd19ee3b5, 0xca8cfea2, 0xc382f5af,
+ 0xfca8c48c, 0xf5a6cf81, 0xeeb4d296, 0xe7bad99b,
+ 0x3bdb7bbb, 0x32d570b6, 0x29c76da1, 0x20c966ac,
+ 0x1fe3578f, 0x16ed5c82, 0x0dff4195, 0x04f14a98,
+ 0x73ab23d3, 0x7aa528de, 0x61b735c9, 0x68b93ec4,
+ 0x57930fe7, 0x5e9d04ea, 0x458f19fd, 0x4c8112f0,
+ 0xab3bcb6b, 0xa235c066, 0xb927dd71, 0xb029d67c,
+ 0x8f03e75f, 0x860dec52, 0x9d1ff145, 0x9411fa48,
+ 0xe34b9303, 0xea45980e, 0xf1578519, 0xf8598e14,
+ 0xc773bf37, 0xce7db43a, 0xd56fa92d, 0xdc61a220,
+ 0x76adf66d, 0x7fa3fd60, 0x64b1e077, 0x6dbfeb7a,
+ 0x5295da59, 0x5b9bd154, 0x4089cc43, 0x4987c74e,
+ 0x3eddae05, 0x37d3a508, 0x2cc1b81f, 0x25cfb312,
+ 0x1ae58231, 0x13eb893c, 0x08f9942b, 0x01f79f26,
+ 0xe64d46bd, 0xef434db0, 0xf45150a7, 0xfd5f5baa,
+ 0xc2756a89, 0xcb7b6184, 0xd0697c93, 0xd967779e,
+ 0xae3d1ed5, 0xa73315d8, 0xbc2108cf, 0xb52f03c2,
+ 0x8a0532e1, 0x830b39ec, 0x981924fb, 0x91172ff6,
+ 0x4d768dd6, 0x447886db, 0x5f6a9bcc, 0x566490c1,
+ 0x694ea1e2, 0x6040aaef, 0x7b52b7f8, 0x725cbcf5,
+ 0x0506d5be, 0x0c08deb3, 0x171ac3a4, 0x1e14c8a9,
+ 0x213ef98a, 0x2830f287, 0x3322ef90, 0x3a2ce49d,
+ 0xdd963d06, 0xd498360b, 0xcf8a2b1c, 0xc6842011,
+ 0xf9ae1132, 0xf0a01a3f, 0xebb20728, 0xe2bc0c25,
+ 0x95e6656e, 0x9ce86e63, 0x87fa7374, 0x8ef47879,
+ 0xb1de495a, 0xb8d04257, 0xa3c25f40, 0xaacc544d,
+ 0xec41f7da, 0xe54ffcd7, 0xfe5de1c0, 0xf753eacd,
+ 0xc879dbee, 0xc177d0e3, 0xda65cdf4, 0xd36bc6f9,
+ 0xa431afb2, 0xad3fa4bf, 0xb62db9a8, 0xbf23b2a5,
+ 0x80098386, 0x8907888b, 0x9215959c, 0x9b1b9e91,
+ 0x7ca1470a, 0x75af4c07, 0x6ebd5110, 0x67b35a1d,
+ 0x58996b3e, 0x51976033, 0x4a857d24, 0x438b7629,
+ 0x34d11f62, 0x3ddf146f, 0x26cd0978, 0x2fc30275,
+ 0x10e93356, 0x19e7385b, 0x02f5254c, 0x0bfb2e41,
+ 0xd79a8c61, 0xde94876c, 0xc5869a7b, 0xcc889176,
+ 0xf3a2a055, 0xfaacab58, 0xe1beb64f, 0xe8b0bd42,
+ 0x9fead409, 0x96e4df04, 0x8df6c213, 0x84f8c91e,
+ 0xbbd2f83d, 0xb2dcf330, 0xa9ceee27, 0xa0c0e52a,
+ 0x477a3cb1, 0x4e7437bc, 0x55662aab, 0x5c6821a6,
+ 0x63421085, 0x6a4c1b88, 0x715e069f, 0x78500d92,
+ 0x0f0a64d9, 0x06046fd4, 0x1d1672c3, 0x141879ce,
+ 0x2b3248ed, 0x223c43e0, 0x392e5ef7, 0x302055fa,
+ 0x9aec01b7, 0x93e20aba, 0x88f017ad, 0x81fe1ca0,
+ 0xbed42d83, 0xb7da268e, 0xacc83b99, 0xa5c63094,
+ 0xd29c59df, 0xdb9252d2, 0xc0804fc5, 0xc98e44c8,
+ 0xf6a475eb, 0xffaa7ee6, 0xe4b863f1, 0xedb668fc,
+ 0x0a0cb167, 0x0302ba6a, 0x1810a77d, 0x111eac70,
+ 0x2e349d53, 0x273a965e, 0x3c288b49, 0x35268044,
+ 0x427ce90f, 0x4b72e202, 0x5060ff15, 0x596ef418,
+ 0x6644c53b, 0x6f4ace36, 0x7458d321, 0x7d56d82c,
+ 0xa1377a0c, 0xa8397101, 0xb32b6c16, 0xba25671b,
+ 0x850f5638, 0x8c015d35, 0x97134022, 0x9e1d4b2f,
+ 0xe9472264, 0xe0492969, 0xfb5b347e, 0xf2553f73,
+ 0xcd7f0e50, 0xc471055d, 0xdf63184a, 0xd66d1347,
+ 0x31d7cadc, 0x38d9c1d1, 0x23cbdcc6, 0x2ac5d7cb,
+ 0x15efe6e8, 0x1ce1ede5, 0x07f3f0f2, 0x0efdfbff,
+ 0x79a792b4, 0x70a999b9, 0x6bbb84ae, 0x62b58fa3,
+ 0x5d9fbe80, 0x5491b58d, 0x4f83a89a, 0x468da397
+ },
+ {
+ 0x00000000, 0x0e0b0d09, 0x1c161a12, 0x121d171b,
+ 0x382c3424, 0x3627392d, 0x243a2e36, 0x2a31233f,
+ 0x70586848, 0x7e536541, 0x6c4e725a, 0x62457f53,
+ 0x48745c6c, 0x467f5165, 0x5462467e, 0x5a694b77,
+ 0xe0b0d090, 0xeebbdd99, 0xfca6ca82, 0xf2adc78b,
+ 0xd89ce4b4, 0xd697e9bd, 0xc48afea6, 0xca81f3af,
+ 0x90e8b8d8, 0x9ee3b5d1, 0x8cfea2ca, 0x82f5afc3,
+ 0xa8c48cfc, 0xa6cf81f5, 0xb4d296ee, 0xbad99be7,
+ 0xdb7bbb3b, 0xd570b632, 0xc76da129, 0xc966ac20,
+ 0xe3578f1f, 0xed5c8216, 0xff41950d, 0xf14a9804,
+ 0xab23d373, 0xa528de7a, 0xb735c961, 0xb93ec468,
+ 0x930fe757, 0x9d04ea5e, 0x8f19fd45, 0x8112f04c,
+ 0x3bcb6bab, 0x35c066a2, 0x27dd71b9, 0x29d67cb0,
+ 0x03e75f8f, 0x0dec5286, 0x1ff1459d, 0x11fa4894,
+ 0x4b9303e3, 0x45980eea, 0x578519f1, 0x598e14f8,
+ 0x73bf37c7, 0x7db43ace, 0x6fa92dd5, 0x61a220dc,
+ 0xadf66d76, 0xa3fd607f, 0xb1e07764, 0xbfeb7a6d,
+ 0x95da5952, 0x9bd1545b, 0x89cc4340, 0x87c74e49,
+ 0xddae053e, 0xd3a50837, 0xc1b81f2c, 0xcfb31225,
+ 0xe582311a, 0xeb893c13, 0xf9942b08, 0xf79f2601,
+ 0x4d46bde6, 0x434db0ef, 0x5150a7f4, 0x5f5baafd,
+ 0x756a89c2, 0x7b6184cb, 0x697c93d0, 0x67779ed9,
+ 0x3d1ed5ae, 0x3315d8a7, 0x2108cfbc, 0x2f03c2b5,
+ 0x0532e18a, 0x0b39ec83, 0x1924fb98, 0x172ff691,
+ 0x768dd64d, 0x7886db44, 0x6a9bcc5f, 0x6490c156,
+ 0x4ea1e269, 0x40aaef60, 0x52b7f87b, 0x5cbcf572,
+ 0x06d5be05, 0x08deb30c, 0x1ac3a417, 0x14c8a91e,
+ 0x3ef98a21, 0x30f28728, 0x22ef9033, 0x2ce49d3a,
+ 0x963d06dd, 0x98360bd4, 0x8a2b1ccf, 0x842011c6,
+ 0xae1132f9, 0xa01a3ff0, 0xb20728eb, 0xbc0c25e2,
+ 0xe6656e95, 0xe86e639c, 0xfa737487, 0xf478798e,
+ 0xde495ab1, 0xd04257b8, 0xc25f40a3, 0xcc544daa,
+ 0x41f7daec, 0x4ffcd7e5, 0x5de1c0fe, 0x53eacdf7,
+ 0x79dbeec8, 0x77d0e3c1, 0x65cdf4da, 0x6bc6f9d3,
+ 0x31afb2a4, 0x3fa4bfad, 0x2db9a8b6, 0x23b2a5bf,
+ 0x09838680, 0x07888b89, 0x15959c92, 0x1b9e919b,
+ 0xa1470a7c, 0xaf4c0775, 0xbd51106e, 0xb35a1d67,
+ 0x996b3e58, 0x97603351, 0x857d244a, 0x8b762943,
+ 0xd11f6234, 0xdf146f3d, 0xcd097826, 0xc302752f,
+ 0xe9335610, 0xe7385b19, 0xf5254c02, 0xfb2e410b,
+ 0x9a8c61d7, 0x94876cde, 0x869a7bc5, 0x889176cc,
+ 0xa2a055f3, 0xacab58fa, 0xbeb64fe1, 0xb0bd42e8,
+ 0xead4099f, 0xe4df0496, 0xf6c2138d, 0xf8c91e84,
+ 0xd2f83dbb, 0xdcf330b2, 0xceee27a9, 0xc0e52aa0,
+ 0x7a3cb147, 0x7437bc4e, 0x662aab55, 0x6821a65c,
+ 0x42108563, 0x4c1b886a, 0x5e069f71, 0x500d9278,
+ 0x0a64d90f, 0x046fd406, 0x1672c31d, 0x1879ce14,
+ 0x3248ed2b, 0x3c43e022, 0x2e5ef739, 0x2055fa30,
+ 0xec01b79a, 0xe20aba93, 0xf017ad88, 0xfe1ca081,
+ 0xd42d83be, 0xda268eb7, 0xc83b99ac, 0xc63094a5,
+ 0x9c59dfd2, 0x9252d2db, 0x804fc5c0, 0x8e44c8c9,
+ 0xa475ebf6, 0xaa7ee6ff, 0xb863f1e4, 0xb668fced,
+ 0x0cb1670a, 0x02ba6a03, 0x10a77d18, 0x1eac7011,
+ 0x349d532e, 0x3a965e27, 0x288b493c, 0x26804435,
+ 0x7ce90f42, 0x72e2024b, 0x60ff1550, 0x6ef41859,
+ 0x44c53b66, 0x4ace366f, 0x58d32174, 0x56d82c7d,
+ 0x377a0ca1, 0x397101a8, 0x2b6c16b3, 0x25671bba,
+ 0x0f563885, 0x015d358c, 0x13402297, 0x1d4b2f9e,
+ 0x472264e9, 0x492969e0, 0x5b347efb, 0x553f73f2,
+ 0x7f0e50cd, 0x71055dc4, 0x63184adf, 0x6d1347d6,
+ 0xd7cadc31, 0xd9c1d138, 0xcbdcc623, 0xc5d7cb2a,
+ 0xefe6e815, 0xe1ede51c, 0xf3f0f207, 0xfdfbff0e,
+ 0xa792b479, 0xa999b970, 0xbb84ae6b, 0xb58fa362,
+ 0x9fbe805d, 0x91b58d54, 0x83a89a4f, 0x8da39746
+ }
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _AESTAB2_H */
diff --git a/zfs/module/icp/asm-x86_64/modes/gcm_intel.S b/zfs/module/icp/asm-x86_64/modes/gcm_intel.S
new file mode 100644
index 000000000000..a43b5ebcb7e5
--- /dev/null
+++ b/zfs/module/icp/asm-x86_64/modes/gcm_intel.S
@@ -0,0 +1,254 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2009 Intel Corporation
+ * All Rights Reserved.
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * Accelerated GHASH implementation with Intel PCLMULQDQ-NI
+ * instructions. This file contains an accelerated
+ * Galois Field Multiplication implementation.
+ *
+ * PCLMULQDQ is used to accelerate the most time-consuming part of GHASH,
+ * carry-less multiplication. More information about PCLMULQDQ can be
+ * found at:
+ * http://software.intel.com/en-us/articles/
+ * carry-less-multiplication-and-its-usage-for-computing-the-gcm-mode/
+ *
+ */
+
+/*
+ * ====================================================================
+ * OpenSolaris OS modifications
+ *
+ * This source originates as file galois_hash_asm.c from
+ * Intel Corporation dated September 21, 2009.
+ *
+ * This OpenSolaris version has these major changes from the original source:
+ *
+ * 1. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
+ * /usr/include/sys/asm_linkage.h, lint(1B) guards, and a dummy C function
+ * definition for lint.
+ *
+ * 2. Formatted code, added comments, and added #includes and #defines.
+ *
+ * 3. If bit CR0.TS is set, clear and set the TS bit, after and before
+ * calling kpreempt_disable() and kpreempt_enable().
+ * If the TS bit is not set, Save and restore %xmm registers at the beginning
+ * and end of function calls (%xmm* registers are not saved and restored by
+ * during kernel thread preemption).
+ *
+ * 4. Removed code to perform hashing. This is already done with C macro
+ * GHASH in gcm.c. For better performance, this removed code should be
+ * reintegrated in the future to replace the C GHASH macro.
+ *
+ * 5. Added code to byte swap 16-byte input and output.
+ *
+ * 6. Folded in comments from the original C source with embedded assembly
+ * (SB_w_shift_xor.c)
+ *
+ * 7. Renamed function and reordered parameters to match OpenSolaris:
+ * Intel interface:
+ * void galois_hash_asm(unsigned char *hk, unsigned char *s,
+ * unsigned char *d, int length)
+ * OpenSolaris OS interface:
+ * void gcm_mul_pclmulqdq(uint64_t *x_in, uint64_t *y, uint64_t *res);
+ * ====================================================================
+ */
+
+
+#if defined(lint) || defined(__lint)
+
+#include <sys/types.h>
+
+/* ARGSUSED */
+void
+gcm_mul_pclmulqdq(uint64_t *x_in, uint64_t *y, uint64_t *res) {
+}
+
+#else /* lint */
+
+#define _ASM
+#include <sys/asm_linkage.h>
+
+/*
+ * Use this mask to byte-swap a 16-byte integer with the pshufb instruction
+ */
+
+// static uint8_t byte_swap16_mask[] = {
+// 15, 14, 13, 12, 11, 10, 9, 8, 7, 6 ,5, 4, 3, 2, 1, 0 };
+.data
+.align XMM_ALIGN
+.Lbyte_swap16_mask:
+ .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+
+
+/*
+ * void gcm_mul_pclmulqdq(uint64_t *x_in, uint64_t *y, uint64_t *res);
+ *
+ * Perform a carry-less multiplication (that is, use XOR instead of the
+ * multiply operator) on P1 and P2 and place the result in P3.
+ *
+ * Byte swap the input and the output.
+ *
+ * Note: x_in, y, and res all point to a block of 20-byte numbers
+ * (an array of two 64-bit integers).
+ *
+ * Note2: For kernel code, caller is responsible for ensuring
+ * kpreempt_disable() has been called. This is because %xmm registers are
+ * not saved/restored. Clear and set the CR0.TS bit on entry and exit,
+ * respectively, if TS is set on entry. Otherwise, if TS is not set,
+ * save and restore %xmm registers on the stack.
+ *
+ * Note3: Original Intel definition:
+ * void galois_hash_asm(unsigned char *hk, unsigned char *s,
+ * unsigned char *d, int length)
+ *
+ * Note4: Register/parameter mapping:
+ * Intel:
+ * Parameter 1: %rcx (copied to %xmm0) hk or x_in
+ * Parameter 2: %rdx (copied to %xmm1) s or y
+ * Parameter 3: %rdi (result) d or res
+ * OpenSolaris:
+ * Parameter 1: %rdi (copied to %xmm0) x_in
+ * Parameter 2: %rsi (copied to %xmm1) y
+ * Parameter 3: %rdx (result) res
+ */
+
+ENTRY_NP(gcm_mul_pclmulqdq)
+ //
+ // Copy Parameters
+ //
+ movdqu (%rdi), %xmm0 // P1
+ movdqu (%rsi), %xmm1 // P2
+
+ //
+ // Byte swap 16-byte input
+ //
+ lea .Lbyte_swap16_mask(%rip), %rax
+ movaps (%rax), %xmm10
+ pshufb %xmm10, %xmm0
+ pshufb %xmm10, %xmm1
+
+
+ //
+ // Multiply with the hash key
+ //
+ movdqu %xmm0, %xmm3
+ pclmulqdq $0, %xmm1, %xmm3 // xmm3 holds a0*b0
+
+ movdqu %xmm0, %xmm4
+ pclmulqdq $16, %xmm1, %xmm4 // xmm4 holds a0*b1
+
+ movdqu %xmm0, %xmm5
+ pclmulqdq $1, %xmm1, %xmm5 // xmm5 holds a1*b0
+ movdqu %xmm0, %xmm6
+ pclmulqdq $17, %xmm1, %xmm6 // xmm6 holds a1*b1
+
+ pxor %xmm5, %xmm4 // xmm4 holds a0*b1 + a1*b0
+
+ movdqu %xmm4, %xmm5 // move the contents of xmm4 to xmm5
+ psrldq $8, %xmm4 // shift by xmm4 64 bits to the right
+ pslldq $8, %xmm5 // shift by xmm5 64 bits to the left
+ pxor %xmm5, %xmm3
+ pxor %xmm4, %xmm6 // Register pair <xmm6:xmm3> holds the result
+ // of the carry-less multiplication of
+ // xmm0 by xmm1.
+
+ // We shift the result of the multiplication by one bit position
+ // to the left to cope for the fact that the bits are reversed.
+ movdqu %xmm3, %xmm7
+ movdqu %xmm6, %xmm8
+ pslld $1, %xmm3
+ pslld $1, %xmm6
+ psrld $31, %xmm7
+ psrld $31, %xmm8
+ movdqu %xmm7, %xmm9
+ pslldq $4, %xmm8
+ pslldq $4, %xmm7
+ psrldq $12, %xmm9
+ por %xmm7, %xmm3
+ por %xmm8, %xmm6
+ por %xmm9, %xmm6
+
+ //
+ // First phase of the reduction
+ //
+ // Move xmm3 into xmm7, xmm8, xmm9 in order to perform the shifts
+ // independently.
+ movdqu %xmm3, %xmm7
+ movdqu %xmm3, %xmm8
+ movdqu %xmm3, %xmm9
+ pslld $31, %xmm7 // packed right shift shifting << 31
+ pslld $30, %xmm8 // packed right shift shifting << 30
+ pslld $25, %xmm9 // packed right shift shifting << 25
+ pxor %xmm8, %xmm7 // xor the shifted versions
+ pxor %xmm9, %xmm7
+ movdqu %xmm7, %xmm8
+ pslldq $12, %xmm7
+ psrldq $4, %xmm8
+ pxor %xmm7, %xmm3 // first phase of the reduction complete
+
+ //
+ // Second phase of the reduction
+ //
+ // Make 3 copies of xmm3 in xmm2, xmm4, xmm5 for doing these
+ // shift operations.
+ movdqu %xmm3, %xmm2
+ movdqu %xmm3, %xmm4 // packed left shifting >> 1
+ movdqu %xmm3, %xmm5
+ psrld $1, %xmm2
+ psrld $2, %xmm4 // packed left shifting >> 2
+ psrld $7, %xmm5 // packed left shifting >> 7
+ pxor %xmm4, %xmm2 // xor the shifted versions
+ pxor %xmm5, %xmm2
+ pxor %xmm8, %xmm2
+ pxor %xmm2, %xmm3
+ pxor %xmm3, %xmm6 // the result is in xmm6
+
+ //
+ // Byte swap 16-byte result
+ //
+ pshufb %xmm10, %xmm6 // %xmm10 has the swap mask
+
+ //
+ // Store the result
+ //
+ movdqu %xmm6, (%rdx) // P3
+
+
+ //
+ // Return
+ //
+ ret
+ SET_SIZE(gcm_mul_pclmulqdq)
+
+#endif /* lint || __lint */
+
+#ifdef __ELF__
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/zfs/module/icp/asm-x86_64/sha1/sha1-x86_64.S b/zfs/module/icp/asm-x86_64/sha1/sha1-x86_64.S
new file mode 100644
index 000000000000..cb923784a730
--- /dev/null
+++ b/zfs/module/icp/asm-x86_64/sha1/sha1-x86_64.S
@@ -0,0 +1,1353 @@
+/*
+ * !/usr/bin/env perl
+ *
+ * ====================================================================
+ * Written by Andy Polyakov <appro at fy.chalmers.se> for the OpenSSL
+ * project. The module is, however, dual licensed under OpenSSL and
+ * CRYPTOGAMS licenses depending on where you obtain it. For further
+ * details see http://www.openssl.org/~appro/cryptogams/.
+ * ====================================================================
+ *
+ * sha1_block procedure for x86_64.
+ *
+ * It was brought to my attention that on EM64T compiler-generated code
+ * was far behind 32-bit assembler implementation. This is unlike on
+ * Opteron where compiler-generated code was only 15% behind 32-bit
+ * assembler, which originally made it hard to motivate the effort.
+ * There was suggestion to mechanically translate 32-bit code, but I
+ * dismissed it, reasoning that x86_64 offers enough register bank
+ * capacity to fully utilize SHA-1 parallelism. Therefore this fresh
+ * implementation:-) However! While 64-bit code does performs better
+ * on Opteron, I failed to beat 32-bit assembler on EM64T core. Well,
+ * x86_64 does offer larger *addressable* bank, but out-of-order core
+ * reaches for even more registers through dynamic aliasing, and EM64T
+ * core must have managed to run-time optimize even 32-bit code just as
+ * good as 64-bit one. Performance improvement is summarized in the
+ * following table:
+ *
+ * gcc 3.4 32-bit asm cycles/byte
+ * Opteron +45% +20% 6.8
+ * Xeon P4 +65% +0% 9.9
+ * Core2 +60% +10% 7.0
+ *
+ *
+ * OpenSolaris OS modifications
+ *
+ * Sun elects to use this software under the BSD license.
+ *
+ * This source originates from OpenSSL file sha1-x86_64.pl at
+ * ftp://ftp.openssl.org/snapshot/openssl-0.9.8-stable-SNAP-20080131.tar.gz
+ * (presumably for future OpenSSL release 0.9.8h), with these changes:
+ *
+ * 1. Added perl "use strict" and declared variables.
+ *
+ * 2. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
+ * /usr/include/sys/asm_linkage.h, .ident keywords, and lint(1B) guards.
+ *
+ * 3. Removed x86_64-xlate.pl script (not needed for as(1) or gas(1)
+ * assemblers).
+ *
+ */
+
+/*
+ * This file was generated by a perl script (sha1-x86_64.pl). The comments from
+ * the original file have been pasted above.
+ */
+
+#if defined(lint) || defined(__lint)
+#include <sys/stdint.h>
+#include <sys/sha1.h>
+
+
+/* ARGSUSED */
+void
+sha1_block_data_order(SHA1_CTX *ctx, const void *inpp, size_t blocks)
+{
+}
+
+#else
+#define _ASM
+#include <sys/asm_linkage.h>
+ENTRY_NP(sha1_block_data_order)
+ push %rbx
+ push %rbp
+ push %r12
+ mov %rsp,%rax
+ mov %rdi,%r8 # reassigned argument
+ sub $72,%rsp
+ mov %rsi,%r9 # reassigned argument
+ and $-64,%rsp
+ mov %rdx,%r10 # reassigned argument
+ mov %rax,64(%rsp)
+
+ mov 0(%r8),%edx
+ mov 4(%r8),%esi
+ mov 8(%r8),%edi
+ mov 12(%r8),%ebp
+ mov 16(%r8),%r11d
+.align 4
+.Lloop:
+ mov 0(%r9),%eax
+ bswap %eax
+ mov %eax,0(%rsp)
+ lea 0x5a827999(%eax,%r11d),%r12d
+ mov %edi,%ebx
+ mov 4(%r9),%eax
+ mov %edx,%r11d
+ xor %ebp,%ebx
+ bswap %eax
+ rol $5,%r11d
+ and %esi,%ebx
+ mov %eax,4(%rsp)
+ add %r11d,%r12d
+ xor %ebp,%ebx
+ rol $30,%esi
+ add %ebx,%r12d
+ lea 0x5a827999(%eax,%ebp),%r11d
+ mov %esi,%ebx
+ mov 8(%r9),%eax
+ mov %r12d,%ebp
+ xor %edi,%ebx
+ bswap %eax
+ rol $5,%ebp
+ and %edx,%ebx
+ mov %eax,8(%rsp)
+ add %ebp,%r11d
+ xor %edi,%ebx
+ rol $30,%edx
+ add %ebx,%r11d
+ lea 0x5a827999(%eax,%edi),%ebp
+ mov %edx,%ebx
+ mov 12(%r9),%eax
+ mov %r11d,%edi
+ xor %esi,%ebx
+ bswap %eax
+ rol $5,%edi
+ and %r12d,%ebx
+ mov %eax,12(%rsp)
+ add %edi,%ebp
+ xor %esi,%ebx
+ rol $30,%r12d
+ add %ebx,%ebp
+ lea 0x5a827999(%eax,%esi),%edi
+ mov %r12d,%ebx
+ mov 16(%r9),%eax
+ mov %ebp,%esi
+ xor %edx,%ebx
+ bswap %eax
+ rol $5,%esi
+ and %r11d,%ebx
+ mov %eax,16(%rsp)
+ add %esi,%edi
+ xor %edx,%ebx
+ rol $30,%r11d
+ add %ebx,%edi
+ lea 0x5a827999(%eax,%edx),%esi
+ mov %r11d,%ebx
+ mov 20(%r9),%eax
+ mov %edi,%edx
+ xor %r12d,%ebx
+ bswap %eax
+ rol $5,%edx
+ and %ebp,%ebx
+ mov %eax,20(%rsp)
+ add %edx,%esi
+ xor %r12d,%ebx
+ rol $30,%ebp
+ add %ebx,%esi
+ lea 0x5a827999(%eax,%r12d),%edx
+ mov %ebp,%ebx
+ mov 24(%r9),%eax
+ mov %esi,%r12d
+ xor %r11d,%ebx
+ bswap %eax
+ rol $5,%r12d
+ and %edi,%ebx
+ mov %eax,24(%rsp)
+ add %r12d,%edx
+ xor %r11d,%ebx
+ rol $30,%edi
+ add %ebx,%edx
+ lea 0x5a827999(%eax,%r11d),%r12d
+ mov %edi,%ebx
+ mov 28(%r9),%eax
+ mov %edx,%r11d
+ xor %ebp,%ebx
+ bswap %eax
+ rol $5,%r11d
+ and %esi,%ebx
+ mov %eax,28(%rsp)
+ add %r11d,%r12d
+ xor %ebp,%ebx
+ rol $30,%esi
+ add %ebx,%r12d
+ lea 0x5a827999(%eax,%ebp),%r11d
+ mov %esi,%ebx
+ mov 32(%r9),%eax
+ mov %r12d,%ebp
+ xor %edi,%ebx
+ bswap %eax
+ rol $5,%ebp
+ and %edx,%ebx
+ mov %eax,32(%rsp)
+ add %ebp,%r11d
+ xor %edi,%ebx
+ rol $30,%edx
+ add %ebx,%r11d
+ lea 0x5a827999(%eax,%edi),%ebp
+ mov %edx,%ebx
+ mov 36(%r9),%eax
+ mov %r11d,%edi
+ xor %esi,%ebx
+ bswap %eax
+ rol $5,%edi
+ and %r12d,%ebx
+ mov %eax,36(%rsp)
+ add %edi,%ebp
+ xor %esi,%ebx
+ rol $30,%r12d
+ add %ebx,%ebp
+ lea 0x5a827999(%eax,%esi),%edi
+ mov %r12d,%ebx
+ mov 40(%r9),%eax
+ mov %ebp,%esi
+ xor %edx,%ebx
+ bswap %eax
+ rol $5,%esi
+ and %r11d,%ebx
+ mov %eax,40(%rsp)
+ add %esi,%edi
+ xor %edx,%ebx
+ rol $30,%r11d
+ add %ebx,%edi
+ lea 0x5a827999(%eax,%edx),%esi
+ mov %r11d,%ebx
+ mov 44(%r9),%eax
+ mov %edi,%edx
+ xor %r12d,%ebx
+ bswap %eax
+ rol $5,%edx
+ and %ebp,%ebx
+ mov %eax,44(%rsp)
+ add %edx,%esi
+ xor %r12d,%ebx
+ rol $30,%ebp
+ add %ebx,%esi
+ lea 0x5a827999(%eax,%r12d),%edx
+ mov %ebp,%ebx
+ mov 48(%r9),%eax
+ mov %esi,%r12d
+ xor %r11d,%ebx
+ bswap %eax
+ rol $5,%r12d
+ and %edi,%ebx
+ mov %eax,48(%rsp)
+ add %r12d,%edx
+ xor %r11d,%ebx
+ rol $30,%edi
+ add %ebx,%edx
+ lea 0x5a827999(%eax,%r11d),%r12d
+ mov %edi,%ebx
+ mov 52(%r9),%eax
+ mov %edx,%r11d
+ xor %ebp,%ebx
+ bswap %eax
+ rol $5,%r11d
+ and %esi,%ebx
+ mov %eax,52(%rsp)
+ add %r11d,%r12d
+ xor %ebp,%ebx
+ rol $30,%esi
+ add %ebx,%r12d
+ lea 0x5a827999(%eax,%ebp),%r11d
+ mov %esi,%ebx
+ mov 56(%r9),%eax
+ mov %r12d,%ebp
+ xor %edi,%ebx
+ bswap %eax
+ rol $5,%ebp
+ and %edx,%ebx
+ mov %eax,56(%rsp)
+ add %ebp,%r11d
+ xor %edi,%ebx
+ rol $30,%edx
+ add %ebx,%r11d
+ lea 0x5a827999(%eax,%edi),%ebp
+ mov %edx,%ebx
+ mov 60(%r9),%eax
+ mov %r11d,%edi
+ xor %esi,%ebx
+ bswap %eax
+ rol $5,%edi
+ and %r12d,%ebx
+ mov %eax,60(%rsp)
+ add %edi,%ebp
+ xor %esi,%ebx
+ rol $30,%r12d
+ add %ebx,%ebp
+ lea 0x5a827999(%eax,%esi),%edi
+ mov 0(%rsp),%eax
+ mov %r12d,%ebx
+ mov %ebp,%esi
+ xor 8(%rsp),%eax
+ xor %edx,%ebx
+ rol $5,%esi
+ xor 32(%rsp),%eax
+ and %r11d,%ebx
+ add %esi,%edi
+ xor 52(%rsp),%eax
+ xor %edx,%ebx
+ rol $30,%r11d
+ add %ebx,%edi
+ rol $1,%eax
+ mov %eax,0(%rsp)
+ lea 0x5a827999(%eax,%edx),%esi
+ mov 4(%rsp),%eax
+ mov %r11d,%ebx
+ mov %edi,%edx
+ xor 12(%rsp),%eax
+ xor %r12d,%ebx
+ rol $5,%edx
+ xor 36(%rsp),%eax
+ and %ebp,%ebx
+ add %edx,%esi
+ xor 56(%rsp),%eax
+ xor %r12d,%ebx
+ rol $30,%ebp
+ add %ebx,%esi
+ rol $1,%eax
+ mov %eax,4(%rsp)
+ lea 0x5a827999(%eax,%r12d),%edx
+ mov 8(%rsp),%eax
+ mov %ebp,%ebx
+ mov %esi,%r12d
+ xor 16(%rsp),%eax
+ xor %r11d,%ebx
+ rol $5,%r12d
+ xor 40(%rsp),%eax
+ and %edi,%ebx
+ add %r12d,%edx
+ xor 60(%rsp),%eax
+ xor %r11d,%ebx
+ rol $30,%edi
+ add %ebx,%edx
+ rol $1,%eax
+ mov %eax,8(%rsp)
+ lea 0x5a827999(%eax,%r11d),%r12d
+ mov 12(%rsp),%eax
+ mov %edi,%ebx
+ mov %edx,%r11d
+ xor 20(%rsp),%eax
+ xor %ebp,%ebx
+ rol $5,%r11d
+ xor 44(%rsp),%eax
+ and %esi,%ebx
+ add %r11d,%r12d
+ xor 0(%rsp),%eax
+ xor %ebp,%ebx
+ rol $30,%esi
+ add %ebx,%r12d
+ rol $1,%eax
+ mov %eax,12(%rsp)
+ lea 0x5a827999(%eax,%ebp),%r11d
+ mov 16(%rsp),%eax
+ mov %esi,%ebx
+ mov %r12d,%ebp
+ xor 24(%rsp),%eax
+ xor %edi,%ebx
+ rol $5,%ebp
+ xor 48(%rsp),%eax
+ and %edx,%ebx
+ add %ebp,%r11d
+ xor 4(%rsp),%eax
+ xor %edi,%ebx
+ rol $30,%edx
+ add %ebx,%r11d
+ rol $1,%eax
+ mov %eax,16(%rsp)
+ lea 0x6ed9eba1(%eax,%edi),%ebp
+ mov 20(%rsp),%eax
+ mov %edx,%ebx
+ mov %r11d,%edi
+ xor 28(%rsp),%eax
+ xor %r12d,%ebx
+ rol $5,%edi
+ xor 52(%rsp),%eax
+ xor %esi,%ebx
+ add %edi,%ebp
+ xor 8(%rsp),%eax
+ rol $30,%r12d
+ add %ebx,%ebp
+ rol $1,%eax
+ mov %eax,20(%rsp)
+ lea 0x6ed9eba1(%eax,%esi),%edi
+ mov 24(%rsp),%eax
+ mov %r12d,%ebx
+ mov %ebp,%esi
+ xor 32(%rsp),%eax
+ xor %r11d,%ebx
+ rol $5,%esi
+ xor 56(%rsp),%eax
+ xor %edx,%ebx
+ add %esi,%edi
+ xor 12(%rsp),%eax
+ rol $30,%r11d
+ add %ebx,%edi
+ rol $1,%eax
+ mov %eax,24(%rsp)
+ lea 0x6ed9eba1(%eax,%edx),%esi
+ mov 28(%rsp),%eax
+ mov %r11d,%ebx
+ mov %edi,%edx
+ xor 36(%rsp),%eax
+ xor %ebp,%ebx
+ rol $5,%edx
+ xor 60(%rsp),%eax
+ xor %r12d,%ebx
+ add %edx,%esi
+ xor 16(%rsp),%eax
+ rol $30,%ebp
+ add %ebx,%esi
+ rol $1,%eax
+ mov %eax,28(%rsp)
+ lea 0x6ed9eba1(%eax,%r12d),%edx
+ mov 32(%rsp),%eax
+ mov %ebp,%ebx
+ mov %esi,%r12d
+ xor 40(%rsp),%eax
+ xor %edi,%ebx
+ rol $5,%r12d
+ xor 0(%rsp),%eax
+ xor %r11d,%ebx
+ add %r12d,%edx
+ xor 20(%rsp),%eax
+ rol $30,%edi
+ add %ebx,%edx
+ rol $1,%eax
+ mov %eax,32(%rsp)
+ lea 0x6ed9eba1(%eax,%r11d),%r12d
+ mov 36(%rsp),%eax
+ mov %edi,%ebx
+ mov %edx,%r11d
+ xor 44(%rsp),%eax
+ xor %esi,%ebx
+ rol $5,%r11d
+ xor 4(%rsp),%eax
+ xor %ebp,%ebx
+ add %r11d,%r12d
+ xor 24(%rsp),%eax
+ rol $30,%esi
+ add %ebx,%r12d
+ rol $1,%eax
+ mov %eax,36(%rsp)
+ lea 0x6ed9eba1(%eax,%ebp),%r11d
+ mov 40(%rsp),%eax
+ mov %esi,%ebx
+ mov %r12d,%ebp
+ xor 48(%rsp),%eax
+ xor %edx,%ebx
+ rol $5,%ebp
+ xor 8(%rsp),%eax
+ xor %edi,%ebx
+ add %ebp,%r11d
+ xor 28(%rsp),%eax
+ rol $30,%edx
+ add %ebx,%r11d
+ rol $1,%eax
+ mov %eax,40(%rsp)
+ lea 0x6ed9eba1(%eax,%edi),%ebp
+ mov 44(%rsp),%eax
+ mov %edx,%ebx
+ mov %r11d,%edi
+ xor 52(%rsp),%eax
+ xor %r12d,%ebx
+ rol $5,%edi
+ xor 12(%rsp),%eax
+ xor %esi,%ebx
+ add %edi,%ebp
+ xor 32(%rsp),%eax
+ rol $30,%r12d
+ add %ebx,%ebp
+ rol $1,%eax
+ mov %eax,44(%rsp)
+ lea 0x6ed9eba1(%eax,%esi),%edi
+ mov 48(%rsp),%eax
+ mov %r12d,%ebx
+ mov %ebp,%esi
+ xor 56(%rsp),%eax
+ xor %r11d,%ebx
+ rol $5,%esi
+ xor 16(%rsp),%eax
+ xor %edx,%ebx
+ add %esi,%edi
+ xor 36(%rsp),%eax
+ rol $30,%r11d
+ add %ebx,%edi
+ rol $1,%eax
+ mov %eax,48(%rsp)
+ lea 0x6ed9eba1(%eax,%edx),%esi
+ mov 52(%rsp),%eax
+ mov %r11d,%ebx
+ mov %edi,%edx
+ xor 60(%rsp),%eax
+ xor %ebp,%ebx
+ rol $5,%edx
+ xor 20(%rsp),%eax
+ xor %r12d,%ebx
+ add %edx,%esi
+ xor 40(%rsp),%eax
+ rol $30,%ebp
+ add %ebx,%esi
+ rol $1,%eax
+ mov %eax,52(%rsp)
+ lea 0x6ed9eba1(%eax,%r12d),%edx
+ mov 56(%rsp),%eax
+ mov %ebp,%ebx
+ mov %esi,%r12d
+ xor 0(%rsp),%eax
+ xor %edi,%ebx
+ rol $5,%r12d
+ xor 24(%rsp),%eax
+ xor %r11d,%ebx
+ add %r12d,%edx
+ xor 44(%rsp),%eax
+ rol $30,%edi
+ add %ebx,%edx
+ rol $1,%eax
+ mov %eax,56(%rsp)
+ lea 0x6ed9eba1(%eax,%r11d),%r12d
+ mov 60(%rsp),%eax
+ mov %edi,%ebx
+ mov %edx,%r11d
+ xor 4(%rsp),%eax
+ xor %esi,%ebx
+ rol $5,%r11d
+ xor 28(%rsp),%eax
+ xor %ebp,%ebx
+ add %r11d,%r12d
+ xor 48(%rsp),%eax
+ rol $30,%esi
+ add %ebx,%r12d
+ rol $1,%eax
+ mov %eax,60(%rsp)
+ lea 0x6ed9eba1(%eax,%ebp),%r11d
+ mov 0(%rsp),%eax
+ mov %esi,%ebx
+ mov %r12d,%ebp
+ xor 8(%rsp),%eax
+ xor %edx,%ebx
+ rol $5,%ebp
+ xor 32(%rsp),%eax
+ xor %edi,%ebx
+ add %ebp,%r11d
+ xor 52(%rsp),%eax
+ rol $30,%edx
+ add %ebx,%r11d
+ rol $1,%eax
+ mov %eax,0(%rsp)
+ lea 0x6ed9eba1(%eax,%edi),%ebp
+ mov 4(%rsp),%eax
+ mov %edx,%ebx
+ mov %r11d,%edi
+ xor 12(%rsp),%eax
+ xor %r12d,%ebx
+ rol $5,%edi
+ xor 36(%rsp),%eax
+ xor %esi,%ebx
+ add %edi,%ebp
+ xor 56(%rsp),%eax
+ rol $30,%r12d
+ add %ebx,%ebp
+ rol $1,%eax
+ mov %eax,4(%rsp)
+ lea 0x6ed9eba1(%eax,%esi),%edi
+ mov 8(%rsp),%eax
+ mov %r12d,%ebx
+ mov %ebp,%esi
+ xor 16(%rsp),%eax
+ xor %r11d,%ebx
+ rol $5,%esi
+ xor 40(%rsp),%eax
+ xor %edx,%ebx
+ add %esi,%edi
+ xor 60(%rsp),%eax
+ rol $30,%r11d
+ add %ebx,%edi
+ rol $1,%eax
+ mov %eax,8(%rsp)
+ lea 0x6ed9eba1(%eax,%edx),%esi
+ mov 12(%rsp),%eax
+ mov %r11d,%ebx
+ mov %edi,%edx
+ xor 20(%rsp),%eax
+ xor %ebp,%ebx
+ rol $5,%edx
+ xor 44(%rsp),%eax
+ xor %r12d,%ebx
+ add %edx,%esi
+ xor 0(%rsp),%eax
+ rol $30,%ebp
+ add %ebx,%esi
+ rol $1,%eax
+ mov %eax,12(%rsp)
+ lea 0x6ed9eba1(%eax,%r12d),%edx
+ mov 16(%rsp),%eax
+ mov %ebp,%ebx
+ mov %esi,%r12d
+ xor 24(%rsp),%eax
+ xor %edi,%ebx
+ rol $5,%r12d
+ xor 48(%rsp),%eax
+ xor %r11d,%ebx
+ add %r12d,%edx
+ xor 4(%rsp),%eax
+ rol $30,%edi
+ add %ebx,%edx
+ rol $1,%eax
+ mov %eax,16(%rsp)
+ lea 0x6ed9eba1(%eax,%r11d),%r12d
+ mov 20(%rsp),%eax
+ mov %edi,%ebx
+ mov %edx,%r11d
+ xor 28(%rsp),%eax
+ xor %esi,%ebx
+ rol $5,%r11d
+ xor 52(%rsp),%eax
+ xor %ebp,%ebx
+ add %r11d,%r12d
+ xor 8(%rsp),%eax
+ rol $30,%esi
+ add %ebx,%r12d
+ rol $1,%eax
+ mov %eax,20(%rsp)
+ lea 0x6ed9eba1(%eax,%ebp),%r11d
+ mov 24(%rsp),%eax
+ mov %esi,%ebx
+ mov %r12d,%ebp
+ xor 32(%rsp),%eax
+ xor %edx,%ebx
+ rol $5,%ebp
+ xor 56(%rsp),%eax
+ xor %edi,%ebx
+ add %ebp,%r11d
+ xor 12(%rsp),%eax
+ rol $30,%edx
+ add %ebx,%r11d
+ rol $1,%eax
+ mov %eax,24(%rsp)
+ lea 0x6ed9eba1(%eax,%edi),%ebp
+ mov 28(%rsp),%eax
+ mov %edx,%ebx
+ mov %r11d,%edi
+ xor 36(%rsp),%eax
+ xor %r12d,%ebx
+ rol $5,%edi
+ xor 60(%rsp),%eax
+ xor %esi,%ebx
+ add %edi,%ebp
+ xor 16(%rsp),%eax
+ rol $30,%r12d
+ add %ebx,%ebp
+ rol $1,%eax
+ mov %eax,28(%rsp)
+ lea 0x6ed9eba1(%eax,%esi),%edi
+ mov 32(%rsp),%eax
+ mov %r12d,%ebx
+ mov %ebp,%esi
+ xor 40(%rsp),%eax
+ xor %r11d,%ebx
+ rol $5,%esi
+ xor 0(%rsp),%eax
+ xor %edx,%ebx
+ add %esi,%edi
+ xor 20(%rsp),%eax
+ rol $30,%r11d
+ add %ebx,%edi
+ rol $1,%eax
+ mov %eax,32(%rsp)
+ lea -0x70e44324(%eax,%edx),%esi
+ mov 36(%rsp),%eax
+ mov %ebp,%ebx
+ mov %ebp,%ecx
+ xor 44(%rsp),%eax
+ mov %edi,%edx
+ and %r11d,%ebx
+ xor 4(%rsp),%eax
+ or %r11d,%ecx
+ rol $5,%edx
+ xor 24(%rsp),%eax
+ and %r12d,%ecx
+ add %edx,%esi
+ rol $1,%eax
+ or %ecx,%ebx
+ rol $30,%ebp
+ mov %eax,36(%rsp)
+ add %ebx,%esi
+ lea -0x70e44324(%eax,%r12d),%edx
+ mov 40(%rsp),%eax
+ mov %edi,%ebx
+ mov %edi,%ecx
+ xor 48(%rsp),%eax
+ mov %esi,%r12d
+ and %ebp,%ebx
+ xor 8(%rsp),%eax
+ or %ebp,%ecx
+ rol $5,%r12d
+ xor 28(%rsp),%eax
+ and %r11d,%ecx
+ add %r12d,%edx
+ rol $1,%eax
+ or %ecx,%ebx
+ rol $30,%edi
+ mov %eax,40(%rsp)
+ add %ebx,%edx
+ lea -0x70e44324(%eax,%r11d),%r12d
+ mov 44(%rsp),%eax
+ mov %esi,%ebx
+ mov %esi,%ecx
+ xor 52(%rsp),%eax
+ mov %edx,%r11d
+ and %edi,%ebx
+ xor 12(%rsp),%eax
+ or %edi,%ecx
+ rol $5,%r11d
+ xor 32(%rsp),%eax
+ and %ebp,%ecx
+ add %r11d,%r12d
+ rol $1,%eax
+ or %ecx,%ebx
+ rol $30,%esi
+ mov %eax,44(%rsp)
+ add %ebx,%r12d
+ lea -0x70e44324(%eax,%ebp),%r11d
+ mov 48(%rsp),%eax
+ mov %edx,%ebx
+ mov %edx,%ecx
+ xor 56(%rsp),%eax
+ mov %r12d,%ebp
+ and %esi,%ebx
+ xor 16(%rsp),%eax
+ or %esi,%ecx
+ rol $5,%ebp
+ xor 36(%rsp),%eax
+ and %edi,%ecx
+ add %ebp,%r11d
+ rol $1,%eax
+ or %ecx,%ebx
+ rol $30,%edx
+ mov %eax,48(%rsp)
+ add %ebx,%r11d
+ lea -0x70e44324(%eax,%edi),%ebp
+ mov 52(%rsp),%eax
+ mov %r12d,%ebx
+ mov %r12d,%ecx
+ xor 60(%rsp),%eax
+ mov %r11d,%edi
+ and %edx,%ebx
+ xor 20(%rsp),%eax
+ or %edx,%ecx
+ rol $5,%edi
+ xor 40(%rsp),%eax
+ and %esi,%ecx
+ add %edi,%ebp
+ rol $1,%eax
+ or %ecx,%ebx
+ rol $30,%r12d
+ mov %eax,52(%rsp)
+ add %ebx,%ebp
+ lea -0x70e44324(%eax,%esi),%edi
+ mov 56(%rsp),%eax
+ mov %r11d,%ebx
+ mov %r11d,%ecx
+ xor 0(%rsp),%eax
+ mov %ebp,%esi
+ and %r12d,%ebx
+ xor 24(%rsp),%eax
+ or %r12d,%ecx
+ rol $5,%esi
+ xor 44(%rsp),%eax
+ and %edx,%ecx
+ add %esi,%edi
+ rol $1,%eax
+ or %ecx,%ebx
+ rol $30,%r11d
+ mov %eax,56(%rsp)
+ add %ebx,%edi
+ lea -0x70e44324(%eax,%edx),%esi
+ mov 60(%rsp),%eax
+ mov %ebp,%ebx
+ mov %ebp,%ecx
+ xor 4(%rsp),%eax
+ mov %edi,%edx
+ and %r11d,%ebx
+ xor 28(%rsp),%eax
+ or %r11d,%ecx
+ rol $5,%edx
+ xor 48(%rsp),%eax
+ and %r12d,%ecx
+ add %edx,%esi
+ rol $1,%eax
+ or %ecx,%ebx
+ rol $30,%ebp
+ mov %eax,60(%rsp)
+ add %ebx,%esi
+ lea -0x70e44324(%eax,%r12d),%edx
+ mov 0(%rsp),%eax
+ mov %edi,%ebx
+ mov %edi,%ecx
+ xor 8(%rsp),%eax
+ mov %esi,%r12d
+ and %ebp,%ebx
+ xor 32(%rsp),%eax
+ or %ebp,%ecx
+ rol $5,%r12d
+ xor 52(%rsp),%eax
+ and %r11d,%ecx
+ add %r12d,%edx
+ rol $1,%eax
+ or %ecx,%ebx
+ rol $30,%edi
+ mov %eax,0(%rsp)
+ add %ebx,%edx
+ lea -0x70e44324(%eax,%r11d),%r12d
+ mov 4(%rsp),%eax
+ mov %esi,%ebx
+ mov %esi,%ecx
+ xor 12(%rsp),%eax
+ mov %edx,%r11d
+ and %edi,%ebx
+ xor 36(%rsp),%eax
+ or %edi,%ecx
+ rol $5,%r11d
+ xor 56(%rsp),%eax
+ and %ebp,%ecx
+ add %r11d,%r12d
+ rol $1,%eax
+ or %ecx,%ebx
+ rol $30,%esi
+ mov %eax,4(%rsp)
+ add %ebx,%r12d
+ lea -0x70e44324(%eax,%ebp),%r11d
+ mov 8(%rsp),%eax
+ mov %edx,%ebx
+ mov %edx,%ecx
+ xor 16(%rsp),%eax
+ mov %r12d,%ebp
+ and %esi,%ebx
+ xor 40(%rsp),%eax
+ or %esi,%ecx
+ rol $5,%ebp
+ xor 60(%rsp),%eax
+ and %edi,%ecx
+ add %ebp,%r11d
+ rol $1,%eax
+ or %ecx,%ebx
+ rol $30,%edx
+ mov %eax,8(%rsp)
+ add %ebx,%r11d
+ lea -0x70e44324(%eax,%edi),%ebp
+ mov 12(%rsp),%eax
+ mov %r12d,%ebx
+ mov %r12d,%ecx
+ xor 20(%rsp),%eax
+ mov %r11d,%edi
+ and %edx,%ebx
+ xor 44(%rsp),%eax
+ or %edx,%ecx
+ rol $5,%edi
+ xor 0(%rsp),%eax
+ and %esi,%ecx
+ add %edi,%ebp
+ rol $1,%eax
+ or %ecx,%ebx
+ rol $30,%r12d
+ mov %eax,12(%rsp)
+ add %ebx,%ebp
+ lea -0x70e44324(%eax,%esi),%edi
+ mov 16(%rsp),%eax
+ mov %r11d,%ebx
+ mov %r11d,%ecx
+ xor 24(%rsp),%eax
+ mov %ebp,%esi
+ and %r12d,%ebx
+ xor 48(%rsp),%eax
+ or %r12d,%ecx
+ rol $5,%esi
+ xor 4(%rsp),%eax
+ and %edx,%ecx
+ add %esi,%edi
+ rol $1,%eax
+ or %ecx,%ebx
+ rol $30,%r11d
+ mov %eax,16(%rsp)
+ add %ebx,%edi
+ lea -0x70e44324(%eax,%edx),%esi
+ mov 20(%rsp),%eax
+ mov %ebp,%ebx
+ mov %ebp,%ecx
+ xor 28(%rsp),%eax
+ mov %edi,%edx
+ and %r11d,%ebx
+ xor 52(%rsp),%eax
+ or %r11d,%ecx
+ rol $5,%edx
+ xor 8(%rsp),%eax
+ and %r12d,%ecx
+ add %edx,%esi
+ rol $1,%eax
+ or %ecx,%ebx
+ rol $30,%ebp
+ mov %eax,20(%rsp)
+ add %ebx,%esi
+ lea -0x70e44324(%eax,%r12d),%edx
+ mov 24(%rsp),%eax
+ mov %edi,%ebx
+ mov %edi,%ecx
+ xor 32(%rsp),%eax
+ mov %esi,%r12d
+ and %ebp,%ebx
+ xor 56(%rsp),%eax
+ or %ebp,%ecx
+ rol $5,%r12d
+ xor 12(%rsp),%eax
+ and %r11d,%ecx
+ add %r12d,%edx
+ rol $1,%eax
+ or %ecx,%ebx
+ rol $30,%edi
+ mov %eax,24(%rsp)
+ add %ebx,%edx
+ lea -0x70e44324(%eax,%r11d),%r12d
+ mov 28(%rsp),%eax
+ mov %esi,%ebx
+ mov %esi,%ecx
+ xor 36(%rsp),%eax
+ mov %edx,%r11d
+ and %edi,%ebx
+ xor 60(%rsp),%eax
+ or %edi,%ecx
+ rol $5,%r11d
+ xor 16(%rsp),%eax
+ and %ebp,%ecx
+ add %r11d,%r12d
+ rol $1,%eax
+ or %ecx,%ebx
+ rol $30,%esi
+ mov %eax,28(%rsp)
+ add %ebx,%r12d
+ lea -0x70e44324(%eax,%ebp),%r11d
+ mov 32(%rsp),%eax
+ mov %edx,%ebx
+ mov %edx,%ecx
+ xor 40(%rsp),%eax
+ mov %r12d,%ebp
+ and %esi,%ebx
+ xor 0(%rsp),%eax
+ or %esi,%ecx
+ rol $5,%ebp
+ xor 20(%rsp),%eax
+ and %edi,%ecx
+ add %ebp,%r11d
+ rol $1,%eax
+ or %ecx,%ebx
+ rol $30,%edx
+ mov %eax,32(%rsp)
+ add %ebx,%r11d
+ lea -0x70e44324(%eax,%edi),%ebp
+ mov 36(%rsp),%eax
+ mov %r12d,%ebx
+ mov %r12d,%ecx
+ xor 44(%rsp),%eax
+ mov %r11d,%edi
+ and %edx,%ebx
+ xor 4(%rsp),%eax
+ or %edx,%ecx
+ rol $5,%edi
+ xor 24(%rsp),%eax
+ and %esi,%ecx
+ add %edi,%ebp
+ rol $1,%eax
+ or %ecx,%ebx
+ rol $30,%r12d
+ mov %eax,36(%rsp)
+ add %ebx,%ebp
+ lea -0x70e44324(%eax,%esi),%edi
+ mov 40(%rsp),%eax
+ mov %r11d,%ebx
+ mov %r11d,%ecx
+ xor 48(%rsp),%eax
+ mov %ebp,%esi
+ and %r12d,%ebx
+ xor 8(%rsp),%eax
+ or %r12d,%ecx
+ rol $5,%esi
+ xor 28(%rsp),%eax
+ and %edx,%ecx
+ add %esi,%edi
+ rol $1,%eax
+ or %ecx,%ebx
+ rol $30,%r11d
+ mov %eax,40(%rsp)
+ add %ebx,%edi
+ lea -0x70e44324(%eax,%edx),%esi
+ mov 44(%rsp),%eax
+ mov %ebp,%ebx
+ mov %ebp,%ecx
+ xor 52(%rsp),%eax
+ mov %edi,%edx
+ and %r11d,%ebx
+ xor 12(%rsp),%eax
+ or %r11d,%ecx
+ rol $5,%edx
+ xor 32(%rsp),%eax
+ and %r12d,%ecx
+ add %edx,%esi
+ rol $1,%eax
+ or %ecx,%ebx
+ rol $30,%ebp
+ mov %eax,44(%rsp)
+ add %ebx,%esi
+ lea -0x70e44324(%eax,%r12d),%edx
+ mov 48(%rsp),%eax
+ mov %edi,%ebx
+ mov %edi,%ecx
+ xor 56(%rsp),%eax
+ mov %esi,%r12d
+ and %ebp,%ebx
+ xor 16(%rsp),%eax
+ or %ebp,%ecx
+ rol $5,%r12d
+ xor 36(%rsp),%eax
+ and %r11d,%ecx
+ add %r12d,%edx
+ rol $1,%eax
+ or %ecx,%ebx
+ rol $30,%edi
+ mov %eax,48(%rsp)
+ add %ebx,%edx
+ lea -0x359d3e2a(%eax,%r11d),%r12d
+ mov 52(%rsp),%eax
+ mov %edi,%ebx
+ mov %edx,%r11d
+ xor 60(%rsp),%eax
+ xor %esi,%ebx
+ rol $5,%r11d
+ xor 20(%rsp),%eax
+ xor %ebp,%ebx
+ add %r11d,%r12d
+ xor 40(%rsp),%eax
+ rol $30,%esi
+ add %ebx,%r12d
+ rol $1,%eax
+ mov %eax,52(%rsp)
+ lea -0x359d3e2a(%eax,%ebp),%r11d
+ mov 56(%rsp),%eax
+ mov %esi,%ebx
+ mov %r12d,%ebp
+ xor 0(%rsp),%eax
+ xor %edx,%ebx
+ rol $5,%ebp
+ xor 24(%rsp),%eax
+ xor %edi,%ebx
+ add %ebp,%r11d
+ xor 44(%rsp),%eax
+ rol $30,%edx
+ add %ebx,%r11d
+ rol $1,%eax
+ mov %eax,56(%rsp)
+ lea -0x359d3e2a(%eax,%edi),%ebp
+ mov 60(%rsp),%eax
+ mov %edx,%ebx
+ mov %r11d,%edi
+ xor 4(%rsp),%eax
+ xor %r12d,%ebx
+ rol $5,%edi
+ xor 28(%rsp),%eax
+ xor %esi,%ebx
+ add %edi,%ebp
+ xor 48(%rsp),%eax
+ rol $30,%r12d
+ add %ebx,%ebp
+ rol $1,%eax
+ mov %eax,60(%rsp)
+ lea -0x359d3e2a(%eax,%esi),%edi
+ mov 0(%rsp),%eax
+ mov %r12d,%ebx
+ mov %ebp,%esi
+ xor 8(%rsp),%eax
+ xor %r11d,%ebx
+ rol $5,%esi
+ xor 32(%rsp),%eax
+ xor %edx,%ebx
+ add %esi,%edi
+ xor 52(%rsp),%eax
+ rol $30,%r11d
+ add %ebx,%edi
+ rol $1,%eax
+ mov %eax,0(%rsp)
+ lea -0x359d3e2a(%eax,%edx),%esi
+ mov 4(%rsp),%eax
+ mov %r11d,%ebx
+ mov %edi,%edx
+ xor 12(%rsp),%eax
+ xor %ebp,%ebx
+ rol $5,%edx
+ xor 36(%rsp),%eax
+ xor %r12d,%ebx
+ add %edx,%esi
+ xor 56(%rsp),%eax
+ rol $30,%ebp
+ add %ebx,%esi
+ rol $1,%eax
+ mov %eax,4(%rsp)
+ lea -0x359d3e2a(%eax,%r12d),%edx
+ mov 8(%rsp),%eax
+ mov %ebp,%ebx
+ mov %esi,%r12d
+ xor 16(%rsp),%eax
+ xor %edi,%ebx
+ rol $5,%r12d
+ xor 40(%rsp),%eax
+ xor %r11d,%ebx
+ add %r12d,%edx
+ xor 60(%rsp),%eax
+ rol $30,%edi
+ add %ebx,%edx
+ rol $1,%eax
+ mov %eax,8(%rsp)
+ lea -0x359d3e2a(%eax,%r11d),%r12d
+ mov 12(%rsp),%eax
+ mov %edi,%ebx
+ mov %edx,%r11d
+ xor 20(%rsp),%eax
+ xor %esi,%ebx
+ rol $5,%r11d
+ xor 44(%rsp),%eax
+ xor %ebp,%ebx
+ add %r11d,%r12d
+ xor 0(%rsp),%eax
+ rol $30,%esi
+ add %ebx,%r12d
+ rol $1,%eax
+ mov %eax,12(%rsp)
+ lea -0x359d3e2a(%eax,%ebp),%r11d
+ mov 16(%rsp),%eax
+ mov %esi,%ebx
+ mov %r12d,%ebp
+ xor 24(%rsp),%eax
+ xor %edx,%ebx
+ rol $5,%ebp
+ xor 48(%rsp),%eax
+ xor %edi,%ebx
+ add %ebp,%r11d
+ xor 4(%rsp),%eax
+ rol $30,%edx
+ add %ebx,%r11d
+ rol $1,%eax
+ mov %eax,16(%rsp)
+ lea -0x359d3e2a(%eax,%edi),%ebp
+ mov 20(%rsp),%eax
+ mov %edx,%ebx
+ mov %r11d,%edi
+ xor 28(%rsp),%eax
+ xor %r12d,%ebx
+ rol $5,%edi
+ xor 52(%rsp),%eax
+ xor %esi,%ebx
+ add %edi,%ebp
+ xor 8(%rsp),%eax
+ rol $30,%r12d
+ add %ebx,%ebp
+ rol $1,%eax
+ mov %eax,20(%rsp)
+ lea -0x359d3e2a(%eax,%esi),%edi
+ mov 24(%rsp),%eax
+ mov %r12d,%ebx
+ mov %ebp,%esi
+ xor 32(%rsp),%eax
+ xor %r11d,%ebx
+ rol $5,%esi
+ xor 56(%rsp),%eax
+ xor %edx,%ebx
+ add %esi,%edi
+ xor 12(%rsp),%eax
+ rol $30,%r11d
+ add %ebx,%edi
+ rol $1,%eax
+ mov %eax,24(%rsp)
+ lea -0x359d3e2a(%eax,%edx),%esi
+ mov 28(%rsp),%eax
+ mov %r11d,%ebx
+ mov %edi,%edx
+ xor 36(%rsp),%eax
+ xor %ebp,%ebx
+ rol $5,%edx
+ xor 60(%rsp),%eax
+ xor %r12d,%ebx
+ add %edx,%esi
+ xor 16(%rsp),%eax
+ rol $30,%ebp
+ add %ebx,%esi
+ rol $1,%eax
+ mov %eax,28(%rsp)
+ lea -0x359d3e2a(%eax,%r12d),%edx
+ mov 32(%rsp),%eax
+ mov %ebp,%ebx
+ mov %esi,%r12d
+ xor 40(%rsp),%eax
+ xor %edi,%ebx
+ rol $5,%r12d
+ xor 0(%rsp),%eax
+ xor %r11d,%ebx
+ add %r12d,%edx
+ xor 20(%rsp),%eax
+ rol $30,%edi
+ add %ebx,%edx
+ rol $1,%eax
+ mov %eax,32(%rsp)
+ lea -0x359d3e2a(%eax,%r11d),%r12d
+ mov 36(%rsp),%eax
+ mov %edi,%ebx
+ mov %edx,%r11d
+ xor 44(%rsp),%eax
+ xor %esi,%ebx
+ rol $5,%r11d
+ xor 4(%rsp),%eax
+ xor %ebp,%ebx
+ add %r11d,%r12d
+ xor 24(%rsp),%eax
+ rol $30,%esi
+ add %ebx,%r12d
+ rol $1,%eax
+ mov %eax,36(%rsp)
+ lea -0x359d3e2a(%eax,%ebp),%r11d
+ mov 40(%rsp),%eax
+ mov %esi,%ebx
+ mov %r12d,%ebp
+ xor 48(%rsp),%eax
+ xor %edx,%ebx
+ rol $5,%ebp
+ xor 8(%rsp),%eax
+ xor %edi,%ebx
+ add %ebp,%r11d
+ xor 28(%rsp),%eax
+ rol $30,%edx
+ add %ebx,%r11d
+ rol $1,%eax
+ mov %eax,40(%rsp)
+ lea -0x359d3e2a(%eax,%edi),%ebp
+ mov 44(%rsp),%eax
+ mov %edx,%ebx
+ mov %r11d,%edi
+ xor 52(%rsp),%eax
+ xor %r12d,%ebx
+ rol $5,%edi
+ xor 12(%rsp),%eax
+ xor %esi,%ebx
+ add %edi,%ebp
+ xor 32(%rsp),%eax
+ rol $30,%r12d
+ add %ebx,%ebp
+ rol $1,%eax
+ mov %eax,44(%rsp)
+ lea -0x359d3e2a(%eax,%esi),%edi
+ mov 48(%rsp),%eax
+ mov %r12d,%ebx
+ mov %ebp,%esi
+ xor 56(%rsp),%eax
+ xor %r11d,%ebx
+ rol $5,%esi
+ xor 16(%rsp),%eax
+ xor %edx,%ebx
+ add %esi,%edi
+ xor 36(%rsp),%eax
+ rol $30,%r11d
+ add %ebx,%edi
+ rol $1,%eax
+ mov %eax,48(%rsp)
+ lea -0x359d3e2a(%eax,%edx),%esi
+ mov 52(%rsp),%eax
+ mov %r11d,%ebx
+ mov %edi,%edx
+ xor 60(%rsp),%eax
+ xor %ebp,%ebx
+ rol $5,%edx
+ xor 20(%rsp),%eax
+ xor %r12d,%ebx
+ add %edx,%esi
+ xor 40(%rsp),%eax
+ rol $30,%ebp
+ add %ebx,%esi
+ rol $1,%eax
+ lea -0x359d3e2a(%eax,%r12d),%edx
+ mov 56(%rsp),%eax
+ mov %ebp,%ebx
+ mov %esi,%r12d
+ xor 0(%rsp),%eax
+ xor %edi,%ebx
+ rol $5,%r12d
+ xor 24(%rsp),%eax
+ xor %r11d,%ebx
+ add %r12d,%edx
+ xor 44(%rsp),%eax
+ rol $30,%edi
+ add %ebx,%edx
+ rol $1,%eax
+ lea -0x359d3e2a(%eax,%r11d),%r12d
+ mov 60(%rsp),%eax
+ mov %edi,%ebx
+ mov %edx,%r11d
+ xor 4(%rsp),%eax
+ xor %esi,%ebx
+ rol $5,%r11d
+ xor 28(%rsp),%eax
+ xor %ebp,%ebx
+ add %r11d,%r12d
+ xor 48(%rsp),%eax
+ rol $30,%esi
+ add %ebx,%r12d
+ rol $1,%eax
+ lea -0x359d3e2a(%eax,%ebp),%r11d
+ mov %esi,%ebx
+ mov %r12d,%ebp
+ xor %edx,%ebx
+ rol $5,%ebp
+ xor %edi,%ebx
+ add %ebp,%r11d
+ rol $30,%edx
+ add %ebx,%r11d
+ // Update and save state information in SHA-1 context
+ add 0(%r8),%r11d
+ add 4(%r8),%r12d
+ add 8(%r8),%edx
+ add 12(%r8),%esi
+ add 16(%r8),%edi
+ mov %r11d,0(%r8)
+ mov %r12d,4(%r8)
+ mov %edx,8(%r8)
+ mov %esi,12(%r8)
+ mov %edi,16(%r8)
+
+ xchg %r11d,%edx # mov %r11d,%edx
+ xchg %r12d,%esi # mov %r12d,%esi
+ xchg %r11d,%edi # mov %edx,%edi
+ xchg %r12d,%ebp # mov %esi,%ebp
+ # mov %edi,%r11d
+ lea 64(%r9),%r9
+ sub $1,%r10
+ jnz .Lloop
+ mov 64(%rsp),%rsp
+ pop %r12
+ pop %rbp
+ pop %rbx
+ ret
+SET_SIZE(sha1_block_data_order)
+
+.data
+.asciz "SHA1 block transform for x86_64, CRYPTOGAMS by <appro at openssl.org>"
+
+#endif /* lint || __lint */
+
+#ifdef __ELF__
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/zfs/module/icp/asm-x86_64/sha2/sha256_impl.S b/zfs/module/icp/asm-x86_64/sha2/sha256_impl.S
new file mode 100644
index 000000000000..766b75355f0b
--- /dev/null
+++ b/zfs/module/icp/asm-x86_64/sha2/sha256_impl.S
@@ -0,0 +1,2063 @@
+/*
+ * ====================================================================
+ * Written by Andy Polyakov <appro at fy.chalmers.se> for the OpenSSL
+ * project. Rights for redistribution and usage in source and binary
+ * forms are granted according to the OpenSSL license.
+ * ====================================================================
+ *
+ * sha256/512_block procedure for x86_64.
+ *
+ * 40% improvement over compiler-generated code on Opteron. On EM64T
+ * sha256 was observed to run >80% faster and sha512 - >40%. No magical
+ * tricks, just straight implementation... I really wonder why gcc
+ * [being armed with inline assembler] fails to generate as fast code.
+ * The only thing which is cool about this module is that it's very
+ * same instruction sequence used for both SHA-256 and SHA-512. In
+ * former case the instructions operate on 32-bit operands, while in
+ * latter - on 64-bit ones. All I had to do is to get one flavor right,
+ * the other one passed the test right away:-)
+ *
+ * sha256_block runs in ~1005 cycles on Opteron, which gives you
+ * asymptotic performance of 64*1000/1005=63.7MBps times CPU clock
+ * frequency in GHz. sha512_block runs in ~1275 cycles, which results
+ * in 128*1000/1275=100MBps per GHz. Is there room for improvement?
+ * Well, if you compare it to IA-64 implementation, which maintains
+ * X[16] in register bank[!], tends to 4 instructions per CPU clock
+ * cycle and runs in 1003 cycles, 1275 is very good result for 3-way
+ * issue Opteron pipeline and X[16] maintained in memory. So that *if*
+ * there is a way to improve it, *then* the only way would be to try to
+ * offload X[16] updates to SSE unit, but that would require "deeper"
+ * loop unroll, which in turn would naturally cause size blow-up, not
+ * to mention increased complexity! And once again, only *if* it's
+ * actually possible to noticeably improve overall ILP, instruction
+ * level parallelism, on a given CPU implementation in this case.
+ *
+ * Special note on Intel EM64T. While Opteron CPU exhibits perfect
+ * performance ratio of 1.5 between 64- and 32-bit flavors [see above],
+ * [currently available] EM64T CPUs apparently are far from it. On the
+ * contrary, 64-bit version, sha512_block, is ~30% *slower* than 32-bit
+ * sha256_block:-( This is presumably because 64-bit shifts/rotates
+ * apparently are not atomic instructions, but implemented in microcode.
+ */
+
+/*
+ * OpenSolaris OS modifications
+ *
+ * Sun elects to use this software under the BSD license.
+ *
+ * This source originates from OpenSSL file sha512-x86_64.pl at
+ * ftp://ftp.openssl.org/snapshot/openssl-0.9.8-stable-SNAP-20080131.tar.gz
+ * (presumably for future OpenSSL release 0.9.8h), with these changes:
+ *
+ * 1. Added perl "use strict" and declared variables.
+ *
+ * 2. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
+ * /usr/include/sys/asm_linkage.h, .ident keywords, and lint(1B) guards.
+ *
+ * 3. Removed x86_64-xlate.pl script (not needed for as(1) or gas(1)
+ * assemblers). Replaced the .picmeup macro with assembler code.
+ *
+ * 4. Added 8 to $ctx, as OpenSolaris OS has an extra 4-byte field, "algotype",
+ * at the beginning of SHA2_CTX (the next field is 8-byte aligned).
+ */
+
+/*
+ * This file was generated by a perl script (sha512-x86_64.pl) that were
+ * used to generate sha256 and sha512 variants from the same code base.
+ * The comments from the original file have been pasted above.
+ */
+
+#if defined(lint) || defined(__lint)
+#include <sys/stdint.h>
+#include <sha2/sha2.h>
+
+/* ARGSUSED */
+void
+SHA256TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num)
+{
+}
+
+
+#else
+#define _ASM
+#include <sys/asm_linkage.h>
+
+ENTRY_NP(SHA256TransformBlocks)
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ mov %rsp,%rbp # copy %rsp
+ shl $4,%rdx # num*16
+ sub $16*4+4*8,%rsp
+ lea (%rsi,%rdx,4),%rdx # inp+num*16*4
+ and $-64,%rsp # align stack frame
+ add $8,%rdi # Skip OpenSolaris field, "algotype"
+ mov %rdi,16*4+0*8(%rsp) # save ctx, 1st arg
+ mov %rsi,16*4+1*8(%rsp) # save inp, 2nd arg
+ mov %rdx,16*4+2*8(%rsp) # save end pointer, "3rd" arg
+ mov %rbp,16*4+3*8(%rsp) # save copy of %rsp
+
+ #.picmeup %rbp
+ # The .picmeup pseudo-directive, from perlasm/x86_64_xlate.pl, puts
+ # the address of the "next" instruction into the target register
+ # (%rbp). This generates these 2 instructions:
+ lea .Llea(%rip),%rbp
+ #nop # .picmeup generates a nop for mod 8 alignment--not needed here
+
+.Llea:
+ lea K256-.(%rbp),%rbp
+
+ mov 4*0(%rdi),%eax
+ mov 4*1(%rdi),%ebx
+ mov 4*2(%rdi),%ecx
+ mov 4*3(%rdi),%edx
+ mov 4*4(%rdi),%r8d
+ mov 4*5(%rdi),%r9d
+ mov 4*6(%rdi),%r10d
+ mov 4*7(%rdi),%r11d
+ jmp .Lloop
+
+.align 16
+.Lloop:
+ xor %rdi,%rdi
+ mov 4*0(%rsi),%r12d
+ bswap %r12d
+ mov %r8d,%r13d
+ mov %r8d,%r14d
+ mov %r9d,%r15d
+
+ ror $6,%r13d
+ ror $11,%r14d
+ xor %r10d,%r15d # f^g
+
+ xor %r14d,%r13d
+ ror $14,%r14d
+ and %r8d,%r15d # (f^g)&e
+ mov %r12d,0(%rsp)
+
+ xor %r14d,%r13d # Sigma1(e)
+ xor %r10d,%r15d # Ch(e,f,g)=((f^g)&e)^g
+ add %r11d,%r12d # T1+=h
+
+ mov %eax,%r11d
+ add %r13d,%r12d # T1+=Sigma1(e)
+
+ add %r15d,%r12d # T1+=Ch(e,f,g)
+ mov %eax,%r13d
+ mov %eax,%r14d
+
+ ror $2,%r11d
+ ror $13,%r13d
+ mov %eax,%r15d
+ add (%rbp,%rdi,4),%r12d # T1+=K[round]
+
+ xor %r13d,%r11d
+ ror $9,%r13d
+ or %ecx,%r14d # a|c
+
+ xor %r13d,%r11d # h=Sigma0(a)
+ and %ecx,%r15d # a&c
+ add %r12d,%edx # d+=T1
+
+ and %ebx,%r14d # (a|c)&b
+ add %r12d,%r11d # h+=T1
+
+ or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14d,%r11d # h+=Maj(a,b,c)
+ mov 4*1(%rsi),%r12d
+ bswap %r12d
+ mov %edx,%r13d
+ mov %edx,%r14d
+ mov %r8d,%r15d
+
+ ror $6,%r13d
+ ror $11,%r14d
+ xor %r9d,%r15d # f^g
+
+ xor %r14d,%r13d
+ ror $14,%r14d
+ and %edx,%r15d # (f^g)&e
+ mov %r12d,4(%rsp)
+
+ xor %r14d,%r13d # Sigma1(e)
+ xor %r9d,%r15d # Ch(e,f,g)=((f^g)&e)^g
+ add %r10d,%r12d # T1+=h
+
+ mov %r11d,%r10d
+ add %r13d,%r12d # T1+=Sigma1(e)
+
+ add %r15d,%r12d # T1+=Ch(e,f,g)
+ mov %r11d,%r13d
+ mov %r11d,%r14d
+
+ ror $2,%r10d
+ ror $13,%r13d
+ mov %r11d,%r15d
+ add (%rbp,%rdi,4),%r12d # T1+=K[round]
+
+ xor %r13d,%r10d
+ ror $9,%r13d
+ or %ebx,%r14d # a|c
+
+ xor %r13d,%r10d # h=Sigma0(a)
+ and %ebx,%r15d # a&c
+ add %r12d,%ecx # d+=T1
+
+ and %eax,%r14d # (a|c)&b
+ add %r12d,%r10d # h+=T1
+
+ or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14d,%r10d # h+=Maj(a,b,c)
+ mov 4*2(%rsi),%r12d
+ bswap %r12d
+ mov %ecx,%r13d
+ mov %ecx,%r14d
+ mov %edx,%r15d
+
+ ror $6,%r13d
+ ror $11,%r14d
+ xor %r8d,%r15d # f^g
+
+ xor %r14d,%r13d
+ ror $14,%r14d
+ and %ecx,%r15d # (f^g)&e
+ mov %r12d,8(%rsp)
+
+ xor %r14d,%r13d # Sigma1(e)
+ xor %r8d,%r15d # Ch(e,f,g)=((f^g)&e)^g
+ add %r9d,%r12d # T1+=h
+
+ mov %r10d,%r9d
+ add %r13d,%r12d # T1+=Sigma1(e)
+
+ add %r15d,%r12d # T1+=Ch(e,f,g)
+ mov %r10d,%r13d
+ mov %r10d,%r14d
+
+ ror $2,%r9d
+ ror $13,%r13d
+ mov %r10d,%r15d
+ add (%rbp,%rdi,4),%r12d # T1+=K[round]
+
+ xor %r13d,%r9d
+ ror $9,%r13d
+ or %eax,%r14d # a|c
+
+ xor %r13d,%r9d # h=Sigma0(a)
+ and %eax,%r15d # a&c
+ add %r12d,%ebx # d+=T1
+
+ and %r11d,%r14d # (a|c)&b
+ add %r12d,%r9d # h+=T1
+
+ or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14d,%r9d # h+=Maj(a,b,c)
+ mov 4*3(%rsi),%r12d
+ bswap %r12d
+ mov %ebx,%r13d
+ mov %ebx,%r14d
+ mov %ecx,%r15d
+
+ ror $6,%r13d
+ ror $11,%r14d
+ xor %edx,%r15d # f^g
+
+ xor %r14d,%r13d
+ ror $14,%r14d
+ and %ebx,%r15d # (f^g)&e
+ mov %r12d,12(%rsp)
+
+ xor %r14d,%r13d # Sigma1(e)
+ xor %edx,%r15d # Ch(e,f,g)=((f^g)&e)^g
+ add %r8d,%r12d # T1+=h
+
+ mov %r9d,%r8d
+ add %r13d,%r12d # T1+=Sigma1(e)
+
+ add %r15d,%r12d # T1+=Ch(e,f,g)
+ mov %r9d,%r13d
+ mov %r9d,%r14d
+
+ ror $2,%r8d
+ ror $13,%r13d
+ mov %r9d,%r15d
+ add (%rbp,%rdi,4),%r12d # T1+=K[round]
+
+ xor %r13d,%r8d
+ ror $9,%r13d
+ or %r11d,%r14d # a|c
+
+ xor %r13d,%r8d # h=Sigma0(a)
+ and %r11d,%r15d # a&c
+ add %r12d,%eax # d+=T1
+
+ and %r10d,%r14d # (a|c)&b
+ add %r12d,%r8d # h+=T1
+
+ or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14d,%r8d # h+=Maj(a,b,c)
+ mov 4*4(%rsi),%r12d
+ bswap %r12d
+ mov %eax,%r13d
+ mov %eax,%r14d
+ mov %ebx,%r15d
+
+ ror $6,%r13d
+ ror $11,%r14d
+ xor %ecx,%r15d # f^g
+
+ xor %r14d,%r13d
+ ror $14,%r14d
+ and %eax,%r15d # (f^g)&e
+ mov %r12d,16(%rsp)
+
+ xor %r14d,%r13d # Sigma1(e)
+ xor %ecx,%r15d # Ch(e,f,g)=((f^g)&e)^g
+ add %edx,%r12d # T1+=h
+
+ mov %r8d,%edx
+ add %r13d,%r12d # T1+=Sigma1(e)
+
+ add %r15d,%r12d # T1+=Ch(e,f,g)
+ mov %r8d,%r13d
+ mov %r8d,%r14d
+
+ ror $2,%edx
+ ror $13,%r13d
+ mov %r8d,%r15d
+ add (%rbp,%rdi,4),%r12d # T1+=K[round]
+
+ xor %r13d,%edx
+ ror $9,%r13d
+ or %r10d,%r14d # a|c
+
+ xor %r13d,%edx # h=Sigma0(a)
+ and %r10d,%r15d # a&c
+ add %r12d,%r11d # d+=T1
+
+ and %r9d,%r14d # (a|c)&b
+ add %r12d,%edx # h+=T1
+
+ or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14d,%edx # h+=Maj(a,b,c)
+ mov 4*5(%rsi),%r12d
+ bswap %r12d
+ mov %r11d,%r13d
+ mov %r11d,%r14d
+ mov %eax,%r15d
+
+ ror $6,%r13d
+ ror $11,%r14d
+ xor %ebx,%r15d # f^g
+
+ xor %r14d,%r13d
+ ror $14,%r14d
+ and %r11d,%r15d # (f^g)&e
+ mov %r12d,20(%rsp)
+
+ xor %r14d,%r13d # Sigma1(e)
+ xor %ebx,%r15d # Ch(e,f,g)=((f^g)&e)^g
+ add %ecx,%r12d # T1+=h
+
+ mov %edx,%ecx
+ add %r13d,%r12d # T1+=Sigma1(e)
+
+ add %r15d,%r12d # T1+=Ch(e,f,g)
+ mov %edx,%r13d
+ mov %edx,%r14d
+
+ ror $2,%ecx
+ ror $13,%r13d
+ mov %edx,%r15d
+ add (%rbp,%rdi,4),%r12d # T1+=K[round]
+
+ xor %r13d,%ecx
+ ror $9,%r13d
+ or %r9d,%r14d # a|c
+
+ xor %r13d,%ecx # h=Sigma0(a)
+ and %r9d,%r15d # a&c
+ add %r12d,%r10d # d+=T1
+
+ and %r8d,%r14d # (a|c)&b
+ add %r12d,%ecx # h+=T1
+
+ or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14d,%ecx # h+=Maj(a,b,c)
+ mov 4*6(%rsi),%r12d
+ bswap %r12d
+ mov %r10d,%r13d
+ mov %r10d,%r14d
+ mov %r11d,%r15d
+
+ ror $6,%r13d
+ ror $11,%r14d
+ xor %eax,%r15d # f^g
+
+ xor %r14d,%r13d
+ ror $14,%r14d
+ and %r10d,%r15d # (f^g)&e
+ mov %r12d,24(%rsp)
+
+ xor %r14d,%r13d # Sigma1(e)
+ xor %eax,%r15d # Ch(e,f,g)=((f^g)&e)^g
+ add %ebx,%r12d # T1+=h
+
+ mov %ecx,%ebx
+ add %r13d,%r12d # T1+=Sigma1(e)
+
+ add %r15d,%r12d # T1+=Ch(e,f,g)
+ mov %ecx,%r13d
+ mov %ecx,%r14d
+
+ ror $2,%ebx
+ ror $13,%r13d
+ mov %ecx,%r15d
+ add (%rbp,%rdi,4),%r12d # T1+=K[round]
+
+ xor %r13d,%ebx
+ ror $9,%r13d
+ or %r8d,%r14d # a|c
+
+ xor %r13d,%ebx # h=Sigma0(a)
+ and %r8d,%r15d # a&c
+ add %r12d,%r9d # d+=T1
+
+ and %edx,%r14d # (a|c)&b
+ add %r12d,%ebx # h+=T1
+
+ or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14d,%ebx # h+=Maj(a,b,c)
+ mov 4*7(%rsi),%r12d
+ bswap %r12d
+ mov %r9d,%r13d
+ mov %r9d,%r14d
+ mov %r10d,%r15d
+
+ ror $6,%r13d
+ ror $11,%r14d
+ xor %r11d,%r15d # f^g
+
+ xor %r14d,%r13d
+ ror $14,%r14d
+ and %r9d,%r15d # (f^g)&e
+ mov %r12d,28(%rsp)
+
+ xor %r14d,%r13d # Sigma1(e)
+ xor %r11d,%r15d # Ch(e,f,g)=((f^g)&e)^g
+ add %eax,%r12d # T1+=h
+
+ mov %ebx,%eax
+ add %r13d,%r12d # T1+=Sigma1(e)
+
+ add %r15d,%r12d # T1+=Ch(e,f,g)
+ mov %ebx,%r13d
+ mov %ebx,%r14d
+
+ ror $2,%eax
+ ror $13,%r13d
+ mov %ebx,%r15d
+ add (%rbp,%rdi,4),%r12d # T1+=K[round]
+
+ xor %r13d,%eax
+ ror $9,%r13d
+ or %edx,%r14d # a|c
+
+ xor %r13d,%eax # h=Sigma0(a)
+ and %edx,%r15d # a&c
+ add %r12d,%r8d # d+=T1
+
+ and %ecx,%r14d # (a|c)&b
+ add %r12d,%eax # h+=T1
+
+ or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14d,%eax # h+=Maj(a,b,c)
+ mov 4*8(%rsi),%r12d
+ bswap %r12d
+ mov %r8d,%r13d
+ mov %r8d,%r14d
+ mov %r9d,%r15d
+
+ ror $6,%r13d
+ ror $11,%r14d
+ xor %r10d,%r15d # f^g
+
+ xor %r14d,%r13d
+ ror $14,%r14d
+ and %r8d,%r15d # (f^g)&e
+ mov %r12d,32(%rsp)
+
+ xor %r14d,%r13d # Sigma1(e)
+ xor %r10d,%r15d # Ch(e,f,g)=((f^g)&e)^g
+ add %r11d,%r12d # T1+=h
+
+ mov %eax,%r11d
+ add %r13d,%r12d # T1+=Sigma1(e)
+
+ add %r15d,%r12d # T1+=Ch(e,f,g)
+ mov %eax,%r13d
+ mov %eax,%r14d
+
+ ror $2,%r11d
+ ror $13,%r13d
+ mov %eax,%r15d
+ add (%rbp,%rdi,4),%r12d # T1+=K[round]
+
+ xor %r13d,%r11d
+ ror $9,%r13d
+ or %ecx,%r14d # a|c
+
+ xor %r13d,%r11d # h=Sigma0(a)
+ and %ecx,%r15d # a&c
+ add %r12d,%edx # d+=T1
+
+ and %ebx,%r14d # (a|c)&b
+ add %r12d,%r11d # h+=T1
+
+ or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14d,%r11d # h+=Maj(a,b,c)
+ mov 4*9(%rsi),%r12d
+ bswap %r12d
+ mov %edx,%r13d
+ mov %edx,%r14d
+ mov %r8d,%r15d
+
+ ror $6,%r13d
+ ror $11,%r14d
+ xor %r9d,%r15d # f^g
+
+ xor %r14d,%r13d
+ ror $14,%r14d
+ and %edx,%r15d # (f^g)&e
+ mov %r12d,36(%rsp)
+
+ xor %r14d,%r13d # Sigma1(e)
+ xor %r9d,%r15d # Ch(e,f,g)=((f^g)&e)^g
+ add %r10d,%r12d # T1+=h
+
+ mov %r11d,%r10d
+ add %r13d,%r12d # T1+=Sigma1(e)
+
+ add %r15d,%r12d # T1+=Ch(e,f,g)
+ mov %r11d,%r13d
+ mov %r11d,%r14d
+
+ ror $2,%r10d
+ ror $13,%r13d
+ mov %r11d,%r15d
+ add (%rbp,%rdi,4),%r12d # T1+=K[round]
+
+ xor %r13d,%r10d
+ ror $9,%r13d
+ or %ebx,%r14d # a|c
+
+ xor %r13d,%r10d # h=Sigma0(a)
+ and %ebx,%r15d # a&c
+ add %r12d,%ecx # d+=T1
+
+ and %eax,%r14d # (a|c)&b
+ add %r12d,%r10d # h+=T1
+
+ or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14d,%r10d # h+=Maj(a,b,c)
+ mov 4*10(%rsi),%r12d
+ bswap %r12d
+ mov %ecx,%r13d
+ mov %ecx,%r14d
+ mov %edx,%r15d
+
+ ror $6,%r13d
+ ror $11,%r14d
+ xor %r8d,%r15d # f^g
+
+ xor %r14d,%r13d
+ ror $14,%r14d
+ and %ecx,%r15d # (f^g)&e
+ mov %r12d,40(%rsp)
+
+ xor %r14d,%r13d # Sigma1(e)
+ xor %r8d,%r15d # Ch(e,f,g)=((f^g)&e)^g
+ add %r9d,%r12d # T1+=h
+
+ mov %r10d,%r9d
+ add %r13d,%r12d # T1+=Sigma1(e)
+
+ add %r15d,%r12d # T1+=Ch(e,f,g)
+ mov %r10d,%r13d
+ mov %r10d,%r14d
+
+ ror $2,%r9d
+ ror $13,%r13d
+ mov %r10d,%r15d
+ add (%rbp,%rdi,4),%r12d # T1+=K[round]
+
+ xor %r13d,%r9d
+ ror $9,%r13d
+ or %eax,%r14d # a|c
+
+ xor %r13d,%r9d # h=Sigma0(a)
+ and %eax,%r15d # a&c
+ add %r12d,%ebx # d+=T1
+
+ and %r11d,%r14d # (a|c)&b
+ add %r12d,%r9d # h+=T1
+
+ or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14d,%r9d # h+=Maj(a,b,c)
+ mov 4*11(%rsi),%r12d
+ bswap %r12d
+ mov %ebx,%r13d
+ mov %ebx,%r14d
+ mov %ecx,%r15d
+
+ ror $6,%r13d
+ ror $11,%r14d
+ xor %edx,%r15d # f^g
+
+ xor %r14d,%r13d
+ ror $14,%r14d
+ and %ebx,%r15d # (f^g)&e
+ mov %r12d,44(%rsp)
+
+ xor %r14d,%r13d # Sigma1(e)
+ xor %edx,%r15d # Ch(e,f,g)=((f^g)&e)^g
+ add %r8d,%r12d # T1+=h
+
+ mov %r9d,%r8d
+ add %r13d,%r12d # T1+=Sigma1(e)
+
+ add %r15d,%r12d # T1+=Ch(e,f,g)
+ mov %r9d,%r13d
+ mov %r9d,%r14d
+
+ ror $2,%r8d
+ ror $13,%r13d
+ mov %r9d,%r15d
+ add (%rbp,%rdi,4),%r12d # T1+=K[round]
+
+ xor %r13d,%r8d
+ ror $9,%r13d
+ or %r11d,%r14d # a|c
+
+ xor %r13d,%r8d # h=Sigma0(a)
+ and %r11d,%r15d # a&c
+ add %r12d,%eax # d+=T1
+
+ and %r10d,%r14d # (a|c)&b
+ add %r12d,%r8d # h+=T1
+
+ or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14d,%r8d # h+=Maj(a,b,c)
+ mov 4*12(%rsi),%r12d
+ bswap %r12d
+ mov %eax,%r13d
+ mov %eax,%r14d
+ mov %ebx,%r15d
+
+ ror $6,%r13d
+ ror $11,%r14d
+ xor %ecx,%r15d # f^g
+
+ xor %r14d,%r13d
+ ror $14,%r14d
+ and %eax,%r15d # (f^g)&e
+ mov %r12d,48(%rsp)
+
+ xor %r14d,%r13d # Sigma1(e)
+ xor %ecx,%r15d # Ch(e,f,g)=((f^g)&e)^g
+ add %edx,%r12d # T1+=h
+
+ mov %r8d,%edx
+ add %r13d,%r12d # T1+=Sigma1(e)
+
+ add %r15d,%r12d # T1+=Ch(e,f,g)
+ mov %r8d,%r13d
+ mov %r8d,%r14d
+
+ ror $2,%edx
+ ror $13,%r13d
+ mov %r8d,%r15d
+ add (%rbp,%rdi,4),%r12d # T1+=K[round]
+
+ xor %r13d,%edx
+ ror $9,%r13d
+ or %r10d,%r14d # a|c
+
+ xor %r13d,%edx # h=Sigma0(a)
+ and %r10d,%r15d # a&c
+ add %r12d,%r11d # d+=T1
+
+ and %r9d,%r14d # (a|c)&b
+ add %r12d,%edx # h+=T1
+
+ or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14d,%edx # h+=Maj(a,b,c)
+ mov 4*13(%rsi),%r12d
+ bswap %r12d
+ mov %r11d,%r13d
+ mov %r11d,%r14d
+ mov %eax,%r15d
+
+ ror $6,%r13d
+ ror $11,%r14d
+ xor %ebx,%r15d # f^g
+
+ xor %r14d,%r13d
+ ror $14,%r14d
+ and %r11d,%r15d # (f^g)&e
+ mov %r12d,52(%rsp)
+
+ xor %r14d,%r13d # Sigma1(e)
+ xor %ebx,%r15d # Ch(e,f,g)=((f^g)&e)^g
+ add %ecx,%r12d # T1+=h
+
+ mov %edx,%ecx
+ add %r13d,%r12d # T1+=Sigma1(e)
+
+ add %r15d,%r12d # T1+=Ch(e,f,g)
+ mov %edx,%r13d
+ mov %edx,%r14d
+
+ ror $2,%ecx
+ ror $13,%r13d
+ mov %edx,%r15d
+ add (%rbp,%rdi,4),%r12d # T1+=K[round]
+
+ xor %r13d,%ecx
+ ror $9,%r13d
+ or %r9d,%r14d # a|c
+
+ xor %r13d,%ecx # h=Sigma0(a)
+ and %r9d,%r15d # a&c
+ add %r12d,%r10d # d+=T1
+
+ and %r8d,%r14d # (a|c)&b
+ add %r12d,%ecx # h+=T1
+
+ or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14d,%ecx # h+=Maj(a,b,c)
+ mov 4*14(%rsi),%r12d
+ bswap %r12d
+ mov %r10d,%r13d
+ mov %r10d,%r14d
+ mov %r11d,%r15d
+
+ ror $6,%r13d
+ ror $11,%r14d
+ xor %eax,%r15d # f^g
+
+ xor %r14d,%r13d
+ ror $14,%r14d
+ and %r10d,%r15d # (f^g)&e
+ mov %r12d,56(%rsp)
+
+ xor %r14d,%r13d # Sigma1(e)
+ xor %eax,%r15d # Ch(e,f,g)=((f^g)&e)^g
+ add %ebx,%r12d # T1+=h
+
+ mov %ecx,%ebx
+ add %r13d,%r12d # T1+=Sigma1(e)
+
+ add %r15d,%r12d # T1+=Ch(e,f,g)
+ mov %ecx,%r13d
+ mov %ecx,%r14d
+
+ ror $2,%ebx
+ ror $13,%r13d
+ mov %ecx,%r15d
+ add (%rbp,%rdi,4),%r12d # T1+=K[round]
+
+ xor %r13d,%ebx
+ ror $9,%r13d
+ or %r8d,%r14d # a|c
+
+ xor %r13d,%ebx # h=Sigma0(a)
+ and %r8d,%r15d # a&c
+ add %r12d,%r9d # d+=T1
+
+ and %edx,%r14d # (a|c)&b
+ add %r12d,%ebx # h+=T1
+
+ or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14d,%ebx # h+=Maj(a,b,c)
+ mov 4*15(%rsi),%r12d
+ bswap %r12d
+ mov %r9d,%r13d
+ mov %r9d,%r14d
+ mov %r10d,%r15d
+
+ ror $6,%r13d
+ ror $11,%r14d
+ xor %r11d,%r15d # f^g
+
+ xor %r14d,%r13d
+ ror $14,%r14d
+ and %r9d,%r15d # (f^g)&e
+ mov %r12d,60(%rsp)
+
+ xor %r14d,%r13d # Sigma1(e)
+ xor %r11d,%r15d # Ch(e,f,g)=((f^g)&e)^g
+ add %eax,%r12d # T1+=h
+
+ mov %ebx,%eax
+ add %r13d,%r12d # T1+=Sigma1(e)
+
+ add %r15d,%r12d # T1+=Ch(e,f,g)
+ mov %ebx,%r13d
+ mov %ebx,%r14d
+
+ ror $2,%eax
+ ror $13,%r13d
+ mov %ebx,%r15d
+ add (%rbp,%rdi,4),%r12d # T1+=K[round]
+
+ xor %r13d,%eax
+ ror $9,%r13d
+ or %edx,%r14d # a|c
+
+ xor %r13d,%eax # h=Sigma0(a)
+ and %edx,%r15d # a&c
+ add %r12d,%r8d # d+=T1
+
+ and %ecx,%r14d # (a|c)&b
+ add %r12d,%eax # h+=T1
+
+ or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14d,%eax # h+=Maj(a,b,c)
+ jmp .Lrounds_16_xx
+.align 16
+.Lrounds_16_xx:
+ mov 4(%rsp),%r13d
+ mov 56(%rsp),%r12d
+
+ mov %r13d,%r15d
+
+ shr $3,%r13d
+ ror $7,%r15d
+
+ xor %r15d,%r13d
+ ror $11,%r15d
+
+ xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
+ mov %r12d,%r14d
+
+ shr $10,%r12d
+ ror $17,%r14d
+
+ xor %r14d,%r12d
+ ror $2,%r14d
+
+ xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
+
+ add %r13d,%r12d
+
+ add 36(%rsp),%r12d
+
+ add 0(%rsp),%r12d
+ mov %r8d,%r13d
+ mov %r8d,%r14d
+ mov %r9d,%r15d
+
+ ror $6,%r13d
+ ror $11,%r14d
+ xor %r10d,%r15d # f^g
+
+ xor %r14d,%r13d
+ ror $14,%r14d
+ and %r8d,%r15d # (f^g)&e
+ mov %r12d,0(%rsp)
+
+ xor %r14d,%r13d # Sigma1(e)
+ xor %r10d,%r15d # Ch(e,f,g)=((f^g)&e)^g
+ add %r11d,%r12d # T1+=h
+
+ mov %eax,%r11d
+ add %r13d,%r12d # T1+=Sigma1(e)
+
+ add %r15d,%r12d # T1+=Ch(e,f,g)
+ mov %eax,%r13d
+ mov %eax,%r14d
+
+ ror $2,%r11d
+ ror $13,%r13d
+ mov %eax,%r15d
+ add (%rbp,%rdi,4),%r12d # T1+=K[round]
+
+ xor %r13d,%r11d
+ ror $9,%r13d
+ or %ecx,%r14d # a|c
+
+ xor %r13d,%r11d # h=Sigma0(a)
+ and %ecx,%r15d # a&c
+ add %r12d,%edx # d+=T1
+
+ and %ebx,%r14d # (a|c)&b
+ add %r12d,%r11d # h+=T1
+
+ or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14d,%r11d # h+=Maj(a,b,c)
+ mov 8(%rsp),%r13d
+ mov 60(%rsp),%r12d
+
+ mov %r13d,%r15d
+
+ shr $3,%r13d
+ ror $7,%r15d
+
+ xor %r15d,%r13d
+ ror $11,%r15d
+
+ xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
+ mov %r12d,%r14d
+
+ shr $10,%r12d
+ ror $17,%r14d
+
+ xor %r14d,%r12d
+ ror $2,%r14d
+
+ xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
+
+ add %r13d,%r12d
+
+ add 40(%rsp),%r12d
+
+ add 4(%rsp),%r12d
+ mov %edx,%r13d
+ mov %edx,%r14d
+ mov %r8d,%r15d
+
+ ror $6,%r13d
+ ror $11,%r14d
+ xor %r9d,%r15d # f^g
+
+ xor %r14d,%r13d
+ ror $14,%r14d
+ and %edx,%r15d # (f^g)&e
+ mov %r12d,4(%rsp)
+
+ xor %r14d,%r13d # Sigma1(e)
+ xor %r9d,%r15d # Ch(e,f,g)=((f^g)&e)^g
+ add %r10d,%r12d # T1+=h
+
+ mov %r11d,%r10d
+ add %r13d,%r12d # T1+=Sigma1(e)
+
+ add %r15d,%r12d # T1+=Ch(e,f,g)
+ mov %r11d,%r13d
+ mov %r11d,%r14d
+
+ ror $2,%r10d
+ ror $13,%r13d
+ mov %r11d,%r15d
+ add (%rbp,%rdi,4),%r12d # T1+=K[round]
+
+ xor %r13d,%r10d
+ ror $9,%r13d
+ or %ebx,%r14d # a|c
+
+ xor %r13d,%r10d # h=Sigma0(a)
+ and %ebx,%r15d # a&c
+ add %r12d,%ecx # d+=T1
+
+ and %eax,%r14d # (a|c)&b
+ add %r12d,%r10d # h+=T1
+
+ or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14d,%r10d # h+=Maj(a,b,c)
+ mov 12(%rsp),%r13d
+ mov 0(%rsp),%r12d
+
+ mov %r13d,%r15d
+
+ shr $3,%r13d
+ ror $7,%r15d
+
+ xor %r15d,%r13d
+ ror $11,%r15d
+
+ xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
+ mov %r12d,%r14d
+
+ shr $10,%r12d
+ ror $17,%r14d
+
+ xor %r14d,%r12d
+ ror $2,%r14d
+
+ xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
+
+ add %r13d,%r12d
+
+ add 44(%rsp),%r12d
+
+ add 8(%rsp),%r12d
+ mov %ecx,%r13d
+ mov %ecx,%r14d
+ mov %edx,%r15d
+
+ ror $6,%r13d
+ ror $11,%r14d
+ xor %r8d,%r15d # f^g
+
+ xor %r14d,%r13d
+ ror $14,%r14d
+ and %ecx,%r15d # (f^g)&e
+ mov %r12d,8(%rsp)
+
+ xor %r14d,%r13d # Sigma1(e)
+ xor %r8d,%r15d # Ch(e,f,g)=((f^g)&e)^g
+ add %r9d,%r12d # T1+=h
+
+ mov %r10d,%r9d
+ add %r13d,%r12d # T1+=Sigma1(e)
+
+ add %r15d,%r12d # T1+=Ch(e,f,g)
+ mov %r10d,%r13d
+ mov %r10d,%r14d
+
+ ror $2,%r9d
+ ror $13,%r13d
+ mov %r10d,%r15d
+ add (%rbp,%rdi,4),%r12d # T1+=K[round]
+
+ xor %r13d,%r9d
+ ror $9,%r13d
+ or %eax,%r14d # a|c
+
+ xor %r13d,%r9d # h=Sigma0(a)
+ and %eax,%r15d # a&c
+ add %r12d,%ebx # d+=T1
+
+ and %r11d,%r14d # (a|c)&b
+ add %r12d,%r9d # h+=T1
+
+ or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14d,%r9d # h+=Maj(a,b,c)
+ mov 16(%rsp),%r13d
+ mov 4(%rsp),%r12d
+
+ mov %r13d,%r15d
+
+ shr $3,%r13d
+ ror $7,%r15d
+
+ xor %r15d,%r13d
+ ror $11,%r15d
+
+ xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
+ mov %r12d,%r14d
+
+ shr $10,%r12d
+ ror $17,%r14d
+
+ xor %r14d,%r12d
+ ror $2,%r14d
+
+ xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
+
+ add %r13d,%r12d
+
+ add 48(%rsp),%r12d
+
+ add 12(%rsp),%r12d
+ mov %ebx,%r13d
+ mov %ebx,%r14d
+ mov %ecx,%r15d
+
+ ror $6,%r13d
+ ror $11,%r14d
+ xor %edx,%r15d # f^g
+
+ xor %r14d,%r13d
+ ror $14,%r14d
+ and %ebx,%r15d # (f^g)&e
+ mov %r12d,12(%rsp)
+
+ xor %r14d,%r13d # Sigma1(e)
+ xor %edx,%r15d # Ch(e,f,g)=((f^g)&e)^g
+ add %r8d,%r12d # T1+=h
+
+ mov %r9d,%r8d
+ add %r13d,%r12d # T1+=Sigma1(e)
+
+ add %r15d,%r12d # T1+=Ch(e,f,g)
+ mov %r9d,%r13d
+ mov %r9d,%r14d
+
+ ror $2,%r8d
+ ror $13,%r13d
+ mov %r9d,%r15d
+ add (%rbp,%rdi,4),%r12d # T1+=K[round]
+
+ xor %r13d,%r8d
+ ror $9,%r13d
+ or %r11d,%r14d # a|c
+
+ xor %r13d,%r8d # h=Sigma0(a)
+ and %r11d,%r15d # a&c
+ add %r12d,%eax # d+=T1
+
+ and %r10d,%r14d # (a|c)&b
+ add %r12d,%r8d # h+=T1
+
+ or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14d,%r8d # h+=Maj(a,b,c)
+ mov 20(%rsp),%r13d
+ mov 8(%rsp),%r12d
+
+ mov %r13d,%r15d
+
+ shr $3,%r13d
+ ror $7,%r15d
+
+ xor %r15d,%r13d
+ ror $11,%r15d
+
+ xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
+ mov %r12d,%r14d
+
+ shr $10,%r12d
+ ror $17,%r14d
+
+ xor %r14d,%r12d
+ ror $2,%r14d
+
+ xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
+
+ add %r13d,%r12d
+
+ add 52(%rsp),%r12d
+
+ add 16(%rsp),%r12d
+ mov %eax,%r13d
+ mov %eax,%r14d
+ mov %ebx,%r15d
+
+ ror $6,%r13d
+ ror $11,%r14d
+ xor %ecx,%r15d # f^g
+
+ xor %r14d,%r13d
+ ror $14,%r14d
+ and %eax,%r15d # (f^g)&e
+ mov %r12d,16(%rsp)
+
+ xor %r14d,%r13d # Sigma1(e)
+ xor %ecx,%r15d # Ch(e,f,g)=((f^g)&e)^g
+ add %edx,%r12d # T1+=h
+
+ mov %r8d,%edx
+ add %r13d,%r12d # T1+=Sigma1(e)
+
+ add %r15d,%r12d # T1+=Ch(e,f,g)
+ mov %r8d,%r13d
+ mov %r8d,%r14d
+
+ ror $2,%edx
+ ror $13,%r13d
+ mov %r8d,%r15d
+ add (%rbp,%rdi,4),%r12d # T1+=K[round]
+
+ xor %r13d,%edx
+ ror $9,%r13d
+ or %r10d,%r14d # a|c
+
+ xor %r13d,%edx # h=Sigma0(a)
+ and %r10d,%r15d # a&c
+ add %r12d,%r11d # d+=T1
+
+ and %r9d,%r14d # (a|c)&b
+ add %r12d,%edx # h+=T1
+
+ or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14d,%edx # h+=Maj(a,b,c)
+ mov 24(%rsp),%r13d
+ mov 12(%rsp),%r12d
+
+ mov %r13d,%r15d
+
+ shr $3,%r13d
+ ror $7,%r15d
+
+ xor %r15d,%r13d
+ ror $11,%r15d
+
+ xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
+ mov %r12d,%r14d
+
+ shr $10,%r12d
+ ror $17,%r14d
+
+ xor %r14d,%r12d
+ ror $2,%r14d
+
+ xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
+
+ add %r13d,%r12d
+
+ add 56(%rsp),%r12d
+
+ add 20(%rsp),%r12d
+ mov %r11d,%r13d
+ mov %r11d,%r14d
+ mov %eax,%r15d
+
+ ror $6,%r13d
+ ror $11,%r14d
+ xor %ebx,%r15d # f^g
+
+ xor %r14d,%r13d
+ ror $14,%r14d
+ and %r11d,%r15d # (f^g)&e
+ mov %r12d,20(%rsp)
+
+ xor %r14d,%r13d # Sigma1(e)
+ xor %ebx,%r15d # Ch(e,f,g)=((f^g)&e)^g
+ add %ecx,%r12d # T1+=h
+
+ mov %edx,%ecx
+ add %r13d,%r12d # T1+=Sigma1(e)
+
+ add %r15d,%r12d # T1+=Ch(e,f,g)
+ mov %edx,%r13d
+ mov %edx,%r14d
+
+ ror $2,%ecx
+ ror $13,%r13d
+ mov %edx,%r15d
+ add (%rbp,%rdi,4),%r12d # T1+=K[round]
+
+ xor %r13d,%ecx
+ ror $9,%r13d
+ or %r9d,%r14d # a|c
+
+ xor %r13d,%ecx # h=Sigma0(a)
+ and %r9d,%r15d # a&c
+ add %r12d,%r10d # d+=T1
+
+ and %r8d,%r14d # (a|c)&b
+ add %r12d,%ecx # h+=T1
+
+ or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14d,%ecx # h+=Maj(a,b,c)
+ mov 28(%rsp),%r13d
+ mov 16(%rsp),%r12d
+
+ mov %r13d,%r15d
+
+ shr $3,%r13d
+ ror $7,%r15d
+
+ xor %r15d,%r13d
+ ror $11,%r15d
+
+ xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
+ mov %r12d,%r14d
+
+ shr $10,%r12d
+ ror $17,%r14d
+
+ xor %r14d,%r12d
+ ror $2,%r14d
+
+ xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
+
+ add %r13d,%r12d
+
+ add 60(%rsp),%r12d
+
+ add 24(%rsp),%r12d
+ mov %r10d,%r13d
+ mov %r10d,%r14d
+ mov %r11d,%r15d
+
+ ror $6,%r13d
+ ror $11,%r14d
+ xor %eax,%r15d # f^g
+
+ xor %r14d,%r13d
+ ror $14,%r14d
+ and %r10d,%r15d # (f^g)&e
+ mov %r12d,24(%rsp)
+
+ xor %r14d,%r13d # Sigma1(e)
+ xor %eax,%r15d # Ch(e,f,g)=((f^g)&e)^g
+ add %ebx,%r12d # T1+=h
+
+ mov %ecx,%ebx
+ add %r13d,%r12d # T1+=Sigma1(e)
+
+ add %r15d,%r12d # T1+=Ch(e,f,g)
+ mov %ecx,%r13d
+ mov %ecx,%r14d
+
+ ror $2,%ebx
+ ror $13,%r13d
+ mov %ecx,%r15d
+ add (%rbp,%rdi,4),%r12d # T1+=K[round]
+
+ xor %r13d,%ebx
+ ror $9,%r13d
+ or %r8d,%r14d # a|c
+
+ xor %r13d,%ebx # h=Sigma0(a)
+ and %r8d,%r15d # a&c
+ add %r12d,%r9d # d+=T1
+
+ and %edx,%r14d # (a|c)&b
+ add %r12d,%ebx # h+=T1
+
+ or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14d,%ebx # h+=Maj(a,b,c)
+ mov 32(%rsp),%r13d
+ mov 20(%rsp),%r12d
+
+ mov %r13d,%r15d
+
+ shr $3,%r13d
+ ror $7,%r15d
+
+ xor %r15d,%r13d
+ ror $11,%r15d
+
+ xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
+ mov %r12d,%r14d
+
+ shr $10,%r12d
+ ror $17,%r14d
+
+ xor %r14d,%r12d
+ ror $2,%r14d
+
+ xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
+
+ add %r13d,%r12d
+
+ add 0(%rsp),%r12d
+
+ add 28(%rsp),%r12d
+ mov %r9d,%r13d
+ mov %r9d,%r14d
+ mov %r10d,%r15d
+
+ ror $6,%r13d
+ ror $11,%r14d
+ xor %r11d,%r15d # f^g
+
+ xor %r14d,%r13d
+ ror $14,%r14d
+ and %r9d,%r15d # (f^g)&e
+ mov %r12d,28(%rsp)
+
+ xor %r14d,%r13d # Sigma1(e)
+ xor %r11d,%r15d # Ch(e,f,g)=((f^g)&e)^g
+ add %eax,%r12d # T1+=h
+
+ mov %ebx,%eax
+ add %r13d,%r12d # T1+=Sigma1(e)
+
+ add %r15d,%r12d # T1+=Ch(e,f,g)
+ mov %ebx,%r13d
+ mov %ebx,%r14d
+
+ ror $2,%eax
+ ror $13,%r13d
+ mov %ebx,%r15d
+ add (%rbp,%rdi,4),%r12d # T1+=K[round]
+
+ xor %r13d,%eax
+ ror $9,%r13d
+ or %edx,%r14d # a|c
+
+ xor %r13d,%eax # h=Sigma0(a)
+ and %edx,%r15d # a&c
+ add %r12d,%r8d # d+=T1
+
+ and %ecx,%r14d # (a|c)&b
+ add %r12d,%eax # h+=T1
+
+ or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14d,%eax # h+=Maj(a,b,c)
+ mov 36(%rsp),%r13d
+ mov 24(%rsp),%r12d
+
+ mov %r13d,%r15d
+
+ shr $3,%r13d
+ ror $7,%r15d
+
+ xor %r15d,%r13d
+ ror $11,%r15d
+
+ xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
+ mov %r12d,%r14d
+
+ shr $10,%r12d
+ ror $17,%r14d
+
+ xor %r14d,%r12d
+ ror $2,%r14d
+
+ xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
+
+ add %r13d,%r12d
+
+ add 4(%rsp),%r12d
+
+ add 32(%rsp),%r12d
+ mov %r8d,%r13d
+ mov %r8d,%r14d
+ mov %r9d,%r15d
+
+ ror $6,%r13d
+ ror $11,%r14d
+ xor %r10d,%r15d # f^g
+
+ xor %r14d,%r13d
+ ror $14,%r14d
+ and %r8d,%r15d # (f^g)&e
+ mov %r12d,32(%rsp)
+
+ xor %r14d,%r13d # Sigma1(e)
+ xor %r10d,%r15d # Ch(e,f,g)=((f^g)&e)^g
+ add %r11d,%r12d # T1+=h
+
+ mov %eax,%r11d
+ add %r13d,%r12d # T1+=Sigma1(e)
+
+ add %r15d,%r12d # T1+=Ch(e,f,g)
+ mov %eax,%r13d
+ mov %eax,%r14d
+
+ ror $2,%r11d
+ ror $13,%r13d
+ mov %eax,%r15d
+ add (%rbp,%rdi,4),%r12d # T1+=K[round]
+
+ xor %r13d,%r11d
+ ror $9,%r13d
+ or %ecx,%r14d # a|c
+
+ xor %r13d,%r11d # h=Sigma0(a)
+ and %ecx,%r15d # a&c
+ add %r12d,%edx # d+=T1
+
+ and %ebx,%r14d # (a|c)&b
+ add %r12d,%r11d # h+=T1
+
+ or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14d,%r11d # h+=Maj(a,b,c)
+ mov 40(%rsp),%r13d
+ mov 28(%rsp),%r12d
+
+ mov %r13d,%r15d
+
+ shr $3,%r13d
+ ror $7,%r15d
+
+ xor %r15d,%r13d
+ ror $11,%r15d
+
+ xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
+ mov %r12d,%r14d
+
+ shr $10,%r12d
+ ror $17,%r14d
+
+ xor %r14d,%r12d
+ ror $2,%r14d
+
+ xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
+
+ add %r13d,%r12d
+
+ add 8(%rsp),%r12d
+
+ add 36(%rsp),%r12d
+ mov %edx,%r13d
+ mov %edx,%r14d
+ mov %r8d,%r15d
+
+ ror $6,%r13d
+ ror $11,%r14d
+ xor %r9d,%r15d # f^g
+
+ xor %r14d,%r13d
+ ror $14,%r14d
+ and %edx,%r15d # (f^g)&e
+ mov %r12d,36(%rsp)
+
+ xor %r14d,%r13d # Sigma1(e)
+ xor %r9d,%r15d # Ch(e,f,g)=((f^g)&e)^g
+ add %r10d,%r12d # T1+=h
+
+ mov %r11d,%r10d
+ add %r13d,%r12d # T1+=Sigma1(e)
+
+ add %r15d,%r12d # T1+=Ch(e,f,g)
+ mov %r11d,%r13d
+ mov %r11d,%r14d
+
+ ror $2,%r10d
+ ror $13,%r13d
+ mov %r11d,%r15d
+ add (%rbp,%rdi,4),%r12d # T1+=K[round]
+
+ xor %r13d,%r10d
+ ror $9,%r13d
+ or %ebx,%r14d # a|c
+
+ xor %r13d,%r10d # h=Sigma0(a)
+ and %ebx,%r15d # a&c
+ add %r12d,%ecx # d+=T1
+
+ and %eax,%r14d # (a|c)&b
+ add %r12d,%r10d # h+=T1
+
+ or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14d,%r10d # h+=Maj(a,b,c)
+ mov 44(%rsp),%r13d
+ mov 32(%rsp),%r12d
+
+ mov %r13d,%r15d
+
+ shr $3,%r13d
+ ror $7,%r15d
+
+ xor %r15d,%r13d
+ ror $11,%r15d
+
+ xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
+ mov %r12d,%r14d
+
+ shr $10,%r12d
+ ror $17,%r14d
+
+ xor %r14d,%r12d
+ ror $2,%r14d
+
+ xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
+
+ add %r13d,%r12d
+
+ add 12(%rsp),%r12d
+
+ add 40(%rsp),%r12d
+ mov %ecx,%r13d
+ mov %ecx,%r14d
+ mov %edx,%r15d
+
+ ror $6,%r13d
+ ror $11,%r14d
+ xor %r8d,%r15d # f^g
+
+ xor %r14d,%r13d
+ ror $14,%r14d
+ and %ecx,%r15d # (f^g)&e
+ mov %r12d,40(%rsp)
+
+ xor %r14d,%r13d # Sigma1(e)
+ xor %r8d,%r15d # Ch(e,f,g)=((f^g)&e)^g
+ add %r9d,%r12d # T1+=h
+
+ mov %r10d,%r9d
+ add %r13d,%r12d # T1+=Sigma1(e)
+
+ add %r15d,%r12d # T1+=Ch(e,f,g)
+ mov %r10d,%r13d
+ mov %r10d,%r14d
+
+ ror $2,%r9d
+ ror $13,%r13d
+ mov %r10d,%r15d
+ add (%rbp,%rdi,4),%r12d # T1+=K[round]
+
+ xor %r13d,%r9d
+ ror $9,%r13d
+ or %eax,%r14d # a|c
+
+ xor %r13d,%r9d # h=Sigma0(a)
+ and %eax,%r15d # a&c
+ add %r12d,%ebx # d+=T1
+
+ and %r11d,%r14d # (a|c)&b
+ add %r12d,%r9d # h+=T1
+
+ or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14d,%r9d # h+=Maj(a,b,c)
+ mov 48(%rsp),%r13d
+ mov 36(%rsp),%r12d
+
+ mov %r13d,%r15d
+
+ shr $3,%r13d
+ ror $7,%r15d
+
+ xor %r15d,%r13d
+ ror $11,%r15d
+
+ xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
+ mov %r12d,%r14d
+
+ shr $10,%r12d
+ ror $17,%r14d
+
+ xor %r14d,%r12d
+ ror $2,%r14d
+
+ xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
+
+ add %r13d,%r12d
+
+ add 16(%rsp),%r12d
+
+ add 44(%rsp),%r12d
+ mov %ebx,%r13d
+ mov %ebx,%r14d
+ mov %ecx,%r15d
+
+ ror $6,%r13d
+ ror $11,%r14d
+ xor %edx,%r15d # f^g
+
+ xor %r14d,%r13d
+ ror $14,%r14d
+ and %ebx,%r15d # (f^g)&e
+ mov %r12d,44(%rsp)
+
+ xor %r14d,%r13d # Sigma1(e)
+ xor %edx,%r15d # Ch(e,f,g)=((f^g)&e)^g
+ add %r8d,%r12d # T1+=h
+
+ mov %r9d,%r8d
+ add %r13d,%r12d # T1+=Sigma1(e)
+
+ add %r15d,%r12d # T1+=Ch(e,f,g)
+ mov %r9d,%r13d
+ mov %r9d,%r14d
+
+ ror $2,%r8d
+ ror $13,%r13d
+ mov %r9d,%r15d
+ add (%rbp,%rdi,4),%r12d # T1+=K[round]
+
+ xor %r13d,%r8d
+ ror $9,%r13d
+ or %r11d,%r14d # a|c
+
+ xor %r13d,%r8d # h=Sigma0(a)
+ and %r11d,%r15d # a&c
+ add %r12d,%eax # d+=T1
+
+ and %r10d,%r14d # (a|c)&b
+ add %r12d,%r8d # h+=T1
+
+ or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14d,%r8d # h+=Maj(a,b,c)
+ mov 52(%rsp),%r13d
+ mov 40(%rsp),%r12d
+
+ mov %r13d,%r15d
+
+ shr $3,%r13d
+ ror $7,%r15d
+
+ xor %r15d,%r13d
+ ror $11,%r15d
+
+ xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
+ mov %r12d,%r14d
+
+ shr $10,%r12d
+ ror $17,%r14d
+
+ xor %r14d,%r12d
+ ror $2,%r14d
+
+ xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
+
+ add %r13d,%r12d
+
+ add 20(%rsp),%r12d
+
+ add 48(%rsp),%r12d
+ mov %eax,%r13d
+ mov %eax,%r14d
+ mov %ebx,%r15d
+
+ ror $6,%r13d
+ ror $11,%r14d
+ xor %ecx,%r15d # f^g
+
+ xor %r14d,%r13d
+ ror $14,%r14d
+ and %eax,%r15d # (f^g)&e
+ mov %r12d,48(%rsp)
+
+ xor %r14d,%r13d # Sigma1(e)
+ xor %ecx,%r15d # Ch(e,f,g)=((f^g)&e)^g
+ add %edx,%r12d # T1+=h
+
+ mov %r8d,%edx
+ add %r13d,%r12d # T1+=Sigma1(e)
+
+ add %r15d,%r12d # T1+=Ch(e,f,g)
+ mov %r8d,%r13d
+ mov %r8d,%r14d
+
+ ror $2,%edx
+ ror $13,%r13d
+ mov %r8d,%r15d
+ add (%rbp,%rdi,4),%r12d # T1+=K[round]
+
+ xor %r13d,%edx
+ ror $9,%r13d
+ or %r10d,%r14d # a|c
+
+ xor %r13d,%edx # h=Sigma0(a)
+ and %r10d,%r15d # a&c
+ add %r12d,%r11d # d+=T1
+
+ and %r9d,%r14d # (a|c)&b
+ add %r12d,%edx # h+=T1
+
+ or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14d,%edx # h+=Maj(a,b,c)
+ mov 56(%rsp),%r13d
+ mov 44(%rsp),%r12d
+
+ mov %r13d,%r15d
+
+ shr $3,%r13d
+ ror $7,%r15d
+
+ xor %r15d,%r13d
+ ror $11,%r15d
+
+ xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
+ mov %r12d,%r14d
+
+ shr $10,%r12d
+ ror $17,%r14d
+
+ xor %r14d,%r12d
+ ror $2,%r14d
+
+ xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
+
+ add %r13d,%r12d
+
+ add 24(%rsp),%r12d
+
+ add 52(%rsp),%r12d
+ mov %r11d,%r13d
+ mov %r11d,%r14d
+ mov %eax,%r15d
+
+ ror $6,%r13d
+ ror $11,%r14d
+ xor %ebx,%r15d # f^g
+
+ xor %r14d,%r13d
+ ror $14,%r14d
+ and %r11d,%r15d # (f^g)&e
+ mov %r12d,52(%rsp)
+
+ xor %r14d,%r13d # Sigma1(e)
+ xor %ebx,%r15d # Ch(e,f,g)=((f^g)&e)^g
+ add %ecx,%r12d # T1+=h
+
+ mov %edx,%ecx
+ add %r13d,%r12d # T1+=Sigma1(e)
+
+ add %r15d,%r12d # T1+=Ch(e,f,g)
+ mov %edx,%r13d
+ mov %edx,%r14d
+
+ ror $2,%ecx
+ ror $13,%r13d
+ mov %edx,%r15d
+ add (%rbp,%rdi,4),%r12d # T1+=K[round]
+
+ xor %r13d,%ecx
+ ror $9,%r13d
+ or %r9d,%r14d # a|c
+
+ xor %r13d,%ecx # h=Sigma0(a)
+ and %r9d,%r15d # a&c
+ add %r12d,%r10d # d+=T1
+
+ and %r8d,%r14d # (a|c)&b
+ add %r12d,%ecx # h+=T1
+
+ or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14d,%ecx # h+=Maj(a,b,c)
+ mov 60(%rsp),%r13d
+ mov 48(%rsp),%r12d
+
+ mov %r13d,%r15d
+
+ shr $3,%r13d
+ ror $7,%r15d
+
+ xor %r15d,%r13d
+ ror $11,%r15d
+
+ xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
+ mov %r12d,%r14d
+
+ shr $10,%r12d
+ ror $17,%r14d
+
+ xor %r14d,%r12d
+ ror $2,%r14d
+
+ xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
+
+ add %r13d,%r12d
+
+ add 28(%rsp),%r12d
+
+ add 56(%rsp),%r12d
+ mov %r10d,%r13d
+ mov %r10d,%r14d
+ mov %r11d,%r15d
+
+ ror $6,%r13d
+ ror $11,%r14d
+ xor %eax,%r15d # f^g
+
+ xor %r14d,%r13d
+ ror $14,%r14d
+ and %r10d,%r15d # (f^g)&e
+ mov %r12d,56(%rsp)
+
+ xor %r14d,%r13d # Sigma1(e)
+ xor %eax,%r15d # Ch(e,f,g)=((f^g)&e)^g
+ add %ebx,%r12d # T1+=h
+
+ mov %ecx,%ebx
+ add %r13d,%r12d # T1+=Sigma1(e)
+
+ add %r15d,%r12d # T1+=Ch(e,f,g)
+ mov %ecx,%r13d
+ mov %ecx,%r14d
+
+ ror $2,%ebx
+ ror $13,%r13d
+ mov %ecx,%r15d
+ add (%rbp,%rdi,4),%r12d # T1+=K[round]
+
+ xor %r13d,%ebx
+ ror $9,%r13d
+ or %r8d,%r14d # a|c
+
+ xor %r13d,%ebx # h=Sigma0(a)
+ and %r8d,%r15d # a&c
+ add %r12d,%r9d # d+=T1
+
+ and %edx,%r14d # (a|c)&b
+ add %r12d,%ebx # h+=T1
+
+ or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14d,%ebx # h+=Maj(a,b,c)
+ mov 0(%rsp),%r13d
+ mov 52(%rsp),%r12d
+
+ mov %r13d,%r15d
+
+ shr $3,%r13d
+ ror $7,%r15d
+
+ xor %r15d,%r13d
+ ror $11,%r15d
+
+ xor %r15d,%r13d # sigma0(X[(i+1)&0xf])
+ mov %r12d,%r14d
+
+ shr $10,%r12d
+ ror $17,%r14d
+
+ xor %r14d,%r12d
+ ror $2,%r14d
+
+ xor %r14d,%r12d # sigma1(X[(i+14)&0xf])
+
+ add %r13d,%r12d
+
+ add 32(%rsp),%r12d
+
+ add 60(%rsp),%r12d
+ mov %r9d,%r13d
+ mov %r9d,%r14d
+ mov %r10d,%r15d
+
+ ror $6,%r13d
+ ror $11,%r14d
+ xor %r11d,%r15d # f^g
+
+ xor %r14d,%r13d
+ ror $14,%r14d
+ and %r9d,%r15d # (f^g)&e
+ mov %r12d,60(%rsp)
+
+ xor %r14d,%r13d # Sigma1(e)
+ xor %r11d,%r15d # Ch(e,f,g)=((f^g)&e)^g
+ add %eax,%r12d # T1+=h
+
+ mov %ebx,%eax
+ add %r13d,%r12d # T1+=Sigma1(e)
+
+ add %r15d,%r12d # T1+=Ch(e,f,g)
+ mov %ebx,%r13d
+ mov %ebx,%r14d
+
+ ror $2,%eax
+ ror $13,%r13d
+ mov %ebx,%r15d
+ add (%rbp,%rdi,4),%r12d # T1+=K[round]
+
+ xor %r13d,%eax
+ ror $9,%r13d
+ or %edx,%r14d # a|c
+
+ xor %r13d,%eax # h=Sigma0(a)
+ and %edx,%r15d # a&c
+ add %r12d,%r8d # d+=T1
+
+ and %ecx,%r14d # (a|c)&b
+ add %r12d,%eax # h+=T1
+
+ or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14d,%eax # h+=Maj(a,b,c)
+ cmp $64,%rdi
+ jb .Lrounds_16_xx
+
+ mov 16*4+0*8(%rsp),%rdi
+ lea 16*4(%rsi),%rsi
+
+ add 4*0(%rdi),%eax
+ add 4*1(%rdi),%ebx
+ add 4*2(%rdi),%ecx
+ add 4*3(%rdi),%edx
+ add 4*4(%rdi),%r8d
+ add 4*5(%rdi),%r9d
+ add 4*6(%rdi),%r10d
+ add 4*7(%rdi),%r11d
+
+ cmp 16*4+2*8(%rsp),%rsi
+
+ mov %eax,4*0(%rdi)
+ mov %ebx,4*1(%rdi)
+ mov %ecx,4*2(%rdi)
+ mov %edx,4*3(%rdi)
+ mov %r8d,4*4(%rdi)
+ mov %r9d,4*5(%rdi)
+ mov %r10d,4*6(%rdi)
+ mov %r11d,4*7(%rdi)
+ jb .Lloop
+
+ mov 16*4+3*8(%rsp),%rsp
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbp
+ pop %rbx
+
+ ret
+SET_SIZE(SHA256TransformBlocks)
+
+.data
+.align 64
+.type K256, at object
+K256:
+ .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+ .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+ .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+ .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+ .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+ .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+ .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+ .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+ .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+ .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+ .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+ .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+ .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+ .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+ .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+ .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+#endif /* !lint && !__lint */
+
+#ifdef __ELF__
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/zfs/module/icp/asm-x86_64/sha2/sha512_impl.S b/zfs/module/icp/asm-x86_64/sha2/sha512_impl.S
new file mode 100644
index 000000000000..6e37618761b2
--- /dev/null
+++ b/zfs/module/icp/asm-x86_64/sha2/sha512_impl.S
@@ -0,0 +1,2088 @@
+/*
+ * ====================================================================
+ * Written by Andy Polyakov <appro at fy.chalmers.se> for the OpenSSL
+ * project. Rights for redistribution and usage in source and binary
+ * forms are granted according to the OpenSSL license.
+ * ====================================================================
+ *
+ * sha256/512_block procedure for x86_64.
+ *
+ * 40% improvement over compiler-generated code on Opteron. On EM64T
+ * sha256 was observed to run >80% faster and sha512 - >40%. No magical
+ * tricks, just straight implementation... I really wonder why gcc
+ * [being armed with inline assembler] fails to generate as fast code.
+ * The only thing which is cool about this module is that it's very
+ * same instruction sequence used for both SHA-256 and SHA-512. In
+ * former case the instructions operate on 32-bit operands, while in
+ * latter - on 64-bit ones. All I had to do is to get one flavor right,
+ * the other one passed the test right away:-)
+ *
+ * sha256_block runs in ~1005 cycles on Opteron, which gives you
+ * asymptotic performance of 64*1000/1005=63.7MBps times CPU clock
+ * frequency in GHz. sha512_block runs in ~1275 cycles, which results
+ * in 128*1000/1275=100MBps per GHz. Is there room for improvement?
+ * Well, if you compare it to IA-64 implementation, which maintains
+ * X[16] in register bank[!], tends to 4 instructions per CPU clock
+ * cycle and runs in 1003 cycles, 1275 is very good result for 3-way
+ * issue Opteron pipeline and X[16] maintained in memory. So that *if*
+ * there is a way to improve it, *then* the only way would be to try to
+ * offload X[16] updates to SSE unit, but that would require "deeper"
+ * loop unroll, which in turn would naturally cause size blow-up, not
+ * to mention increased complexity! And once again, only *if* it's
+ * actually possible to noticeably improve overall ILP, instruction
+ * level parallelism, on a given CPU implementation in this case.
+ *
+ * Special note on Intel EM64T. While Opteron CPU exhibits perfect
+ * performance ratio of 1.5 between 64- and 32-bit flavors [see above],
+ * [currently available] EM64T CPUs apparently are far from it. On the
+ * contrary, 64-bit version, sha512_block, is ~30% *slower* than 32-bit
+ * sha256_block:-( This is presumably because 64-bit shifts/rotates
+ * apparently are not atomic instructions, but implemented in microcode.
+ */
+
+/*
+ * OpenSolaris OS modifications
+ *
+ * Sun elects to use this software under the BSD license.
+ *
+ * This source originates from OpenSSL file sha512-x86_64.pl at
+ * ftp://ftp.openssl.org/snapshot/openssl-0.9.8-stable-SNAP-20080131.tar.gz
+ * (presumably for future OpenSSL release 0.9.8h), with these changes:
+ *
+ * 1. Added perl "use strict" and declared variables.
+ *
+ * 2. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
+ * /usr/include/sys/asm_linkage.h, .ident keywords, and lint(1B) guards.
+ *
+ * 3. Removed x86_64-xlate.pl script (not needed for as(1) or gas(1)
+ * assemblers). Replaced the .picmeup macro with assembler code.
+ *
+ * 4. Added 8 to $ctx, as OpenSolaris OS has an extra 4-byte field, "algotype",
+ * at the beginning of SHA2_CTX (the next field is 8-byte aligned).
+ */
+
+/*
+ * This file was generated by a perl script (sha512-x86_64.pl) that were
+ * used to generate sha256 and sha512 variants from the same code base.
+ * The comments from the original file have been pasted above.
+ */
+
+
+#if defined(lint) || defined(__lint)
+#include <sys/stdint.h>
+#include <sha2/sha2.h>
+
+/* ARGSUSED */
+void
+SHA512TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num)
+{
+}
+
+
+#else
+#define _ASM
+#include <sys/asm_linkage.h>
+
+ENTRY_NP(SHA512TransformBlocks)
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ mov %rsp,%rbp # copy %rsp
+ shl $4,%rdx # num*16
+ sub $16*8+4*8,%rsp
+ lea (%rsi,%rdx,8),%rdx # inp+num*16*8
+ and $-64,%rsp # align stack frame
+ add $8,%rdi # Skip OpenSolaris field, "algotype"
+ mov %rdi,16*8+0*8(%rsp) # save ctx, 1st arg
+ mov %rsi,16*8+1*8(%rsp) # save inp, 2nd arg
+ mov %rdx,16*8+2*8(%rsp) # save end pointer, "3rd" arg
+ mov %rbp,16*8+3*8(%rsp) # save copy of %rsp
+
+ #.picmeup %rbp
+ # The .picmeup pseudo-directive, from perlasm/x86_64_xlate.pl, puts
+ # the address of the "next" instruction into the target register
+ # (%rbp). This generates these 2 instructions:
+ lea .Llea(%rip),%rbp
+ #nop # .picmeup generates a nop for mod 8 alignment--not needed here
+
+.Llea:
+ lea K512-.(%rbp),%rbp
+
+ mov 8*0(%rdi),%rax
+ mov 8*1(%rdi),%rbx
+ mov 8*2(%rdi),%rcx
+ mov 8*3(%rdi),%rdx
+ mov 8*4(%rdi),%r8
+ mov 8*5(%rdi),%r9
+ mov 8*6(%rdi),%r10
+ mov 8*7(%rdi),%r11
+ jmp .Lloop
+
+.align 16
+.Lloop:
+ xor %rdi,%rdi
+ mov 8*0(%rsi),%r12
+ bswap %r12
+ mov %r8,%r13
+ mov %r8,%r14
+ mov %r9,%r15
+
+ ror $14,%r13
+ ror $18,%r14
+ xor %r10,%r15 # f^g
+
+ xor %r14,%r13
+ ror $23,%r14
+ and %r8,%r15 # (f^g)&e
+ mov %r12,0(%rsp)
+
+ xor %r14,%r13 # Sigma1(e)
+ xor %r10,%r15 # Ch(e,f,g)=((f^g)&e)^g
+ add %r11,%r12 # T1+=h
+
+ mov %rax,%r11
+ add %r13,%r12 # T1+=Sigma1(e)
+
+ add %r15,%r12 # T1+=Ch(e,f,g)
+ mov %rax,%r13
+ mov %rax,%r14
+
+ ror $28,%r11
+ ror $34,%r13
+ mov %rax,%r15
+ add (%rbp,%rdi,8),%r12 # T1+=K[round]
+
+ xor %r13,%r11
+ ror $5,%r13
+ or %rcx,%r14 # a|c
+
+ xor %r13,%r11 # h=Sigma0(a)
+ and %rcx,%r15 # a&c
+ add %r12,%rdx # d+=T1
+
+ and %rbx,%r14 # (a|c)&b
+ add %r12,%r11 # h+=T1
+
+ or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14,%r11 # h+=Maj(a,b,c)
+ mov 8*1(%rsi),%r12
+ bswap %r12
+ mov %rdx,%r13
+ mov %rdx,%r14
+ mov %r8,%r15
+
+ ror $14,%r13
+ ror $18,%r14
+ xor %r9,%r15 # f^g
+
+ xor %r14,%r13
+ ror $23,%r14
+ and %rdx,%r15 # (f^g)&e
+ mov %r12,8(%rsp)
+
+ xor %r14,%r13 # Sigma1(e)
+ xor %r9,%r15 # Ch(e,f,g)=((f^g)&e)^g
+ add %r10,%r12 # T1+=h
+
+ mov %r11,%r10
+ add %r13,%r12 # T1+=Sigma1(e)
+
+ add %r15,%r12 # T1+=Ch(e,f,g)
+ mov %r11,%r13
+ mov %r11,%r14
+
+ ror $28,%r10
+ ror $34,%r13
+ mov %r11,%r15
+ add (%rbp,%rdi,8),%r12 # T1+=K[round]
+
+ xor %r13,%r10
+ ror $5,%r13
+ or %rbx,%r14 # a|c
+
+ xor %r13,%r10 # h=Sigma0(a)
+ and %rbx,%r15 # a&c
+ add %r12,%rcx # d+=T1
+
+ and %rax,%r14 # (a|c)&b
+ add %r12,%r10 # h+=T1
+
+ or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14,%r10 # h+=Maj(a,b,c)
+ mov 8*2(%rsi),%r12
+ bswap %r12
+ mov %rcx,%r13
+ mov %rcx,%r14
+ mov %rdx,%r15
+
+ ror $14,%r13
+ ror $18,%r14
+ xor %r8,%r15 # f^g
+
+ xor %r14,%r13
+ ror $23,%r14
+ and %rcx,%r15 # (f^g)&e
+ mov %r12,16(%rsp)
+
+ xor %r14,%r13 # Sigma1(e)
+ xor %r8,%r15 # Ch(e,f,g)=((f^g)&e)^g
+ add %r9,%r12 # T1+=h
+
+ mov %r10,%r9
+ add %r13,%r12 # T1+=Sigma1(e)
+
+ add %r15,%r12 # T1+=Ch(e,f,g)
+ mov %r10,%r13
+ mov %r10,%r14
+
+ ror $28,%r9
+ ror $34,%r13
+ mov %r10,%r15
+ add (%rbp,%rdi,8),%r12 # T1+=K[round]
+
+ xor %r13,%r9
+ ror $5,%r13
+ or %rax,%r14 # a|c
+
+ xor %r13,%r9 # h=Sigma0(a)
+ and %rax,%r15 # a&c
+ add %r12,%rbx # d+=T1
+
+ and %r11,%r14 # (a|c)&b
+ add %r12,%r9 # h+=T1
+
+ or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14,%r9 # h+=Maj(a,b,c)
+ mov 8*3(%rsi),%r12
+ bswap %r12
+ mov %rbx,%r13
+ mov %rbx,%r14
+ mov %rcx,%r15
+
+ ror $14,%r13
+ ror $18,%r14
+ xor %rdx,%r15 # f^g
+
+ xor %r14,%r13
+ ror $23,%r14
+ and %rbx,%r15 # (f^g)&e
+ mov %r12,24(%rsp)
+
+ xor %r14,%r13 # Sigma1(e)
+ xor %rdx,%r15 # Ch(e,f,g)=((f^g)&e)^g
+ add %r8,%r12 # T1+=h
+
+ mov %r9,%r8
+ add %r13,%r12 # T1+=Sigma1(e)
+
+ add %r15,%r12 # T1+=Ch(e,f,g)
+ mov %r9,%r13
+ mov %r9,%r14
+
+ ror $28,%r8
+ ror $34,%r13
+ mov %r9,%r15
+ add (%rbp,%rdi,8),%r12 # T1+=K[round]
+
+ xor %r13,%r8
+ ror $5,%r13
+ or %r11,%r14 # a|c
+
+ xor %r13,%r8 # h=Sigma0(a)
+ and %r11,%r15 # a&c
+ add %r12,%rax # d+=T1
+
+ and %r10,%r14 # (a|c)&b
+ add %r12,%r8 # h+=T1
+
+ or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14,%r8 # h+=Maj(a,b,c)
+ mov 8*4(%rsi),%r12
+ bswap %r12
+ mov %rax,%r13
+ mov %rax,%r14
+ mov %rbx,%r15
+
+ ror $14,%r13
+ ror $18,%r14
+ xor %rcx,%r15 # f^g
+
+ xor %r14,%r13
+ ror $23,%r14
+ and %rax,%r15 # (f^g)&e
+ mov %r12,32(%rsp)
+
+ xor %r14,%r13 # Sigma1(e)
+ xor %rcx,%r15 # Ch(e,f,g)=((f^g)&e)^g
+ add %rdx,%r12 # T1+=h
+
+ mov %r8,%rdx
+ add %r13,%r12 # T1+=Sigma1(e)
+
+ add %r15,%r12 # T1+=Ch(e,f,g)
+ mov %r8,%r13
+ mov %r8,%r14
+
+ ror $28,%rdx
+ ror $34,%r13
+ mov %r8,%r15
+ add (%rbp,%rdi,8),%r12 # T1+=K[round]
+
+ xor %r13,%rdx
+ ror $5,%r13
+ or %r10,%r14 # a|c
+
+ xor %r13,%rdx # h=Sigma0(a)
+ and %r10,%r15 # a&c
+ add %r12,%r11 # d+=T1
+
+ and %r9,%r14 # (a|c)&b
+ add %r12,%rdx # h+=T1
+
+ or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14,%rdx # h+=Maj(a,b,c)
+ mov 8*5(%rsi),%r12
+ bswap %r12
+ mov %r11,%r13
+ mov %r11,%r14
+ mov %rax,%r15
+
+ ror $14,%r13
+ ror $18,%r14
+ xor %rbx,%r15 # f^g
+
+ xor %r14,%r13
+ ror $23,%r14
+ and %r11,%r15 # (f^g)&e
+ mov %r12,40(%rsp)
+
+ xor %r14,%r13 # Sigma1(e)
+ xor %rbx,%r15 # Ch(e,f,g)=((f^g)&e)^g
+ add %rcx,%r12 # T1+=h
+
+ mov %rdx,%rcx
+ add %r13,%r12 # T1+=Sigma1(e)
+
+ add %r15,%r12 # T1+=Ch(e,f,g)
+ mov %rdx,%r13
+ mov %rdx,%r14
+
+ ror $28,%rcx
+ ror $34,%r13
+ mov %rdx,%r15
+ add (%rbp,%rdi,8),%r12 # T1+=K[round]
+
+ xor %r13,%rcx
+ ror $5,%r13
+ or %r9,%r14 # a|c
+
+ xor %r13,%rcx # h=Sigma0(a)
+ and %r9,%r15 # a&c
+ add %r12,%r10 # d+=T1
+
+ and %r8,%r14 # (a|c)&b
+ add %r12,%rcx # h+=T1
+
+ or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14,%rcx # h+=Maj(a,b,c)
+ mov 8*6(%rsi),%r12
+ bswap %r12
+ mov %r10,%r13
+ mov %r10,%r14
+ mov %r11,%r15
+
+ ror $14,%r13
+ ror $18,%r14
+ xor %rax,%r15 # f^g
+
+ xor %r14,%r13
+ ror $23,%r14
+ and %r10,%r15 # (f^g)&e
+ mov %r12,48(%rsp)
+
+ xor %r14,%r13 # Sigma1(e)
+ xor %rax,%r15 # Ch(e,f,g)=((f^g)&e)^g
+ add %rbx,%r12 # T1+=h
+
+ mov %rcx,%rbx
+ add %r13,%r12 # T1+=Sigma1(e)
+
+ add %r15,%r12 # T1+=Ch(e,f,g)
+ mov %rcx,%r13
+ mov %rcx,%r14
+
+ ror $28,%rbx
+ ror $34,%r13
+ mov %rcx,%r15
+ add (%rbp,%rdi,8),%r12 # T1+=K[round]
+
+ xor %r13,%rbx
+ ror $5,%r13
+ or %r8,%r14 # a|c
+
+ xor %r13,%rbx # h=Sigma0(a)
+ and %r8,%r15 # a&c
+ add %r12,%r9 # d+=T1
+
+ and %rdx,%r14 # (a|c)&b
+ add %r12,%rbx # h+=T1
+
+ or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14,%rbx # h+=Maj(a,b,c)
+ mov 8*7(%rsi),%r12
+ bswap %r12
+ mov %r9,%r13
+ mov %r9,%r14
+ mov %r10,%r15
+
+ ror $14,%r13
+ ror $18,%r14
+ xor %r11,%r15 # f^g
+
+ xor %r14,%r13
+ ror $23,%r14
+ and %r9,%r15 # (f^g)&e
+ mov %r12,56(%rsp)
+
+ xor %r14,%r13 # Sigma1(e)
+ xor %r11,%r15 # Ch(e,f,g)=((f^g)&e)^g
+ add %rax,%r12 # T1+=h
+
+ mov %rbx,%rax
+ add %r13,%r12 # T1+=Sigma1(e)
+
+ add %r15,%r12 # T1+=Ch(e,f,g)
+ mov %rbx,%r13
+ mov %rbx,%r14
+
+ ror $28,%rax
+ ror $34,%r13
+ mov %rbx,%r15
+ add (%rbp,%rdi,8),%r12 # T1+=K[round]
+
+ xor %r13,%rax
+ ror $5,%r13
+ or %rdx,%r14 # a|c
+
+ xor %r13,%rax # h=Sigma0(a)
+ and %rdx,%r15 # a&c
+ add %r12,%r8 # d+=T1
+
+ and %rcx,%r14 # (a|c)&b
+ add %r12,%rax # h+=T1
+
+ or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14,%rax # h+=Maj(a,b,c)
+ mov 8*8(%rsi),%r12
+ bswap %r12
+ mov %r8,%r13
+ mov %r8,%r14
+ mov %r9,%r15
+
+ ror $14,%r13
+ ror $18,%r14
+ xor %r10,%r15 # f^g
+
+ xor %r14,%r13
+ ror $23,%r14
+ and %r8,%r15 # (f^g)&e
+ mov %r12,64(%rsp)
+
+ xor %r14,%r13 # Sigma1(e)
+ xor %r10,%r15 # Ch(e,f,g)=((f^g)&e)^g
+ add %r11,%r12 # T1+=h
+
+ mov %rax,%r11
+ add %r13,%r12 # T1+=Sigma1(e)
+
+ add %r15,%r12 # T1+=Ch(e,f,g)
+ mov %rax,%r13
+ mov %rax,%r14
+
+ ror $28,%r11
+ ror $34,%r13
+ mov %rax,%r15
+ add (%rbp,%rdi,8),%r12 # T1+=K[round]
+
+ xor %r13,%r11
+ ror $5,%r13
+ or %rcx,%r14 # a|c
+
+ xor %r13,%r11 # h=Sigma0(a)
+ and %rcx,%r15 # a&c
+ add %r12,%rdx # d+=T1
+
+ and %rbx,%r14 # (a|c)&b
+ add %r12,%r11 # h+=T1
+
+ or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14,%r11 # h+=Maj(a,b,c)
+ mov 8*9(%rsi),%r12
+ bswap %r12
+ mov %rdx,%r13
+ mov %rdx,%r14
+ mov %r8,%r15
+
+ ror $14,%r13
+ ror $18,%r14
+ xor %r9,%r15 # f^g
+
+ xor %r14,%r13
+ ror $23,%r14
+ and %rdx,%r15 # (f^g)&e
+ mov %r12,72(%rsp)
+
+ xor %r14,%r13 # Sigma1(e)
+ xor %r9,%r15 # Ch(e,f,g)=((f^g)&e)^g
+ add %r10,%r12 # T1+=h
+
+ mov %r11,%r10
+ add %r13,%r12 # T1+=Sigma1(e)
+
+ add %r15,%r12 # T1+=Ch(e,f,g)
+ mov %r11,%r13
+ mov %r11,%r14
+
+ ror $28,%r10
+ ror $34,%r13
+ mov %r11,%r15
+ add (%rbp,%rdi,8),%r12 # T1+=K[round]
+
+ xor %r13,%r10
+ ror $5,%r13
+ or %rbx,%r14 # a|c
+
+ xor %r13,%r10 # h=Sigma0(a)
+ and %rbx,%r15 # a&c
+ add %r12,%rcx # d+=T1
+
+ and %rax,%r14 # (a|c)&b
+ add %r12,%r10 # h+=T1
+
+ or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14,%r10 # h+=Maj(a,b,c)
+ mov 8*10(%rsi),%r12
+ bswap %r12
+ mov %rcx,%r13
+ mov %rcx,%r14
+ mov %rdx,%r15
+
+ ror $14,%r13
+ ror $18,%r14
+ xor %r8,%r15 # f^g
+
+ xor %r14,%r13
+ ror $23,%r14
+ and %rcx,%r15 # (f^g)&e
+ mov %r12,80(%rsp)
+
+ xor %r14,%r13 # Sigma1(e)
+ xor %r8,%r15 # Ch(e,f,g)=((f^g)&e)^g
+ add %r9,%r12 # T1+=h
+
+ mov %r10,%r9
+ add %r13,%r12 # T1+=Sigma1(e)
+
+ add %r15,%r12 # T1+=Ch(e,f,g)
+ mov %r10,%r13
+ mov %r10,%r14
+
+ ror $28,%r9
+ ror $34,%r13
+ mov %r10,%r15
+ add (%rbp,%rdi,8),%r12 # T1+=K[round]
+
+ xor %r13,%r9
+ ror $5,%r13
+ or %rax,%r14 # a|c
+
+ xor %r13,%r9 # h=Sigma0(a)
+ and %rax,%r15 # a&c
+ add %r12,%rbx # d+=T1
+
+ and %r11,%r14 # (a|c)&b
+ add %r12,%r9 # h+=T1
+
+ or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14,%r9 # h+=Maj(a,b,c)
+ mov 8*11(%rsi),%r12
+ bswap %r12
+ mov %rbx,%r13
+ mov %rbx,%r14
+ mov %rcx,%r15
+
+ ror $14,%r13
+ ror $18,%r14
+ xor %rdx,%r15 # f^g
+
+ xor %r14,%r13
+ ror $23,%r14
+ and %rbx,%r15 # (f^g)&e
+ mov %r12,88(%rsp)
+
+ xor %r14,%r13 # Sigma1(e)
+ xor %rdx,%r15 # Ch(e,f,g)=((f^g)&e)^g
+ add %r8,%r12 # T1+=h
+
+ mov %r9,%r8
+ add %r13,%r12 # T1+=Sigma1(e)
+
+ add %r15,%r12 # T1+=Ch(e,f,g)
+ mov %r9,%r13
+ mov %r9,%r14
+
+ ror $28,%r8
+ ror $34,%r13
+ mov %r9,%r15
+ add (%rbp,%rdi,8),%r12 # T1+=K[round]
+
+ xor %r13,%r8
+ ror $5,%r13
+ or %r11,%r14 # a|c
+
+ xor %r13,%r8 # h=Sigma0(a)
+ and %r11,%r15 # a&c
+ add %r12,%rax # d+=T1
+
+ and %r10,%r14 # (a|c)&b
+ add %r12,%r8 # h+=T1
+
+ or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14,%r8 # h+=Maj(a,b,c)
+ mov 8*12(%rsi),%r12
+ bswap %r12
+ mov %rax,%r13
+ mov %rax,%r14
+ mov %rbx,%r15
+
+ ror $14,%r13
+ ror $18,%r14
+ xor %rcx,%r15 # f^g
+
+ xor %r14,%r13
+ ror $23,%r14
+ and %rax,%r15 # (f^g)&e
+ mov %r12,96(%rsp)
+
+ xor %r14,%r13 # Sigma1(e)
+ xor %rcx,%r15 # Ch(e,f,g)=((f^g)&e)^g
+ add %rdx,%r12 # T1+=h
+
+ mov %r8,%rdx
+ add %r13,%r12 # T1+=Sigma1(e)
+
+ add %r15,%r12 # T1+=Ch(e,f,g)
+ mov %r8,%r13
+ mov %r8,%r14
+
+ ror $28,%rdx
+ ror $34,%r13
+ mov %r8,%r15
+ add (%rbp,%rdi,8),%r12 # T1+=K[round]
+
+ xor %r13,%rdx
+ ror $5,%r13
+ or %r10,%r14 # a|c
+
+ xor %r13,%rdx # h=Sigma0(a)
+ and %r10,%r15 # a&c
+ add %r12,%r11 # d+=T1
+
+ and %r9,%r14 # (a|c)&b
+ add %r12,%rdx # h+=T1
+
+ or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14,%rdx # h+=Maj(a,b,c)
+ mov 8*13(%rsi),%r12
+ bswap %r12
+ mov %r11,%r13
+ mov %r11,%r14
+ mov %rax,%r15
+
+ ror $14,%r13
+ ror $18,%r14
+ xor %rbx,%r15 # f^g
+
+ xor %r14,%r13
+ ror $23,%r14
+ and %r11,%r15 # (f^g)&e
+ mov %r12,104(%rsp)
+
+ xor %r14,%r13 # Sigma1(e)
+ xor %rbx,%r15 # Ch(e,f,g)=((f^g)&e)^g
+ add %rcx,%r12 # T1+=h
+
+ mov %rdx,%rcx
+ add %r13,%r12 # T1+=Sigma1(e)
+
+ add %r15,%r12 # T1+=Ch(e,f,g)
+ mov %rdx,%r13
+ mov %rdx,%r14
+
+ ror $28,%rcx
+ ror $34,%r13
+ mov %rdx,%r15
+ add (%rbp,%rdi,8),%r12 # T1+=K[round]
+
+ xor %r13,%rcx
+ ror $5,%r13
+ or %r9,%r14 # a|c
+
+ xor %r13,%rcx # h=Sigma0(a)
+ and %r9,%r15 # a&c
+ add %r12,%r10 # d+=T1
+
+ and %r8,%r14 # (a|c)&b
+ add %r12,%rcx # h+=T1
+
+ or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14,%rcx # h+=Maj(a,b,c)
+ mov 8*14(%rsi),%r12
+ bswap %r12
+ mov %r10,%r13
+ mov %r10,%r14
+ mov %r11,%r15
+
+ ror $14,%r13
+ ror $18,%r14
+ xor %rax,%r15 # f^g
+
+ xor %r14,%r13
+ ror $23,%r14
+ and %r10,%r15 # (f^g)&e
+ mov %r12,112(%rsp)
+
+ xor %r14,%r13 # Sigma1(e)
+ xor %rax,%r15 # Ch(e,f,g)=((f^g)&e)^g
+ add %rbx,%r12 # T1+=h
+
+ mov %rcx,%rbx
+ add %r13,%r12 # T1+=Sigma1(e)
+
+ add %r15,%r12 # T1+=Ch(e,f,g)
+ mov %rcx,%r13
+ mov %rcx,%r14
+
+ ror $28,%rbx
+ ror $34,%r13
+ mov %rcx,%r15
+ add (%rbp,%rdi,8),%r12 # T1+=K[round]
+
+ xor %r13,%rbx
+ ror $5,%r13
+ or %r8,%r14 # a|c
+
+ xor %r13,%rbx # h=Sigma0(a)
+ and %r8,%r15 # a&c
+ add %r12,%r9 # d+=T1
+
+ and %rdx,%r14 # (a|c)&b
+ add %r12,%rbx # h+=T1
+
+ or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14,%rbx # h+=Maj(a,b,c)
+ mov 8*15(%rsi),%r12
+ bswap %r12
+ mov %r9,%r13
+ mov %r9,%r14
+ mov %r10,%r15
+
+ ror $14,%r13
+ ror $18,%r14
+ xor %r11,%r15 # f^g
+
+ xor %r14,%r13
+ ror $23,%r14
+ and %r9,%r15 # (f^g)&e
+ mov %r12,120(%rsp)
+
+ xor %r14,%r13 # Sigma1(e)
+ xor %r11,%r15 # Ch(e,f,g)=((f^g)&e)^g
+ add %rax,%r12 # T1+=h
+
+ mov %rbx,%rax
+ add %r13,%r12 # T1+=Sigma1(e)
+
+ add %r15,%r12 # T1+=Ch(e,f,g)
+ mov %rbx,%r13
+ mov %rbx,%r14
+
+ ror $28,%rax
+ ror $34,%r13
+ mov %rbx,%r15
+ add (%rbp,%rdi,8),%r12 # T1+=K[round]
+
+ xor %r13,%rax
+ ror $5,%r13
+ or %rdx,%r14 # a|c
+
+ xor %r13,%rax # h=Sigma0(a)
+ and %rdx,%r15 # a&c
+ add %r12,%r8 # d+=T1
+
+ and %rcx,%r14 # (a|c)&b
+ add %r12,%rax # h+=T1
+
+ or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14,%rax # h+=Maj(a,b,c)
+ jmp .Lrounds_16_xx
+.align 16
+.Lrounds_16_xx:
+ mov 8(%rsp),%r13
+ mov 112(%rsp),%r12
+
+ mov %r13,%r15
+
+ shr $7,%r13
+ ror $1,%r15
+
+ xor %r15,%r13
+ ror $7,%r15
+
+ xor %r15,%r13 # sigma0(X[(i+1)&0xf])
+ mov %r12,%r14
+
+ shr $6,%r12
+ ror $19,%r14
+
+ xor %r14,%r12
+ ror $42,%r14
+
+ xor %r14,%r12 # sigma1(X[(i+14)&0xf])
+
+ add %r13,%r12
+
+ add 72(%rsp),%r12
+
+ add 0(%rsp),%r12
+ mov %r8,%r13
+ mov %r8,%r14
+ mov %r9,%r15
+
+ ror $14,%r13
+ ror $18,%r14
+ xor %r10,%r15 # f^g
+
+ xor %r14,%r13
+ ror $23,%r14
+ and %r8,%r15 # (f^g)&e
+ mov %r12,0(%rsp)
+
+ xor %r14,%r13 # Sigma1(e)
+ xor %r10,%r15 # Ch(e,f,g)=((f^g)&e)^g
+ add %r11,%r12 # T1+=h
+
+ mov %rax,%r11
+ add %r13,%r12 # T1+=Sigma1(e)
+
+ add %r15,%r12 # T1+=Ch(e,f,g)
+ mov %rax,%r13
+ mov %rax,%r14
+
+ ror $28,%r11
+ ror $34,%r13
+ mov %rax,%r15
+ add (%rbp,%rdi,8),%r12 # T1+=K[round]
+
+ xor %r13,%r11
+ ror $5,%r13
+ or %rcx,%r14 # a|c
+
+ xor %r13,%r11 # h=Sigma0(a)
+ and %rcx,%r15 # a&c
+ add %r12,%rdx # d+=T1
+
+ and %rbx,%r14 # (a|c)&b
+ add %r12,%r11 # h+=T1
+
+ or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14,%r11 # h+=Maj(a,b,c)
+ mov 16(%rsp),%r13
+ mov 120(%rsp),%r12
+
+ mov %r13,%r15
+
+ shr $7,%r13
+ ror $1,%r15
+
+ xor %r15,%r13
+ ror $7,%r15
+
+ xor %r15,%r13 # sigma0(X[(i+1)&0xf])
+ mov %r12,%r14
+
+ shr $6,%r12
+ ror $19,%r14
+
+ xor %r14,%r12
+ ror $42,%r14
+
+ xor %r14,%r12 # sigma1(X[(i+14)&0xf])
+
+ add %r13,%r12
+
+ add 80(%rsp),%r12
+
+ add 8(%rsp),%r12
+ mov %rdx,%r13
+ mov %rdx,%r14
+ mov %r8,%r15
+
+ ror $14,%r13
+ ror $18,%r14
+ xor %r9,%r15 # f^g
+
+ xor %r14,%r13
+ ror $23,%r14
+ and %rdx,%r15 # (f^g)&e
+ mov %r12,8(%rsp)
+
+ xor %r14,%r13 # Sigma1(e)
+ xor %r9,%r15 # Ch(e,f,g)=((f^g)&e)^g
+ add %r10,%r12 # T1+=h
+
+ mov %r11,%r10
+ add %r13,%r12 # T1+=Sigma1(e)
+
+ add %r15,%r12 # T1+=Ch(e,f,g)
+ mov %r11,%r13
+ mov %r11,%r14
+
+ ror $28,%r10
+ ror $34,%r13
+ mov %r11,%r15
+ add (%rbp,%rdi,8),%r12 # T1+=K[round]
+
+ xor %r13,%r10
+ ror $5,%r13
+ or %rbx,%r14 # a|c
+
+ xor %r13,%r10 # h=Sigma0(a)
+ and %rbx,%r15 # a&c
+ add %r12,%rcx # d+=T1
+
+ and %rax,%r14 # (a|c)&b
+ add %r12,%r10 # h+=T1
+
+ or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14,%r10 # h+=Maj(a,b,c)
+ mov 24(%rsp),%r13
+ mov 0(%rsp),%r12
+
+ mov %r13,%r15
+
+ shr $7,%r13
+ ror $1,%r15
+
+ xor %r15,%r13
+ ror $7,%r15
+
+ xor %r15,%r13 # sigma0(X[(i+1)&0xf])
+ mov %r12,%r14
+
+ shr $6,%r12
+ ror $19,%r14
+
+ xor %r14,%r12
+ ror $42,%r14
+
+ xor %r14,%r12 # sigma1(X[(i+14)&0xf])
+
+ add %r13,%r12
+
+ add 88(%rsp),%r12
+
+ add 16(%rsp),%r12
+ mov %rcx,%r13
+ mov %rcx,%r14
+ mov %rdx,%r15
+
+ ror $14,%r13
+ ror $18,%r14
+ xor %r8,%r15 # f^g
+
+ xor %r14,%r13
+ ror $23,%r14
+ and %rcx,%r15 # (f^g)&e
+ mov %r12,16(%rsp)
+
+ xor %r14,%r13 # Sigma1(e)
+ xor %r8,%r15 # Ch(e,f,g)=((f^g)&e)^g
+ add %r9,%r12 # T1+=h
+
+ mov %r10,%r9
+ add %r13,%r12 # T1+=Sigma1(e)
+
+ add %r15,%r12 # T1+=Ch(e,f,g)
+ mov %r10,%r13
+ mov %r10,%r14
+
+ ror $28,%r9
+ ror $34,%r13
+ mov %r10,%r15
+ add (%rbp,%rdi,8),%r12 # T1+=K[round]
+
+ xor %r13,%r9
+ ror $5,%r13
+ or %rax,%r14 # a|c
+
+ xor %r13,%r9 # h=Sigma0(a)
+ and %rax,%r15 # a&c
+ add %r12,%rbx # d+=T1
+
+ and %r11,%r14 # (a|c)&b
+ add %r12,%r9 # h+=T1
+
+ or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14,%r9 # h+=Maj(a,b,c)
+ mov 32(%rsp),%r13
+ mov 8(%rsp),%r12
+
+ mov %r13,%r15
+
+ shr $7,%r13
+ ror $1,%r15
+
+ xor %r15,%r13
+ ror $7,%r15
+
+ xor %r15,%r13 # sigma0(X[(i+1)&0xf])
+ mov %r12,%r14
+
+ shr $6,%r12
+ ror $19,%r14
+
+ xor %r14,%r12
+ ror $42,%r14
+
+ xor %r14,%r12 # sigma1(X[(i+14)&0xf])
+
+ add %r13,%r12
+
+ add 96(%rsp),%r12
+
+ add 24(%rsp),%r12
+ mov %rbx,%r13
+ mov %rbx,%r14
+ mov %rcx,%r15
+
+ ror $14,%r13
+ ror $18,%r14
+ xor %rdx,%r15 # f^g
+
+ xor %r14,%r13
+ ror $23,%r14
+ and %rbx,%r15 # (f^g)&e
+ mov %r12,24(%rsp)
+
+ xor %r14,%r13 # Sigma1(e)
+ xor %rdx,%r15 # Ch(e,f,g)=((f^g)&e)^g
+ add %r8,%r12 # T1+=h
+
+ mov %r9,%r8
+ add %r13,%r12 # T1+=Sigma1(e)
+
+ add %r15,%r12 # T1+=Ch(e,f,g)
+ mov %r9,%r13
+ mov %r9,%r14
+
+ ror $28,%r8
+ ror $34,%r13
+ mov %r9,%r15
+ add (%rbp,%rdi,8),%r12 # T1+=K[round]
+
+ xor %r13,%r8
+ ror $5,%r13
+ or %r11,%r14 # a|c
+
+ xor %r13,%r8 # h=Sigma0(a)
+ and %r11,%r15 # a&c
+ add %r12,%rax # d+=T1
+
+ and %r10,%r14 # (a|c)&b
+ add %r12,%r8 # h+=T1
+
+ or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14,%r8 # h+=Maj(a,b,c)
+ mov 40(%rsp),%r13
+ mov 16(%rsp),%r12
+
+ mov %r13,%r15
+
+ shr $7,%r13
+ ror $1,%r15
+
+ xor %r15,%r13
+ ror $7,%r15
+
+ xor %r15,%r13 # sigma0(X[(i+1)&0xf])
+ mov %r12,%r14
+
+ shr $6,%r12
+ ror $19,%r14
+
+ xor %r14,%r12
+ ror $42,%r14
+
+ xor %r14,%r12 # sigma1(X[(i+14)&0xf])
+
+ add %r13,%r12
+
+ add 104(%rsp),%r12
+
+ add 32(%rsp),%r12
+ mov %rax,%r13
+ mov %rax,%r14
+ mov %rbx,%r15
+
+ ror $14,%r13
+ ror $18,%r14
+ xor %rcx,%r15 # f^g
+
+ xor %r14,%r13
+ ror $23,%r14
+ and %rax,%r15 # (f^g)&e
+ mov %r12,32(%rsp)
+
+ xor %r14,%r13 # Sigma1(e)
+ xor %rcx,%r15 # Ch(e,f,g)=((f^g)&e)^g
+ add %rdx,%r12 # T1+=h
+
+ mov %r8,%rdx
+ add %r13,%r12 # T1+=Sigma1(e)
+
+ add %r15,%r12 # T1+=Ch(e,f,g)
+ mov %r8,%r13
+ mov %r8,%r14
+
+ ror $28,%rdx
+ ror $34,%r13
+ mov %r8,%r15
+ add (%rbp,%rdi,8),%r12 # T1+=K[round]
+
+ xor %r13,%rdx
+ ror $5,%r13
+ or %r10,%r14 # a|c
+
+ xor %r13,%rdx # h=Sigma0(a)
+ and %r10,%r15 # a&c
+ add %r12,%r11 # d+=T1
+
+ and %r9,%r14 # (a|c)&b
+ add %r12,%rdx # h+=T1
+
+ or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14,%rdx # h+=Maj(a,b,c)
+ mov 48(%rsp),%r13
+ mov 24(%rsp),%r12
+
+ mov %r13,%r15
+
+ shr $7,%r13
+ ror $1,%r15
+
+ xor %r15,%r13
+ ror $7,%r15
+
+ xor %r15,%r13 # sigma0(X[(i+1)&0xf])
+ mov %r12,%r14
+
+ shr $6,%r12
+ ror $19,%r14
+
+ xor %r14,%r12
+ ror $42,%r14
+
+ xor %r14,%r12 # sigma1(X[(i+14)&0xf])
+
+ add %r13,%r12
+
+ add 112(%rsp),%r12
+
+ add 40(%rsp),%r12
+ mov %r11,%r13
+ mov %r11,%r14
+ mov %rax,%r15
+
+ ror $14,%r13
+ ror $18,%r14
+ xor %rbx,%r15 # f^g
+
+ xor %r14,%r13
+ ror $23,%r14
+ and %r11,%r15 # (f^g)&e
+ mov %r12,40(%rsp)
+
+ xor %r14,%r13 # Sigma1(e)
+ xor %rbx,%r15 # Ch(e,f,g)=((f^g)&e)^g
+ add %rcx,%r12 # T1+=h
+
+ mov %rdx,%rcx
+ add %r13,%r12 # T1+=Sigma1(e)
+
+ add %r15,%r12 # T1+=Ch(e,f,g)
+ mov %rdx,%r13
+ mov %rdx,%r14
+
+ ror $28,%rcx
+ ror $34,%r13
+ mov %rdx,%r15
+ add (%rbp,%rdi,8),%r12 # T1+=K[round]
+
+ xor %r13,%rcx
+ ror $5,%r13
+ or %r9,%r14 # a|c
+
+ xor %r13,%rcx # h=Sigma0(a)
+ and %r9,%r15 # a&c
+ add %r12,%r10 # d+=T1
+
+ and %r8,%r14 # (a|c)&b
+ add %r12,%rcx # h+=T1
+
+ or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14,%rcx # h+=Maj(a,b,c)
+ mov 56(%rsp),%r13
+ mov 32(%rsp),%r12
+
+ mov %r13,%r15
+
+ shr $7,%r13
+ ror $1,%r15
+
+ xor %r15,%r13
+ ror $7,%r15
+
+ xor %r15,%r13 # sigma0(X[(i+1)&0xf])
+ mov %r12,%r14
+
+ shr $6,%r12
+ ror $19,%r14
+
+ xor %r14,%r12
+ ror $42,%r14
+
+ xor %r14,%r12 # sigma1(X[(i+14)&0xf])
+
+ add %r13,%r12
+
+ add 120(%rsp),%r12
+
+ add 48(%rsp),%r12
+ mov %r10,%r13
+ mov %r10,%r14
+ mov %r11,%r15
+
+ ror $14,%r13
+ ror $18,%r14
+ xor %rax,%r15 # f^g
+
+ xor %r14,%r13
+ ror $23,%r14
+ and %r10,%r15 # (f^g)&e
+ mov %r12,48(%rsp)
+
+ xor %r14,%r13 # Sigma1(e)
+ xor %rax,%r15 # Ch(e,f,g)=((f^g)&e)^g
+ add %rbx,%r12 # T1+=h
+
+ mov %rcx,%rbx
+ add %r13,%r12 # T1+=Sigma1(e)
+
+ add %r15,%r12 # T1+=Ch(e,f,g)
+ mov %rcx,%r13
+ mov %rcx,%r14
+
+ ror $28,%rbx
+ ror $34,%r13
+ mov %rcx,%r15
+ add (%rbp,%rdi,8),%r12 # T1+=K[round]
+
+ xor %r13,%rbx
+ ror $5,%r13
+ or %r8,%r14 # a|c
+
+ xor %r13,%rbx # h=Sigma0(a)
+ and %r8,%r15 # a&c
+ add %r12,%r9 # d+=T1
+
+ and %rdx,%r14 # (a|c)&b
+ add %r12,%rbx # h+=T1
+
+ or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14,%rbx # h+=Maj(a,b,c)
+ mov 64(%rsp),%r13
+ mov 40(%rsp),%r12
+
+ mov %r13,%r15
+
+ shr $7,%r13
+ ror $1,%r15
+
+ xor %r15,%r13
+ ror $7,%r15
+
+ xor %r15,%r13 # sigma0(X[(i+1)&0xf])
+ mov %r12,%r14
+
+ shr $6,%r12
+ ror $19,%r14
+
+ xor %r14,%r12
+ ror $42,%r14
+
+ xor %r14,%r12 # sigma1(X[(i+14)&0xf])
+
+ add %r13,%r12
+
+ add 0(%rsp),%r12
+
+ add 56(%rsp),%r12
+ mov %r9,%r13
+ mov %r9,%r14
+ mov %r10,%r15
+
+ ror $14,%r13
+ ror $18,%r14
+ xor %r11,%r15 # f^g
+
+ xor %r14,%r13
+ ror $23,%r14
+ and %r9,%r15 # (f^g)&e
+ mov %r12,56(%rsp)
+
+ xor %r14,%r13 # Sigma1(e)
+ xor %r11,%r15 # Ch(e,f,g)=((f^g)&e)^g
+ add %rax,%r12 # T1+=h
+
+ mov %rbx,%rax
+ add %r13,%r12 # T1+=Sigma1(e)
+
+ add %r15,%r12 # T1+=Ch(e,f,g)
+ mov %rbx,%r13
+ mov %rbx,%r14
+
+ ror $28,%rax
+ ror $34,%r13
+ mov %rbx,%r15
+ add (%rbp,%rdi,8),%r12 # T1+=K[round]
+
+ xor %r13,%rax
+ ror $5,%r13
+ or %rdx,%r14 # a|c
+
+ xor %r13,%rax # h=Sigma0(a)
+ and %rdx,%r15 # a&c
+ add %r12,%r8 # d+=T1
+
+ and %rcx,%r14 # (a|c)&b
+ add %r12,%rax # h+=T1
+
+ or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14,%rax # h+=Maj(a,b,c)
+ mov 72(%rsp),%r13
+ mov 48(%rsp),%r12
+
+ mov %r13,%r15
+
+ shr $7,%r13
+ ror $1,%r15
+
+ xor %r15,%r13
+ ror $7,%r15
+
+ xor %r15,%r13 # sigma0(X[(i+1)&0xf])
+ mov %r12,%r14
+
+ shr $6,%r12
+ ror $19,%r14
+
+ xor %r14,%r12
+ ror $42,%r14
+
+ xor %r14,%r12 # sigma1(X[(i+14)&0xf])
+
+ add %r13,%r12
+
+ add 8(%rsp),%r12
+
+ add 64(%rsp),%r12
+ mov %r8,%r13
+ mov %r8,%r14
+ mov %r9,%r15
+
+ ror $14,%r13
+ ror $18,%r14
+ xor %r10,%r15 # f^g
+
+ xor %r14,%r13
+ ror $23,%r14
+ and %r8,%r15 # (f^g)&e
+ mov %r12,64(%rsp)
+
+ xor %r14,%r13 # Sigma1(e)
+ xor %r10,%r15 # Ch(e,f,g)=((f^g)&e)^g
+ add %r11,%r12 # T1+=h
+
+ mov %rax,%r11
+ add %r13,%r12 # T1+=Sigma1(e)
+
+ add %r15,%r12 # T1+=Ch(e,f,g)
+ mov %rax,%r13
+ mov %rax,%r14
+
+ ror $28,%r11
+ ror $34,%r13
+ mov %rax,%r15
+ add (%rbp,%rdi,8),%r12 # T1+=K[round]
+
+ xor %r13,%r11
+ ror $5,%r13
+ or %rcx,%r14 # a|c
+
+ xor %r13,%r11 # h=Sigma0(a)
+ and %rcx,%r15 # a&c
+ add %r12,%rdx # d+=T1
+
+ and %rbx,%r14 # (a|c)&b
+ add %r12,%r11 # h+=T1
+
+ or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14,%r11 # h+=Maj(a,b,c)
+ mov 80(%rsp),%r13
+ mov 56(%rsp),%r12
+
+ mov %r13,%r15
+
+ shr $7,%r13
+ ror $1,%r15
+
+ xor %r15,%r13
+ ror $7,%r15
+
+ xor %r15,%r13 # sigma0(X[(i+1)&0xf])
+ mov %r12,%r14
+
+ shr $6,%r12
+ ror $19,%r14
+
+ xor %r14,%r12
+ ror $42,%r14
+
+ xor %r14,%r12 # sigma1(X[(i+14)&0xf])
+
+ add %r13,%r12
+
+ add 16(%rsp),%r12
+
+ add 72(%rsp),%r12
+ mov %rdx,%r13
+ mov %rdx,%r14
+ mov %r8,%r15
+
+ ror $14,%r13
+ ror $18,%r14
+ xor %r9,%r15 # f^g
+
+ xor %r14,%r13
+ ror $23,%r14
+ and %rdx,%r15 # (f^g)&e
+ mov %r12,72(%rsp)
+
+ xor %r14,%r13 # Sigma1(e)
+ xor %r9,%r15 # Ch(e,f,g)=((f^g)&e)^g
+ add %r10,%r12 # T1+=h
+
+ mov %r11,%r10
+ add %r13,%r12 # T1+=Sigma1(e)
+
+ add %r15,%r12 # T1+=Ch(e,f,g)
+ mov %r11,%r13
+ mov %r11,%r14
+
+ ror $28,%r10
+ ror $34,%r13
+ mov %r11,%r15
+ add (%rbp,%rdi,8),%r12 # T1+=K[round]
+
+ xor %r13,%r10
+ ror $5,%r13
+ or %rbx,%r14 # a|c
+
+ xor %r13,%r10 # h=Sigma0(a)
+ and %rbx,%r15 # a&c
+ add %r12,%rcx # d+=T1
+
+ and %rax,%r14 # (a|c)&b
+ add %r12,%r10 # h+=T1
+
+ or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14,%r10 # h+=Maj(a,b,c)
+ mov 88(%rsp),%r13
+ mov 64(%rsp),%r12
+
+ mov %r13,%r15
+
+ shr $7,%r13
+ ror $1,%r15
+
+ xor %r15,%r13
+ ror $7,%r15
+
+ xor %r15,%r13 # sigma0(X[(i+1)&0xf])
+ mov %r12,%r14
+
+ shr $6,%r12
+ ror $19,%r14
+
+ xor %r14,%r12
+ ror $42,%r14
+
+ xor %r14,%r12 # sigma1(X[(i+14)&0xf])
+
+ add %r13,%r12
+
+ add 24(%rsp),%r12
+
+ add 80(%rsp),%r12
+ mov %rcx,%r13
+ mov %rcx,%r14
+ mov %rdx,%r15
+
+ ror $14,%r13
+ ror $18,%r14
+ xor %r8,%r15 # f^g
+
+ xor %r14,%r13
+ ror $23,%r14
+ and %rcx,%r15 # (f^g)&e
+ mov %r12,80(%rsp)
+
+ xor %r14,%r13 # Sigma1(e)
+ xor %r8,%r15 # Ch(e,f,g)=((f^g)&e)^g
+ add %r9,%r12 # T1+=h
+
+ mov %r10,%r9
+ add %r13,%r12 # T1+=Sigma1(e)
+
+ add %r15,%r12 # T1+=Ch(e,f,g)
+ mov %r10,%r13
+ mov %r10,%r14
+
+ ror $28,%r9
+ ror $34,%r13
+ mov %r10,%r15
+ add (%rbp,%rdi,8),%r12 # T1+=K[round]
+
+ xor %r13,%r9
+ ror $5,%r13
+ or %rax,%r14 # a|c
+
+ xor %r13,%r9 # h=Sigma0(a)
+ and %rax,%r15 # a&c
+ add %r12,%rbx # d+=T1
+
+ and %r11,%r14 # (a|c)&b
+ add %r12,%r9 # h+=T1
+
+ or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14,%r9 # h+=Maj(a,b,c)
+ mov 96(%rsp),%r13
+ mov 72(%rsp),%r12
+
+ mov %r13,%r15
+
+ shr $7,%r13
+ ror $1,%r15
+
+ xor %r15,%r13
+ ror $7,%r15
+
+ xor %r15,%r13 # sigma0(X[(i+1)&0xf])
+ mov %r12,%r14
+
+ shr $6,%r12
+ ror $19,%r14
+
+ xor %r14,%r12
+ ror $42,%r14
+
+ xor %r14,%r12 # sigma1(X[(i+14)&0xf])
+
+ add %r13,%r12
+
+ add 32(%rsp),%r12
+
+ add 88(%rsp),%r12
+ mov %rbx,%r13
+ mov %rbx,%r14
+ mov %rcx,%r15
+
+ ror $14,%r13
+ ror $18,%r14
+ xor %rdx,%r15 # f^g
+
+ xor %r14,%r13
+ ror $23,%r14
+ and %rbx,%r15 # (f^g)&e
+ mov %r12,88(%rsp)
+
+ xor %r14,%r13 # Sigma1(e)
+ xor %rdx,%r15 # Ch(e,f,g)=((f^g)&e)^g
+ add %r8,%r12 # T1+=h
+
+ mov %r9,%r8
+ add %r13,%r12 # T1+=Sigma1(e)
+
+ add %r15,%r12 # T1+=Ch(e,f,g)
+ mov %r9,%r13
+ mov %r9,%r14
+
+ ror $28,%r8
+ ror $34,%r13
+ mov %r9,%r15
+ add (%rbp,%rdi,8),%r12 # T1+=K[round]
+
+ xor %r13,%r8
+ ror $5,%r13
+ or %r11,%r14 # a|c
+
+ xor %r13,%r8 # h=Sigma0(a)
+ and %r11,%r15 # a&c
+ add %r12,%rax # d+=T1
+
+ and %r10,%r14 # (a|c)&b
+ add %r12,%r8 # h+=T1
+
+ or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14,%r8 # h+=Maj(a,b,c)
+ mov 104(%rsp),%r13
+ mov 80(%rsp),%r12
+
+ mov %r13,%r15
+
+ shr $7,%r13
+ ror $1,%r15
+
+ xor %r15,%r13
+ ror $7,%r15
+
+ xor %r15,%r13 # sigma0(X[(i+1)&0xf])
+ mov %r12,%r14
+
+ shr $6,%r12
+ ror $19,%r14
+
+ xor %r14,%r12
+ ror $42,%r14
+
+ xor %r14,%r12 # sigma1(X[(i+14)&0xf])
+
+ add %r13,%r12
+
+ add 40(%rsp),%r12
+
+ add 96(%rsp),%r12
+ mov %rax,%r13
+ mov %rax,%r14
+ mov %rbx,%r15
+
+ ror $14,%r13
+ ror $18,%r14
+ xor %rcx,%r15 # f^g
+
+ xor %r14,%r13
+ ror $23,%r14
+ and %rax,%r15 # (f^g)&e
+ mov %r12,96(%rsp)
+
+ xor %r14,%r13 # Sigma1(e)
+ xor %rcx,%r15 # Ch(e,f,g)=((f^g)&e)^g
+ add %rdx,%r12 # T1+=h
+
+ mov %r8,%rdx
+ add %r13,%r12 # T1+=Sigma1(e)
+
+ add %r15,%r12 # T1+=Ch(e,f,g)
+ mov %r8,%r13
+ mov %r8,%r14
+
+ ror $28,%rdx
+ ror $34,%r13
+ mov %r8,%r15
+ add (%rbp,%rdi,8),%r12 # T1+=K[round]
+
+ xor %r13,%rdx
+ ror $5,%r13
+ or %r10,%r14 # a|c
+
+ xor %r13,%rdx # h=Sigma0(a)
+ and %r10,%r15 # a&c
+ add %r12,%r11 # d+=T1
+
+ and %r9,%r14 # (a|c)&b
+ add %r12,%rdx # h+=T1
+
+ or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14,%rdx # h+=Maj(a,b,c)
+ mov 112(%rsp),%r13
+ mov 88(%rsp),%r12
+
+ mov %r13,%r15
+
+ shr $7,%r13
+ ror $1,%r15
+
+ xor %r15,%r13
+ ror $7,%r15
+
+ xor %r15,%r13 # sigma0(X[(i+1)&0xf])
+ mov %r12,%r14
+
+ shr $6,%r12
+ ror $19,%r14
+
+ xor %r14,%r12
+ ror $42,%r14
+
+ xor %r14,%r12 # sigma1(X[(i+14)&0xf])
+
+ add %r13,%r12
+
+ add 48(%rsp),%r12
+
+ add 104(%rsp),%r12
+ mov %r11,%r13
+ mov %r11,%r14
+ mov %rax,%r15
+
+ ror $14,%r13
+ ror $18,%r14
+ xor %rbx,%r15 # f^g
+
+ xor %r14,%r13
+ ror $23,%r14
+ and %r11,%r15 # (f^g)&e
+ mov %r12,104(%rsp)
+
+ xor %r14,%r13 # Sigma1(e)
+ xor %rbx,%r15 # Ch(e,f,g)=((f^g)&e)^g
+ add %rcx,%r12 # T1+=h
+
+ mov %rdx,%rcx
+ add %r13,%r12 # T1+=Sigma1(e)
+
+ add %r15,%r12 # T1+=Ch(e,f,g)
+ mov %rdx,%r13
+ mov %rdx,%r14
+
+ ror $28,%rcx
+ ror $34,%r13
+ mov %rdx,%r15
+ add (%rbp,%rdi,8),%r12 # T1+=K[round]
+
+ xor %r13,%rcx
+ ror $5,%r13
+ or %r9,%r14 # a|c
+
+ xor %r13,%rcx # h=Sigma0(a)
+ and %r9,%r15 # a&c
+ add %r12,%r10 # d+=T1
+
+ and %r8,%r14 # (a|c)&b
+ add %r12,%rcx # h+=T1
+
+ or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14,%rcx # h+=Maj(a,b,c)
+ mov 120(%rsp),%r13
+ mov 96(%rsp),%r12
+
+ mov %r13,%r15
+
+ shr $7,%r13
+ ror $1,%r15
+
+ xor %r15,%r13
+ ror $7,%r15
+
+ xor %r15,%r13 # sigma0(X[(i+1)&0xf])
+ mov %r12,%r14
+
+ shr $6,%r12
+ ror $19,%r14
+
+ xor %r14,%r12
+ ror $42,%r14
+
+ xor %r14,%r12 # sigma1(X[(i+14)&0xf])
+
+ add %r13,%r12
+
+ add 56(%rsp),%r12
+
+ add 112(%rsp),%r12
+ mov %r10,%r13
+ mov %r10,%r14
+ mov %r11,%r15
+
+ ror $14,%r13
+ ror $18,%r14
+ xor %rax,%r15 # f^g
+
+ xor %r14,%r13
+ ror $23,%r14
+ and %r10,%r15 # (f^g)&e
+ mov %r12,112(%rsp)
+
+ xor %r14,%r13 # Sigma1(e)
+ xor %rax,%r15 # Ch(e,f,g)=((f^g)&e)^g
+ add %rbx,%r12 # T1+=h
+
+ mov %rcx,%rbx
+ add %r13,%r12 # T1+=Sigma1(e)
+
+ add %r15,%r12 # T1+=Ch(e,f,g)
+ mov %rcx,%r13
+ mov %rcx,%r14
+
+ ror $28,%rbx
+ ror $34,%r13
+ mov %rcx,%r15
+ add (%rbp,%rdi,8),%r12 # T1+=K[round]
+
+ xor %r13,%rbx
+ ror $5,%r13
+ or %r8,%r14 # a|c
+
+ xor %r13,%rbx # h=Sigma0(a)
+ and %r8,%r15 # a&c
+ add %r12,%r9 # d+=T1
+
+ and %rdx,%r14 # (a|c)&b
+ add %r12,%rbx # h+=T1
+
+ or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14,%rbx # h+=Maj(a,b,c)
+ mov 0(%rsp),%r13
+ mov 104(%rsp),%r12
+
+ mov %r13,%r15
+
+ shr $7,%r13
+ ror $1,%r15
+
+ xor %r15,%r13
+ ror $7,%r15
+
+ xor %r15,%r13 # sigma0(X[(i+1)&0xf])
+ mov %r12,%r14
+
+ shr $6,%r12
+ ror $19,%r14
+
+ xor %r14,%r12
+ ror $42,%r14
+
+ xor %r14,%r12 # sigma1(X[(i+14)&0xf])
+
+ add %r13,%r12
+
+ add 64(%rsp),%r12
+
+ add 120(%rsp),%r12
+ mov %r9,%r13
+ mov %r9,%r14
+ mov %r10,%r15
+
+ ror $14,%r13
+ ror $18,%r14
+ xor %r11,%r15 # f^g
+
+ xor %r14,%r13
+ ror $23,%r14
+ and %r9,%r15 # (f^g)&e
+ mov %r12,120(%rsp)
+
+ xor %r14,%r13 # Sigma1(e)
+ xor %r11,%r15 # Ch(e,f,g)=((f^g)&e)^g
+ add %rax,%r12 # T1+=h
+
+ mov %rbx,%rax
+ add %r13,%r12 # T1+=Sigma1(e)
+
+ add %r15,%r12 # T1+=Ch(e,f,g)
+ mov %rbx,%r13
+ mov %rbx,%r14
+
+ ror $28,%rax
+ ror $34,%r13
+ mov %rbx,%r15
+ add (%rbp,%rdi,8),%r12 # T1+=K[round]
+
+ xor %r13,%rax
+ ror $5,%r13
+ or %rdx,%r14 # a|c
+
+ xor %r13,%rax # h=Sigma0(a)
+ and %rdx,%r15 # a&c
+ add %r12,%r8 # d+=T1
+
+ and %rcx,%r14 # (a|c)&b
+ add %r12,%rax # h+=T1
+
+ or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c)
+ lea 1(%rdi),%rdi # round++
+
+ add %r14,%rax # h+=Maj(a,b,c)
+ cmp $80,%rdi
+ jb .Lrounds_16_xx
+
+ mov 16*8+0*8(%rsp),%rdi
+ lea 16*8(%rsi),%rsi
+
+ add 8*0(%rdi),%rax
+ add 8*1(%rdi),%rbx
+ add 8*2(%rdi),%rcx
+ add 8*3(%rdi),%rdx
+ add 8*4(%rdi),%r8
+ add 8*5(%rdi),%r9
+ add 8*6(%rdi),%r10
+ add 8*7(%rdi),%r11
+
+ cmp 16*8+2*8(%rsp),%rsi
+
+ mov %rax,8*0(%rdi)
+ mov %rbx,8*1(%rdi)
+ mov %rcx,8*2(%rdi)
+ mov %rdx,8*3(%rdi)
+ mov %r8,8*4(%rdi)
+ mov %r9,8*5(%rdi)
+ mov %r10,8*6(%rdi)
+ mov %r11,8*7(%rdi)
+ jb .Lloop
+
+ mov 16*8+3*8(%rsp),%rsp
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbp
+ pop %rbx
+
+ ret
+SET_SIZE(SHA512TransformBlocks)
+
+.data
+.align 64
+.type K512, at object
+K512:
+ .quad 0x428a2f98d728ae22,0x7137449123ef65cd
+ .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+ .quad 0x3956c25bf348b538,0x59f111f1b605d019
+ .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
+ .quad 0xd807aa98a3030242,0x12835b0145706fbe
+ .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+ .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
+ .quad 0x9bdc06a725c71235,0xc19bf174cf692694
+ .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
+ .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
+ .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
+ .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+ .quad 0x983e5152ee66dfab,0xa831c66d2db43210
+ .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
+ .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
+ .quad 0x06ca6351e003826f,0x142929670a0e6e70
+ .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
+ .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+ .quad 0x650a73548baf63de,0x766a0abb3c77b2a8
+ .quad 0x81c2c92e47edaee6,0x92722c851482353b
+ .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
+ .quad 0xc24b8b70d0f89791,0xc76c51a30654be30
+ .quad 0xd192e819d6ef5218,0xd69906245565a910
+ .quad 0xf40e35855771202a,0x106aa07032bbd1b8
+ .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
+ .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+ .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+ .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+ .quad 0x748f82ee5defb2fc,0x78a5636f43172f60
+ .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
+ .quad 0x90befffa23631e28,0xa4506cebde82bde9
+ .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
+ .quad 0xca273eceea26619c,0xd186b8c721c0c207
+ .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+ .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
+ .quad 0x113f9804bef90dae,0x1b710b35131c471b
+ .quad 0x28db77f523047d84,0x32caab7b40c72493
+ .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+ .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+ .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
+#endif /* !lint && !__lint */
+
+#ifdef __ELF__
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/zfs/module/icp/core/kcf_callprov.c b/zfs/module/icp/core/kcf_callprov.c
new file mode 100644
index 000000000000..fd2f7e1aac3d
--- /dev/null
+++ b/zfs/module/icp/core/kcf_callprov.c
@@ -0,0 +1,1567 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/crypto/common.h>
+#include <sys/crypto/impl.h>
+#include <sys/crypto/sched_impl.h>
+
+static int kcf_emulate_dual(kcf_provider_desc_t *, crypto_ctx_t *,
+ kcf_req_params_t *);
+
+void
+kcf_free_triedlist(kcf_prov_tried_t *list)
+{
+ kcf_prov_tried_t *l;
+
+ while ((l = list) != NULL) {
+ list = list->pt_next;
+ KCF_PROV_REFRELE(l->pt_pd);
+ kmem_free(l, sizeof (kcf_prov_tried_t));
+ }
+}
+
+kcf_prov_tried_t *
+kcf_insert_triedlist(kcf_prov_tried_t **list, kcf_provider_desc_t *pd,
+ int kmflag)
+{
+ kcf_prov_tried_t *l;
+
+ l = kmem_alloc(sizeof (kcf_prov_tried_t), kmflag);
+ if (l == NULL)
+ return (NULL);
+
+ l->pt_pd = pd;
+ l->pt_next = *list;
+ *list = l;
+
+ return (l);
+}
+
+static boolean_t
+is_in_triedlist(kcf_provider_desc_t *pd, kcf_prov_tried_t *triedl)
+{
+ while (triedl != NULL) {
+ if (triedl->pt_pd == pd)
+ return (B_TRUE);
+ triedl = triedl->pt_next;
+ };
+
+ return (B_FALSE);
+}
+
+/*
+ * Search a mech entry's hardware provider list for the specified
+ * provider. Return true if found.
+ */
+static boolean_t
+is_valid_provider_for_mech(kcf_provider_desc_t *pd, kcf_mech_entry_t *me,
+ crypto_func_group_t fg)
+{
+ kcf_prov_mech_desc_t *prov_chain;
+
+ prov_chain = me->me_hw_prov_chain;
+ if (prov_chain != NULL) {
+ ASSERT(me->me_num_hwprov > 0);
+ for (; prov_chain != NULL; prov_chain = prov_chain->pm_next) {
+ if (prov_chain->pm_prov_desc == pd &&
+ IS_FG_SUPPORTED(prov_chain, fg)) {
+ return (B_TRUE);
+ }
+ }
+ }
+ return (B_FALSE);
+}
+
+/*
+ * This routine, given a logical provider, returns the least loaded
+ * provider belonging to the logical provider. The provider must be
+ * able to do the specified mechanism, i.e. check that the mechanism
+ * hasn't been disabled. In addition, just in case providers are not
+ * entirely equivalent, the provider's entry point is checked for
+ * non-nullness. This is accomplished by having the caller pass, as
+ * arguments, the offset of the function group (offset_1), and the
+ * offset of the function within the function group (offset_2).
+ * Returns NULL if no provider can be found.
+ */
+int
+kcf_get_hardware_provider(crypto_mech_type_t mech_type_1,
+ crypto_mech_type_t mech_type_2, boolean_t call_restrict,
+ kcf_provider_desc_t *old, kcf_provider_desc_t **new, crypto_func_group_t fg)
+{
+ kcf_provider_desc_t *provider, *real_pd = old;
+ kcf_provider_desc_t *gpd = NULL; /* good provider */
+ kcf_provider_desc_t *bpd = NULL; /* busy provider */
+ kcf_provider_list_t *p;
+ kcf_ops_class_t class;
+ kcf_mech_entry_t *me;
+ kcf_mech_entry_tab_t *me_tab;
+ int index, len, gqlen = INT_MAX, rv = CRYPTO_SUCCESS;
+
+ /* get the mech entry for the specified mechanism */
+ class = KCF_MECH2CLASS(mech_type_1);
+ if ((class < KCF_FIRST_OPSCLASS) || (class > KCF_LAST_OPSCLASS)) {
+ return (CRYPTO_MECHANISM_INVALID);
+ }
+
+ me_tab = &kcf_mech_tabs_tab[class];
+ index = KCF_MECH2INDEX(mech_type_1);
+ if ((index < 0) || (index >= me_tab->met_size)) {
+ return (CRYPTO_MECHANISM_INVALID);
+ }
+
+ me = &((me_tab->met_tab)[index]);
+ mutex_enter(&me->me_mutex);
+
+ /*
+ * We assume the provider descriptor will not go away because
+ * it is being held somewhere, i.e. its reference count has been
+ * incremented. In the case of the crypto module, the provider
+ * descriptor is held by the session structure.
+ */
+ if (old->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) {
+ if (old->pd_provider_list == NULL) {
+ real_pd = NULL;
+ rv = CRYPTO_DEVICE_ERROR;
+ goto out;
+ }
+ /*
+ * Find the least loaded real provider. KCF_PROV_LOAD gives
+ * the load (number of pending requests) of the provider.
+ */
+ mutex_enter(&old->pd_lock);
+ p = old->pd_provider_list;
+ while (p != NULL) {
+ provider = p->pl_provider;
+
+ ASSERT(provider->pd_prov_type !=
+ CRYPTO_LOGICAL_PROVIDER);
+
+ if (call_restrict &&
+ (provider->pd_flags & KCF_PROV_RESTRICTED)) {
+ p = p->pl_next;
+ continue;
+ }
+
+ if (!is_valid_provider_for_mech(provider, me, fg)) {
+ p = p->pl_next;
+ continue;
+ }
+
+ /* provider does second mech */
+ if (mech_type_2 != CRYPTO_MECH_INVALID) {
+ int i;
+
+ i = KCF_TO_PROV_MECH_INDX(provider,
+ mech_type_2);
+ if (i == KCF_INVALID_INDX) {
+ p = p->pl_next;
+ continue;
+ }
+ }
+
+ if (provider->pd_state != KCF_PROV_READY) {
+ /* choose BUSY if no READY providers */
+ if (provider->pd_state == KCF_PROV_BUSY)
+ bpd = provider;
+ p = p->pl_next;
+ continue;
+ }
+
+ len = KCF_PROV_LOAD(provider);
+ if (len < gqlen) {
+ gqlen = len;
+ gpd = provider;
+ }
+
+ p = p->pl_next;
+ }
+
+ if (gpd != NULL) {
+ real_pd = gpd;
+ KCF_PROV_REFHOLD(real_pd);
+ } else if (bpd != NULL) {
+ real_pd = bpd;
+ KCF_PROV_REFHOLD(real_pd);
+ } else {
+ /* can't find provider */
+ real_pd = NULL;
+ rv = CRYPTO_MECHANISM_INVALID;
+ }
+ mutex_exit(&old->pd_lock);
+
+ } else {
+ if (!KCF_IS_PROV_USABLE(old) ||
+ (call_restrict && (old->pd_flags & KCF_PROV_RESTRICTED))) {
+ real_pd = NULL;
+ rv = CRYPTO_DEVICE_ERROR;
+ goto out;
+ }
+
+ if (!is_valid_provider_for_mech(old, me, fg)) {
+ real_pd = NULL;
+ rv = CRYPTO_MECHANISM_INVALID;
+ goto out;
+ }
+
+ KCF_PROV_REFHOLD(real_pd);
+ }
+out:
+ mutex_exit(&me->me_mutex);
+ *new = real_pd;
+ return (rv);
+}
+
+/*
+ * Return the best provider for the specified mechanism. The provider
+ * is held and it is the caller's responsibility to release it when done.
+ * The fg input argument is used as a search criterion to pick a provider.
+ * A provider has to support this function group to be picked.
+ *
+ * Find the least loaded provider in the list of providers. We do a linear
+ * search to find one. This is fine as we assume there are only a few
+ * number of providers in this list. If this assumption ever changes,
+ * we should revisit this.
+ *
+ * call_restrict represents if the caller should not be allowed to
+ * use restricted providers.
+ */
+kcf_provider_desc_t *
+kcf_get_mech_provider(crypto_mech_type_t mech_type, kcf_mech_entry_t **mepp,
+ int *error, kcf_prov_tried_t *triedl, crypto_func_group_t fg,
+ boolean_t call_restrict, size_t data_size)
+{
+ kcf_provider_desc_t *pd = NULL, *gpd = NULL;
+ kcf_prov_mech_desc_t *prov_chain, *mdesc;
+ int len, gqlen = INT_MAX;
+ kcf_ops_class_t class;
+ int index;
+ kcf_mech_entry_t *me;
+ kcf_mech_entry_tab_t *me_tab;
+
+ class = KCF_MECH2CLASS(mech_type);
+ if ((class < KCF_FIRST_OPSCLASS) || (class > KCF_LAST_OPSCLASS)) {
+ *error = CRYPTO_MECHANISM_INVALID;
+ return (NULL);
+ }
+
+ me_tab = &kcf_mech_tabs_tab[class];
+ index = KCF_MECH2INDEX(mech_type);
+ if ((index < 0) || (index >= me_tab->met_size)) {
+ *error = CRYPTO_MECHANISM_INVALID;
+ return (NULL);
+ }
+
+ me = &((me_tab->met_tab)[index]);
+ if (mepp != NULL)
+ *mepp = me;
+
+ mutex_enter(&me->me_mutex);
+
+ prov_chain = me->me_hw_prov_chain;
+
+ /*
+ * We check for the threshold for using a hardware provider for
+ * this amount of data. If there is no software provider available
+ * for the mechanism, then the threshold is ignored.
+ */
+ if ((prov_chain != NULL) &&
+ ((data_size == 0) || (me->me_threshold == 0) ||
+ (data_size >= me->me_threshold) ||
+ ((mdesc = me->me_sw_prov) == NULL) ||
+ (!IS_FG_SUPPORTED(mdesc, fg)) ||
+ (!KCF_IS_PROV_USABLE(mdesc->pm_prov_desc)))) {
+ ASSERT(me->me_num_hwprov > 0);
+ /* there is at least one provider */
+
+ /*
+ * Find the least loaded real provider. KCF_PROV_LOAD gives
+ * the load (number of pending requests) of the provider.
+ */
+ while (prov_chain != NULL) {
+ pd = prov_chain->pm_prov_desc;
+
+ if (!IS_FG_SUPPORTED(prov_chain, fg) ||
+ !KCF_IS_PROV_USABLE(pd) ||
+ IS_PROVIDER_TRIED(pd, triedl) ||
+ (call_restrict &&
+ (pd->pd_flags & KCF_PROV_RESTRICTED))) {
+ prov_chain = prov_chain->pm_next;
+ continue;
+ }
+
+ if ((len = KCF_PROV_LOAD(pd)) < gqlen) {
+ gqlen = len;
+ gpd = pd;
+ }
+
+ prov_chain = prov_chain->pm_next;
+ }
+
+ pd = gpd;
+ }
+
+ /* No HW provider for this mech, is there a SW provider? */
+ if (pd == NULL && (mdesc = me->me_sw_prov) != NULL) {
+ pd = mdesc->pm_prov_desc;
+ if (!IS_FG_SUPPORTED(mdesc, fg) ||
+ !KCF_IS_PROV_USABLE(pd) ||
+ IS_PROVIDER_TRIED(pd, triedl) ||
+ (call_restrict && (pd->pd_flags & KCF_PROV_RESTRICTED)))
+ pd = NULL;
+ }
+
+ if (pd == NULL) {
+ /*
+ * We do not want to report CRYPTO_MECH_NOT_SUPPORTED, when
+ * we are in the "fallback to the next provider" case. Rather
+ * we preserve the error, so that the client gets the right
+ * error code.
+ */
+ if (triedl == NULL)
+ *error = CRYPTO_MECH_NOT_SUPPORTED;
+ } else
+ KCF_PROV_REFHOLD(pd);
+
+ mutex_exit(&me->me_mutex);
+ return (pd);
+}
+
+/*
+ * Very similar to kcf_get_mech_provider(). Finds the best provider capable of
+ * a dual operation with both me1 and me2.
+ * When no dual-ops capable providers are available, return the best provider
+ * for me1 only, and sets *prov_mt2 to CRYPTO_INVALID_MECHID;
+ * We assume/expect that a slower HW capable of the dual is still
+ * faster than the 2 fastest providers capable of the individual ops
+ * separately.
+ */
+kcf_provider_desc_t *
+kcf_get_dual_provider(crypto_mechanism_t *mech1, crypto_mechanism_t *mech2,
+ kcf_mech_entry_t **mepp, crypto_mech_type_t *prov_mt1,
+ crypto_mech_type_t *prov_mt2, int *error, kcf_prov_tried_t *triedl,
+ crypto_func_group_t fg1, crypto_func_group_t fg2, boolean_t call_restrict,
+ size_t data_size)
+{
+ kcf_provider_desc_t *pd = NULL, *pdm1 = NULL, *pdm1m2 = NULL;
+ kcf_prov_mech_desc_t *prov_chain, *mdesc;
+ int len, gqlen = INT_MAX, dgqlen = INT_MAX;
+ crypto_mech_info_list_t *mil;
+ crypto_mech_type_t m2id = mech2->cm_type;
+ kcf_mech_entry_t *me;
+
+ /* when mech is a valid mechanism, me will be its mech_entry */
+ if (kcf_get_mech_entry(mech1->cm_type, &me) != KCF_SUCCESS) {
+ *error = CRYPTO_MECHANISM_INVALID;
+ return (NULL);
+ }
+
+ *prov_mt2 = CRYPTO_MECH_INVALID;
+
+ if (mepp != NULL)
+ *mepp = me;
+ mutex_enter(&me->me_mutex);
+
+ prov_chain = me->me_hw_prov_chain;
+ /*
+ * We check the threshold for using a hardware provider for
+ * this amount of data. If there is no software provider available
+ * for the first mechanism, then the threshold is ignored.
+ */
+ if ((prov_chain != NULL) &&
+ ((data_size == 0) || (me->me_threshold == 0) ||
+ (data_size >= me->me_threshold) ||
+ ((mdesc = me->me_sw_prov) == NULL) ||
+ (!IS_FG_SUPPORTED(mdesc, fg1)) ||
+ (!KCF_IS_PROV_USABLE(mdesc->pm_prov_desc)))) {
+ /* there is at least one provider */
+ ASSERT(me->me_num_hwprov > 0);
+
+ /*
+ * Find the least loaded provider capable of the combo
+ * me1 + me2, and save a pointer to the least loaded
+ * provider capable of me1 only.
+ */
+ while (prov_chain != NULL) {
+ pd = prov_chain->pm_prov_desc;
+ len = KCF_PROV_LOAD(pd);
+
+ if (!IS_FG_SUPPORTED(prov_chain, fg1) ||
+ !KCF_IS_PROV_USABLE(pd) ||
+ IS_PROVIDER_TRIED(pd, triedl) ||
+ (call_restrict &&
+ (pd->pd_flags & KCF_PROV_RESTRICTED))) {
+ prov_chain = prov_chain->pm_next;
+ continue;
+ }
+
+ /* Save the best provider capable of m1 */
+ if (len < gqlen) {
+ *prov_mt1 =
+ prov_chain->pm_mech_info.cm_mech_number;
+ gqlen = len;
+ pdm1 = pd;
+ }
+
+ /* See if pd can do me2 too */
+ for (mil = prov_chain->pm_mi_list;
+ mil != NULL; mil = mil->ml_next) {
+ if ((mil->ml_mech_info.cm_func_group_mask &
+ fg2) == 0)
+ continue;
+
+ if ((mil->ml_kcf_mechid == m2id) &&
+ (len < dgqlen)) {
+ /* Bingo! */
+ dgqlen = len;
+ pdm1m2 = pd;
+ *prov_mt2 =
+ mil->ml_mech_info.cm_mech_number;
+ *prov_mt1 = prov_chain->
+ pm_mech_info.cm_mech_number;
+ break;
+ }
+ }
+
+ prov_chain = prov_chain->pm_next;
+ }
+
+ pd = (pdm1m2 != NULL) ? pdm1m2 : pdm1;
+ }
+
+ /* no HW provider for this mech, is there a SW provider? */
+ if (pd == NULL && (mdesc = me->me_sw_prov) != NULL) {
+ pd = mdesc->pm_prov_desc;
+ if (!IS_FG_SUPPORTED(mdesc, fg1) ||
+ !KCF_IS_PROV_USABLE(pd) ||
+ IS_PROVIDER_TRIED(pd, triedl) ||
+ (call_restrict && (pd->pd_flags & KCF_PROV_RESTRICTED)))
+ pd = NULL;
+ else {
+ /* See if pd can do me2 too */
+ for (mil = me->me_sw_prov->pm_mi_list;
+ mil != NULL; mil = mil->ml_next) {
+ if ((mil->ml_mech_info.cm_func_group_mask &
+ fg2) == 0)
+ continue;
+
+ if (mil->ml_kcf_mechid == m2id) {
+ /* Bingo! */
+ *prov_mt2 =
+ mil->ml_mech_info.cm_mech_number;
+ break;
+ }
+ }
+ *prov_mt1 = me->me_sw_prov->pm_mech_info.cm_mech_number;
+ }
+ }
+
+ if (pd == NULL)
+ *error = CRYPTO_MECH_NOT_SUPPORTED;
+ else
+ KCF_PROV_REFHOLD(pd);
+
+ mutex_exit(&me->me_mutex);
+ return (pd);
+}
+
+/*
+ * Do the actual work of calling the provider routines.
+ *
+ * pd - Provider structure
+ * ctx - Context for this operation
+ * params - Parameters for this operation
+ * rhndl - Request handle to use for notification
+ *
+ * The return values are the same as that of the respective SPI.
+ */
+int
+common_submit_request(kcf_provider_desc_t *pd, crypto_ctx_t *ctx,
+ kcf_req_params_t *params, crypto_req_handle_t rhndl)
+{
+ int err = CRYPTO_ARGUMENTS_BAD;
+ kcf_op_type_t optype;
+
+ optype = params->rp_optype;
+
+ switch (params->rp_opgrp) {
+ case KCF_OG_DIGEST: {
+ kcf_digest_ops_params_t *dops = ¶ms->rp_u.digest_params;
+
+ switch (optype) {
+ case KCF_OP_INIT:
+ /*
+ * We should do this only here and not in KCF_WRAP_*
+ * macros. This is because we may want to try other
+ * providers, in case we recover from a failure.
+ */
+ KCF_SET_PROVIDER_MECHNUM(dops->do_framework_mechtype,
+ pd, &dops->do_mech);
+
+ err = KCF_PROV_DIGEST_INIT(pd, ctx, &dops->do_mech,
+ rhndl);
+ break;
+
+ case KCF_OP_SINGLE:
+ err = KCF_PROV_DIGEST(pd, ctx, dops->do_data,
+ dops->do_digest, rhndl);
+ break;
+
+ case KCF_OP_UPDATE:
+ err = KCF_PROV_DIGEST_UPDATE(pd, ctx,
+ dops->do_data, rhndl);
+ break;
+
+ case KCF_OP_FINAL:
+ err = KCF_PROV_DIGEST_FINAL(pd, ctx,
+ dops->do_digest, rhndl);
+ break;
+
+ case KCF_OP_ATOMIC:
+ ASSERT(ctx == NULL);
+ KCF_SET_PROVIDER_MECHNUM(dops->do_framework_mechtype,
+ pd, &dops->do_mech);
+ err = KCF_PROV_DIGEST_ATOMIC(pd, dops->do_sid,
+ &dops->do_mech, dops->do_data, dops->do_digest,
+ rhndl);
+ break;
+
+ case KCF_OP_DIGEST_KEY:
+ err = KCF_PROV_DIGEST_KEY(pd, ctx, dops->do_digest_key,
+ rhndl);
+ break;
+
+ default:
+ break;
+ }
+ break;
+ }
+
+ case KCF_OG_MAC: {
+ kcf_mac_ops_params_t *mops = ¶ms->rp_u.mac_params;
+
+ switch (optype) {
+ case KCF_OP_INIT:
+ KCF_SET_PROVIDER_MECHNUM(mops->mo_framework_mechtype,
+ pd, &mops->mo_mech);
+
+ err = KCF_PROV_MAC_INIT(pd, ctx, &mops->mo_mech,
+ mops->mo_key, mops->mo_templ, rhndl);
+ break;
+
+ case KCF_OP_SINGLE:
+ err = KCF_PROV_MAC(pd, ctx, mops->mo_data,
+ mops->mo_mac, rhndl);
+ break;
+
+ case KCF_OP_UPDATE:
+ err = KCF_PROV_MAC_UPDATE(pd, ctx, mops->mo_data,
+ rhndl);
+ break;
+
+ case KCF_OP_FINAL:
+ err = KCF_PROV_MAC_FINAL(pd, ctx, mops->mo_mac, rhndl);
+ break;
+
+ case KCF_OP_ATOMIC:
+ ASSERT(ctx == NULL);
+ KCF_SET_PROVIDER_MECHNUM(mops->mo_framework_mechtype,
+ pd, &mops->mo_mech);
+
+ err = KCF_PROV_MAC_ATOMIC(pd, mops->mo_sid,
+ &mops->mo_mech, mops->mo_key, mops->mo_data,
+ mops->mo_mac, mops->mo_templ, rhndl);
+ break;
+
+ case KCF_OP_MAC_VERIFY_ATOMIC:
+ ASSERT(ctx == NULL);
+ KCF_SET_PROVIDER_MECHNUM(mops->mo_framework_mechtype,
+ pd, &mops->mo_mech);
+
+ err = KCF_PROV_MAC_VERIFY_ATOMIC(pd, mops->mo_sid,
+ &mops->mo_mech, mops->mo_key, mops->mo_data,
+ mops->mo_mac, mops->mo_templ, rhndl);
+ break;
+
+ default:
+ break;
+ }
+ break;
+ }
+
+ case KCF_OG_ENCRYPT: {
+ kcf_encrypt_ops_params_t *eops = ¶ms->rp_u.encrypt_params;
+
+ switch (optype) {
+ case KCF_OP_INIT:
+ KCF_SET_PROVIDER_MECHNUM(eops->eo_framework_mechtype,
+ pd, &eops->eo_mech);
+
+ err = KCF_PROV_ENCRYPT_INIT(pd, ctx, &eops->eo_mech,
+ eops->eo_key, eops->eo_templ, rhndl);
+ break;
+
+ case KCF_OP_SINGLE:
+ err = KCF_PROV_ENCRYPT(pd, ctx, eops->eo_plaintext,
+ eops->eo_ciphertext, rhndl);
+ break;
+
+ case KCF_OP_UPDATE:
+ err = KCF_PROV_ENCRYPT_UPDATE(pd, ctx,
+ eops->eo_plaintext, eops->eo_ciphertext, rhndl);
+ break;
+
+ case KCF_OP_FINAL:
+ err = KCF_PROV_ENCRYPT_FINAL(pd, ctx,
+ eops->eo_ciphertext, rhndl);
+ break;
+
+ case KCF_OP_ATOMIC:
+ ASSERT(ctx == NULL);
+ KCF_SET_PROVIDER_MECHNUM(eops->eo_framework_mechtype,
+ pd, &eops->eo_mech);
+
+ err = KCF_PROV_ENCRYPT_ATOMIC(pd, eops->eo_sid,
+ &eops->eo_mech, eops->eo_key, eops->eo_plaintext,
+ eops->eo_ciphertext, eops->eo_templ, rhndl);
+ break;
+
+ default:
+ break;
+ }
+ break;
+ }
+
+ case KCF_OG_DECRYPT: {
+ kcf_decrypt_ops_params_t *dcrops = ¶ms->rp_u.decrypt_params;
+
+ switch (optype) {
+ case KCF_OP_INIT:
+ KCF_SET_PROVIDER_MECHNUM(dcrops->dop_framework_mechtype,
+ pd, &dcrops->dop_mech);
+
+ err = KCF_PROV_DECRYPT_INIT(pd, ctx, &dcrops->dop_mech,
+ dcrops->dop_key, dcrops->dop_templ, rhndl);
+ break;
+
+ case KCF_OP_SINGLE:
+ err = KCF_PROV_DECRYPT(pd, ctx, dcrops->dop_ciphertext,
+ dcrops->dop_plaintext, rhndl);
+ break;
+
+ case KCF_OP_UPDATE:
+ err = KCF_PROV_DECRYPT_UPDATE(pd, ctx,
+ dcrops->dop_ciphertext, dcrops->dop_plaintext,
+ rhndl);
+ break;
+
+ case KCF_OP_FINAL:
+ err = KCF_PROV_DECRYPT_FINAL(pd, ctx,
+ dcrops->dop_plaintext, rhndl);
+ break;
+
+ case KCF_OP_ATOMIC:
+ ASSERT(ctx == NULL);
+ KCF_SET_PROVIDER_MECHNUM(dcrops->dop_framework_mechtype,
+ pd, &dcrops->dop_mech);
+
+ err = KCF_PROV_DECRYPT_ATOMIC(pd, dcrops->dop_sid,
+ &dcrops->dop_mech, dcrops->dop_key,
+ dcrops->dop_ciphertext, dcrops->dop_plaintext,
+ dcrops->dop_templ, rhndl);
+ break;
+
+ default:
+ break;
+ }
+ break;
+ }
+
+ case KCF_OG_SIGN: {
+ kcf_sign_ops_params_t *sops = ¶ms->rp_u.sign_params;
+
+ switch (optype) {
+ case KCF_OP_INIT:
+ KCF_SET_PROVIDER_MECHNUM(sops->so_framework_mechtype,
+ pd, &sops->so_mech);
+
+ err = KCF_PROV_SIGN_INIT(pd, ctx, &sops->so_mech,
+ sops->so_key, sops->so_templ, rhndl);
+ break;
+
+ case KCF_OP_SIGN_RECOVER_INIT:
+ KCF_SET_PROVIDER_MECHNUM(sops->so_framework_mechtype,
+ pd, &sops->so_mech);
+
+ err = KCF_PROV_SIGN_RECOVER_INIT(pd, ctx,
+ &sops->so_mech, sops->so_key, sops->so_templ,
+ rhndl);
+ break;
+
+ case KCF_OP_SINGLE:
+ err = KCF_PROV_SIGN(pd, ctx, sops->so_data,
+ sops->so_signature, rhndl);
+ break;
+
+ case KCF_OP_SIGN_RECOVER:
+ err = KCF_PROV_SIGN_RECOVER(pd, ctx,
+ sops->so_data, sops->so_signature, rhndl);
+ break;
+
+ case KCF_OP_UPDATE:
+ err = KCF_PROV_SIGN_UPDATE(pd, ctx, sops->so_data,
+ rhndl);
+ break;
+
+ case KCF_OP_FINAL:
+ err = KCF_PROV_SIGN_FINAL(pd, ctx, sops->so_signature,
+ rhndl);
+ break;
+
+ case KCF_OP_ATOMIC:
+ ASSERT(ctx == NULL);
+ KCF_SET_PROVIDER_MECHNUM(sops->so_framework_mechtype,
+ pd, &sops->so_mech);
+
+ err = KCF_PROV_SIGN_ATOMIC(pd, sops->so_sid,
+ &sops->so_mech, sops->so_key, sops->so_data,
+ sops->so_templ, sops->so_signature, rhndl);
+ break;
+
+ case KCF_OP_SIGN_RECOVER_ATOMIC:
+ ASSERT(ctx == NULL);
+ KCF_SET_PROVIDER_MECHNUM(sops->so_framework_mechtype,
+ pd, &sops->so_mech);
+
+ err = KCF_PROV_SIGN_RECOVER_ATOMIC(pd, sops->so_sid,
+ &sops->so_mech, sops->so_key, sops->so_data,
+ sops->so_templ, sops->so_signature, rhndl);
+ break;
+
+ default:
+ break;
+ }
+ break;
+ }
+
+ case KCF_OG_VERIFY: {
+ kcf_verify_ops_params_t *vops = ¶ms->rp_u.verify_params;
+
+ switch (optype) {
+ case KCF_OP_INIT:
+ KCF_SET_PROVIDER_MECHNUM(vops->vo_framework_mechtype,
+ pd, &vops->vo_mech);
+
+ err = KCF_PROV_VERIFY_INIT(pd, ctx, &vops->vo_mech,
+ vops->vo_key, vops->vo_templ, rhndl);
+ break;
+
+ case KCF_OP_VERIFY_RECOVER_INIT:
+ KCF_SET_PROVIDER_MECHNUM(vops->vo_framework_mechtype,
+ pd, &vops->vo_mech);
+
+ err = KCF_PROV_VERIFY_RECOVER_INIT(pd, ctx,
+ &vops->vo_mech, vops->vo_key, vops->vo_templ,
+ rhndl);
+ break;
+
+ case KCF_OP_SINGLE:
+ err = KCF_PROV_VERIFY(pd, ctx, vops->vo_data,
+ vops->vo_signature, rhndl);
+ break;
+
+ case KCF_OP_VERIFY_RECOVER:
+ err = KCF_PROV_VERIFY_RECOVER(pd, ctx,
+ vops->vo_signature, vops->vo_data, rhndl);
+ break;
+
+ case KCF_OP_UPDATE:
+ err = KCF_PROV_VERIFY_UPDATE(pd, ctx, vops->vo_data,
+ rhndl);
+ break;
+
+ case KCF_OP_FINAL:
+ err = KCF_PROV_VERIFY_FINAL(pd, ctx, vops->vo_signature,
+ rhndl);
+ break;
+
+ case KCF_OP_ATOMIC:
+ ASSERT(ctx == NULL);
+ KCF_SET_PROVIDER_MECHNUM(vops->vo_framework_mechtype,
+ pd, &vops->vo_mech);
+
+ err = KCF_PROV_VERIFY_ATOMIC(pd, vops->vo_sid,
+ &vops->vo_mech, vops->vo_key, vops->vo_data,
+ vops->vo_templ, vops->vo_signature, rhndl);
+ break;
+
+ case KCF_OP_VERIFY_RECOVER_ATOMIC:
+ ASSERT(ctx == NULL);
+ KCF_SET_PROVIDER_MECHNUM(vops->vo_framework_mechtype,
+ pd, &vops->vo_mech);
+
+ err = KCF_PROV_VERIFY_RECOVER_ATOMIC(pd, vops->vo_sid,
+ &vops->vo_mech, vops->vo_key, vops->vo_signature,
+ vops->vo_templ, vops->vo_data, rhndl);
+ break;
+
+ default:
+ break;
+ }
+ break;
+ }
+
+ case KCF_OG_ENCRYPT_MAC: {
+ kcf_encrypt_mac_ops_params_t *eops =
+ ¶ms->rp_u.encrypt_mac_params;
+ kcf_context_t *kcf_secondctx;
+
+ switch (optype) {
+ case KCF_OP_INIT:
+ kcf_secondctx = ((kcf_context_t *)
+ (ctx->cc_framework_private))->kc_secondctx;
+
+ if (kcf_secondctx != NULL) {
+ err = kcf_emulate_dual(pd, ctx, params);
+ break;
+ }
+ KCF_SET_PROVIDER_MECHNUM(
+ eops->em_framework_encr_mechtype,
+ pd, &eops->em_encr_mech);
+
+ KCF_SET_PROVIDER_MECHNUM(
+ eops->em_framework_mac_mechtype,
+ pd, &eops->em_mac_mech);
+
+ err = KCF_PROV_ENCRYPT_MAC_INIT(pd, ctx,
+ &eops->em_encr_mech, eops->em_encr_key,
+ &eops->em_mac_mech, eops->em_mac_key,
+ eops->em_encr_templ, eops->em_mac_templ,
+ rhndl);
+
+ break;
+
+ case KCF_OP_SINGLE:
+ err = KCF_PROV_ENCRYPT_MAC(pd, ctx,
+ eops->em_plaintext, eops->em_ciphertext,
+ eops->em_mac, rhndl);
+ break;
+
+ case KCF_OP_UPDATE:
+ kcf_secondctx = ((kcf_context_t *)
+ (ctx->cc_framework_private))->kc_secondctx;
+ if (kcf_secondctx != NULL) {
+ err = kcf_emulate_dual(pd, ctx, params);
+ break;
+ }
+ err = KCF_PROV_ENCRYPT_MAC_UPDATE(pd, ctx,
+ eops->em_plaintext, eops->em_ciphertext, rhndl);
+ break;
+
+ case KCF_OP_FINAL:
+ kcf_secondctx = ((kcf_context_t *)
+ (ctx->cc_framework_private))->kc_secondctx;
+ if (kcf_secondctx != NULL) {
+ err = kcf_emulate_dual(pd, ctx, params);
+ break;
+ }
+ err = KCF_PROV_ENCRYPT_MAC_FINAL(pd, ctx,
+ eops->em_ciphertext, eops->em_mac, rhndl);
+ break;
+
+ case KCF_OP_ATOMIC:
+ ASSERT(ctx == NULL);
+
+ KCF_SET_PROVIDER_MECHNUM(
+ eops->em_framework_encr_mechtype,
+ pd, &eops->em_encr_mech);
+
+ KCF_SET_PROVIDER_MECHNUM(
+ eops->em_framework_mac_mechtype,
+ pd, &eops->em_mac_mech);
+
+ err = KCF_PROV_ENCRYPT_MAC_ATOMIC(pd, eops->em_sid,
+ &eops->em_encr_mech, eops->em_encr_key,
+ &eops->em_mac_mech, eops->em_mac_key,
+ eops->em_plaintext, eops->em_ciphertext,
+ eops->em_mac,
+ eops->em_encr_templ, eops->em_mac_templ,
+ rhndl);
+
+ break;
+
+ default:
+ break;
+ }
+ break;
+ }
+
+ case KCF_OG_MAC_DECRYPT: {
+ kcf_mac_decrypt_ops_params_t *dops =
+ ¶ms->rp_u.mac_decrypt_params;
+ kcf_context_t *kcf_secondctx;
+
+ switch (optype) {
+ case KCF_OP_INIT:
+ kcf_secondctx = ((kcf_context_t *)
+ (ctx->cc_framework_private))->kc_secondctx;
+
+ if (kcf_secondctx != NULL) {
+ err = kcf_emulate_dual(pd, ctx, params);
+ break;
+ }
+ KCF_SET_PROVIDER_MECHNUM(
+ dops->md_framework_mac_mechtype,
+ pd, &dops->md_mac_mech);
+
+ KCF_SET_PROVIDER_MECHNUM(
+ dops->md_framework_decr_mechtype,
+ pd, &dops->md_decr_mech);
+
+ err = KCF_PROV_MAC_DECRYPT_INIT(pd, ctx,
+ &dops->md_mac_mech, dops->md_mac_key,
+ &dops->md_decr_mech, dops->md_decr_key,
+ dops->md_mac_templ, dops->md_decr_templ,
+ rhndl);
+
+ break;
+
+ case KCF_OP_SINGLE:
+ err = KCF_PROV_MAC_DECRYPT(pd, ctx,
+ dops->md_ciphertext, dops->md_mac,
+ dops->md_plaintext, rhndl);
+ break;
+
+ case KCF_OP_UPDATE:
+ kcf_secondctx = ((kcf_context_t *)
+ (ctx->cc_framework_private))->kc_secondctx;
+ if (kcf_secondctx != NULL) {
+ err = kcf_emulate_dual(pd, ctx, params);
+ break;
+ }
+ err = KCF_PROV_MAC_DECRYPT_UPDATE(pd, ctx,
+ dops->md_ciphertext, dops->md_plaintext, rhndl);
+ break;
+
+ case KCF_OP_FINAL:
+ kcf_secondctx = ((kcf_context_t *)
+ (ctx->cc_framework_private))->kc_secondctx;
+ if (kcf_secondctx != NULL) {
+ err = kcf_emulate_dual(pd, ctx, params);
+ break;
+ }
+ err = KCF_PROV_MAC_DECRYPT_FINAL(pd, ctx,
+ dops->md_mac, dops->md_plaintext, rhndl);
+ break;
+
+ case KCF_OP_ATOMIC:
+ ASSERT(ctx == NULL);
+
+ KCF_SET_PROVIDER_MECHNUM(
+ dops->md_framework_mac_mechtype,
+ pd, &dops->md_mac_mech);
+
+ KCF_SET_PROVIDER_MECHNUM(
+ dops->md_framework_decr_mechtype,
+ pd, &dops->md_decr_mech);
+
+ err = KCF_PROV_MAC_DECRYPT_ATOMIC(pd, dops->md_sid,
+ &dops->md_mac_mech, dops->md_mac_key,
+ &dops->md_decr_mech, dops->md_decr_key,
+ dops->md_ciphertext, dops->md_mac,
+ dops->md_plaintext,
+ dops->md_mac_templ, dops->md_decr_templ,
+ rhndl);
+
+ break;
+
+ case KCF_OP_MAC_VERIFY_DECRYPT_ATOMIC:
+ ASSERT(ctx == NULL);
+
+ KCF_SET_PROVIDER_MECHNUM(
+ dops->md_framework_mac_mechtype,
+ pd, &dops->md_mac_mech);
+
+ KCF_SET_PROVIDER_MECHNUM(
+ dops->md_framework_decr_mechtype,
+ pd, &dops->md_decr_mech);
+
+ err = KCF_PROV_MAC_VERIFY_DECRYPT_ATOMIC(pd,
+ dops->md_sid, &dops->md_mac_mech, dops->md_mac_key,
+ &dops->md_decr_mech, dops->md_decr_key,
+ dops->md_ciphertext, dops->md_mac,
+ dops->md_plaintext,
+ dops->md_mac_templ, dops->md_decr_templ,
+ rhndl);
+
+ break;
+
+ default:
+ break;
+ }
+ break;
+ }
+
+ case KCF_OG_KEY: {
+ kcf_key_ops_params_t *kops = ¶ms->rp_u.key_params;
+
+ ASSERT(ctx == NULL);
+ KCF_SET_PROVIDER_MECHNUM(kops->ko_framework_mechtype, pd,
+ &kops->ko_mech);
+
+ switch (optype) {
+ case KCF_OP_KEY_GENERATE:
+ err = KCF_PROV_KEY_GENERATE(pd, kops->ko_sid,
+ &kops->ko_mech,
+ kops->ko_key_template, kops->ko_key_attribute_count,
+ kops->ko_key_object_id_ptr, rhndl);
+ break;
+
+ case KCF_OP_KEY_GENERATE_PAIR:
+ err = KCF_PROV_KEY_GENERATE_PAIR(pd, kops->ko_sid,
+ &kops->ko_mech,
+ kops->ko_key_template, kops->ko_key_attribute_count,
+ kops->ko_private_key_template,
+ kops->ko_private_key_attribute_count,
+ kops->ko_key_object_id_ptr,
+ kops->ko_private_key_object_id_ptr, rhndl);
+ break;
+
+ case KCF_OP_KEY_WRAP:
+ err = KCF_PROV_KEY_WRAP(pd, kops->ko_sid,
+ &kops->ko_mech,
+ kops->ko_key, kops->ko_key_object_id_ptr,
+ kops->ko_wrapped_key, kops->ko_wrapped_key_len_ptr,
+ rhndl);
+ break;
+
+ case KCF_OP_KEY_UNWRAP:
+ err = KCF_PROV_KEY_UNWRAP(pd, kops->ko_sid,
+ &kops->ko_mech,
+ kops->ko_key, kops->ko_wrapped_key,
+ kops->ko_wrapped_key_len_ptr,
+ kops->ko_key_template, kops->ko_key_attribute_count,
+ kops->ko_key_object_id_ptr, rhndl);
+ break;
+
+ case KCF_OP_KEY_DERIVE:
+ err = KCF_PROV_KEY_DERIVE(pd, kops->ko_sid,
+ &kops->ko_mech,
+ kops->ko_key, kops->ko_key_template,
+ kops->ko_key_attribute_count,
+ kops->ko_key_object_id_ptr, rhndl);
+ break;
+
+ default:
+ break;
+ }
+ break;
+ }
+
+ case KCF_OG_RANDOM: {
+ kcf_random_number_ops_params_t *rops =
+ ¶ms->rp_u.random_number_params;
+
+ ASSERT(ctx == NULL);
+
+ switch (optype) {
+ case KCF_OP_RANDOM_SEED:
+ err = KCF_PROV_SEED_RANDOM(pd, rops->rn_sid,
+ rops->rn_buf, rops->rn_buflen, rops->rn_entropy_est,
+ rops->rn_flags, rhndl);
+ break;
+
+ case KCF_OP_RANDOM_GENERATE:
+ err = KCF_PROV_GENERATE_RANDOM(pd, rops->rn_sid,
+ rops->rn_buf, rops->rn_buflen, rhndl);
+ break;
+
+ default:
+ break;
+ }
+ break;
+ }
+
+ case KCF_OG_SESSION: {
+ kcf_session_ops_params_t *sops = ¶ms->rp_u.session_params;
+
+ ASSERT(ctx == NULL);
+ switch (optype) {
+ case KCF_OP_SESSION_OPEN:
+ /*
+ * so_pd may be a logical provider, in which case
+ * we need to check whether it has been removed.
+ */
+ if (KCF_IS_PROV_REMOVED(sops->so_pd)) {
+ err = CRYPTO_DEVICE_ERROR;
+ break;
+ }
+ err = KCF_PROV_SESSION_OPEN(pd, sops->so_sid_ptr,
+ rhndl, sops->so_pd);
+ break;
+
+ case KCF_OP_SESSION_CLOSE:
+ /*
+ * so_pd may be a logical provider, in which case
+ * we need to check whether it has been removed.
+ */
+ if (KCF_IS_PROV_REMOVED(sops->so_pd)) {
+ err = CRYPTO_DEVICE_ERROR;
+ break;
+ }
+ err = KCF_PROV_SESSION_CLOSE(pd, sops->so_sid,
+ rhndl, sops->so_pd);
+ break;
+
+ case KCF_OP_SESSION_LOGIN:
+ err = KCF_PROV_SESSION_LOGIN(pd, sops->so_sid,
+ sops->so_user_type, sops->so_pin,
+ sops->so_pin_len, rhndl);
+ break;
+
+ case KCF_OP_SESSION_LOGOUT:
+ err = KCF_PROV_SESSION_LOGOUT(pd, sops->so_sid, rhndl);
+ break;
+
+ default:
+ break;
+ }
+ break;
+ }
+
+ case KCF_OG_OBJECT: {
+ kcf_object_ops_params_t *jops = ¶ms->rp_u.object_params;
+
+ ASSERT(ctx == NULL);
+ switch (optype) {
+ case KCF_OP_OBJECT_CREATE:
+ err = KCF_PROV_OBJECT_CREATE(pd, jops->oo_sid,
+ jops->oo_template, jops->oo_attribute_count,
+ jops->oo_object_id_ptr, rhndl);
+ break;
+
+ case KCF_OP_OBJECT_COPY:
+ err = KCF_PROV_OBJECT_COPY(pd, jops->oo_sid,
+ jops->oo_object_id,
+ jops->oo_template, jops->oo_attribute_count,
+ jops->oo_object_id_ptr, rhndl);
+ break;
+
+ case KCF_OP_OBJECT_DESTROY:
+ err = KCF_PROV_OBJECT_DESTROY(pd, jops->oo_sid,
+ jops->oo_object_id, rhndl);
+ break;
+
+ case KCF_OP_OBJECT_GET_SIZE:
+ err = KCF_PROV_OBJECT_GET_SIZE(pd, jops->oo_sid,
+ jops->oo_object_id, jops->oo_object_size, rhndl);
+ break;
+
+ case KCF_OP_OBJECT_GET_ATTRIBUTE_VALUE:
+ err = KCF_PROV_OBJECT_GET_ATTRIBUTE_VALUE(pd,
+ jops->oo_sid, jops->oo_object_id,
+ jops->oo_template, jops->oo_attribute_count, rhndl);
+ break;
+
+ case KCF_OP_OBJECT_SET_ATTRIBUTE_VALUE:
+ err = KCF_PROV_OBJECT_SET_ATTRIBUTE_VALUE(pd,
+ jops->oo_sid, jops->oo_object_id,
+ jops->oo_template, jops->oo_attribute_count, rhndl);
+ break;
+
+ case KCF_OP_OBJECT_FIND_INIT:
+ err = KCF_PROV_OBJECT_FIND_INIT(pd, jops->oo_sid,
+ jops->oo_template, jops->oo_attribute_count,
+ jops->oo_find_init_pp_ptr, rhndl);
+ break;
+
+ case KCF_OP_OBJECT_FIND:
+ err = KCF_PROV_OBJECT_FIND(pd, jops->oo_find_pp,
+ jops->oo_object_id_ptr, jops->oo_max_object_count,
+ jops->oo_object_count_ptr, rhndl);
+ break;
+
+ case KCF_OP_OBJECT_FIND_FINAL:
+ err = KCF_PROV_OBJECT_FIND_FINAL(pd, jops->oo_find_pp,
+ rhndl);
+ break;
+
+ default:
+ break;
+ }
+ break;
+ }
+
+ case KCF_OG_PROVMGMT: {
+ kcf_provmgmt_ops_params_t *pops = ¶ms->rp_u.provmgmt_params;
+
+ ASSERT(ctx == NULL);
+ switch (optype) {
+ case KCF_OP_MGMT_EXTINFO:
+ /*
+ * po_pd may be a logical provider, in which case
+ * we need to check whether it has been removed.
+ */
+ if (KCF_IS_PROV_REMOVED(pops->po_pd)) {
+ err = CRYPTO_DEVICE_ERROR;
+ break;
+ }
+ err = KCF_PROV_EXT_INFO(pd, pops->po_ext_info, rhndl,
+ pops->po_pd);
+ break;
+
+ case KCF_OP_MGMT_INITTOKEN:
+ err = KCF_PROV_INIT_TOKEN(pd, pops->po_pin,
+ pops->po_pin_len, pops->po_label, rhndl);
+ break;
+
+ case KCF_OP_MGMT_INITPIN:
+ err = KCF_PROV_INIT_PIN(pd, pops->po_sid, pops->po_pin,
+ pops->po_pin_len, rhndl);
+ break;
+
+ case KCF_OP_MGMT_SETPIN:
+ err = KCF_PROV_SET_PIN(pd, pops->po_sid,
+ pops->po_old_pin, pops->po_old_pin_len,
+ pops->po_pin, pops->po_pin_len, rhndl);
+ break;
+
+ default:
+ break;
+ }
+ break;
+ }
+
+ case KCF_OG_NOSTORE_KEY: {
+ kcf_key_ops_params_t *kops = ¶ms->rp_u.key_params;
+
+ ASSERT(ctx == NULL);
+ KCF_SET_PROVIDER_MECHNUM(kops->ko_framework_mechtype, pd,
+ &kops->ko_mech);
+
+ switch (optype) {
+ case KCF_OP_KEY_GENERATE:
+ err = KCF_PROV_NOSTORE_KEY_GENERATE(pd, kops->ko_sid,
+ &kops->ko_mech, kops->ko_key_template,
+ kops->ko_key_attribute_count,
+ kops->ko_out_template1,
+ kops->ko_out_attribute_count1, rhndl);
+ break;
+
+ case KCF_OP_KEY_GENERATE_PAIR:
+ err = KCF_PROV_NOSTORE_KEY_GENERATE_PAIR(pd,
+ kops->ko_sid, &kops->ko_mech,
+ kops->ko_key_template, kops->ko_key_attribute_count,
+ kops->ko_private_key_template,
+ kops->ko_private_key_attribute_count,
+ kops->ko_out_template1,
+ kops->ko_out_attribute_count1,
+ kops->ko_out_template2,
+ kops->ko_out_attribute_count2,
+ rhndl);
+ break;
+
+ case KCF_OP_KEY_DERIVE:
+ err = KCF_PROV_NOSTORE_KEY_DERIVE(pd, kops->ko_sid,
+ &kops->ko_mech, kops->ko_key,
+ kops->ko_key_template,
+ kops->ko_key_attribute_count,
+ kops->ko_out_template1,
+ kops->ko_out_attribute_count1, rhndl);
+ break;
+
+ default:
+ break;
+ }
+ break;
+ }
+ default:
+ break;
+ } /* end of switch(params->rp_opgrp) */
+
+ KCF_PROV_INCRSTATS(pd, err);
+ return (err);
+}
+
+
+/*
+ * Emulate the call for a multipart dual ops with 2 single steps.
+ * This routine is always called in the context of a working thread
+ * running kcf_svc_do_run().
+ * The single steps are submitted in a pure synchronous way (blocking).
+ * When this routine returns, kcf_svc_do_run() will call kcf_aop_done()
+ * so the originating consumer's callback gets invoked. kcf_aop_done()
+ * takes care of freeing the operation context. So, this routine does
+ * not free the operation context.
+ *
+ * The provider descriptor is assumed held by the callers.
+ */
+static int
+kcf_emulate_dual(kcf_provider_desc_t *pd, crypto_ctx_t *ctx,
+ kcf_req_params_t *params)
+{
+ int err = CRYPTO_ARGUMENTS_BAD;
+ kcf_op_type_t optype;
+ size_t save_len;
+ off_t save_offset;
+
+ optype = params->rp_optype;
+
+ switch (params->rp_opgrp) {
+ case KCF_OG_ENCRYPT_MAC: {
+ kcf_encrypt_mac_ops_params_t *cmops =
+ ¶ms->rp_u.encrypt_mac_params;
+ kcf_context_t *encr_kcf_ctx;
+ crypto_ctx_t *mac_ctx;
+ kcf_req_params_t encr_params;
+
+ encr_kcf_ctx = (kcf_context_t *)(ctx->cc_framework_private);
+
+ switch (optype) {
+ case KCF_OP_INIT: {
+ encr_kcf_ctx->kc_secondctx = NULL;
+
+ KCF_WRAP_ENCRYPT_OPS_PARAMS(&encr_params, KCF_OP_INIT,
+ pd->pd_sid, &cmops->em_encr_mech,
+ cmops->em_encr_key, NULL, NULL,
+ cmops->em_encr_templ);
+
+ err = kcf_submit_request(pd, ctx, NULL, &encr_params,
+ B_FALSE);
+
+ /* It can't be CRYPTO_QUEUED */
+ if (err != CRYPTO_SUCCESS) {
+ break;
+ }
+
+ err = crypto_mac_init(&cmops->em_mac_mech,
+ cmops->em_mac_key, cmops->em_mac_templ,
+ (crypto_context_t *)&mac_ctx, NULL);
+
+ if (err == CRYPTO_SUCCESS) {
+ encr_kcf_ctx->kc_secondctx = (kcf_context_t *)
+ mac_ctx->cc_framework_private;
+ KCF_CONTEXT_REFHOLD((kcf_context_t *)
+ mac_ctx->cc_framework_private);
+ }
+
+ break;
+
+ }
+ case KCF_OP_UPDATE: {
+ crypto_dual_data_t *ct = cmops->em_ciphertext;
+ crypto_data_t *pt = cmops->em_plaintext;
+ kcf_context_t *mac_kcf_ctx = encr_kcf_ctx->kc_secondctx;
+ crypto_ctx_t *mac_ctx = &mac_kcf_ctx->kc_glbl_ctx;
+
+ KCF_WRAP_ENCRYPT_OPS_PARAMS(&encr_params, KCF_OP_UPDATE,
+ pd->pd_sid, NULL, NULL, pt, (crypto_data_t *)ct,
+ NULL);
+
+ err = kcf_submit_request(pd, ctx, NULL, &encr_params,
+ B_FALSE);
+
+ /* It can't be CRYPTO_QUEUED */
+ if (err != CRYPTO_SUCCESS) {
+ break;
+ }
+
+ save_offset = ct->dd_offset1;
+ save_len = ct->dd_len1;
+ if (ct->dd_len2 == 0) {
+ /*
+ * The previous encrypt step was an
+ * accumulation only and didn't produce any
+ * partial output
+ */
+ if (ct->dd_len1 == 0)
+ break;
+
+ } else {
+ ct->dd_offset1 = ct->dd_offset2;
+ ct->dd_len1 = ct->dd_len2;
+ }
+ err = crypto_mac_update((crypto_context_t)mac_ctx,
+ (crypto_data_t *)ct, NULL);
+
+ ct->dd_offset1 = save_offset;
+ ct->dd_len1 = save_len;
+
+ break;
+ }
+ case KCF_OP_FINAL: {
+ crypto_dual_data_t *ct = cmops->em_ciphertext;
+ crypto_data_t *mac = cmops->em_mac;
+ kcf_context_t *mac_kcf_ctx = encr_kcf_ctx->kc_secondctx;
+ crypto_ctx_t *mac_ctx = &mac_kcf_ctx->kc_glbl_ctx;
+ crypto_context_t mac_context = mac_ctx;
+
+ KCF_WRAP_ENCRYPT_OPS_PARAMS(&encr_params, KCF_OP_FINAL,
+ pd->pd_sid, NULL, NULL, NULL, (crypto_data_t *)ct,
+ NULL);
+
+ err = kcf_submit_request(pd, ctx, NULL, &encr_params,
+ B_FALSE);
+
+ /* It can't be CRYPTO_QUEUED */
+ if (err != CRYPTO_SUCCESS) {
+ crypto_cancel_ctx(mac_context);
+ break;
+ }
+
+ if (ct->dd_len2 > 0) {
+ save_offset = ct->dd_offset1;
+ save_len = ct->dd_len1;
+ ct->dd_offset1 = ct->dd_offset2;
+ ct->dd_len1 = ct->dd_len2;
+
+ err = crypto_mac_update(mac_context,
+ (crypto_data_t *)ct, NULL);
+
+ ct->dd_offset1 = save_offset;
+ ct->dd_len1 = save_len;
+
+ if (err != CRYPTO_SUCCESS) {
+ crypto_cancel_ctx(mac_context);
+ return (err);
+ }
+ }
+
+ /* and finally, collect the MAC */
+ err = crypto_mac_final(mac_context, mac, NULL);
+ break;
+ }
+
+ default:
+ break;
+ }
+ KCF_PROV_INCRSTATS(pd, err);
+ break;
+ }
+ case KCF_OG_MAC_DECRYPT: {
+ kcf_mac_decrypt_ops_params_t *mdops =
+ ¶ms->rp_u.mac_decrypt_params;
+ kcf_context_t *decr_kcf_ctx;
+ crypto_ctx_t *mac_ctx;
+ kcf_req_params_t decr_params;
+
+ decr_kcf_ctx = (kcf_context_t *)(ctx->cc_framework_private);
+
+ switch (optype) {
+ case KCF_OP_INIT: {
+ decr_kcf_ctx->kc_secondctx = NULL;
+
+ err = crypto_mac_init(&mdops->md_mac_mech,
+ mdops->md_mac_key, mdops->md_mac_templ,
+ (crypto_context_t *)&mac_ctx, NULL);
+
+ /* It can't be CRYPTO_QUEUED */
+ if (err != CRYPTO_SUCCESS) {
+ break;
+ }
+
+ KCF_WRAP_DECRYPT_OPS_PARAMS(&decr_params, KCF_OP_INIT,
+ pd->pd_sid, &mdops->md_decr_mech,
+ mdops->md_decr_key, NULL, NULL,
+ mdops->md_decr_templ);
+
+ err = kcf_submit_request(pd, ctx, NULL, &decr_params,
+ B_FALSE);
+
+ /* It can't be CRYPTO_QUEUED */
+ if (err != CRYPTO_SUCCESS) {
+ crypto_cancel_ctx((crypto_context_t)mac_ctx);
+ break;
+ }
+
+ decr_kcf_ctx->kc_secondctx = (kcf_context_t *)
+ mac_ctx->cc_framework_private;
+ KCF_CONTEXT_REFHOLD((kcf_context_t *)
+ mac_ctx->cc_framework_private);
+
+ break;
+ default:
+ break;
+
+ }
+ case KCF_OP_UPDATE: {
+ crypto_dual_data_t *ct = mdops->md_ciphertext;
+ crypto_data_t *pt = mdops->md_plaintext;
+ kcf_context_t *mac_kcf_ctx = decr_kcf_ctx->kc_secondctx;
+ crypto_ctx_t *mac_ctx = &mac_kcf_ctx->kc_glbl_ctx;
+
+ err = crypto_mac_update((crypto_context_t)mac_ctx,
+ (crypto_data_t *)ct, NULL);
+
+ if (err != CRYPTO_SUCCESS)
+ break;
+
+ save_offset = ct->dd_offset1;
+ save_len = ct->dd_len1;
+
+ /* zero ct->dd_len2 means decrypt everything */
+ if (ct->dd_len2 > 0) {
+ ct->dd_offset1 = ct->dd_offset2;
+ ct->dd_len1 = ct->dd_len2;
+ }
+
+ err = crypto_decrypt_update((crypto_context_t)ctx,
+ (crypto_data_t *)ct, pt, NULL);
+
+ ct->dd_offset1 = save_offset;
+ ct->dd_len1 = save_len;
+
+ break;
+ }
+ case KCF_OP_FINAL: {
+ crypto_data_t *pt = mdops->md_plaintext;
+ crypto_data_t *mac = mdops->md_mac;
+ kcf_context_t *mac_kcf_ctx = decr_kcf_ctx->kc_secondctx;
+ crypto_ctx_t *mac_ctx = &mac_kcf_ctx->kc_glbl_ctx;
+
+ err = crypto_mac_final((crypto_context_t)mac_ctx,
+ mac, NULL);
+
+ if (err != CRYPTO_SUCCESS) {
+ crypto_cancel_ctx(ctx);
+ break;
+ }
+
+ /* Get the last chunk of plaintext */
+ KCF_CONTEXT_REFHOLD(decr_kcf_ctx);
+ err = crypto_decrypt_final((crypto_context_t)ctx, pt,
+ NULL);
+
+ break;
+ }
+ }
+ break;
+ }
+ default:
+
+ break;
+ } /* end of switch(params->rp_opgrp) */
+
+ return (err);
+}
diff --git a/zfs/module/icp/core/kcf_mech_tabs.c b/zfs/module/icp/core/kcf_mech_tabs.c
new file mode 100644
index 000000000000..723bfdb601a4
--- /dev/null
+++ b/zfs/module/icp/core/kcf_mech_tabs.c
@@ -0,0 +1,791 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/crypto/common.h>
+#include <sys/crypto/api.h>
+#include <sys/crypto/impl.h>
+#include <sys/modhash.h>
+
+/* Cryptographic mechanisms tables and their access functions */
+
+/*
+ * Internal numbers assigned to mechanisms are coded as follows:
+ *
+ * +----------------+----------------+
+ * | mech. class | mech. index |
+ * <--- 32-bits --->+<--- 32-bits --->
+ *
+ * the mech_class identifies the table the mechanism belongs to.
+ * mech_index is the index for that mechanism in the table.
+ * A mechanism belongs to exactly 1 table.
+ * The tables are:
+ * . digest_mechs_tab[] for the msg digest mechs.
+ * . cipher_mechs_tab[] for encrypt/decrypt and wrap/unwrap mechs.
+ * . mac_mechs_tab[] for MAC mechs.
+ * . sign_mechs_tab[] for sign & verify mechs.
+ * . keyops_mechs_tab[] for key/key pair generation, and key derivation.
+ * . misc_mechs_tab[] for mechs that don't belong to any of the above.
+ *
+ * There are no holes in the tables.
+ */
+
+/*
+ * Locking conventions:
+ * --------------------
+ * A global mutex, kcf_mech_tabs_lock, serializes writes to the
+ * mechanism table via kcf_create_mech_entry().
+ *
+ * A mutex is associated with every entry of the tables.
+ * The mutex is acquired whenever the entry is accessed for
+ * 1) retrieving the mech_id (comparing the mech name)
+ * 2) finding a provider for an xxx_init() or atomic operation.
+ * 3) altering the mechs entry to add or remove a provider.
+ *
+ * In 2), after a provider is chosen, its prov_desc is held and the
+ * entry's mutex must be dropped. The provider's working function (SPI) is
+ * called outside the mech_entry's mutex.
+ *
+ * The number of providers for a particular mechanism is not expected to be
+ * long enough to justify the cost of using rwlocks, so the per-mechanism
+ * entry mutex won't be very *hot*.
+ *
+ * When both kcf_mech_tabs_lock and a mech_entry mutex need to be held,
+ * kcf_mech_tabs_lock must always be acquired first.
+ *
+ */
+
+ /* Mechanisms tables */
+
+
+/* RFE 4687834 Will deal with the extensibility of these tables later */
+
+kcf_mech_entry_t kcf_digest_mechs_tab[KCF_MAXDIGEST];
+kcf_mech_entry_t kcf_cipher_mechs_tab[KCF_MAXCIPHER];
+kcf_mech_entry_t kcf_mac_mechs_tab[KCF_MAXMAC];
+kcf_mech_entry_t kcf_sign_mechs_tab[KCF_MAXSIGN];
+kcf_mech_entry_t kcf_keyops_mechs_tab[KCF_MAXKEYOPS];
+kcf_mech_entry_t kcf_misc_mechs_tab[KCF_MAXMISC];
+
+kcf_mech_entry_tab_t kcf_mech_tabs_tab[KCF_LAST_OPSCLASS + 1] = {
+ {0, NULL}, /* No class zero */
+ {KCF_MAXDIGEST, kcf_digest_mechs_tab},
+ {KCF_MAXCIPHER, kcf_cipher_mechs_tab},
+ {KCF_MAXMAC, kcf_mac_mechs_tab},
+ {KCF_MAXSIGN, kcf_sign_mechs_tab},
+ {KCF_MAXKEYOPS, kcf_keyops_mechs_tab},
+ {KCF_MAXMISC, kcf_misc_mechs_tab}
+};
+
+/*
+ * Per-algorithm internal thresholds for the minimum input size of before
+ * offloading to hardware provider.
+ * Dispatching a crypto operation to a hardware provider entails paying the
+ * cost of an additional context switch. Measurments with Sun Accelerator 4000
+ * shows that 512-byte jobs or smaller are better handled in software.
+ * There is room for refinement here.
+ *
+ */
+int kcf_md5_threshold = 512;
+int kcf_sha1_threshold = 512;
+int kcf_des_threshold = 512;
+int kcf_des3_threshold = 512;
+int kcf_aes_threshold = 512;
+int kcf_bf_threshold = 512;
+int kcf_rc4_threshold = 512;
+
+kmutex_t kcf_mech_tabs_lock;
+static uint32_t kcf_gen_swprov = 0;
+
+int kcf_mech_hash_size = 256;
+mod_hash_t *kcf_mech_hash; /* mech name to id hash */
+
+static crypto_mech_type_t
+kcf_mech_hash_find(char *mechname)
+{
+ mod_hash_val_t hv;
+ crypto_mech_type_t mt;
+
+ mt = CRYPTO_MECH_INVALID;
+ if (mod_hash_find(kcf_mech_hash, (mod_hash_key_t)mechname, &hv) == 0) {
+ mt = *(crypto_mech_type_t *)hv;
+ ASSERT(mt != CRYPTO_MECH_INVALID);
+ }
+
+ return (mt);
+}
+
+void
+kcf_destroy_mech_tabs(void)
+{
+ int i, max;
+ kcf_ops_class_t class;
+ kcf_mech_entry_t *me_tab;
+
+ if (kcf_mech_hash)
+ mod_hash_destroy_hash(kcf_mech_hash);
+
+ mutex_destroy(&kcf_mech_tabs_lock);
+
+ for (class = KCF_FIRST_OPSCLASS; class <= KCF_LAST_OPSCLASS; class++) {
+ max = kcf_mech_tabs_tab[class].met_size;
+ me_tab = kcf_mech_tabs_tab[class].met_tab;
+ for (i = 0; i < max; i++)
+ mutex_destroy(&(me_tab[i].me_mutex));
+ }
+}
+
+/*
+ * kcf_init_mech_tabs()
+ *
+ * Called by the misc/kcf's _init() routine to initialize the tables
+ * of mech_entry's.
+ */
+void
+kcf_init_mech_tabs(void)
+{
+ int i, max;
+ kcf_ops_class_t class;
+ kcf_mech_entry_t *me_tab;
+
+ /* Initializes the mutex locks. */
+
+ mutex_init(&kcf_mech_tabs_lock, NULL, MUTEX_DEFAULT, NULL);
+
+ /* Then the pre-defined mechanism entries */
+
+ /* Two digests */
+ (void) strncpy(kcf_digest_mechs_tab[0].me_name, SUN_CKM_MD5,
+ CRYPTO_MAX_MECH_NAME);
+ kcf_digest_mechs_tab[0].me_threshold = kcf_md5_threshold;
+
+ (void) strncpy(kcf_digest_mechs_tab[1].me_name, SUN_CKM_SHA1,
+ CRYPTO_MAX_MECH_NAME);
+ kcf_digest_mechs_tab[1].me_threshold = kcf_sha1_threshold;
+
+ /* The symmetric ciphers in various modes */
+ (void) strncpy(kcf_cipher_mechs_tab[0].me_name, SUN_CKM_DES_CBC,
+ CRYPTO_MAX_MECH_NAME);
+ kcf_cipher_mechs_tab[0].me_threshold = kcf_des_threshold;
+
+ (void) strncpy(kcf_cipher_mechs_tab[1].me_name, SUN_CKM_DES3_CBC,
+ CRYPTO_MAX_MECH_NAME);
+ kcf_cipher_mechs_tab[1].me_threshold = kcf_des3_threshold;
+
+ (void) strncpy(kcf_cipher_mechs_tab[2].me_name, SUN_CKM_DES_ECB,
+ CRYPTO_MAX_MECH_NAME);
+ kcf_cipher_mechs_tab[2].me_threshold = kcf_des_threshold;
+
+ (void) strncpy(kcf_cipher_mechs_tab[3].me_name, SUN_CKM_DES3_ECB,
+ CRYPTO_MAX_MECH_NAME);
+ kcf_cipher_mechs_tab[3].me_threshold = kcf_des3_threshold;
+
+ (void) strncpy(kcf_cipher_mechs_tab[4].me_name, SUN_CKM_BLOWFISH_CBC,
+ CRYPTO_MAX_MECH_NAME);
+ kcf_cipher_mechs_tab[4].me_threshold = kcf_bf_threshold;
+
+ (void) strncpy(kcf_cipher_mechs_tab[5].me_name, SUN_CKM_BLOWFISH_ECB,
+ CRYPTO_MAX_MECH_NAME);
+ kcf_cipher_mechs_tab[5].me_threshold = kcf_bf_threshold;
+
+ (void) strncpy(kcf_cipher_mechs_tab[6].me_name, SUN_CKM_AES_CBC,
+ CRYPTO_MAX_MECH_NAME);
+ kcf_cipher_mechs_tab[6].me_threshold = kcf_aes_threshold;
+
+ (void) strncpy(kcf_cipher_mechs_tab[7].me_name, SUN_CKM_AES_ECB,
+ CRYPTO_MAX_MECH_NAME);
+ kcf_cipher_mechs_tab[7].me_threshold = kcf_aes_threshold;
+
+ (void) strncpy(kcf_cipher_mechs_tab[8].me_name, SUN_CKM_RC4,
+ CRYPTO_MAX_MECH_NAME);
+ kcf_cipher_mechs_tab[8].me_threshold = kcf_rc4_threshold;
+
+
+ /* 4 HMACs */
+ (void) strncpy(kcf_mac_mechs_tab[0].me_name, SUN_CKM_MD5_HMAC,
+ CRYPTO_MAX_MECH_NAME);
+ kcf_mac_mechs_tab[0].me_threshold = kcf_md5_threshold;
+
+ (void) strncpy(kcf_mac_mechs_tab[1].me_name, SUN_CKM_MD5_HMAC_GENERAL,
+ CRYPTO_MAX_MECH_NAME);
+ kcf_mac_mechs_tab[1].me_threshold = kcf_md5_threshold;
+
+ (void) strncpy(kcf_mac_mechs_tab[2].me_name, SUN_CKM_SHA1_HMAC,
+ CRYPTO_MAX_MECH_NAME);
+ kcf_mac_mechs_tab[2].me_threshold = kcf_sha1_threshold;
+
+ (void) strncpy(kcf_mac_mechs_tab[3].me_name, SUN_CKM_SHA1_HMAC_GENERAL,
+ CRYPTO_MAX_MECH_NAME);
+ kcf_mac_mechs_tab[3].me_threshold = kcf_sha1_threshold;
+
+
+ /* 1 random number generation pseudo mechanism */
+ (void) strncpy(kcf_misc_mechs_tab[0].me_name, SUN_RANDOM,
+ CRYPTO_MAX_MECH_NAME);
+
+ kcf_mech_hash = mod_hash_create_strhash_nodtr("kcf mech2id hash",
+ kcf_mech_hash_size, mod_hash_null_valdtor);
+
+ for (class = KCF_FIRST_OPSCLASS; class <= KCF_LAST_OPSCLASS; class++) {
+ max = kcf_mech_tabs_tab[class].met_size;
+ me_tab = kcf_mech_tabs_tab[class].met_tab;
+ for (i = 0; i < max; i++) {
+ mutex_init(&(me_tab[i].me_mutex), NULL,
+ MUTEX_DEFAULT, NULL);
+ if (me_tab[i].me_name[0] != 0) {
+ me_tab[i].me_mechid = KCF_MECHID(class, i);
+ (void) mod_hash_insert(kcf_mech_hash,
+ (mod_hash_key_t)me_tab[i].me_name,
+ (mod_hash_val_t)&(me_tab[i].me_mechid));
+ }
+ }
+ }
+}
+
+/*
+ * kcf_create_mech_entry()
+ *
+ * Arguments:
+ * . The class of mechanism.
+ * . the name of the new mechanism.
+ *
+ * Description:
+ * Creates a new mech_entry for a mechanism not yet known to the
+ * framework.
+ * This routine is called by kcf_add_mech_provider, which is
+ * in turn invoked for each mechanism supported by a provider.
+ * The'class' argument depends on the crypto_func_group_t bitmask
+ * in the registering provider's mech_info struct for this mechanism.
+ * When there is ambiguity in the mapping between the crypto_func_group_t
+ * and a class (dual ops, ...) the KCF_MISC_CLASS should be used.
+ *
+ * Context:
+ * User context only.
+ *
+ * Returns:
+ * KCF_INVALID_MECH_CLASS or KCF_INVALID_MECH_NAME if the class or
+ * the mechname is bogus.
+ * KCF_MECH_TAB_FULL when there is no room left in the mech. tabs.
+ * KCF_SUCCESS otherwise.
+ */
+static int
+kcf_create_mech_entry(kcf_ops_class_t class, char *mechname)
+{
+ crypto_mech_type_t mt;
+ kcf_mech_entry_t *me_tab;
+ int i = 0, size;
+
+ if ((class < KCF_FIRST_OPSCLASS) || (class > KCF_LAST_OPSCLASS))
+ return (KCF_INVALID_MECH_CLASS);
+
+ if ((mechname == NULL) || (mechname[0] == 0))
+ return (KCF_INVALID_MECH_NAME);
+ /*
+ * First check if the mechanism is already in one of the tables.
+ * The mech_entry could be in another class.
+ */
+ mutex_enter(&kcf_mech_tabs_lock);
+ mt = kcf_mech_hash_find(mechname);
+ if (mt != CRYPTO_MECH_INVALID) {
+ /* Nothing to do, regardless the suggested class. */
+ mutex_exit(&kcf_mech_tabs_lock);
+ return (KCF_SUCCESS);
+ }
+ /* Now take the next unused mech entry in the class's tab */
+ me_tab = kcf_mech_tabs_tab[class].met_tab;
+ size = kcf_mech_tabs_tab[class].met_size;
+
+ while (i < size) {
+ mutex_enter(&(me_tab[i].me_mutex));
+ if (me_tab[i].me_name[0] == 0) {
+ /* Found an empty spot */
+ (void) strncpy(me_tab[i].me_name, mechname,
+ CRYPTO_MAX_MECH_NAME);
+ me_tab[i].me_name[CRYPTO_MAX_MECH_NAME-1] = '\0';
+ me_tab[i].me_mechid = KCF_MECHID(class, i);
+ /*
+ * No a-priori information about the new mechanism, so
+ * the threshold is set to zero.
+ */
+ me_tab[i].me_threshold = 0;
+
+ mutex_exit(&(me_tab[i].me_mutex));
+ /* Add the new mechanism to the hash table */
+ (void) mod_hash_insert(kcf_mech_hash,
+ (mod_hash_key_t)me_tab[i].me_name,
+ (mod_hash_val_t)&(me_tab[i].me_mechid));
+ break;
+ }
+ mutex_exit(&(me_tab[i].me_mutex));
+ i++;
+ }
+
+ mutex_exit(&kcf_mech_tabs_lock);
+
+ if (i == size) {
+ return (KCF_MECH_TAB_FULL);
+ }
+
+ return (KCF_SUCCESS);
+}
+
+/*
+ * kcf_add_mech_provider()
+ *
+ * Arguments:
+ * . An index in to the provider mechanism array
+ * . A pointer to the provider descriptor
+ * . A storage for the kcf_prov_mech_desc_t the entry was added at.
+ *
+ * Description:
+ * Adds a new provider of a mechanism to the mechanism's mech_entry
+ * chain.
+ *
+ * Context:
+ * User context only.
+ *
+ * Returns
+ * KCF_SUCCESS on success
+ * KCF_MECH_TAB_FULL otherwise.
+ */
+int
+kcf_add_mech_provider(short mech_indx,
+ kcf_provider_desc_t *prov_desc, kcf_prov_mech_desc_t **pmdpp)
+{
+ int error;
+ kcf_mech_entry_t *mech_entry = NULL;
+ crypto_mech_info_t *mech_info;
+ crypto_mech_type_t kcf_mech_type, mt;
+ kcf_prov_mech_desc_t *prov_mech, *prov_mech2;
+ crypto_func_group_t simple_fg_mask, dual_fg_mask;
+ crypto_mech_info_t *dmi;
+ crypto_mech_info_list_t *mil, *mil2;
+ kcf_mech_entry_t *me;
+ int i;
+
+ ASSERT(prov_desc->pd_prov_type != CRYPTO_LOGICAL_PROVIDER);
+
+ mech_info = &prov_desc->pd_mechanisms[mech_indx];
+
+ /*
+ * A mechanism belongs to exactly one mechanism table.
+ * Find the class corresponding to the function group flag of
+ * the mechanism.
+ */
+ kcf_mech_type = kcf_mech_hash_find(mech_info->cm_mech_name);
+ if (kcf_mech_type == CRYPTO_MECH_INVALID) {
+ crypto_func_group_t fg = mech_info->cm_func_group_mask;
+ kcf_ops_class_t class;
+
+ if (fg & CRYPTO_FG_DIGEST || fg & CRYPTO_FG_DIGEST_ATOMIC)
+ class = KCF_DIGEST_CLASS;
+ else if (fg & CRYPTO_FG_ENCRYPT || fg & CRYPTO_FG_DECRYPT ||
+ fg & CRYPTO_FG_ENCRYPT_ATOMIC ||
+ fg & CRYPTO_FG_DECRYPT_ATOMIC)
+ class = KCF_CIPHER_CLASS;
+ else if (fg & CRYPTO_FG_MAC || fg & CRYPTO_FG_MAC_ATOMIC)
+ class = KCF_MAC_CLASS;
+ else if (fg & CRYPTO_FG_SIGN || fg & CRYPTO_FG_VERIFY ||
+ fg & CRYPTO_FG_SIGN_ATOMIC ||
+ fg & CRYPTO_FG_VERIFY_ATOMIC ||
+ fg & CRYPTO_FG_SIGN_RECOVER ||
+ fg & CRYPTO_FG_VERIFY_RECOVER)
+ class = KCF_SIGN_CLASS;
+ else if (fg & CRYPTO_FG_GENERATE ||
+ fg & CRYPTO_FG_GENERATE_KEY_PAIR ||
+ fg & CRYPTO_FG_WRAP || fg & CRYPTO_FG_UNWRAP ||
+ fg & CRYPTO_FG_DERIVE)
+ class = KCF_KEYOPS_CLASS;
+ else
+ class = KCF_MISC_CLASS;
+
+ /*
+ * Attempt to create a new mech_entry for the specified
+ * mechanism. kcf_create_mech_entry() can handle the case
+ * where such an entry already exists.
+ */
+ if ((error = kcf_create_mech_entry(class,
+ mech_info->cm_mech_name)) != KCF_SUCCESS) {
+ return (error);
+ }
+ /* get the KCF mech type that was assigned to the mechanism */
+ kcf_mech_type = kcf_mech_hash_find(mech_info->cm_mech_name);
+ ASSERT(kcf_mech_type != CRYPTO_MECH_INVALID);
+ }
+
+ error = kcf_get_mech_entry(kcf_mech_type, &mech_entry);
+ ASSERT(error == KCF_SUCCESS);
+
+ /* allocate and initialize new kcf_prov_mech_desc */
+ prov_mech = kmem_zalloc(sizeof (kcf_prov_mech_desc_t), KM_SLEEP);
+ bcopy(mech_info, &prov_mech->pm_mech_info, sizeof (crypto_mech_info_t));
+ prov_mech->pm_prov_desc = prov_desc;
+ prov_desc->pd_mech_indx[KCF_MECH2CLASS(kcf_mech_type)]
+ [KCF_MECH2INDEX(kcf_mech_type)] = mech_indx;
+
+ KCF_PROV_REFHOLD(prov_desc);
+ KCF_PROV_IREFHOLD(prov_desc);
+
+ dual_fg_mask = mech_info->cm_func_group_mask & CRYPTO_FG_DUAL_MASK;
+
+ if (dual_fg_mask == ((crypto_func_group_t)0))
+ goto add_entry;
+
+ simple_fg_mask = (mech_info->cm_func_group_mask &
+ CRYPTO_FG_SIMPLEOP_MASK) | CRYPTO_FG_RANDOM;
+
+ for (i = 0; i < prov_desc->pd_mech_list_count; i++) {
+ dmi = &prov_desc->pd_mechanisms[i];
+
+ /* skip self */
+ if (dmi->cm_mech_number == mech_info->cm_mech_number)
+ continue;
+
+ /* skip if not a dual operation mechanism */
+ if (!(dmi->cm_func_group_mask & dual_fg_mask) ||
+ (dmi->cm_func_group_mask & simple_fg_mask))
+ continue;
+
+ mt = kcf_mech_hash_find(dmi->cm_mech_name);
+ if (mt == CRYPTO_MECH_INVALID)
+ continue;
+
+ if (kcf_get_mech_entry(mt, &me) != KCF_SUCCESS)
+ continue;
+
+ mil = kmem_zalloc(sizeof (*mil), KM_SLEEP);
+ mil2 = kmem_zalloc(sizeof (*mil2), KM_SLEEP);
+
+ /*
+ * Ignore hard-coded entries in the mech table
+ * if the provider hasn't registered.
+ */
+ mutex_enter(&me->me_mutex);
+ if (me->me_hw_prov_chain == NULL && me->me_sw_prov == NULL) {
+ mutex_exit(&me->me_mutex);
+ kmem_free(mil, sizeof (*mil));
+ kmem_free(mil2, sizeof (*mil2));
+ continue;
+ }
+
+ /*
+ * Add other dual mechanisms that have registered
+ * with the framework to this mechanism's
+ * cross-reference list.
+ */
+ mil->ml_mech_info = *dmi; /* struct assignment */
+ mil->ml_kcf_mechid = mt;
+
+ /* add to head of list */
+ mil->ml_next = prov_mech->pm_mi_list;
+ prov_mech->pm_mi_list = mil;
+
+ if (prov_desc->pd_prov_type == CRYPTO_HW_PROVIDER)
+ prov_mech2 = me->me_hw_prov_chain;
+ else
+ prov_mech2 = me->me_sw_prov;
+
+ if (prov_mech2 == NULL) {
+ kmem_free(mil2, sizeof (*mil2));
+ mutex_exit(&me->me_mutex);
+ continue;
+ }
+
+ /*
+ * Update all other cross-reference lists by
+ * adding this new mechanism.
+ */
+ while (prov_mech2 != NULL) {
+ if (prov_mech2->pm_prov_desc == prov_desc) {
+ /* struct assignment */
+ mil2->ml_mech_info = *mech_info;
+ mil2->ml_kcf_mechid = kcf_mech_type;
+
+ /* add to head of list */
+ mil2->ml_next = prov_mech2->pm_mi_list;
+ prov_mech2->pm_mi_list = mil2;
+ break;
+ }
+ prov_mech2 = prov_mech2->pm_next;
+ }
+ if (prov_mech2 == NULL)
+ kmem_free(mil2, sizeof (*mil2));
+
+ mutex_exit(&me->me_mutex);
+ }
+
+add_entry:
+ /*
+ * Add new kcf_prov_mech_desc at the front of HW providers
+ * chain.
+ */
+ switch (prov_desc->pd_prov_type) {
+
+ case CRYPTO_HW_PROVIDER:
+ mutex_enter(&mech_entry->me_mutex);
+ prov_mech->pm_me = mech_entry;
+ prov_mech->pm_next = mech_entry->me_hw_prov_chain;
+ mech_entry->me_hw_prov_chain = prov_mech;
+ mech_entry->me_num_hwprov++;
+ mutex_exit(&mech_entry->me_mutex);
+ break;
+
+ case CRYPTO_SW_PROVIDER:
+ mutex_enter(&mech_entry->me_mutex);
+ if (mech_entry->me_sw_prov != NULL) {
+ /*
+ * There is already a SW provider for this mechanism.
+ * Since we allow only one SW provider per mechanism,
+ * report this condition.
+ */
+ cmn_err(CE_WARN, "The cryptographic software provider "
+ "\"%s\" will not be used for %s. The provider "
+ "\"%s\" will be used for this mechanism "
+ "instead.", prov_desc->pd_description,
+ mech_info->cm_mech_name,
+ mech_entry->me_sw_prov->pm_prov_desc->
+ pd_description);
+ KCF_PROV_REFRELE(prov_desc);
+ kmem_free(prov_mech, sizeof (kcf_prov_mech_desc_t));
+ prov_mech = NULL;
+ } else {
+ /*
+ * Set the provider as the software provider for
+ * this mechanism.
+ */
+ mech_entry->me_sw_prov = prov_mech;
+
+ /* We'll wrap around after 4 billion registrations! */
+ mech_entry->me_gen_swprov = kcf_gen_swprov++;
+ }
+ mutex_exit(&mech_entry->me_mutex);
+ break;
+ default:
+ break;
+ }
+
+ *pmdpp = prov_mech;
+
+ return (KCF_SUCCESS);
+}
+
+/*
+ * kcf_remove_mech_provider()
+ *
+ * Arguments:
+ * . mech_name: the name of the mechanism.
+ * . prov_desc: The provider descriptor
+ *
+ * Description:
+ * Removes a provider from chain of provider descriptors.
+ * The provider is made unavailable to kernel consumers for the specified
+ * mechanism.
+ *
+ * Context:
+ * User context only.
+ */
+void
+kcf_remove_mech_provider(char *mech_name, kcf_provider_desc_t *prov_desc)
+{
+ crypto_mech_type_t mech_type;
+ kcf_prov_mech_desc_t *prov_mech = NULL, *prov_chain;
+ kcf_prov_mech_desc_t **prev_entry_next;
+ kcf_mech_entry_t *mech_entry;
+ crypto_mech_info_list_t *mil, *mil2, *next, **prev_next;
+
+ ASSERT(prov_desc->pd_prov_type != CRYPTO_LOGICAL_PROVIDER);
+
+ /* get the KCF mech type that was assigned to the mechanism */
+ if ((mech_type = kcf_mech_hash_find(mech_name)) ==
+ CRYPTO_MECH_INVALID) {
+ /*
+ * Provider was not allowed for this mech due to policy or
+ * configuration.
+ */
+ return;
+ }
+
+ /* get a ptr to the mech_entry that was created */
+ if (kcf_get_mech_entry(mech_type, &mech_entry) != KCF_SUCCESS) {
+ /*
+ * Provider was not allowed for this mech due to policy or
+ * configuration.
+ */
+ return;
+ }
+
+ mutex_enter(&mech_entry->me_mutex);
+
+ switch (prov_desc->pd_prov_type) {
+
+ case CRYPTO_HW_PROVIDER:
+ /* find the provider in the mech_entry chain */
+ prev_entry_next = &mech_entry->me_hw_prov_chain;
+ prov_mech = mech_entry->me_hw_prov_chain;
+ while (prov_mech != NULL &&
+ prov_mech->pm_prov_desc != prov_desc) {
+ prev_entry_next = &prov_mech->pm_next;
+ prov_mech = prov_mech->pm_next;
+ }
+
+ if (prov_mech == NULL) {
+ /* entry not found, simply return */
+ mutex_exit(&mech_entry->me_mutex);
+ return;
+ }
+
+ /* remove provider entry from mech_entry chain */
+ *prev_entry_next = prov_mech->pm_next;
+ ASSERT(mech_entry->me_num_hwprov > 0);
+ mech_entry->me_num_hwprov--;
+ break;
+
+ case CRYPTO_SW_PROVIDER:
+ if (mech_entry->me_sw_prov == NULL ||
+ mech_entry->me_sw_prov->pm_prov_desc != prov_desc) {
+ /* not the software provider for this mechanism */
+ mutex_exit(&mech_entry->me_mutex);
+ return;
+ }
+ prov_mech = mech_entry->me_sw_prov;
+ mech_entry->me_sw_prov = NULL;
+ break;
+ default:
+ /* unexpected crypto_provider_type_t */
+ mutex_exit(&mech_entry->me_mutex);
+ return;
+ }
+
+ mutex_exit(&mech_entry->me_mutex);
+
+ /* Free the dual ops cross-reference lists */
+ mil = prov_mech->pm_mi_list;
+ while (mil != NULL) {
+ next = mil->ml_next;
+ if (kcf_get_mech_entry(mil->ml_kcf_mechid,
+ &mech_entry) != KCF_SUCCESS) {
+ mil = next;
+ continue;
+ }
+
+ mutex_enter(&mech_entry->me_mutex);
+ if (prov_desc->pd_prov_type == CRYPTO_HW_PROVIDER)
+ prov_chain = mech_entry->me_hw_prov_chain;
+ else
+ prov_chain = mech_entry->me_sw_prov;
+
+ while (prov_chain != NULL) {
+ if (prov_chain->pm_prov_desc == prov_desc) {
+ prev_next = &prov_chain->pm_mi_list;
+ mil2 = prov_chain->pm_mi_list;
+ while (mil2 != NULL &&
+ mil2->ml_kcf_mechid != mech_type) {
+ prev_next = &mil2->ml_next;
+ mil2 = mil2->ml_next;
+ }
+ if (mil2 != NULL) {
+ *prev_next = mil2->ml_next;
+ kmem_free(mil2, sizeof (*mil2));
+ }
+ break;
+ }
+ prov_chain = prov_chain->pm_next;
+ }
+
+ mutex_exit(&mech_entry->me_mutex);
+ kmem_free(mil, sizeof (crypto_mech_info_list_t));
+ mil = next;
+ }
+
+ /* free entry */
+ KCF_PROV_REFRELE(prov_mech->pm_prov_desc);
+ KCF_PROV_IREFRELE(prov_mech->pm_prov_desc);
+ kmem_free(prov_mech, sizeof (kcf_prov_mech_desc_t));
+}
+
+/*
+ * kcf_get_mech_entry()
+ *
+ * Arguments:
+ * . The framework mechanism type
+ * . Storage for the mechanism entry
+ *
+ * Description:
+ * Retrieves the mechanism entry for the mech.
+ *
+ * Context:
+ * User and interrupt contexts.
+ *
+ * Returns:
+ * KCF_MECHANISM_XXX appropriate error code.
+ * KCF_SUCCESS otherwise.
+ */
+int
+kcf_get_mech_entry(crypto_mech_type_t mech_type, kcf_mech_entry_t **mep)
+{
+ kcf_ops_class_t class;
+ int index;
+ kcf_mech_entry_tab_t *me_tab;
+
+ ASSERT(mep != NULL);
+
+ class = KCF_MECH2CLASS(mech_type);
+
+ if ((class < KCF_FIRST_OPSCLASS) || (class > KCF_LAST_OPSCLASS)) {
+ /* the caller won't need to know it's an invalid class */
+ return (KCF_INVALID_MECH_NUMBER);
+ }
+
+ me_tab = &kcf_mech_tabs_tab[class];
+ index = KCF_MECH2INDEX(mech_type);
+
+ if ((index < 0) || (index >= me_tab->met_size)) {
+ return (KCF_INVALID_MECH_NUMBER);
+ }
+
+ *mep = &((me_tab->met_tab)[index]);
+
+ return (KCF_SUCCESS);
+}
+
+/* CURRENTLY UNSUPPORTED: attempting to load the module if it isn't found */
+/*
+ * Lookup the hash table for an entry that matches the mechname.
+ * If there are no hardware or software providers for the mechanism,
+ * but there is an unloaded software provider, this routine will attempt
+ * to load it.
+ *
+ * If the MOD_NOAUTOUNLOAD flag is not set, a software provider is
+ * in constant danger of being unloaded. For consumers that call
+ * crypto_mech2id() only once, the provider will not be reloaded
+ * if it becomes unloaded. If a provider gets loaded elsewhere
+ * without the MOD_NOAUTOUNLOAD flag being set, we set it now.
+ */
+crypto_mech_type_t
+crypto_mech2id_common(char *mechname, boolean_t load_module)
+{
+ crypto_mech_type_t mt = kcf_mech_hash_find(mechname);
+ return (mt);
+}
diff --git a/zfs/module/icp/core/kcf_prov_lib.c b/zfs/module/icp/core/kcf_prov_lib.c
new file mode 100644
index 000000000000..dd4cd086d21f
--- /dev/null
+++ b/zfs/module/icp/core/kcf_prov_lib.c
@@ -0,0 +1,229 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/zfs_context.h>
+#include <modes/modes.h>
+#include <sys/crypto/common.h>
+#include <sys/crypto/impl.h>
+
+/*
+ * Utility routine to copy a buffer to a crypto_data structure.
+ */
+
+/*
+ * Utility routine to apply the command, 'cmd', to the
+ * data in the uio structure.
+ */
+int
+crypto_uio_data(crypto_data_t *data, uchar_t *buf, int len, cmd_type_t cmd,
+ void *digest_ctx, void (*update)(void))
+{
+ uio_t *uiop = data->cd_uio;
+ off_t offset = data->cd_offset;
+ size_t length = len;
+ uint_t vec_idx;
+ size_t cur_len;
+ uchar_t *datap;
+
+ ASSERT(data->cd_format == CRYPTO_DATA_UIO);
+ if (uiop->uio_segflg != UIO_SYSSPACE) {
+ return (CRYPTO_ARGUMENTS_BAD);
+ }
+
+ /*
+ * Jump to the first iovec containing data to be
+ * processed.
+ */
+ for (vec_idx = 0; vec_idx < uiop->uio_iovcnt &&
+ offset >= uiop->uio_iov[vec_idx].iov_len;
+ offset -= uiop->uio_iov[vec_idx++].iov_len)
+ ;
+
+ if (vec_idx == uiop->uio_iovcnt) {
+ /*
+ * The caller specified an offset that is larger than
+ * the total size of the buffers it provided.
+ */
+ return (CRYPTO_DATA_LEN_RANGE);
+ }
+
+ while (vec_idx < uiop->uio_iovcnt && length > 0) {
+ cur_len = MIN(uiop->uio_iov[vec_idx].iov_len -
+ offset, length);
+
+ datap = (uchar_t *)(uiop->uio_iov[vec_idx].iov_base +
+ offset);
+ switch (cmd) {
+ case COPY_FROM_DATA:
+ bcopy(datap, buf, cur_len);
+ buf += cur_len;
+ break;
+ case COPY_TO_DATA:
+ bcopy(buf, datap, cur_len);
+ buf += cur_len;
+ break;
+ case COMPARE_TO_DATA:
+ if (bcmp(datap, buf, cur_len))
+ return (CRYPTO_SIGNATURE_INVALID);
+ buf += cur_len;
+ break;
+ case MD5_DIGEST_DATA:
+ case SHA1_DIGEST_DATA:
+ case SHA2_DIGEST_DATA:
+ case GHASH_DATA:
+ return (CRYPTO_ARGUMENTS_BAD);
+ }
+
+ length -= cur_len;
+ vec_idx++;
+ offset = 0;
+ }
+
+ if (vec_idx == uiop->uio_iovcnt && length > 0) {
+ /*
+ * The end of the specified iovec's was reached but
+ * the length requested could not be processed.
+ */
+ switch (cmd) {
+ case COPY_TO_DATA:
+ data->cd_length = len;
+ return (CRYPTO_BUFFER_TOO_SMALL);
+ default:
+ return (CRYPTO_DATA_LEN_RANGE);
+ }
+ }
+
+ return (CRYPTO_SUCCESS);
+}
+
+int
+crypto_put_output_data(uchar_t *buf, crypto_data_t *output, int len)
+{
+ switch (output->cd_format) {
+ case CRYPTO_DATA_RAW:
+ if (output->cd_raw.iov_len < len) {
+ output->cd_length = len;
+ return (CRYPTO_BUFFER_TOO_SMALL);
+ }
+ bcopy(buf, (uchar_t *)(output->cd_raw.iov_base +
+ output->cd_offset), len);
+ break;
+
+ case CRYPTO_DATA_UIO:
+ return (crypto_uio_data(output, buf, len,
+ COPY_TO_DATA, NULL, NULL));
+ default:
+ return (CRYPTO_ARGUMENTS_BAD);
+ }
+
+ return (CRYPTO_SUCCESS);
+}
+
+int
+crypto_update_iov(void *ctx, crypto_data_t *input, crypto_data_t *output,
+ int (*cipher)(void *, caddr_t, size_t, crypto_data_t *),
+ void (*copy_block)(uint8_t *, uint64_t *))
+{
+ common_ctx_t *common_ctx = ctx;
+ int rv;
+
+ if (input->cd_miscdata != NULL) {
+ copy_block((uint8_t *)input->cd_miscdata,
+ &common_ctx->cc_iv[0]);
+ }
+
+ if (input->cd_raw.iov_len < input->cd_length)
+ return (CRYPTO_ARGUMENTS_BAD);
+
+ rv = (cipher)(ctx, input->cd_raw.iov_base + input->cd_offset,
+ input->cd_length, (input == output) ? NULL : output);
+
+ return (rv);
+}
+
+int
+crypto_update_uio(void *ctx, crypto_data_t *input, crypto_data_t *output,
+ int (*cipher)(void *, caddr_t, size_t, crypto_data_t *),
+ void (*copy_block)(uint8_t *, uint64_t *))
+{
+ common_ctx_t *common_ctx = ctx;
+ uio_t *uiop = input->cd_uio;
+ off_t offset = input->cd_offset;
+ size_t length = input->cd_length;
+ uint_t vec_idx;
+ size_t cur_len;
+
+ if (input->cd_miscdata != NULL) {
+ copy_block((uint8_t *)input->cd_miscdata,
+ &common_ctx->cc_iv[0]);
+ }
+
+ if (input->cd_uio->uio_segflg != UIO_SYSSPACE) {
+ return (CRYPTO_ARGUMENTS_BAD);
+ }
+
+ /*
+ * Jump to the first iovec containing data to be
+ * processed.
+ */
+ for (vec_idx = 0; vec_idx < uiop->uio_iovcnt &&
+ offset >= uiop->uio_iov[vec_idx].iov_len;
+ offset -= uiop->uio_iov[vec_idx++].iov_len)
+ ;
+ if (vec_idx == uiop->uio_iovcnt) {
+ /*
+ * The caller specified an offset that is larger than the
+ * total size of the buffers it provided.
+ */
+ return (CRYPTO_DATA_LEN_RANGE);
+ }
+
+ /*
+ * Now process the iovecs.
+ */
+ while (vec_idx < uiop->uio_iovcnt && length > 0) {
+ cur_len = MIN(uiop->uio_iov[vec_idx].iov_len -
+ offset, length);
+
+ (cipher)(ctx, uiop->uio_iov[vec_idx].iov_base + offset,
+ cur_len, (input == output) ? NULL : output);
+
+ length -= cur_len;
+ vec_idx++;
+ offset = 0;
+ }
+
+ if (vec_idx == uiop->uio_iovcnt && length > 0) {
+ /*
+ * The end of the specified iovec's was reached but
+ * the length requested could not be processed, i.e.
+ * The caller requested to digest more data than it provided.
+ */
+
+ return (CRYPTO_DATA_LEN_RANGE);
+ }
+
+ return (CRYPTO_SUCCESS);
+}
diff --git a/zfs/module/icp/core/kcf_prov_tabs.c b/zfs/module/icp/core/kcf_prov_tabs.c
new file mode 100644
index 000000000000..94e6937bcd76
--- /dev/null
+++ b/zfs/module/icp/core/kcf_prov_tabs.c
@@ -0,0 +1,645 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * This file is part of the core Kernel Cryptographic Framework.
+ * It implements the management of tables of Providers. Entries to
+ * added and removed when cryptographic providers register with
+ * and unregister from the framework, respectively. The KCF scheduler
+ * and ioctl pseudo driver call this function to obtain the list
+ * of available providers.
+ *
+ * The provider table is indexed by crypto_provider_id_t. Each
+ * element of the table contains a pointer to a provider descriptor,
+ * or NULL if the entry is free.
+ *
+ * This file also implements helper functions to allocate and free
+ * provider descriptors.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/crypto/common.h>
+#include <sys/crypto/impl.h>
+#include <sys/crypto/sched_impl.h>
+#include <sys/crypto/spi.h>
+
+#define KCF_MAX_PROVIDERS 512 /* max number of providers */
+
+/*
+ * Prov_tab is an array of providers which is updated when
+ * a crypto provider registers with kcf. The provider calls the
+ * SPI routine, crypto_register_provider(), which in turn calls
+ * kcf_prov_tab_add_provider().
+ *
+ * A provider unregisters by calling crypto_unregister_provider()
+ * which triggers the removal of the prov_tab entry.
+ * It also calls kcf_remove_mech_provider().
+ *
+ * prov_tab entries are not updated from kcf.conf or by cryptoadm(1M).
+ */
+static kcf_provider_desc_t **prov_tab = NULL;
+static kmutex_t prov_tab_mutex; /* ensure exclusive access to the table */
+static uint_t prov_tab_num = 0; /* number of providers in table */
+static uint_t prov_tab_max = KCF_MAX_PROVIDERS;
+
+void
+kcf_prov_tab_destroy(void)
+{
+ mutex_destroy(&prov_tab_mutex);
+
+ if (prov_tab)
+ kmem_free(prov_tab, prov_tab_max *
+ sizeof (kcf_provider_desc_t *));
+}
+
+/*
+ * Initialize a mutex and the KCF providers table, prov_tab.
+ * The providers table is dynamically allocated with prov_tab_max entries.
+ * Called from kcf module _init().
+ */
+void
+kcf_prov_tab_init(void)
+{
+ mutex_init(&prov_tab_mutex, NULL, MUTEX_DEFAULT, NULL);
+
+ prov_tab = kmem_zalloc(prov_tab_max * sizeof (kcf_provider_desc_t *),
+ KM_SLEEP);
+}
+
+/*
+ * Add a provider to the provider table. If no free entry can be found
+ * for the new provider, returns CRYPTO_HOST_MEMORY. Otherwise, add
+ * the provider to the table, initialize the pd_prov_id field
+ * of the specified provider descriptor to the index in that table,
+ * and return CRYPTO_SUCCESS. Note that a REFHOLD is done on the
+ * provider when pointed to by a table entry.
+ */
+int
+kcf_prov_tab_add_provider(kcf_provider_desc_t *prov_desc)
+{
+ uint_t i;
+
+ ASSERT(prov_tab != NULL);
+
+ mutex_enter(&prov_tab_mutex);
+
+ /* find free slot in providers table */
+ for (i = 1; i < KCF_MAX_PROVIDERS && prov_tab[i] != NULL; i++)
+ ;
+ if (i == KCF_MAX_PROVIDERS) {
+ /* ran out of providers entries */
+ mutex_exit(&prov_tab_mutex);
+ cmn_err(CE_WARN, "out of providers entries");
+ return (CRYPTO_HOST_MEMORY);
+ }
+
+ /* initialize entry */
+ prov_tab[i] = prov_desc;
+ KCF_PROV_REFHOLD(prov_desc);
+ KCF_PROV_IREFHOLD(prov_desc);
+ prov_tab_num++;
+
+ mutex_exit(&prov_tab_mutex);
+
+ /* update provider descriptor */
+ prov_desc->pd_prov_id = i;
+
+ /*
+ * The KCF-private provider handle is defined as the internal
+ * provider id.
+ */
+ prov_desc->pd_kcf_prov_handle =
+ (crypto_kcf_provider_handle_t)prov_desc->pd_prov_id;
+
+ return (CRYPTO_SUCCESS);
+}
+
+/*
+ * Remove the provider specified by its id. A REFRELE is done on the
+ * corresponding provider descriptor before this function returns.
+ * Returns CRYPTO_UNKNOWN_PROVIDER if the provider id is not valid.
+ */
+int
+kcf_prov_tab_rem_provider(crypto_provider_id_t prov_id)
+{
+ kcf_provider_desc_t *prov_desc;
+
+ ASSERT(prov_tab != NULL);
+ ASSERT(prov_tab_num >= 0);
+
+ /*
+ * Validate provider id, since it can be specified by a 3rd-party
+ * provider.
+ */
+
+ mutex_enter(&prov_tab_mutex);
+ if (prov_id >= KCF_MAX_PROVIDERS ||
+ ((prov_desc = prov_tab[prov_id]) == NULL)) {
+ mutex_exit(&prov_tab_mutex);
+ return (CRYPTO_INVALID_PROVIDER_ID);
+ }
+ mutex_exit(&prov_tab_mutex);
+
+ /*
+ * The provider id must remain valid until the associated provider
+ * descriptor is freed. For this reason, we simply release our
+ * reference to the descriptor here. When the reference count
+ * reaches zero, kcf_free_provider_desc() will be invoked and
+ * the associated entry in the providers table will be released
+ * at that time.
+ */
+
+ KCF_PROV_REFRELE(prov_desc);
+ KCF_PROV_IREFRELE(prov_desc);
+
+ return (CRYPTO_SUCCESS);
+}
+
+/*
+ * Returns the provider descriptor corresponding to the specified
+ * provider id. A REFHOLD is done on the descriptor before it is
+ * returned to the caller. It is the responsibility of the caller
+ * to do a REFRELE once it is done with the provider descriptor.
+ */
+kcf_provider_desc_t *
+kcf_prov_tab_lookup(crypto_provider_id_t prov_id)
+{
+ kcf_provider_desc_t *prov_desc;
+
+ mutex_enter(&prov_tab_mutex);
+
+ prov_desc = prov_tab[prov_id];
+
+ if (prov_desc == NULL) {
+ mutex_exit(&prov_tab_mutex);
+ return (NULL);
+ }
+
+ KCF_PROV_REFHOLD(prov_desc);
+
+ mutex_exit(&prov_tab_mutex);
+
+ return (prov_desc);
+}
+
+static void
+allocate_ops_v1(crypto_ops_t *src, crypto_ops_t *dst, uint_t *mech_list_count)
+{
+ if (src->co_control_ops != NULL)
+ dst->co_control_ops = kmem_alloc(sizeof (crypto_control_ops_t),
+ KM_SLEEP);
+
+ if (src->co_digest_ops != NULL)
+ dst->co_digest_ops = kmem_alloc(sizeof (crypto_digest_ops_t),
+ KM_SLEEP);
+
+ if (src->co_cipher_ops != NULL)
+ dst->co_cipher_ops = kmem_alloc(sizeof (crypto_cipher_ops_t),
+ KM_SLEEP);
+
+ if (src->co_mac_ops != NULL)
+ dst->co_mac_ops = kmem_alloc(sizeof (crypto_mac_ops_t),
+ KM_SLEEP);
+
+ if (src->co_sign_ops != NULL)
+ dst->co_sign_ops = kmem_alloc(sizeof (crypto_sign_ops_t),
+ KM_SLEEP);
+
+ if (src->co_verify_ops != NULL)
+ dst->co_verify_ops = kmem_alloc(sizeof (crypto_verify_ops_t),
+ KM_SLEEP);
+
+ if (src->co_dual_ops != NULL)
+ dst->co_dual_ops = kmem_alloc(sizeof (crypto_dual_ops_t),
+ KM_SLEEP);
+
+ if (src->co_dual_cipher_mac_ops != NULL)
+ dst->co_dual_cipher_mac_ops = kmem_alloc(
+ sizeof (crypto_dual_cipher_mac_ops_t), KM_SLEEP);
+
+ if (src->co_random_ops != NULL) {
+ dst->co_random_ops = kmem_alloc(
+ sizeof (crypto_random_number_ops_t), KM_SLEEP);
+
+ /*
+ * Allocate storage to store the array of supported mechanisms
+ * specified by provider. We allocate extra mechanism storage
+ * if the provider has random_ops since we keep an internal
+ * mechanism, SUN_RANDOM, in this case.
+ */
+ (*mech_list_count)++;
+ }
+
+ if (src->co_session_ops != NULL)
+ dst->co_session_ops = kmem_alloc(sizeof (crypto_session_ops_t),
+ KM_SLEEP);
+
+ if (src->co_object_ops != NULL)
+ dst->co_object_ops = kmem_alloc(sizeof (crypto_object_ops_t),
+ KM_SLEEP);
+
+ if (src->co_key_ops != NULL)
+ dst->co_key_ops = kmem_alloc(sizeof (crypto_key_ops_t),
+ KM_SLEEP);
+
+ if (src->co_provider_ops != NULL)
+ dst->co_provider_ops = kmem_alloc(
+ sizeof (crypto_provider_management_ops_t), KM_SLEEP);
+
+ if (src->co_ctx_ops != NULL)
+ dst->co_ctx_ops = kmem_alloc(sizeof (crypto_ctx_ops_t),
+ KM_SLEEP);
+}
+
+static void
+allocate_ops_v2(crypto_ops_t *src, crypto_ops_t *dst)
+{
+ if (src->co_mech_ops != NULL)
+ dst->co_mech_ops = kmem_alloc(sizeof (crypto_mech_ops_t),
+ KM_SLEEP);
+}
+
+static void
+allocate_ops_v3(crypto_ops_t *src, crypto_ops_t *dst)
+{
+ if (src->co_nostore_key_ops != NULL)
+ dst->co_nostore_key_ops =
+ kmem_alloc(sizeof (crypto_nostore_key_ops_t), KM_SLEEP);
+}
+
+/*
+ * Allocate a provider descriptor. mech_list_count specifies the
+ * number of mechanisms supported by the providers, and is used
+ * to allocate storage for the mechanism table.
+ * This function may sleep while allocating memory, which is OK
+ * since it is invoked from user context during provider registration.
+ */
+kcf_provider_desc_t *
+kcf_alloc_provider_desc(crypto_provider_info_t *info)
+{
+ int i, j;
+ kcf_provider_desc_t *desc;
+ uint_t mech_list_count = info->pi_mech_list_count;
+ crypto_ops_t *src_ops = info->pi_ops_vector;
+
+ desc = kmem_zalloc(sizeof (kcf_provider_desc_t), KM_SLEEP);
+
+ /*
+ * pd_description serves two purposes
+ * - Appears as a blank padded PKCS#11 style string, that will be
+ * returned to applications in CK_SLOT_INFO.slotDescription.
+ * This means that we should not have a null character in the
+ * first CRYPTO_PROVIDER_DESCR_MAX_LEN bytes.
+ * - Appears as a null-terminated string that can be used by
+ * other kcf routines.
+ *
+ * So, we allocate enough room for one extra null terminator
+ * which keeps every one happy.
+ */
+ desc->pd_description = kmem_alloc(CRYPTO_PROVIDER_DESCR_MAX_LEN + 1,
+ KM_SLEEP);
+ (void) memset(desc->pd_description, ' ',
+ CRYPTO_PROVIDER_DESCR_MAX_LEN);
+ desc->pd_description[CRYPTO_PROVIDER_DESCR_MAX_LEN] = '\0';
+
+ /*
+ * Since the framework does not require the ops vector specified
+ * by the providers during registration to be persistent,
+ * KCF needs to allocate storage where copies of the ops
+ * vectors are copied.
+ */
+ desc->pd_ops_vector = kmem_zalloc(sizeof (crypto_ops_t), KM_SLEEP);
+
+ if (info->pi_provider_type != CRYPTO_LOGICAL_PROVIDER) {
+ allocate_ops_v1(src_ops, desc->pd_ops_vector, &mech_list_count);
+ if (info->pi_interface_version >= CRYPTO_SPI_VERSION_2)
+ allocate_ops_v2(src_ops, desc->pd_ops_vector);
+ if (info->pi_interface_version == CRYPTO_SPI_VERSION_3)
+ allocate_ops_v3(src_ops, desc->pd_ops_vector);
+ }
+
+ desc->pd_mech_list_count = mech_list_count;
+ desc->pd_mechanisms = kmem_zalloc(sizeof (crypto_mech_info_t) *
+ mech_list_count, KM_SLEEP);
+ for (i = 0; i < KCF_OPS_CLASSSIZE; i++)
+ for (j = 0; j < KCF_MAXMECHTAB; j++)
+ desc->pd_mech_indx[i][j] = KCF_INVALID_INDX;
+
+ desc->pd_prov_id = KCF_PROVID_INVALID;
+ desc->pd_state = KCF_PROV_ALLOCATED;
+
+ mutex_init(&desc->pd_lock, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&desc->pd_resume_cv, NULL, CV_DEFAULT, NULL);
+ cv_init(&desc->pd_remove_cv, NULL, CV_DEFAULT, NULL);
+
+ return (desc);
+}
+
+/*
+ * Called by KCF_PROV_REFRELE when a provider's reference count drops
+ * to zero. We free the descriptor when the last reference is released.
+ * However, for software providers, we do not free it when there is an
+ * unregister thread waiting. We signal that thread in this case and
+ * that thread is responsible for freeing the descriptor.
+ */
+void
+kcf_provider_zero_refcnt(kcf_provider_desc_t *desc)
+{
+ mutex_enter(&desc->pd_lock);
+ switch (desc->pd_prov_type) {
+ case CRYPTO_SW_PROVIDER:
+ if (desc->pd_state == KCF_PROV_REMOVED ||
+ desc->pd_state == KCF_PROV_DISABLED) {
+ desc->pd_state = KCF_PROV_FREED;
+ cv_broadcast(&desc->pd_remove_cv);
+ mutex_exit(&desc->pd_lock);
+ break;
+ }
+ /* FALLTHRU */
+
+ case CRYPTO_HW_PROVIDER:
+ case CRYPTO_LOGICAL_PROVIDER:
+ mutex_exit(&desc->pd_lock);
+ kcf_free_provider_desc(desc);
+ }
+}
+
+/*
+ * Free a provider descriptor.
+ */
+void
+kcf_free_provider_desc(kcf_provider_desc_t *desc)
+{
+ if (desc == NULL)
+ return;
+
+ mutex_enter(&prov_tab_mutex);
+ if (desc->pd_prov_id != KCF_PROVID_INVALID) {
+ /* release the associated providers table entry */
+ ASSERT(prov_tab[desc->pd_prov_id] != NULL);
+ prov_tab[desc->pd_prov_id] = NULL;
+ prov_tab_num--;
+ }
+ mutex_exit(&prov_tab_mutex);
+
+ /* free the kernel memory associated with the provider descriptor */
+
+ if (desc->pd_description != NULL)
+ kmem_free(desc->pd_description,
+ CRYPTO_PROVIDER_DESCR_MAX_LEN + 1);
+
+ if (desc->pd_ops_vector != NULL) {
+
+ if (desc->pd_ops_vector->co_control_ops != NULL)
+ kmem_free(desc->pd_ops_vector->co_control_ops,
+ sizeof (crypto_control_ops_t));
+
+ if (desc->pd_ops_vector->co_digest_ops != NULL)
+ kmem_free(desc->pd_ops_vector->co_digest_ops,
+ sizeof (crypto_digest_ops_t));
+
+ if (desc->pd_ops_vector->co_cipher_ops != NULL)
+ kmem_free(desc->pd_ops_vector->co_cipher_ops,
+ sizeof (crypto_cipher_ops_t));
+
+ if (desc->pd_ops_vector->co_mac_ops != NULL)
+ kmem_free(desc->pd_ops_vector->co_mac_ops,
+ sizeof (crypto_mac_ops_t));
+
+ if (desc->pd_ops_vector->co_sign_ops != NULL)
+ kmem_free(desc->pd_ops_vector->co_sign_ops,
+ sizeof (crypto_sign_ops_t));
+
+ if (desc->pd_ops_vector->co_verify_ops != NULL)
+ kmem_free(desc->pd_ops_vector->co_verify_ops,
+ sizeof (crypto_verify_ops_t));
+
+ if (desc->pd_ops_vector->co_dual_ops != NULL)
+ kmem_free(desc->pd_ops_vector->co_dual_ops,
+ sizeof (crypto_dual_ops_t));
+
+ if (desc->pd_ops_vector->co_dual_cipher_mac_ops != NULL)
+ kmem_free(desc->pd_ops_vector->co_dual_cipher_mac_ops,
+ sizeof (crypto_dual_cipher_mac_ops_t));
+
+ if (desc->pd_ops_vector->co_random_ops != NULL)
+ kmem_free(desc->pd_ops_vector->co_random_ops,
+ sizeof (crypto_random_number_ops_t));
+
+ if (desc->pd_ops_vector->co_session_ops != NULL)
+ kmem_free(desc->pd_ops_vector->co_session_ops,
+ sizeof (crypto_session_ops_t));
+
+ if (desc->pd_ops_vector->co_object_ops != NULL)
+ kmem_free(desc->pd_ops_vector->co_object_ops,
+ sizeof (crypto_object_ops_t));
+
+ if (desc->pd_ops_vector->co_key_ops != NULL)
+ kmem_free(desc->pd_ops_vector->co_key_ops,
+ sizeof (crypto_key_ops_t));
+
+ if (desc->pd_ops_vector->co_provider_ops != NULL)
+ kmem_free(desc->pd_ops_vector->co_provider_ops,
+ sizeof (crypto_provider_management_ops_t));
+
+ if (desc->pd_ops_vector->co_ctx_ops != NULL)
+ kmem_free(desc->pd_ops_vector->co_ctx_ops,
+ sizeof (crypto_ctx_ops_t));
+
+ if (desc->pd_ops_vector->co_mech_ops != NULL)
+ kmem_free(desc->pd_ops_vector->co_mech_ops,
+ sizeof (crypto_mech_ops_t));
+
+ if (desc->pd_ops_vector->co_nostore_key_ops != NULL)
+ kmem_free(desc->pd_ops_vector->co_nostore_key_ops,
+ sizeof (crypto_nostore_key_ops_t));
+
+ kmem_free(desc->pd_ops_vector, sizeof (crypto_ops_t));
+ }
+
+ if (desc->pd_mechanisms != NULL)
+ /* free the memory associated with the mechanism info's */
+ kmem_free(desc->pd_mechanisms, sizeof (crypto_mech_info_t) *
+ desc->pd_mech_list_count);
+
+ if (desc->pd_sched_info.ks_taskq != NULL)
+ taskq_destroy(desc->pd_sched_info.ks_taskq);
+
+ mutex_destroy(&desc->pd_lock);
+ cv_destroy(&desc->pd_resume_cv);
+ cv_destroy(&desc->pd_remove_cv);
+
+ kmem_free(desc, sizeof (kcf_provider_desc_t));
+}
+
+/*
+ * Returns an array of hardware and logical provider descriptors,
+ * a.k.a the PKCS#11 slot list. A REFHOLD is done on each descriptor
+ * before the array is returned. The entire table can be freed by
+ * calling kcf_free_provider_tab().
+ */
+int
+kcf_get_slot_list(uint_t *count, kcf_provider_desc_t ***array,
+ boolean_t unverified)
+{
+ kcf_provider_desc_t *prov_desc;
+ kcf_provider_desc_t **p = NULL;
+ char *last;
+ uint_t cnt = 0;
+ uint_t i, j;
+ int rval = CRYPTO_SUCCESS;
+ size_t n, final_size;
+
+ /* count the providers */
+ mutex_enter(&prov_tab_mutex);
+ for (i = 0; i < KCF_MAX_PROVIDERS; i++) {
+ if ((prov_desc = prov_tab[i]) != NULL &&
+ ((prov_desc->pd_prov_type == CRYPTO_HW_PROVIDER &&
+ (prov_desc->pd_flags & CRYPTO_HIDE_PROVIDER) == 0) ||
+ prov_desc->pd_prov_type == CRYPTO_LOGICAL_PROVIDER)) {
+ if (KCF_IS_PROV_USABLE(prov_desc) ||
+ (unverified && KCF_IS_PROV_UNVERIFIED(prov_desc))) {
+ cnt++;
+ }
+ }
+ }
+ mutex_exit(&prov_tab_mutex);
+
+ if (cnt == 0)
+ goto out;
+
+ n = cnt * sizeof (kcf_provider_desc_t *);
+again:
+ p = kmem_zalloc(n, KM_SLEEP);
+
+ /* pointer to last entry in the array */
+ last = (char *)&p[cnt-1];
+
+ mutex_enter(&prov_tab_mutex);
+ /* fill the slot list */
+ for (i = 0, j = 0; i < KCF_MAX_PROVIDERS; i++) {
+ if ((prov_desc = prov_tab[i]) != NULL &&
+ ((prov_desc->pd_prov_type == CRYPTO_HW_PROVIDER &&
+ (prov_desc->pd_flags & CRYPTO_HIDE_PROVIDER) == 0) ||
+ prov_desc->pd_prov_type == CRYPTO_LOGICAL_PROVIDER)) {
+ if (KCF_IS_PROV_USABLE(prov_desc) ||
+ (unverified && KCF_IS_PROV_UNVERIFIED(prov_desc))) {
+ if ((char *)&p[j] > last) {
+ mutex_exit(&prov_tab_mutex);
+ kcf_free_provider_tab(cnt, p);
+ n = n << 1;
+ cnt = cnt << 1;
+ goto again;
+ }
+ p[j++] = prov_desc;
+ KCF_PROV_REFHOLD(prov_desc);
+ }
+ }
+ }
+ mutex_exit(&prov_tab_mutex);
+
+ final_size = j * sizeof (kcf_provider_desc_t *);
+ cnt = j;
+ ASSERT(final_size <= n);
+
+ /* check if buffer we allocated is too large */
+ if (final_size < n) {
+ char *final_buffer = NULL;
+
+ if (final_size > 0) {
+ final_buffer = kmem_alloc(final_size, KM_SLEEP);
+ bcopy(p, final_buffer, final_size);
+ }
+ kmem_free(p, n);
+ p = (kcf_provider_desc_t **)final_buffer;
+ }
+out:
+ *count = cnt;
+ *array = p;
+ return (rval);
+}
+
+/*
+ * Free an array of hardware provider descriptors. A REFRELE
+ * is done on each descriptor before the table is freed.
+ */
+void
+kcf_free_provider_tab(uint_t count, kcf_provider_desc_t **array)
+{
+ kcf_provider_desc_t *prov_desc;
+ int i;
+
+ for (i = 0; i < count; i++) {
+ if ((prov_desc = array[i]) != NULL) {
+ KCF_PROV_REFRELE(prov_desc);
+ }
+ }
+ kmem_free(array, count * sizeof (kcf_provider_desc_t *));
+}
+
+/*
+ * Returns in the location pointed to by pd a pointer to the descriptor
+ * for the software provider for the specified mechanism.
+ * The provider descriptor is returned held and it is the caller's
+ * responsibility to release it when done. The mechanism entry
+ * is returned if the optional argument mep is non NULL.
+ *
+ * Returns one of the CRYPTO_ * error codes on failure, and
+ * CRYPTO_SUCCESS on success.
+ */
+int
+kcf_get_sw_prov(crypto_mech_type_t mech_type, kcf_provider_desc_t **pd,
+ kcf_mech_entry_t **mep, boolean_t log_warn)
+{
+ kcf_mech_entry_t *me;
+
+ /* get the mechanism entry for this mechanism */
+ if (kcf_get_mech_entry(mech_type, &me) != KCF_SUCCESS)
+ return (CRYPTO_MECHANISM_INVALID);
+
+ /*
+ * Get the software provider for this mechanism.
+ * Lock the mech_entry until we grab the 'pd'.
+ */
+ mutex_enter(&me->me_mutex);
+
+ if (me->me_sw_prov == NULL ||
+ (*pd = me->me_sw_prov->pm_prov_desc) == NULL) {
+ /* no SW provider for this mechanism */
+ if (log_warn)
+ cmn_err(CE_WARN, "no SW provider for \"%s\"\n",
+ me->me_name);
+ mutex_exit(&me->me_mutex);
+ return (CRYPTO_MECH_NOT_SUPPORTED);
+ }
+
+ KCF_PROV_REFHOLD(*pd);
+ mutex_exit(&me->me_mutex);
+
+ if (mep != NULL)
+ *mep = me;
+
+ return (CRYPTO_SUCCESS);
+}
diff --git a/zfs/module/icp/core/kcf_sched.c b/zfs/module/icp/core/kcf_sched.c
new file mode 100644
index 000000000000..da2346f7ec21
--- /dev/null
+++ b/zfs/module/icp/core/kcf_sched.c
@@ -0,0 +1,1782 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * This file contains the core framework routines for the
+ * kernel cryptographic framework. These routines are at the
+ * layer, between the kernel API/ioctls and the SPI.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/crypto/common.h>
+#include <sys/crypto/impl.h>
+#include <sys/crypto/sched_impl.h>
+#include <sys/crypto/api.h>
+
+kcf_global_swq_t *gswq; /* Global software queue */
+
+/* Thread pool related variables */
+static kcf_pool_t *kcfpool; /* Thread pool of kcfd LWPs */
+int kcf_maxthreads = 2;
+int kcf_minthreads = 1;
+int kcf_thr_multiple = 2; /* Boot-time tunable for experimentation */
+static ulong_t kcf_idlethr_timeout;
+#define KCF_DEFAULT_THRTIMEOUT 60000000 /* 60 seconds */
+
+/* kmem caches used by the scheduler */
+static kmem_cache_t *kcf_sreq_cache;
+static kmem_cache_t *kcf_areq_cache;
+static kmem_cache_t *kcf_context_cache;
+
+/* Global request ID table */
+static kcf_reqid_table_t *kcf_reqid_table[REQID_TABLES];
+
+/* KCF stats. Not protected. */
+static kcf_stats_t kcf_ksdata = {
+ { "total threads in pool", KSTAT_DATA_UINT32},
+ { "idle threads in pool", KSTAT_DATA_UINT32},
+ { "min threads in pool", KSTAT_DATA_UINT32},
+ { "max threads in pool", KSTAT_DATA_UINT32},
+ { "requests in gswq", KSTAT_DATA_UINT32},
+ { "max requests in gswq", KSTAT_DATA_UINT32},
+ { "threads for HW taskq", KSTAT_DATA_UINT32},
+ { "minalloc for HW taskq", KSTAT_DATA_UINT32},
+ { "maxalloc for HW taskq", KSTAT_DATA_UINT32}
+};
+
+static kstat_t *kcf_misc_kstat = NULL;
+ulong_t kcf_swprov_hndl = 0;
+
+static kcf_areq_node_t *kcf_areqnode_alloc(kcf_provider_desc_t *,
+ kcf_context_t *, crypto_call_req_t *, kcf_req_params_t *, boolean_t);
+static int kcf_disp_sw_request(kcf_areq_node_t *);
+static void process_req_hwp(void *);
+static int kcf_enqueue(kcf_areq_node_t *);
+static void kcfpool_alloc(void);
+static void kcf_reqid_delete(kcf_areq_node_t *areq);
+static crypto_req_id_t kcf_reqid_insert(kcf_areq_node_t *areq);
+static int kcf_misc_kstat_update(kstat_t *ksp, int rw);
+
+/*
+ * Create a new context.
+ */
+crypto_ctx_t *
+kcf_new_ctx(crypto_call_req_t *crq, kcf_provider_desc_t *pd,
+ crypto_session_id_t sid)
+{
+ crypto_ctx_t *ctx;
+ kcf_context_t *kcf_ctx;
+
+ kcf_ctx = kmem_cache_alloc(kcf_context_cache,
+ (crq == NULL) ? KM_SLEEP : KM_NOSLEEP);
+ if (kcf_ctx == NULL)
+ return (NULL);
+
+ /* initialize the context for the consumer */
+ kcf_ctx->kc_refcnt = 1;
+ kcf_ctx->kc_req_chain_first = NULL;
+ kcf_ctx->kc_req_chain_last = NULL;
+ kcf_ctx->kc_secondctx = NULL;
+ KCF_PROV_REFHOLD(pd);
+ kcf_ctx->kc_prov_desc = pd;
+ kcf_ctx->kc_sw_prov_desc = NULL;
+ kcf_ctx->kc_mech = NULL;
+
+ ctx = &kcf_ctx->kc_glbl_ctx;
+ ctx->cc_provider = pd->pd_prov_handle;
+ ctx->cc_session = sid;
+ ctx->cc_provider_private = NULL;
+ ctx->cc_framework_private = (void *)kcf_ctx;
+ ctx->cc_flags = 0;
+ ctx->cc_opstate = NULL;
+
+ return (ctx);
+}
+
+/*
+ * Allocate a new async request node.
+ *
+ * ictx - Framework private context pointer
+ * crq - Has callback function and argument. Should be non NULL.
+ * req - The parameters to pass to the SPI
+ */
+static kcf_areq_node_t *
+kcf_areqnode_alloc(kcf_provider_desc_t *pd, kcf_context_t *ictx,
+ crypto_call_req_t *crq, kcf_req_params_t *req, boolean_t isdual)
+{
+ kcf_areq_node_t *arptr, *areq;
+
+ ASSERT(crq != NULL);
+ arptr = kmem_cache_alloc(kcf_areq_cache, KM_NOSLEEP);
+ if (arptr == NULL)
+ return (NULL);
+
+ arptr->an_state = REQ_ALLOCATED;
+ arptr->an_reqarg = *crq;
+ arptr->an_params = *req;
+ arptr->an_context = ictx;
+ arptr->an_isdual = isdual;
+
+ arptr->an_next = arptr->an_prev = NULL;
+ KCF_PROV_REFHOLD(pd);
+ arptr->an_provider = pd;
+ arptr->an_tried_plist = NULL;
+ arptr->an_refcnt = 1;
+ arptr->an_idnext = arptr->an_idprev = NULL;
+
+ /*
+ * Requests for context-less operations do not use the
+ * fields - an_is_my_turn, and an_ctxchain_next.
+ */
+ if (ictx == NULL)
+ return (arptr);
+
+ KCF_CONTEXT_REFHOLD(ictx);
+ /*
+ * Chain this request to the context.
+ */
+ mutex_enter(&ictx->kc_in_use_lock);
+ arptr->an_ctxchain_next = NULL;
+ if ((areq = ictx->kc_req_chain_last) == NULL) {
+ arptr->an_is_my_turn = B_TRUE;
+ ictx->kc_req_chain_last =
+ ictx->kc_req_chain_first = arptr;
+ } else {
+ ASSERT(ictx->kc_req_chain_first != NULL);
+ arptr->an_is_my_turn = B_FALSE;
+ /* Insert the new request to the end of the chain. */
+ areq->an_ctxchain_next = arptr;
+ ictx->kc_req_chain_last = arptr;
+ }
+ mutex_exit(&ictx->kc_in_use_lock);
+
+ return (arptr);
+}
+
+/*
+ * Queue the request node and do one of the following:
+ * - If there is an idle thread signal it to run.
+ * - If there is no idle thread and max running threads is not
+ * reached, signal the creator thread for more threads.
+ *
+ * If the two conditions above are not met, we don't need to do
+ * any thing. The request will be picked up by one of the
+ * worker threads when it becomes available.
+ */
+static int
+kcf_disp_sw_request(kcf_areq_node_t *areq)
+{
+ int err;
+ int cnt = 0;
+
+ if ((err = kcf_enqueue(areq)) != 0)
+ return (err);
+
+ if (kcfpool->kp_idlethreads > 0) {
+ /* Signal an idle thread to run */
+ mutex_enter(&gswq->gs_lock);
+ cv_signal(&gswq->gs_cv);
+ mutex_exit(&gswq->gs_lock);
+
+ return (CRYPTO_QUEUED);
+ }
+
+ /*
+ * We keep the number of running threads to be at
+ * kcf_minthreads to reduce gs_lock contention.
+ */
+ cnt = kcf_minthreads -
+ (kcfpool->kp_threads - kcfpool->kp_blockedthreads);
+ if (cnt > 0) {
+ /*
+ * The following ensures the number of threads in pool
+ * does not exceed kcf_maxthreads.
+ */
+ cnt = MIN(cnt, kcf_maxthreads - (int)kcfpool->kp_threads);
+ if (cnt > 0) {
+ /* Signal the creator thread for more threads */
+ mutex_enter(&kcfpool->kp_user_lock);
+ if (!kcfpool->kp_signal_create_thread) {
+ kcfpool->kp_signal_create_thread = B_TRUE;
+ kcfpool->kp_nthrs = cnt;
+ cv_signal(&kcfpool->kp_user_cv);
+ }
+ mutex_exit(&kcfpool->kp_user_lock);
+ }
+ }
+
+ return (CRYPTO_QUEUED);
+}
+
+/*
+ * This routine is called by the taskq associated with
+ * each hardware provider. We notify the kernel consumer
+ * via the callback routine in case of CRYPTO_SUCCESS or
+ * a failure.
+ *
+ * A request can be of type kcf_areq_node_t or of type
+ * kcf_sreq_node_t.
+ */
+static void
+process_req_hwp(void *ireq)
+{
+ int error = 0;
+ crypto_ctx_t *ctx;
+ kcf_call_type_t ctype;
+ kcf_provider_desc_t *pd;
+ kcf_areq_node_t *areq = (kcf_areq_node_t *)ireq;
+ kcf_sreq_node_t *sreq = (kcf_sreq_node_t *)ireq;
+
+ pd = ((ctype = GET_REQ_TYPE(ireq)) == CRYPTO_SYNCH) ?
+ sreq->sn_provider : areq->an_provider;
+
+ /*
+ * Wait if flow control is in effect for the provider. A
+ * CRYPTO_PROVIDER_READY or CRYPTO_PROVIDER_FAILED
+ * notification will signal us. We also get signaled if
+ * the provider is unregistering.
+ */
+ if (pd->pd_state == KCF_PROV_BUSY) {
+ mutex_enter(&pd->pd_lock);
+ while (pd->pd_state == KCF_PROV_BUSY)
+ cv_wait(&pd->pd_resume_cv, &pd->pd_lock);
+ mutex_exit(&pd->pd_lock);
+ }
+
+ /*
+ * Bump the internal reference count while the request is being
+ * processed. This is how we know when it's safe to unregister
+ * a provider. This step must precede the pd_state check below.
+ */
+ KCF_PROV_IREFHOLD(pd);
+
+ /*
+ * Fail the request if the provider has failed. We return a
+ * recoverable error and the notified clients attempt any
+ * recovery. For async clients this is done in kcf_aop_done()
+ * and for sync clients it is done in the k-api routines.
+ */
+ if (pd->pd_state >= KCF_PROV_FAILED) {
+ error = CRYPTO_DEVICE_ERROR;
+ goto bail;
+ }
+
+ if (ctype == CRYPTO_SYNCH) {
+ mutex_enter(&sreq->sn_lock);
+ sreq->sn_state = REQ_INPROGRESS;
+ mutex_exit(&sreq->sn_lock);
+
+ ctx = sreq->sn_context ? &sreq->sn_context->kc_glbl_ctx : NULL;
+ error = common_submit_request(sreq->sn_provider, ctx,
+ sreq->sn_params, sreq);
+ } else {
+ kcf_context_t *ictx;
+ ASSERT(ctype == CRYPTO_ASYNCH);
+
+ /*
+ * We are in the per-hardware provider thread context and
+ * hence can sleep. Note that the caller would have done
+ * a taskq_dispatch(..., TQ_NOSLEEP) and would have returned.
+ */
+ ctx = (ictx = areq->an_context) ? &ictx->kc_glbl_ctx : NULL;
+
+ mutex_enter(&areq->an_lock);
+ /*
+ * We need to maintain ordering for multi-part requests.
+ * an_is_my_turn is set to B_TRUE initially for a request
+ * when it is enqueued and there are no other requests
+ * for that context. It is set later from kcf_aop_done() when
+ * the request before us in the chain of requests for the
+ * context completes. We get signaled at that point.
+ */
+ if (ictx != NULL) {
+ ASSERT(ictx->kc_prov_desc == areq->an_provider);
+
+ while (areq->an_is_my_turn == B_FALSE) {
+ cv_wait(&areq->an_turn_cv, &areq->an_lock);
+ }
+ }
+ areq->an_state = REQ_INPROGRESS;
+ mutex_exit(&areq->an_lock);
+
+ error = common_submit_request(areq->an_provider, ctx,
+ &areq->an_params, areq);
+ }
+
+bail:
+ if (error == CRYPTO_QUEUED) {
+ /*
+ * The request is queued by the provider and we should
+ * get a crypto_op_notification() from the provider later.
+ * We notify the consumer at that time.
+ */
+ return;
+ } else { /* CRYPTO_SUCCESS or other failure */
+ KCF_PROV_IREFRELE(pd);
+ if (ctype == CRYPTO_SYNCH)
+ kcf_sop_done(sreq, error);
+ else
+ kcf_aop_done(areq, error);
+ }
+}
+
+/*
+ * This routine checks if a request can be retried on another
+ * provider. If true, mech1 is initialized to point to the mechanism
+ * structure. mech2 is also initialized in case of a dual operation. fg
+ * is initialized to the correct crypto_func_group_t bit flag. They are
+ * initialized by this routine, so that the caller can pass them to a
+ * kcf_get_mech_provider() or kcf_get_dual_provider() with no further change.
+ *
+ * We check that the request is for a init or atomic routine and that
+ * it is for one of the operation groups used from k-api .
+ */
+static boolean_t
+can_resubmit(kcf_areq_node_t *areq, crypto_mechanism_t **mech1,
+ crypto_mechanism_t **mech2, crypto_func_group_t *fg)
+{
+ kcf_req_params_t *params;
+ kcf_op_type_t optype;
+
+ params = &areq->an_params;
+ optype = params->rp_optype;
+
+ if (!(IS_INIT_OP(optype) || IS_ATOMIC_OP(optype)))
+ return (B_FALSE);
+
+ switch (params->rp_opgrp) {
+ case KCF_OG_DIGEST: {
+ kcf_digest_ops_params_t *dops = ¶ms->rp_u.digest_params;
+
+ dops->do_mech.cm_type = dops->do_framework_mechtype;
+ *mech1 = &dops->do_mech;
+ *fg = (optype == KCF_OP_INIT) ? CRYPTO_FG_DIGEST :
+ CRYPTO_FG_DIGEST_ATOMIC;
+ break;
+ }
+
+ case KCF_OG_MAC: {
+ kcf_mac_ops_params_t *mops = ¶ms->rp_u.mac_params;
+
+ mops->mo_mech.cm_type = mops->mo_framework_mechtype;
+ *mech1 = &mops->mo_mech;
+ *fg = (optype == KCF_OP_INIT) ? CRYPTO_FG_MAC :
+ CRYPTO_FG_MAC_ATOMIC;
+ break;
+ }
+
+ case KCF_OG_SIGN: {
+ kcf_sign_ops_params_t *sops = ¶ms->rp_u.sign_params;
+
+ sops->so_mech.cm_type = sops->so_framework_mechtype;
+ *mech1 = &sops->so_mech;
+ switch (optype) {
+ case KCF_OP_INIT:
+ *fg = CRYPTO_FG_SIGN;
+ break;
+ case KCF_OP_ATOMIC:
+ *fg = CRYPTO_FG_SIGN_ATOMIC;
+ break;
+ default:
+ ASSERT(optype == KCF_OP_SIGN_RECOVER_ATOMIC);
+ *fg = CRYPTO_FG_SIGN_RECOVER_ATOMIC;
+ }
+ break;
+ }
+
+ case KCF_OG_VERIFY: {
+ kcf_verify_ops_params_t *vops = ¶ms->rp_u.verify_params;
+
+ vops->vo_mech.cm_type = vops->vo_framework_mechtype;
+ *mech1 = &vops->vo_mech;
+ switch (optype) {
+ case KCF_OP_INIT:
+ *fg = CRYPTO_FG_VERIFY;
+ break;
+ case KCF_OP_ATOMIC:
+ *fg = CRYPTO_FG_VERIFY_ATOMIC;
+ break;
+ default:
+ ASSERT(optype == KCF_OP_VERIFY_RECOVER_ATOMIC);
+ *fg = CRYPTO_FG_VERIFY_RECOVER_ATOMIC;
+ }
+ break;
+ }
+
+ case KCF_OG_ENCRYPT: {
+ kcf_encrypt_ops_params_t *eops = ¶ms->rp_u.encrypt_params;
+
+ eops->eo_mech.cm_type = eops->eo_framework_mechtype;
+ *mech1 = &eops->eo_mech;
+ *fg = (optype == KCF_OP_INIT) ? CRYPTO_FG_ENCRYPT :
+ CRYPTO_FG_ENCRYPT_ATOMIC;
+ break;
+ }
+
+ case KCF_OG_DECRYPT: {
+ kcf_decrypt_ops_params_t *dcrops = ¶ms->rp_u.decrypt_params;
+
+ dcrops->dop_mech.cm_type = dcrops->dop_framework_mechtype;
+ *mech1 = &dcrops->dop_mech;
+ *fg = (optype == KCF_OP_INIT) ? CRYPTO_FG_DECRYPT :
+ CRYPTO_FG_DECRYPT_ATOMIC;
+ break;
+ }
+
+ case KCF_OG_ENCRYPT_MAC: {
+ kcf_encrypt_mac_ops_params_t *eops =
+ ¶ms->rp_u.encrypt_mac_params;
+
+ eops->em_encr_mech.cm_type = eops->em_framework_encr_mechtype;
+ *mech1 = &eops->em_encr_mech;
+ eops->em_mac_mech.cm_type = eops->em_framework_mac_mechtype;
+ *mech2 = &eops->em_mac_mech;
+ *fg = (optype == KCF_OP_INIT) ? CRYPTO_FG_ENCRYPT_MAC :
+ CRYPTO_FG_ENCRYPT_MAC_ATOMIC;
+ break;
+ }
+
+ case KCF_OG_MAC_DECRYPT: {
+ kcf_mac_decrypt_ops_params_t *dops =
+ ¶ms->rp_u.mac_decrypt_params;
+
+ dops->md_mac_mech.cm_type = dops->md_framework_mac_mechtype;
+ *mech1 = &dops->md_mac_mech;
+ dops->md_decr_mech.cm_type = dops->md_framework_decr_mechtype;
+ *mech2 = &dops->md_decr_mech;
+ *fg = (optype == KCF_OP_INIT) ? CRYPTO_FG_MAC_DECRYPT :
+ CRYPTO_FG_MAC_DECRYPT_ATOMIC;
+ break;
+ }
+
+ default:
+ return (B_FALSE);
+ }
+
+ return (B_TRUE);
+}
+
+/*
+ * This routine is called when a request to a provider has failed
+ * with a recoverable error. This routine tries to find another provider
+ * and dispatches the request to the new provider, if one is available.
+ * We reuse the request structure.
+ *
+ * A return value of NULL from kcf_get_mech_provider() indicates
+ * we have tried the last provider.
+ */
+static int
+kcf_resubmit_request(kcf_areq_node_t *areq)
+{
+ int error = CRYPTO_FAILED;
+ kcf_context_t *ictx;
+ kcf_provider_desc_t *old_pd;
+ kcf_provider_desc_t *new_pd;
+ crypto_mechanism_t *mech1 = NULL, *mech2 = NULL;
+ crypto_mech_type_t prov_mt1, prov_mt2;
+ crypto_func_group_t fg = 0;
+
+ if (!can_resubmit(areq, &mech1, &mech2, &fg))
+ return (error);
+
+ old_pd = areq->an_provider;
+ /*
+ * Add old_pd to the list of providers already tried. We release
+ * the hold on old_pd (from the earlier kcf_get_mech_provider()) in
+ * kcf_free_triedlist().
+ */
+ if (kcf_insert_triedlist(&areq->an_tried_plist, old_pd,
+ KM_NOSLEEP) == NULL)
+ return (error);
+
+ if (mech1 && !mech2) {
+ new_pd = kcf_get_mech_provider(mech1->cm_type, NULL, &error,
+ areq->an_tried_plist, fg,
+ (areq->an_reqarg.cr_flag & CRYPTO_RESTRICTED), 0);
+ } else {
+ ASSERT(mech1 != NULL && mech2 != NULL);
+
+ new_pd = kcf_get_dual_provider(mech1, mech2, NULL, &prov_mt1,
+ &prov_mt2, &error, areq->an_tried_plist, fg, fg,
+ (areq->an_reqarg.cr_flag & CRYPTO_RESTRICTED), 0);
+ }
+
+ if (new_pd == NULL)
+ return (error);
+
+ /*
+ * We reuse the old context by resetting provider specific
+ * fields in it.
+ */
+ if ((ictx = areq->an_context) != NULL) {
+ crypto_ctx_t *ctx;
+
+ ASSERT(old_pd == ictx->kc_prov_desc);
+ KCF_PROV_REFRELE(ictx->kc_prov_desc);
+ KCF_PROV_REFHOLD(new_pd);
+ ictx->kc_prov_desc = new_pd;
+
+ ctx = &ictx->kc_glbl_ctx;
+ ctx->cc_provider = new_pd->pd_prov_handle;
+ ctx->cc_session = new_pd->pd_sid;
+ ctx->cc_provider_private = NULL;
+ }
+
+ /* We reuse areq. by resetting the provider and context fields. */
+ KCF_PROV_REFRELE(old_pd);
+ KCF_PROV_REFHOLD(new_pd);
+ areq->an_provider = new_pd;
+ mutex_enter(&areq->an_lock);
+ areq->an_state = REQ_WAITING;
+ mutex_exit(&areq->an_lock);
+
+ switch (new_pd->pd_prov_type) {
+ case CRYPTO_SW_PROVIDER:
+ error = kcf_disp_sw_request(areq);
+ break;
+
+ case CRYPTO_HW_PROVIDER: {
+ taskq_t *taskq = new_pd->pd_sched_info.ks_taskq;
+
+ if (taskq_dispatch(taskq, process_req_hwp, areq, TQ_NOSLEEP) ==
+ TASKQID_INVALID) {
+ error = CRYPTO_HOST_MEMORY;
+ } else {
+ error = CRYPTO_QUEUED;
+ }
+
+ break;
+ default:
+ break;
+ }
+ }
+
+ return (error);
+}
+
+static inline int EMPTY_TASKQ(taskq_t *tq)
+{
+#ifdef _KERNEL
+ return (tq->tq_lowest_id == tq->tq_next_id);
+#else
+ return (tq->tq_task.tqent_next == &tq->tq_task || tq->tq_active == 0);
+#endif
+}
+
+/*
+ * Routine called by both ioctl and k-api. The consumer should
+ * bundle the parameters into a kcf_req_params_t structure. A bunch
+ * of macros are available in ops_impl.h for this bundling. They are:
+ *
+ * KCF_WRAP_DIGEST_OPS_PARAMS()
+ * KCF_WRAP_MAC_OPS_PARAMS()
+ * KCF_WRAP_ENCRYPT_OPS_PARAMS()
+ * KCF_WRAP_DECRYPT_OPS_PARAMS() ... etc.
+ *
+ * It is the caller's responsibility to free the ctx argument when
+ * appropriate. See the KCF_CONTEXT_COND_RELEASE macro for details.
+ */
+int
+kcf_submit_request(kcf_provider_desc_t *pd, crypto_ctx_t *ctx,
+ crypto_call_req_t *crq, kcf_req_params_t *params, boolean_t cont)
+{
+ int error = CRYPTO_SUCCESS;
+ kcf_areq_node_t *areq;
+ kcf_sreq_node_t *sreq;
+ kcf_context_t *kcf_ctx;
+ taskq_t *taskq = pd->pd_sched_info.ks_taskq;
+
+ kcf_ctx = ctx ? (kcf_context_t *)ctx->cc_framework_private : NULL;
+
+ /* Synchronous cases */
+ if (crq == NULL) {
+ switch (pd->pd_prov_type) {
+ case CRYPTO_SW_PROVIDER:
+ error = common_submit_request(pd, ctx, params,
+ KCF_RHNDL(KM_SLEEP));
+ break;
+
+ case CRYPTO_HW_PROVIDER:
+ /*
+ * Special case for CRYPTO_SYNCHRONOUS providers that
+ * never return a CRYPTO_QUEUED error. We skip any
+ * request allocation and call the SPI directly.
+ */
+ if ((pd->pd_flags & CRYPTO_SYNCHRONOUS) &&
+ EMPTY_TASKQ(taskq)) {
+ KCF_PROV_IREFHOLD(pd);
+ if (pd->pd_state == KCF_PROV_READY) {
+ error = common_submit_request(pd, ctx,
+ params, KCF_RHNDL(KM_SLEEP));
+ KCF_PROV_IREFRELE(pd);
+ ASSERT(error != CRYPTO_QUEUED);
+ break;
+ }
+ KCF_PROV_IREFRELE(pd);
+ }
+
+ sreq = kmem_cache_alloc(kcf_sreq_cache, KM_SLEEP);
+ sreq->sn_state = REQ_ALLOCATED;
+ sreq->sn_rv = CRYPTO_FAILED;
+ sreq->sn_params = params;
+
+ /*
+ * Note that we do not need to hold the context
+ * for synchronous case as the context will never
+ * become invalid underneath us. We do not need to hold
+ * the provider here either as the caller has a hold.
+ */
+ sreq->sn_context = kcf_ctx;
+ ASSERT(KCF_PROV_REFHELD(pd));
+ sreq->sn_provider = pd;
+
+ ASSERT(taskq != NULL);
+ /*
+ * Call the SPI directly if the taskq is empty and the
+ * provider is not busy, else dispatch to the taskq.
+ * Calling directly is fine as this is the synchronous
+ * case. This is unlike the asynchronous case where we
+ * must always dispatch to the taskq.
+ */
+ if (EMPTY_TASKQ(taskq) &&
+ pd->pd_state == KCF_PROV_READY) {
+ process_req_hwp(sreq);
+ } else {
+ /*
+ * We can not tell from taskq_dispatch() return
+ * value if we exceeded maxalloc. Hence the
+ * check here. Since we are allowed to wait in
+ * the synchronous case, we wait for the taskq
+ * to become empty.
+ */
+ if (taskq->tq_nalloc >= crypto_taskq_maxalloc) {
+ taskq_wait(taskq);
+ }
+
+ (void) taskq_dispatch(taskq, process_req_hwp,
+ sreq, TQ_SLEEP);
+ }
+
+ /*
+ * Wait for the notification to arrive,
+ * if the operation is not done yet.
+ * Bug# 4722589 will make the wait a cv_wait_sig().
+ */
+ mutex_enter(&sreq->sn_lock);
+ while (sreq->sn_state < REQ_DONE)
+ cv_wait(&sreq->sn_cv, &sreq->sn_lock);
+ mutex_exit(&sreq->sn_lock);
+
+ error = sreq->sn_rv;
+ kmem_cache_free(kcf_sreq_cache, sreq);
+
+ break;
+
+ default:
+ error = CRYPTO_FAILED;
+ break;
+ }
+
+ } else { /* Asynchronous cases */
+ switch (pd->pd_prov_type) {
+ case CRYPTO_SW_PROVIDER:
+ if (!(crq->cr_flag & CRYPTO_ALWAYS_QUEUE)) {
+ /*
+ * This case has less overhead since there is
+ * no switching of context.
+ */
+ error = common_submit_request(pd, ctx, params,
+ KCF_RHNDL(KM_NOSLEEP));
+ } else {
+ /*
+ * CRYPTO_ALWAYS_QUEUE is set. We need to
+ * queue the request and return.
+ */
+ areq = kcf_areqnode_alloc(pd, kcf_ctx, crq,
+ params, cont);
+ if (areq == NULL)
+ error = CRYPTO_HOST_MEMORY;
+ else {
+ if (!(crq->cr_flag
+ & CRYPTO_SKIP_REQID)) {
+ /*
+ * Set the request handle. This handle
+ * is used for any crypto_cancel_req(9f)
+ * calls from the consumer. We have to
+ * do this before dispatching the
+ * request.
+ */
+ crq->cr_reqid = kcf_reqid_insert(areq);
+ }
+
+ error = kcf_disp_sw_request(areq);
+ /*
+ * There is an error processing this
+ * request. Remove the handle and
+ * release the request structure.
+ */
+ if (error != CRYPTO_QUEUED) {
+ if (!(crq->cr_flag
+ & CRYPTO_SKIP_REQID))
+ kcf_reqid_delete(areq);
+ KCF_AREQ_REFRELE(areq);
+ }
+ }
+ }
+ break;
+
+ case CRYPTO_HW_PROVIDER:
+ /*
+ * We need to queue the request and return.
+ */
+ areq = kcf_areqnode_alloc(pd, kcf_ctx, crq, params,
+ cont);
+ if (areq == NULL) {
+ error = CRYPTO_HOST_MEMORY;
+ goto done;
+ }
+
+ ASSERT(taskq != NULL);
+ /*
+ * We can not tell from taskq_dispatch() return
+ * value if we exceeded maxalloc. Hence the check
+ * here.
+ */
+ if (taskq->tq_nalloc >= crypto_taskq_maxalloc) {
+ error = CRYPTO_BUSY;
+ KCF_AREQ_REFRELE(areq);
+ goto done;
+ }
+
+ if (!(crq->cr_flag & CRYPTO_SKIP_REQID)) {
+ /*
+ * Set the request handle. This handle is used
+ * for any crypto_cancel_req(9f) calls from the
+ * consumer. We have to do this before dispatching
+ * the request.
+ */
+ crq->cr_reqid = kcf_reqid_insert(areq);
+ }
+
+ if (taskq_dispatch(taskq,
+ process_req_hwp, areq, TQ_NOSLEEP) ==
+ TASKQID_INVALID) {
+ error = CRYPTO_HOST_MEMORY;
+ if (!(crq->cr_flag & CRYPTO_SKIP_REQID))
+ kcf_reqid_delete(areq);
+ KCF_AREQ_REFRELE(areq);
+ } else {
+ error = CRYPTO_QUEUED;
+ }
+ break;
+
+ default:
+ error = CRYPTO_FAILED;
+ break;
+ }
+ }
+
+done:
+ return (error);
+}
+
+/*
+ * We're done with this framework context, so free it. Note that freeing
+ * framework context (kcf_context) frees the global context (crypto_ctx).
+ *
+ * The provider is responsible for freeing provider private context after a
+ * final or single operation and resetting the cc_provider_private field
+ * to NULL. It should do this before it notifies the framework of the
+ * completion. We still need to call KCF_PROV_FREE_CONTEXT to handle cases
+ * like crypto_cancel_ctx(9f).
+ */
+void
+kcf_free_context(kcf_context_t *kcf_ctx)
+{
+ kcf_provider_desc_t *pd = kcf_ctx->kc_prov_desc;
+ crypto_ctx_t *gctx = &kcf_ctx->kc_glbl_ctx;
+ kcf_context_t *kcf_secondctx = kcf_ctx->kc_secondctx;
+
+ /* Release the second context, if any */
+
+ if (kcf_secondctx != NULL)
+ KCF_CONTEXT_REFRELE(kcf_secondctx);
+
+ if (gctx->cc_provider_private != NULL) {
+ mutex_enter(&pd->pd_lock);
+ if (!KCF_IS_PROV_REMOVED(pd)) {
+ /*
+ * Increment the provider's internal refcnt so it
+ * doesn't unregister from the framework while
+ * we're calling the entry point.
+ */
+ KCF_PROV_IREFHOLD(pd);
+ mutex_exit(&pd->pd_lock);
+ (void) KCF_PROV_FREE_CONTEXT(pd, gctx);
+ KCF_PROV_IREFRELE(pd);
+ } else {
+ mutex_exit(&pd->pd_lock);
+ }
+ }
+
+ /* kcf_ctx->kc_prov_desc has a hold on pd */
+ KCF_PROV_REFRELE(kcf_ctx->kc_prov_desc);
+
+ /* check if this context is shared with a software provider */
+ if ((gctx->cc_flags & CRYPTO_INIT_OPSTATE) &&
+ kcf_ctx->kc_sw_prov_desc != NULL) {
+ KCF_PROV_REFRELE(kcf_ctx->kc_sw_prov_desc);
+ }
+
+ kmem_cache_free(kcf_context_cache, kcf_ctx);
+}
+
+/*
+ * Free the request after releasing all the holds.
+ */
+void
+kcf_free_req(kcf_areq_node_t *areq)
+{
+ KCF_PROV_REFRELE(areq->an_provider);
+ if (areq->an_context != NULL)
+ KCF_CONTEXT_REFRELE(areq->an_context);
+
+ if (areq->an_tried_plist != NULL)
+ kcf_free_triedlist(areq->an_tried_plist);
+ kmem_cache_free(kcf_areq_cache, areq);
+}
+
+/*
+ * Utility routine to remove a request from the chain of requests
+ * hanging off a context.
+ */
+void
+kcf_removereq_in_ctxchain(kcf_context_t *ictx, kcf_areq_node_t *areq)
+{
+ kcf_areq_node_t *cur, *prev;
+
+ /*
+ * Get context lock, search for areq in the chain and remove it.
+ */
+ ASSERT(ictx != NULL);
+ mutex_enter(&ictx->kc_in_use_lock);
+ prev = cur = ictx->kc_req_chain_first;
+
+ while (cur != NULL) {
+ if (cur == areq) {
+ if (prev == cur) {
+ if ((ictx->kc_req_chain_first =
+ cur->an_ctxchain_next) == NULL)
+ ictx->kc_req_chain_last = NULL;
+ } else {
+ if (cur == ictx->kc_req_chain_last)
+ ictx->kc_req_chain_last = prev;
+ prev->an_ctxchain_next = cur->an_ctxchain_next;
+ }
+
+ break;
+ }
+ prev = cur;
+ cur = cur->an_ctxchain_next;
+ }
+ mutex_exit(&ictx->kc_in_use_lock);
+}
+
+/*
+ * Remove the specified node from the global software queue.
+ *
+ * The caller must hold the queue lock and request lock (an_lock).
+ */
+void
+kcf_remove_node(kcf_areq_node_t *node)
+{
+ kcf_areq_node_t *nextp = node->an_next;
+ kcf_areq_node_t *prevp = node->an_prev;
+
+ if (nextp != NULL)
+ nextp->an_prev = prevp;
+ else
+ gswq->gs_last = prevp;
+
+ if (prevp != NULL)
+ prevp->an_next = nextp;
+ else
+ gswq->gs_first = nextp;
+
+ node->an_state = REQ_CANCELED;
+}
+
+/*
+ * Add the request node to the end of the global software queue.
+ *
+ * The caller should not hold the queue lock. Returns 0 if the
+ * request is successfully queued. Returns CRYPTO_BUSY if the limit
+ * on the number of jobs is exceeded.
+ */
+static int
+kcf_enqueue(kcf_areq_node_t *node)
+{
+ kcf_areq_node_t *tnode;
+
+ mutex_enter(&gswq->gs_lock);
+
+ if (gswq->gs_njobs >= gswq->gs_maxjobs) {
+ mutex_exit(&gswq->gs_lock);
+ return (CRYPTO_BUSY);
+ }
+
+ if (gswq->gs_last == NULL) {
+ gswq->gs_first = gswq->gs_last = node;
+ } else {
+ ASSERT(gswq->gs_last->an_next == NULL);
+ tnode = gswq->gs_last;
+ tnode->an_next = node;
+ gswq->gs_last = node;
+ node->an_prev = tnode;
+ }
+
+ gswq->gs_njobs++;
+
+ /* an_lock not needed here as we hold gs_lock */
+ node->an_state = REQ_WAITING;
+
+ mutex_exit(&gswq->gs_lock);
+
+ return (0);
+}
+
+/*
+ * kmem_cache_alloc constructor for sync request structure.
+ */
+/* ARGSUSED */
+static int
+kcf_sreq_cache_constructor(void *buf, void *cdrarg, int kmflags)
+{
+ kcf_sreq_node_t *sreq = (kcf_sreq_node_t *)buf;
+
+ sreq->sn_type = CRYPTO_SYNCH;
+ cv_init(&sreq->sn_cv, NULL, CV_DEFAULT, NULL);
+ mutex_init(&sreq->sn_lock, NULL, MUTEX_DEFAULT, NULL);
+
+ return (0);
+}
+
+/* ARGSUSED */
+static void
+kcf_sreq_cache_destructor(void *buf, void *cdrarg)
+{
+ kcf_sreq_node_t *sreq = (kcf_sreq_node_t *)buf;
+
+ mutex_destroy(&sreq->sn_lock);
+ cv_destroy(&sreq->sn_cv);
+}
+
+/*
+ * kmem_cache_alloc constructor for async request structure.
+ */
+/* ARGSUSED */
+static int
+kcf_areq_cache_constructor(void *buf, void *cdrarg, int kmflags)
+{
+ kcf_areq_node_t *areq = (kcf_areq_node_t *)buf;
+
+ areq->an_type = CRYPTO_ASYNCH;
+ areq->an_refcnt = 0;
+ mutex_init(&areq->an_lock, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&areq->an_done, NULL, CV_DEFAULT, NULL);
+ cv_init(&areq->an_turn_cv, NULL, CV_DEFAULT, NULL);
+
+ return (0);
+}
+
+/* ARGSUSED */
+static void
+kcf_areq_cache_destructor(void *buf, void *cdrarg)
+{
+ kcf_areq_node_t *areq = (kcf_areq_node_t *)buf;
+
+ ASSERT(areq->an_refcnt == 0);
+ mutex_destroy(&areq->an_lock);
+ cv_destroy(&areq->an_done);
+ cv_destroy(&areq->an_turn_cv);
+}
+
+/*
+ * kmem_cache_alloc constructor for kcf_context structure.
+ */
+/* ARGSUSED */
+static int
+kcf_context_cache_constructor(void *buf, void *cdrarg, int kmflags)
+{
+ kcf_context_t *kctx = (kcf_context_t *)buf;
+
+ kctx->kc_refcnt = 0;
+ mutex_init(&kctx->kc_in_use_lock, NULL, MUTEX_DEFAULT, NULL);
+
+ return (0);
+}
+
+/* ARGSUSED */
+static void
+kcf_context_cache_destructor(void *buf, void *cdrarg)
+{
+ kcf_context_t *kctx = (kcf_context_t *)buf;
+
+ ASSERT(kctx->kc_refcnt == 0);
+ mutex_destroy(&kctx->kc_in_use_lock);
+}
+
+void
+kcf_sched_destroy(void)
+{
+ int i;
+
+ if (kcf_misc_kstat)
+ kstat_delete(kcf_misc_kstat);
+
+ if (kcfpool) {
+ mutex_destroy(&kcfpool->kp_thread_lock);
+ cv_destroy(&kcfpool->kp_nothr_cv);
+ mutex_destroy(&kcfpool->kp_user_lock);
+ cv_destroy(&kcfpool->kp_user_cv);
+
+ kmem_free(kcfpool, sizeof (kcf_pool_t));
+ }
+
+ for (i = 0; i < REQID_TABLES; i++) {
+ if (kcf_reqid_table[i]) {
+ mutex_destroy(&(kcf_reqid_table[i]->rt_lock));
+ kmem_free(kcf_reqid_table[i],
+ sizeof (kcf_reqid_table_t));
+ }
+ }
+
+ if (gswq) {
+ mutex_destroy(&gswq->gs_lock);
+ cv_destroy(&gswq->gs_cv);
+ kmem_free(gswq, sizeof (kcf_global_swq_t));
+ }
+
+ if (kcf_context_cache)
+ kmem_cache_destroy(kcf_context_cache);
+ if (kcf_areq_cache)
+ kmem_cache_destroy(kcf_areq_cache);
+ if (kcf_sreq_cache)
+ kmem_cache_destroy(kcf_sreq_cache);
+
+ mutex_destroy(&ntfy_list_lock);
+ cv_destroy(&ntfy_list_cv);
+}
+
+/*
+ * Creates and initializes all the structures needed by the framework.
+ */
+void
+kcf_sched_init(void)
+{
+ int i;
+ kcf_reqid_table_t *rt;
+
+ /*
+ * Create all the kmem caches needed by the framework. We set the
+ * align argument to 64, to get a slab aligned to 64-byte as well as
+ * have the objects (cache_chunksize) to be a 64-byte multiple.
+ * This helps to avoid false sharing as this is the size of the
+ * CPU cache line.
+ */
+ kcf_sreq_cache = kmem_cache_create("kcf_sreq_cache",
+ sizeof (struct kcf_sreq_node), 64, kcf_sreq_cache_constructor,
+ kcf_sreq_cache_destructor, NULL, NULL, NULL, 0);
+
+ kcf_areq_cache = kmem_cache_create("kcf_areq_cache",
+ sizeof (struct kcf_areq_node), 64, kcf_areq_cache_constructor,
+ kcf_areq_cache_destructor, NULL, NULL, NULL, 0);
+
+ kcf_context_cache = kmem_cache_create("kcf_context_cache",
+ sizeof (struct kcf_context), 64, kcf_context_cache_constructor,
+ kcf_context_cache_destructor, NULL, NULL, NULL, 0);
+
+ gswq = kmem_alloc(sizeof (kcf_global_swq_t), KM_SLEEP);
+
+ mutex_init(&gswq->gs_lock, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&gswq->gs_cv, NULL, CV_DEFAULT, NULL);
+ gswq->gs_njobs = 0;
+ gswq->gs_maxjobs = kcf_maxthreads * crypto_taskq_maxalloc;
+ gswq->gs_first = gswq->gs_last = NULL;
+
+ /* Initialize the global reqid table */
+ for (i = 0; i < REQID_TABLES; i++) {
+ rt = kmem_zalloc(sizeof (kcf_reqid_table_t), KM_SLEEP);
+ kcf_reqid_table[i] = rt;
+ mutex_init(&rt->rt_lock, NULL, MUTEX_DEFAULT, NULL);
+ rt->rt_curid = i;
+ }
+
+ /* Allocate and initialize the thread pool */
+ kcfpool_alloc();
+
+ /* Initialize the event notification list variables */
+ mutex_init(&ntfy_list_lock, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&ntfy_list_cv, NULL, CV_DEFAULT, NULL);
+
+ /* Create the kcf kstat */
+ kcf_misc_kstat = kstat_create("kcf", 0, "framework_stats", "crypto",
+ KSTAT_TYPE_NAMED, sizeof (kcf_stats_t) / sizeof (kstat_named_t),
+ KSTAT_FLAG_VIRTUAL);
+
+ if (kcf_misc_kstat != NULL) {
+ kcf_misc_kstat->ks_data = &kcf_ksdata;
+ kcf_misc_kstat->ks_update = kcf_misc_kstat_update;
+ kstat_install(kcf_misc_kstat);
+ }
+}
+
+/*
+ * Signal the waiting sync client.
+ */
+void
+kcf_sop_done(kcf_sreq_node_t *sreq, int error)
+{
+ mutex_enter(&sreq->sn_lock);
+ sreq->sn_state = REQ_DONE;
+ sreq->sn_rv = error;
+ cv_signal(&sreq->sn_cv);
+ mutex_exit(&sreq->sn_lock);
+}
+
+/*
+ * Callback the async client with the operation status.
+ * We free the async request node and possibly the context.
+ * We also handle any chain of requests hanging off of
+ * the context.
+ */
+void
+kcf_aop_done(kcf_areq_node_t *areq, int error)
+{
+ kcf_op_type_t optype;
+ boolean_t skip_notify = B_FALSE;
+ kcf_context_t *ictx;
+ kcf_areq_node_t *nextreq;
+
+ /*
+ * Handle recoverable errors. This has to be done first
+ * before doing any thing else in this routine so that
+ * we do not change the state of the request.
+ */
+ if (error != CRYPTO_SUCCESS && IS_RECOVERABLE(error)) {
+ /*
+ * We try another provider, if one is available. Else
+ * we continue with the failure notification to the
+ * client.
+ */
+ if (kcf_resubmit_request(areq) == CRYPTO_QUEUED)
+ return;
+ }
+
+ mutex_enter(&areq->an_lock);
+ areq->an_state = REQ_DONE;
+ mutex_exit(&areq->an_lock);
+
+ optype = (&areq->an_params)->rp_optype;
+ if ((ictx = areq->an_context) != NULL) {
+ /*
+ * A request after it is removed from the request
+ * queue, still stays on a chain of requests hanging
+ * of its context structure. It needs to be removed
+ * from this chain at this point.
+ */
+ mutex_enter(&ictx->kc_in_use_lock);
+ nextreq = areq->an_ctxchain_next;
+ if (nextreq != NULL) {
+ mutex_enter(&nextreq->an_lock);
+ nextreq->an_is_my_turn = B_TRUE;
+ cv_signal(&nextreq->an_turn_cv);
+ mutex_exit(&nextreq->an_lock);
+ }
+
+ ictx->kc_req_chain_first = nextreq;
+ if (nextreq == NULL)
+ ictx->kc_req_chain_last = NULL;
+ mutex_exit(&ictx->kc_in_use_lock);
+
+ if (IS_SINGLE_OP(optype) || IS_FINAL_OP(optype)) {
+ ASSERT(nextreq == NULL);
+ KCF_CONTEXT_REFRELE(ictx);
+ } else if (error != CRYPTO_SUCCESS && IS_INIT_OP(optype)) {
+ /*
+ * NOTE - We do not release the context in case of update
+ * operations. We require the consumer to free it explicitly,
+ * in case it wants to abandon an update operation. This is done
+ * as there may be mechanisms in ECB mode that can continue
+ * even if an operation on a block fails.
+ */
+ KCF_CONTEXT_REFRELE(ictx);
+ }
+ }
+
+ /* Deal with the internal continuation to this request first */
+
+ if (areq->an_isdual) {
+ kcf_dual_req_t *next_arg;
+ next_arg = (kcf_dual_req_t *)areq->an_reqarg.cr_callback_arg;
+ next_arg->kr_areq = areq;
+ KCF_AREQ_REFHOLD(areq);
+ areq->an_isdual = B_FALSE;
+
+ NOTIFY_CLIENT(areq, error);
+ return;
+ }
+
+ /*
+ * If CRYPTO_NOTIFY_OPDONE flag is set, we should notify
+ * always. If this flag is clear, we skip the notification
+ * provided there are no errors. We check this flag for only
+ * init or update operations. It is ignored for single, final or
+ * atomic operations.
+ */
+ skip_notify = (IS_UPDATE_OP(optype) || IS_INIT_OP(optype)) &&
+ (!(areq->an_reqarg.cr_flag & CRYPTO_NOTIFY_OPDONE)) &&
+ (error == CRYPTO_SUCCESS);
+
+ if (!skip_notify) {
+ NOTIFY_CLIENT(areq, error);
+ }
+
+ if (!(areq->an_reqarg.cr_flag & CRYPTO_SKIP_REQID))
+ kcf_reqid_delete(areq);
+
+ KCF_AREQ_REFRELE(areq);
+}
+
+/*
+ * Allocate the thread pool and initialize all the fields.
+ */
+static void
+kcfpool_alloc()
+{
+ kcfpool = kmem_alloc(sizeof (kcf_pool_t), KM_SLEEP);
+
+ kcfpool->kp_threads = kcfpool->kp_idlethreads = 0;
+ kcfpool->kp_blockedthreads = 0;
+ kcfpool->kp_signal_create_thread = B_FALSE;
+ kcfpool->kp_nthrs = 0;
+ kcfpool->kp_user_waiting = B_FALSE;
+
+ mutex_init(&kcfpool->kp_thread_lock, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&kcfpool->kp_nothr_cv, NULL, CV_DEFAULT, NULL);
+
+ mutex_init(&kcfpool->kp_user_lock, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&kcfpool->kp_user_cv, NULL, CV_DEFAULT, NULL);
+
+ kcf_idlethr_timeout = KCF_DEFAULT_THRTIMEOUT;
+}
+
+/*
+ * Insert the async request in the hash table after assigning it
+ * an ID. Returns the ID.
+ *
+ * The ID is used by the caller to pass as an argument to a
+ * cancel_req() routine later.
+ */
+static crypto_req_id_t
+kcf_reqid_insert(kcf_areq_node_t *areq)
+{
+ int indx;
+ crypto_req_id_t id;
+ kcf_areq_node_t *headp;
+ kcf_reqid_table_t *rt;
+
+ kpreempt_disable();
+ rt = kcf_reqid_table[CPU_SEQID & REQID_TABLE_MASK];
+ kpreempt_enable();
+
+ mutex_enter(&rt->rt_lock);
+
+ rt->rt_curid = id =
+ (rt->rt_curid - REQID_COUNTER_LOW) | REQID_COUNTER_HIGH;
+ SET_REQID(areq, id);
+ indx = REQID_HASH(id);
+ headp = areq->an_idnext = rt->rt_idhash[indx];
+ areq->an_idprev = NULL;
+ if (headp != NULL)
+ headp->an_idprev = areq;
+
+ rt->rt_idhash[indx] = areq;
+ mutex_exit(&rt->rt_lock);
+
+ return (id);
+}
+
+/*
+ * Delete the async request from the hash table.
+ */
+static void
+kcf_reqid_delete(kcf_areq_node_t *areq)
+{
+ int indx;
+ kcf_areq_node_t *nextp, *prevp;
+ crypto_req_id_t id = GET_REQID(areq);
+ kcf_reqid_table_t *rt;
+
+ rt = kcf_reqid_table[id & REQID_TABLE_MASK];
+ indx = REQID_HASH(id);
+
+ mutex_enter(&rt->rt_lock);
+
+ nextp = areq->an_idnext;
+ prevp = areq->an_idprev;
+ if (nextp != NULL)
+ nextp->an_idprev = prevp;
+ if (prevp != NULL)
+ prevp->an_idnext = nextp;
+ else
+ rt->rt_idhash[indx] = nextp;
+
+ SET_REQID(areq, 0);
+ cv_broadcast(&areq->an_done);
+
+ mutex_exit(&rt->rt_lock);
+}
+
+/*
+ * Cancel a single asynchronous request.
+ *
+ * We guarantee that no problems will result from calling
+ * crypto_cancel_req() for a request which is either running, or
+ * has already completed. We remove the request from any queues
+ * if it is possible. We wait for request completion if the
+ * request is dispatched to a provider.
+ *
+ * Calling context:
+ * Can be called from user context only.
+ *
+ * NOTE: We acquire the following locks in this routine (in order):
+ * - rt_lock (kcf_reqid_table_t)
+ * - gswq->gs_lock
+ * - areq->an_lock
+ * - ictx->kc_in_use_lock (from kcf_removereq_in_ctxchain())
+ *
+ * This locking order MUST be maintained in code every where else.
+ */
+void
+crypto_cancel_req(crypto_req_id_t id)
+{
+ int indx;
+ kcf_areq_node_t *areq;
+ kcf_provider_desc_t *pd;
+ kcf_context_t *ictx;
+ kcf_reqid_table_t *rt;
+
+ rt = kcf_reqid_table[id & REQID_TABLE_MASK];
+ indx = REQID_HASH(id);
+
+ mutex_enter(&rt->rt_lock);
+ for (areq = rt->rt_idhash[indx]; areq; areq = areq->an_idnext) {
+ if (GET_REQID(areq) == id) {
+ /*
+ * We found the request. It is either still waiting
+ * in the framework queues or running at the provider.
+ */
+ pd = areq->an_provider;
+ ASSERT(pd != NULL);
+
+ switch (pd->pd_prov_type) {
+ case CRYPTO_SW_PROVIDER:
+ mutex_enter(&gswq->gs_lock);
+ mutex_enter(&areq->an_lock);
+
+ /* This request can be safely canceled. */
+ if (areq->an_state <= REQ_WAITING) {
+ /* Remove from gswq, global software queue. */
+ kcf_remove_node(areq);
+ if ((ictx = areq->an_context) != NULL)
+ kcf_removereq_in_ctxchain(ictx, areq);
+
+ mutex_exit(&areq->an_lock);
+ mutex_exit(&gswq->gs_lock);
+ mutex_exit(&rt->rt_lock);
+
+ /* Remove areq from hash table and free it. */
+ kcf_reqid_delete(areq);
+ KCF_AREQ_REFRELE(areq);
+ return;
+ }
+
+ mutex_exit(&areq->an_lock);
+ mutex_exit(&gswq->gs_lock);
+ break;
+
+ case CRYPTO_HW_PROVIDER:
+ /*
+ * There is no interface to remove an entry
+ * once it is on the taskq. So, we do not do
+ * any thing for a hardware provider.
+ */
+ break;
+ default:
+ break;
+ }
+
+ /*
+ * The request is running. Wait for the request completion
+ * to notify us.
+ */
+ KCF_AREQ_REFHOLD(areq);
+ while (GET_REQID(areq) == id)
+ cv_wait(&areq->an_done, &rt->rt_lock);
+ KCF_AREQ_REFRELE(areq);
+ break;
+ }
+ }
+
+ mutex_exit(&rt->rt_lock);
+}
+
+/*
+ * Cancel all asynchronous requests associated with the
+ * passed in crypto context and free it.
+ *
+ * A client SHOULD NOT call this routine after calling a crypto_*_final
+ * routine. This routine is called only during intermediate operations.
+ * The client should not use the crypto context after this function returns
+ * since we destroy it.
+ *
+ * Calling context:
+ * Can be called from user context only.
+ */
+void
+crypto_cancel_ctx(crypto_context_t ctx)
+{
+ kcf_context_t *ictx;
+ kcf_areq_node_t *areq;
+
+ if (ctx == NULL)
+ return;
+
+ ictx = (kcf_context_t *)((crypto_ctx_t *)ctx)->cc_framework_private;
+
+ mutex_enter(&ictx->kc_in_use_lock);
+
+ /* Walk the chain and cancel each request */
+ while ((areq = ictx->kc_req_chain_first) != NULL) {
+ /*
+ * We have to drop the lock here as we may have
+ * to wait for request completion. We hold the
+ * request before dropping the lock though, so that it
+ * won't be freed underneath us.
+ */
+ KCF_AREQ_REFHOLD(areq);
+ mutex_exit(&ictx->kc_in_use_lock);
+
+ crypto_cancel_req(GET_REQID(areq));
+ KCF_AREQ_REFRELE(areq);
+
+ mutex_enter(&ictx->kc_in_use_lock);
+ }
+
+ mutex_exit(&ictx->kc_in_use_lock);
+ KCF_CONTEXT_REFRELE(ictx);
+}
+
+/*
+ * Update kstats.
+ */
+static int
+kcf_misc_kstat_update(kstat_t *ksp, int rw)
+{
+ uint_t tcnt;
+ kcf_stats_t *ks_data;
+
+ if (rw == KSTAT_WRITE)
+ return (EACCES);
+
+ ks_data = ksp->ks_data;
+
+ ks_data->ks_thrs_in_pool.value.ui32 = kcfpool->kp_threads;
+ /*
+ * The failover thread is counted in kp_idlethreads in
+ * some corner cases. This is done to avoid doing more checks
+ * when submitting a request. We account for those cases below.
+ */
+ if ((tcnt = kcfpool->kp_idlethreads) == (kcfpool->kp_threads + 1))
+ tcnt--;
+ ks_data->ks_idle_thrs.value.ui32 = tcnt;
+ ks_data->ks_minthrs.value.ui32 = kcf_minthreads;
+ ks_data->ks_maxthrs.value.ui32 = kcf_maxthreads;
+ ks_data->ks_swq_njobs.value.ui32 = gswq->gs_njobs;
+ ks_data->ks_swq_maxjobs.value.ui32 = gswq->gs_maxjobs;
+ ks_data->ks_taskq_threads.value.ui32 = crypto_taskq_threads;
+ ks_data->ks_taskq_minalloc.value.ui32 = crypto_taskq_minalloc;
+ ks_data->ks_taskq_maxalloc.value.ui32 = crypto_taskq_maxalloc;
+
+ return (0);
+}
+
+/*
+ * Allocate and initiatize a kcf_dual_req, used for saving the arguments of
+ * a dual operation or an atomic operation that has to be internally
+ * simulated with multiple single steps.
+ * crq determines the memory allocation flags.
+ */
+
+kcf_dual_req_t *
+kcf_alloc_req(crypto_call_req_t *crq)
+{
+ kcf_dual_req_t *kcr;
+
+ kcr = kmem_alloc(sizeof (kcf_dual_req_t), KCF_KMFLAG(crq));
+
+ if (kcr == NULL)
+ return (NULL);
+
+ /* Copy the whole crypto_call_req struct, as it isn't persistant */
+ if (crq != NULL)
+ kcr->kr_callreq = *crq;
+ else
+ bzero(&(kcr->kr_callreq), sizeof (crypto_call_req_t));
+ kcr->kr_areq = NULL;
+ kcr->kr_saveoffset = 0;
+ kcr->kr_savelen = 0;
+
+ return (kcr);
+}
+
+/*
+ * Callback routine for the next part of a simulated dual part.
+ * Schedules the next step.
+ *
+ * This routine can be called from interrupt context.
+ */
+void
+kcf_next_req(void *next_req_arg, int status)
+{
+ kcf_dual_req_t *next_req = (kcf_dual_req_t *)next_req_arg;
+ kcf_req_params_t *params = &(next_req->kr_params);
+ kcf_areq_node_t *areq = next_req->kr_areq;
+ int error = status;
+ kcf_provider_desc_t *pd = NULL;
+ crypto_dual_data_t *ct = NULL;
+
+ /* Stop the processing if an error occured at this step */
+ if (error != CRYPTO_SUCCESS) {
+out:
+ areq->an_reqarg = next_req->kr_callreq;
+ KCF_AREQ_REFRELE(areq);
+ kmem_free(next_req, sizeof (kcf_dual_req_t));
+ areq->an_isdual = B_FALSE;
+ kcf_aop_done(areq, error);
+ return;
+ }
+
+ switch (params->rp_opgrp) {
+ case KCF_OG_MAC: {
+
+ /*
+ * The next req is submitted with the same reqid as the
+ * first part. The consumer only got back that reqid, and
+ * should still be able to cancel the operation during its
+ * second step.
+ */
+ kcf_mac_ops_params_t *mops = &(params->rp_u.mac_params);
+ crypto_ctx_template_t mac_tmpl;
+ kcf_mech_entry_t *me;
+
+ ct = (crypto_dual_data_t *)mops->mo_data;
+ mac_tmpl = (crypto_ctx_template_t)mops->mo_templ;
+
+ /* No expected recoverable failures, so no retry list */
+ pd = kcf_get_mech_provider(mops->mo_framework_mechtype,
+ &me, &error, NULL, CRYPTO_FG_MAC_ATOMIC,
+ (areq->an_reqarg.cr_flag & CRYPTO_RESTRICTED), ct->dd_len2);
+
+ if (pd == NULL) {
+ error = CRYPTO_MECH_NOT_SUPPORTED;
+ goto out;
+ }
+ /* Validate the MAC context template here */
+ if ((pd->pd_prov_type == CRYPTO_SW_PROVIDER) &&
+ (mac_tmpl != NULL)) {
+ kcf_ctx_template_t *ctx_mac_tmpl;
+
+ ctx_mac_tmpl = (kcf_ctx_template_t *)mac_tmpl;
+
+ if (ctx_mac_tmpl->ct_generation != me->me_gen_swprov) {
+ KCF_PROV_REFRELE(pd);
+ error = CRYPTO_OLD_CTX_TEMPLATE;
+ goto out;
+ }
+ mops->mo_templ = ctx_mac_tmpl->ct_prov_tmpl;
+ }
+
+ break;
+ }
+ case KCF_OG_DECRYPT: {
+ kcf_decrypt_ops_params_t *dcrops =
+ &(params->rp_u.decrypt_params);
+
+ ct = (crypto_dual_data_t *)dcrops->dop_ciphertext;
+ /* No expected recoverable failures, so no retry list */
+ pd = kcf_get_mech_provider(dcrops->dop_framework_mechtype,
+ NULL, &error, NULL, CRYPTO_FG_DECRYPT_ATOMIC,
+ (areq->an_reqarg.cr_flag & CRYPTO_RESTRICTED), ct->dd_len1);
+
+ if (pd == NULL) {
+ error = CRYPTO_MECH_NOT_SUPPORTED;
+ goto out;
+ }
+ break;
+ }
+ default:
+ break;
+ }
+
+ /* The second step uses len2 and offset2 of the dual_data */
+ next_req->kr_saveoffset = ct->dd_offset1;
+ next_req->kr_savelen = ct->dd_len1;
+ ct->dd_offset1 = ct->dd_offset2;
+ ct->dd_len1 = ct->dd_len2;
+
+ /* preserve if the caller is restricted */
+ if (areq->an_reqarg.cr_flag & CRYPTO_RESTRICTED) {
+ areq->an_reqarg.cr_flag = CRYPTO_RESTRICTED;
+ } else {
+ areq->an_reqarg.cr_flag = 0;
+ }
+
+ areq->an_reqarg.cr_callback_func = kcf_last_req;
+ areq->an_reqarg.cr_callback_arg = next_req;
+ areq->an_isdual = B_TRUE;
+
+ /*
+ * We would like to call kcf_submit_request() here. But,
+ * that is not possible as that routine allocates a new
+ * kcf_areq_node_t request structure, while we need to
+ * reuse the existing request structure.
+ */
+ switch (pd->pd_prov_type) {
+ case CRYPTO_SW_PROVIDER:
+ error = common_submit_request(pd, NULL, params,
+ KCF_RHNDL(KM_NOSLEEP));
+ break;
+
+ case CRYPTO_HW_PROVIDER: {
+ kcf_provider_desc_t *old_pd;
+ taskq_t *taskq = pd->pd_sched_info.ks_taskq;
+
+ /*
+ * Set the params for the second step in the
+ * dual-ops.
+ */
+ areq->an_params = *params;
+ old_pd = areq->an_provider;
+ KCF_PROV_REFRELE(old_pd);
+ KCF_PROV_REFHOLD(pd);
+ areq->an_provider = pd;
+
+ /*
+ * Note that we have to do a taskq_dispatch()
+ * here as we may be in interrupt context.
+ */
+ if (taskq_dispatch(taskq, process_req_hwp, areq,
+ TQ_NOSLEEP) == (taskqid_t)0) {
+ error = CRYPTO_HOST_MEMORY;
+ } else {
+ error = CRYPTO_QUEUED;
+ }
+ break;
+ }
+ default:
+ break;
+ }
+
+ /*
+ * We have to release the holds on the request and the provider
+ * in all cases.
+ */
+ KCF_AREQ_REFRELE(areq);
+ KCF_PROV_REFRELE(pd);
+
+ if (error != CRYPTO_QUEUED) {
+ /* restore, clean up, and invoke the client's callback */
+
+ ct->dd_offset1 = next_req->kr_saveoffset;
+ ct->dd_len1 = next_req->kr_savelen;
+ areq->an_reqarg = next_req->kr_callreq;
+ kmem_free(next_req, sizeof (kcf_dual_req_t));
+ areq->an_isdual = B_FALSE;
+ kcf_aop_done(areq, error);
+ }
+}
+
+/*
+ * Last part of an emulated dual operation.
+ * Clean up and restore ...
+ */
+void
+kcf_last_req(void *last_req_arg, int status)
+{
+ kcf_dual_req_t *last_req = (kcf_dual_req_t *)last_req_arg;
+
+ kcf_req_params_t *params = &(last_req->kr_params);
+ kcf_areq_node_t *areq = last_req->kr_areq;
+ crypto_dual_data_t *ct = NULL;
+
+ switch (params->rp_opgrp) {
+ case KCF_OG_MAC: {
+ kcf_mac_ops_params_t *mops = &(params->rp_u.mac_params);
+
+ ct = (crypto_dual_data_t *)mops->mo_data;
+ break;
+ }
+ case KCF_OG_DECRYPT: {
+ kcf_decrypt_ops_params_t *dcrops =
+ &(params->rp_u.decrypt_params);
+
+ ct = (crypto_dual_data_t *)dcrops->dop_ciphertext;
+ break;
+ }
+ default: {
+ panic("invalid kcf_op_group_t %d", (int)params->rp_opgrp);
+ return;
+ }
+ }
+ ct->dd_offset1 = last_req->kr_saveoffset;
+ ct->dd_len1 = last_req->kr_savelen;
+
+ /* The submitter used kcf_last_req as its callback */
+
+ if (areq == NULL) {
+ crypto_call_req_t *cr = &last_req->kr_callreq;
+
+ (*(cr->cr_callback_func))(cr->cr_callback_arg, status);
+ kmem_free(last_req, sizeof (kcf_dual_req_t));
+ return;
+ }
+ areq->an_reqarg = last_req->kr_callreq;
+ KCF_AREQ_REFRELE(areq);
+ kmem_free(last_req, sizeof (kcf_dual_req_t));
+ areq->an_isdual = B_FALSE;
+ kcf_aop_done(areq, status);
+}
diff --git a/zfs/module/icp/illumos-crypto.c b/zfs/module/icp/illumos-crypto.c
new file mode 100644
index 000000000000..aa63e431f94b
--- /dev/null
+++ b/zfs/module/icp/illumos-crypto.c
@@ -0,0 +1,156 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2016, Datto, Inc. All rights reserved.
+ */
+
+#ifdef _KERNEL
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#else
+#define __exit
+#define __init
+#endif
+
+#include <sys/crypto/common.h>
+#include <sys/crypto/api.h>
+#include <sys/crypto/impl.h>
+#include <sys/crypto/sched_impl.h>
+#include <sys/modhash_impl.h>
+#include <sys/crypto/icp.h>
+
+/*
+ * Changes made to the original Illumos Crypto Layer for the ICP:
+ *
+ * Several changes were needed to allow the Illumos Crypto Layer
+ * to work in the Linux kernel. Almost all of the changes fall into
+ * one of the following categories:
+ *
+ * 1) Moving the syntax to the C90: This was mostly a matter of
+ * changing func() definitions to func(void). In a few cases,
+ * initializations of structs with unions needed to have brackets
+ * added.
+ *
+ * 2) Changes to allow userspace compilation: The ICP is meant to be
+ * compiled and used in both userspace and kernel space (for ztest and
+ * libzfs), so the _KERNEL macros did not make sense anymore. For the
+ * same reason, many header includes were also changed to use
+ * sys/zfs_context.h
+ *
+ * 3) Moving to a statically compiled architecture: At some point in
+ * the future it may make sense to have encryption algorithms that are
+ * loadable into the ICP at runtime via separate kernel modules.
+ * However, considering that this code will probably not see much use
+ * outside of zfs and zfs encryption only requires aes and sha256
+ * algorithms it seemed like more trouble than it was worth to port over
+ * Illumos's kernel module structure to a Linux kernel module. In
+ * addition, The Illumos code related to keeping track of kernel modules
+ * is very much tied to the Illumos OS and proved difficult to port to
+ * Linux. Therefore, the structure of the ICP was simplified to work
+ * statically and several pieces of code responsible for keeping track
+ * of Illumos kernel modules were removed and simplified. All module
+ * initialization and destruction is now called in this file during
+ * Linux kernel module loading and unloading.
+ *
+ * 4) Adding destructors: The Illumos Crypto Layer is built into
+ * the Illumos kernel and is not meant to be unloaded. Some destructors
+ * were added to allow the ICP to be unloaded without leaking
+ * structures.
+ *
+ * 5) Removing CRYPTO_DATA_MBLK related structures and code:
+ * crypto_data_t can have 3 formats, CRYPTO_DATA_RAW, CRYPTO_DATA_UIO,
+ * and CRYPTO_DATA_MBLK. ZFS only requires the first 2 formats, as the
+ * last one is related to streamed data. To simplify the port, code
+ * related to this format was removed.
+ *
+ * 6) Changes for architecture specific code: Some changes were needed
+ * to make architecture specific assembly compile. The biggest change
+ * here was to functions related to detecting CPU capabilities for amd64.
+ * The Illumos Crypto Layer used called into the Illumos kernel's API
+ * to discover these. They have been converted to instead use the
+ * 'cpuid' instruction as per the Intel spec. In addition, references to
+ * the sun4u' and sparc architectures have been removed so that these
+ * will use the generic implementation.
+ *
+ * 7) Removing sha384 and sha512 code: The sha code was actually very
+ * wasy to port. However, the generic sha384 and sha512 code actually
+ * exceeds the stack size on arm and powerpc architectures. In an effort
+ * to remove warnings, this code was removed.
+ *
+ * 8) Change large allocations from kmem_alloc() to vmem_alloc(): In
+ * testing the ICP with the ZFS encryption code, a few allocations were
+ * found that could potentially be very large. These caused the SPL to
+ * throw warnings and so they were changed to use vmem_alloc().
+ *
+ * 9) Makefiles: Makefiles were added that would work with the existing
+ * ZFS Makefiles.
+ */
+
+void __exit
+icp_fini(void)
+{
+ skein_mod_fini();
+ sha2_mod_fini();
+ sha1_mod_fini();
+ edonr_mod_fini();
+ aes_mod_fini();
+ kcf_sched_destroy();
+ kcf_prov_tab_destroy();
+ kcf_destroy_mech_tabs();
+ mod_hash_fini();
+}
+
+/* roughly equivalent to kcf.c: _init() */
+int __init
+icp_init(void)
+{
+ /* initialize the mod hash module */
+ mod_hash_init();
+
+ /* initialize the mechanisms tables supported out-of-the-box */
+ kcf_init_mech_tabs();
+
+ /* initialize the providers tables */
+ kcf_prov_tab_init();
+
+ /*
+ * Initialize scheduling structures. Note that this does NOT
+ * start any threads since it might not be safe to do so.
+ */
+ kcf_sched_init();
+
+ /* initialize algorithms */
+ aes_mod_init();
+ edonr_mod_init();
+ sha1_mod_init();
+ sha2_mod_init();
+ skein_mod_init();
+
+ return (0);
+}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+module_exit(icp_fini);
+module_init(icp_init);
+MODULE_LICENSE(ZFS_META_LICENSE);
+#endif
diff --git a/zfs/module/icp/include/aes/aes_impl.h b/zfs/module/icp/include/aes/aes_impl.h
new file mode 100644
index 000000000000..ed15f74e797e
--- /dev/null
+++ b/zfs/module/icp/include/aes/aes_impl.h
@@ -0,0 +1,170 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _AES_IMPL_H
+#define _AES_IMPL_H
+
+/*
+ * Common definitions used by AES.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/zfs_context.h>
+#include <sys/crypto/common.h>
+
+/* Similar to sysmacros.h IS_P2ALIGNED, but checks two pointers: */
+#define IS_P2ALIGNED2(v, w, a) \
+ ((((uintptr_t)(v) | (uintptr_t)(w)) & ((uintptr_t)(a) - 1)) == 0)
+
+#define AES_BLOCK_LEN 16 /* bytes */
+/* Round constant length, in number of 32-bit elements: */
+#define RC_LENGTH (5 * ((AES_BLOCK_LEN) / 4 - 2))
+
+#define AES_COPY_BLOCK(src, dst) \
+ (dst)[0] = (src)[0]; \
+ (dst)[1] = (src)[1]; \
+ (dst)[2] = (src)[2]; \
+ (dst)[3] = (src)[3]; \
+ (dst)[4] = (src)[4]; \
+ (dst)[5] = (src)[5]; \
+ (dst)[6] = (src)[6]; \
+ (dst)[7] = (src)[7]; \
+ (dst)[8] = (src)[8]; \
+ (dst)[9] = (src)[9]; \
+ (dst)[10] = (src)[10]; \
+ (dst)[11] = (src)[11]; \
+ (dst)[12] = (src)[12]; \
+ (dst)[13] = (src)[13]; \
+ (dst)[14] = (src)[14]; \
+ (dst)[15] = (src)[15]
+
+#define AES_XOR_BLOCK(src, dst) \
+ (dst)[0] ^= (src)[0]; \
+ (dst)[1] ^= (src)[1]; \
+ (dst)[2] ^= (src)[2]; \
+ (dst)[3] ^= (src)[3]; \
+ (dst)[4] ^= (src)[4]; \
+ (dst)[5] ^= (src)[5]; \
+ (dst)[6] ^= (src)[6]; \
+ (dst)[7] ^= (src)[7]; \
+ (dst)[8] ^= (src)[8]; \
+ (dst)[9] ^= (src)[9]; \
+ (dst)[10] ^= (src)[10]; \
+ (dst)[11] ^= (src)[11]; \
+ (dst)[12] ^= (src)[12]; \
+ (dst)[13] ^= (src)[13]; \
+ (dst)[14] ^= (src)[14]; \
+ (dst)[15] ^= (src)[15]
+
+/* AES key size definitions */
+#define AES_MINBITS 128
+#define AES_MINBYTES ((AES_MINBITS) >> 3)
+#define AES_MAXBITS 256
+#define AES_MAXBYTES ((AES_MAXBITS) >> 3)
+
+#define AES_MIN_KEY_BYTES ((AES_MINBITS) >> 3)
+#define AES_MAX_KEY_BYTES ((AES_MAXBITS) >> 3)
+#define AES_192_KEY_BYTES 24
+#define AES_IV_LEN 16
+
+/* AES key schedule may be implemented with 32- or 64-bit elements: */
+#define AES_32BIT_KS 32
+#define AES_64BIT_KS 64
+
+#define MAX_AES_NR 14 /* Maximum number of rounds */
+#define MAX_AES_NB 4 /* Number of columns comprising a state */
+
+typedef union {
+#ifdef sun4u
+ uint64_t ks64[((MAX_AES_NR) + 1) * (MAX_AES_NB)];
+#endif
+ uint32_t ks32[((MAX_AES_NR) + 1) * (MAX_AES_NB)];
+} aes_ks_t;
+
+/* aes_key.flags value: */
+#define INTEL_AES_NI_CAPABLE 0x1 /* AES-NI instructions present */
+
+typedef struct aes_key aes_key_t;
+struct aes_key {
+ aes_ks_t encr_ks; /* encryption key schedule */
+ aes_ks_t decr_ks; /* decryption key schedule */
+#ifdef __amd64
+ long double align128; /* Align fields above for Intel AES-NI */
+ int flags; /* implementation-dependent flags */
+#endif /* __amd64 */
+ int nr; /* number of rounds (10, 12, or 14) */
+ int type; /* key schedule size (32 or 64 bits) */
+};
+
+/*
+ * Core AES functions.
+ * ks and keysched are pointers to aes_key_t.
+ * They are declared void* as they are intended to be opaque types.
+ * Use function aes_alloc_keysched() to allocate memory for ks and keysched.
+ */
+extern void *aes_alloc_keysched(size_t *size, int kmflag);
+extern void aes_init_keysched(const uint8_t *cipherKey, uint_t keyBits,
+ void *keysched);
+extern int aes_encrypt_block(const void *ks, const uint8_t *pt, uint8_t *ct);
+extern int aes_decrypt_block(const void *ks, const uint8_t *ct, uint8_t *pt);
+
+/*
+ * AES mode functions.
+ * The first 2 functions operate on 16-byte AES blocks.
+ */
+extern void aes_copy_block(uint8_t *in, uint8_t *out);
+extern void aes_xor_block(uint8_t *data, uint8_t *dst);
+
+/* Note: ctx is a pointer to aes_ctx_t defined in modes.h */
+extern int aes_encrypt_contiguous_blocks(void *ctx, char *data, size_t length,
+ crypto_data_t *out);
+extern int aes_decrypt_contiguous_blocks(void *ctx, char *data, size_t length,
+ crypto_data_t *out);
+
+/*
+ * The following definitions and declarations are only used by AES FIPS POST
+ */
+#ifdef _AES_IMPL
+
+typedef enum aes_mech_type {
+ AES_ECB_MECH_INFO_TYPE, /* SUN_CKM_AES_ECB */
+ AES_CBC_MECH_INFO_TYPE, /* SUN_CKM_AES_CBC */
+ AES_CBC_PAD_MECH_INFO_TYPE, /* SUN_CKM_AES_CBC_PAD */
+ AES_CTR_MECH_INFO_TYPE, /* SUN_CKM_AES_CTR */
+ AES_CCM_MECH_INFO_TYPE, /* SUN_CKM_AES_CCM */
+ AES_GCM_MECH_INFO_TYPE, /* SUN_CKM_AES_GCM */
+ AES_GMAC_MECH_INFO_TYPE /* SUN_CKM_AES_GMAC */
+} aes_mech_type_t;
+
+#endif /* _AES_IMPL */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _AES_IMPL_H */
diff --git a/zfs/module/icp/include/modes/modes.h b/zfs/module/icp/include/modes/modes.h
new file mode 100644
index 000000000000..7c1f10b16e76
--- /dev/null
+++ b/zfs/module/icp/include/modes/modes.h
@@ -0,0 +1,385 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _COMMON_CRYPTO_MODES_H
+#define _COMMON_CRYPTO_MODES_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/zfs_context.h>
+#include <sys/crypto/common.h>
+#include <sys/crypto/impl.h>
+
+#define ECB_MODE 0x00000002
+#define CBC_MODE 0x00000004
+#define CTR_MODE 0x00000008
+#define CCM_MODE 0x00000010
+#define GCM_MODE 0x00000020
+#define GMAC_MODE 0x00000040
+
+/*
+ * cc_keysched: Pointer to key schedule.
+ *
+ * cc_keysched_len: Length of the key schedule.
+ *
+ * cc_remainder: This is for residual data, i.e. data that can't
+ * be processed because there are too few bytes.
+ * Must wait until more data arrives.
+ *
+ * cc_remainder_len: Number of bytes in cc_remainder.
+ *
+ * cc_iv: Scratch buffer that sometimes contains the IV.
+ *
+ * cc_lastp: Pointer to previous block of ciphertext.
+ *
+ * cc_copy_to: Pointer to where encrypted residual data needs
+ * to be copied.
+ *
+ * cc_flags: PROVIDER_OWNS_KEY_SCHEDULE
+ * When a context is freed, it is necessary
+ * to know whether the key schedule was allocated
+ * by the caller, or internally, e.g. an init routine.
+ * If allocated by the latter, then it needs to be freed.
+ *
+ * ECB_MODE, CBC_MODE, CTR_MODE, or CCM_MODE
+ */
+struct common_ctx {
+ void *cc_keysched;
+ size_t cc_keysched_len;
+ uint64_t cc_iv[2];
+ uint64_t cc_remainder[2];
+ size_t cc_remainder_len;
+ uint8_t *cc_lastp;
+ uint8_t *cc_copy_to;
+ uint32_t cc_flags;
+};
+
+typedef struct common_ctx common_ctx_t;
+
+typedef struct ecb_ctx {
+ struct common_ctx ecb_common;
+ uint64_t ecb_lastblock[2];
+} ecb_ctx_t;
+
+#define ecb_keysched ecb_common.cc_keysched
+#define ecb_keysched_len ecb_common.cc_keysched_len
+#define ecb_iv ecb_common.cc_iv
+#define ecb_remainder ecb_common.cc_remainder
+#define ecb_remainder_len ecb_common.cc_remainder_len
+#define ecb_lastp ecb_common.cc_lastp
+#define ecb_copy_to ecb_common.cc_copy_to
+#define ecb_flags ecb_common.cc_flags
+
+typedef struct cbc_ctx {
+ struct common_ctx cbc_common;
+ uint64_t cbc_lastblock[2];
+} cbc_ctx_t;
+
+#define cbc_keysched cbc_common.cc_keysched
+#define cbc_keysched_len cbc_common.cc_keysched_len
+#define cbc_iv cbc_common.cc_iv
+#define cbc_remainder cbc_common.cc_remainder
+#define cbc_remainder_len cbc_common.cc_remainder_len
+#define cbc_lastp cbc_common.cc_lastp
+#define cbc_copy_to cbc_common.cc_copy_to
+#define cbc_flags cbc_common.cc_flags
+
+/*
+ * ctr_lower_mask Bit-mask for lower 8 bytes of counter block.
+ * ctr_upper_mask Bit-mask for upper 8 bytes of counter block.
+ */
+typedef struct ctr_ctx {
+ struct common_ctx ctr_common;
+ uint64_t ctr_lower_mask;
+ uint64_t ctr_upper_mask;
+ uint32_t ctr_tmp[4];
+} ctr_ctx_t;
+
+/*
+ * ctr_cb Counter block.
+ */
+#define ctr_keysched ctr_common.cc_keysched
+#define ctr_keysched_len ctr_common.cc_keysched_len
+#define ctr_cb ctr_common.cc_iv
+#define ctr_remainder ctr_common.cc_remainder
+#define ctr_remainder_len ctr_common.cc_remainder_len
+#define ctr_lastp ctr_common.cc_lastp
+#define ctr_copy_to ctr_common.cc_copy_to
+#define ctr_flags ctr_common.cc_flags
+
+/*
+ *
+ * ccm_mac_len: Stores length of the MAC in CCM mode.
+ * ccm_mac_buf: Stores the intermediate value for MAC in CCM encrypt.
+ * In CCM decrypt, stores the input MAC value.
+ * ccm_data_len: Length of the plaintext for CCM mode encrypt, or
+ * length of the ciphertext for CCM mode decrypt.
+ * ccm_processed_data_len:
+ * Length of processed plaintext in CCM mode encrypt,
+ * or length of processed ciphertext for CCM mode decrypt.
+ * ccm_processed_mac_len:
+ * Length of MAC data accumulated in CCM mode decrypt.
+ *
+ * ccm_pt_buf: Only used in CCM mode decrypt. It stores the
+ * decrypted plaintext to be returned when
+ * MAC verification succeeds in decrypt_final.
+ * Memory for this should be allocated in the AES module.
+ *
+ */
+typedef struct ccm_ctx {
+ struct common_ctx ccm_common;
+ uint32_t ccm_tmp[4];
+ size_t ccm_mac_len;
+ uint64_t ccm_mac_buf[2];
+ size_t ccm_data_len;
+ size_t ccm_processed_data_len;
+ size_t ccm_processed_mac_len;
+ uint8_t *ccm_pt_buf;
+ uint64_t ccm_mac_input_buf[2];
+ uint64_t ccm_counter_mask;
+} ccm_ctx_t;
+
+#define ccm_keysched ccm_common.cc_keysched
+#define ccm_keysched_len ccm_common.cc_keysched_len
+#define ccm_cb ccm_common.cc_iv
+#define ccm_remainder ccm_common.cc_remainder
+#define ccm_remainder_len ccm_common.cc_remainder_len
+#define ccm_lastp ccm_common.cc_lastp
+#define ccm_copy_to ccm_common.cc_copy_to
+#define ccm_flags ccm_common.cc_flags
+
+/*
+ * gcm_tag_len: Length of authentication tag.
+ *
+ * gcm_ghash: Stores output from the GHASH function.
+ *
+ * gcm_processed_data_len:
+ * Length of processed plaintext (encrypt) or
+ * length of processed ciphertext (decrypt).
+ *
+ * gcm_pt_buf: Stores the decrypted plaintext returned by
+ * decrypt_final when the computed authentication
+ * tag matches the user supplied tag.
+ *
+ * gcm_pt_buf_len: Length of the plaintext buffer.
+ *
+ * gcm_H: Subkey.
+ *
+ * gcm_J0: Pre-counter block generated from the IV.
+ *
+ * gcm_len_a_len_c: 64-bit representations of the bit lengths of
+ * AAD and ciphertext.
+ *
+ * gcm_kmflag: Current value of kmflag. Used only for allocating
+ * the plaintext buffer during decryption.
+ */
+typedef struct gcm_ctx {
+ struct common_ctx gcm_common;
+ size_t gcm_tag_len;
+ size_t gcm_processed_data_len;
+ size_t gcm_pt_buf_len;
+ uint32_t gcm_tmp[4];
+ uint64_t gcm_ghash[2];
+ uint64_t gcm_H[2];
+ uint64_t gcm_J0[2];
+ uint64_t gcm_len_a_len_c[2];
+ uint8_t *gcm_pt_buf;
+ int gcm_kmflag;
+} gcm_ctx_t;
+
+#define gcm_keysched gcm_common.cc_keysched
+#define gcm_keysched_len gcm_common.cc_keysched_len
+#define gcm_cb gcm_common.cc_iv
+#define gcm_remainder gcm_common.cc_remainder
+#define gcm_remainder_len gcm_common.cc_remainder_len
+#define gcm_lastp gcm_common.cc_lastp
+#define gcm_copy_to gcm_common.cc_copy_to
+#define gcm_flags gcm_common.cc_flags
+
+#define AES_GMAC_IV_LEN 12
+#define AES_GMAC_TAG_BITS 128
+
+typedef struct aes_ctx {
+ union {
+ ecb_ctx_t acu_ecb;
+ cbc_ctx_t acu_cbc;
+ ctr_ctx_t acu_ctr;
+ ccm_ctx_t acu_ccm;
+ gcm_ctx_t acu_gcm;
+ } acu;
+} aes_ctx_t;
+
+#define ac_flags acu.acu_ecb.ecb_common.cc_flags
+#define ac_remainder_len acu.acu_ecb.ecb_common.cc_remainder_len
+#define ac_keysched acu.acu_ecb.ecb_common.cc_keysched
+#define ac_keysched_len acu.acu_ecb.ecb_common.cc_keysched_len
+#define ac_iv acu.acu_ecb.ecb_common.cc_iv
+#define ac_lastp acu.acu_ecb.ecb_common.cc_lastp
+#define ac_pt_buf acu.acu_ccm.ccm_pt_buf
+#define ac_mac_len acu.acu_ccm.ccm_mac_len
+#define ac_data_len acu.acu_ccm.ccm_data_len
+#define ac_processed_mac_len acu.acu_ccm.ccm_processed_mac_len
+#define ac_processed_data_len acu.acu_ccm.ccm_processed_data_len
+#define ac_tag_len acu.acu_gcm.gcm_tag_len
+
+typedef struct blowfish_ctx {
+ union {
+ ecb_ctx_t bcu_ecb;
+ cbc_ctx_t bcu_cbc;
+ } bcu;
+} blowfish_ctx_t;
+
+#define bc_flags bcu.bcu_ecb.ecb_common.cc_flags
+#define bc_remainder_len bcu.bcu_ecb.ecb_common.cc_remainder_len
+#define bc_keysched bcu.bcu_ecb.ecb_common.cc_keysched
+#define bc_keysched_len bcu.bcu_ecb.ecb_common.cc_keysched_len
+#define bc_iv bcu.bcu_ecb.ecb_common.cc_iv
+#define bc_lastp bcu.bcu_ecb.ecb_common.cc_lastp
+
+typedef struct des_ctx {
+ union {
+ ecb_ctx_t dcu_ecb;
+ cbc_ctx_t dcu_cbc;
+ } dcu;
+} des_ctx_t;
+
+#define dc_flags dcu.dcu_ecb.ecb_common.cc_flags
+#define dc_remainder_len dcu.dcu_ecb.ecb_common.cc_remainder_len
+#define dc_keysched dcu.dcu_ecb.ecb_common.cc_keysched
+#define dc_keysched_len dcu.dcu_ecb.ecb_common.cc_keysched_len
+#define dc_iv dcu.dcu_ecb.ecb_common.cc_iv
+#define dc_lastp dcu.dcu_ecb.ecb_common.cc_lastp
+
+extern int ecb_cipher_contiguous_blocks(ecb_ctx_t *, char *, size_t,
+ crypto_data_t *, size_t, int (*cipher)(const void *, const uint8_t *,
+ uint8_t *));
+
+extern int cbc_encrypt_contiguous_blocks(cbc_ctx_t *, char *, size_t,
+ crypto_data_t *, size_t,
+ int (*encrypt)(const void *, const uint8_t *, uint8_t *),
+ void (*copy_block)(uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *));
+
+extern int cbc_decrypt_contiguous_blocks(cbc_ctx_t *, char *, size_t,
+ crypto_data_t *, size_t,
+ int (*decrypt)(const void *, const uint8_t *, uint8_t *),
+ void (*copy_block)(uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *));
+
+extern int ctr_mode_contiguous_blocks(ctr_ctx_t *, char *, size_t,
+ crypto_data_t *, size_t,
+ int (*cipher)(const void *, const uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *));
+
+extern int ccm_mode_encrypt_contiguous_blocks(ccm_ctx_t *, char *, size_t,
+ crypto_data_t *, size_t,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*copy_block)(uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *));
+
+extern int ccm_mode_decrypt_contiguous_blocks(ccm_ctx_t *, char *, size_t,
+ crypto_data_t *, size_t,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*copy_block)(uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *));
+
+extern int gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *, char *, size_t,
+ crypto_data_t *, size_t,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*copy_block)(uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *));
+
+extern int gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t *, char *, size_t,
+ crypto_data_t *, size_t,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*copy_block)(uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *));
+
+int ccm_encrypt_final(ccm_ctx_t *, crypto_data_t *, size_t,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *));
+
+int gcm_encrypt_final(gcm_ctx_t *, crypto_data_t *, size_t,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*copy_block)(uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *));
+
+extern int ccm_decrypt_final(ccm_ctx_t *, crypto_data_t *, size_t,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*copy_block)(uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *));
+
+extern int gcm_decrypt_final(gcm_ctx_t *, crypto_data_t *, size_t,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *));
+
+extern int ctr_mode_final(ctr_ctx_t *, crypto_data_t *,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *));
+
+extern int cbc_init_ctx(cbc_ctx_t *, char *, size_t, size_t,
+ void (*copy_block)(uint8_t *, uint64_t *));
+
+extern int ctr_init_ctx(ctr_ctx_t *, ulong_t, uint8_t *,
+ void (*copy_block)(uint8_t *, uint8_t *));
+
+extern int ccm_init_ctx(ccm_ctx_t *, char *, int, boolean_t, size_t,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *));
+
+extern int gcm_init_ctx(gcm_ctx_t *, char *, size_t,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*copy_block)(uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *));
+
+extern int gmac_init_ctx(gcm_ctx_t *, char *, size_t,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*copy_block)(uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *));
+
+extern void calculate_ccm_mac(ccm_ctx_t *, uint8_t *,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *));
+
+extern void gcm_mul(uint64_t *, uint64_t *, uint64_t *);
+
+extern void crypto_init_ptrs(crypto_data_t *, void **, offset_t *);
+extern void crypto_get_ptrs(crypto_data_t *, void **, offset_t *,
+ uint8_t **, size_t *, uint8_t **, size_t);
+
+extern void *ecb_alloc_ctx(int);
+extern void *cbc_alloc_ctx(int);
+extern void *ctr_alloc_ctx(int);
+extern void *ccm_alloc_ctx(int);
+extern void *gcm_alloc_ctx(int);
+extern void *gmac_alloc_ctx(int);
+extern void crypto_free_mode_ctx(void *);
+extern void gcm_set_kmflag(gcm_ctx_t *, int);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _COMMON_CRYPTO_MODES_H */
diff --git a/zfs/module/icp/include/sha1/sha1.h b/zfs/module/icp/include/sha1/sha1.h
new file mode 100644
index 000000000000..251b64fcaeee
--- /dev/null
+++ b/zfs/module/icp/include/sha1/sha1.h
@@ -0,0 +1,61 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_SHA1_H
+#define _SYS_SHA1_H
+
+#include <sys/types.h> /* for uint_* */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * NOTE: n2rng (Niagara2 RNG driver) accesses the state field of
+ * SHA1_CTX directly. NEVER change this structure without verifying
+ * compatibility with n2rng. The important thing is that the state
+ * must be in a field declared as uint32_t state[5].
+ */
+/* SHA-1 context. */
+typedef struct {
+ uint32_t state[5]; /* state (ABCDE) */
+ uint32_t count[2]; /* number of bits, modulo 2^64 (msb first) */
+ union {
+ uint8_t buf8[64]; /* undigested input */
+ uint32_t buf32[16]; /* realigned input */
+ } buf_un;
+} SHA1_CTX;
+
+#define SHA1_DIGEST_LENGTH 20
+
+void SHA1Init(SHA1_CTX *);
+void SHA1Update(SHA1_CTX *, const void *, size_t);
+void SHA1Final(void *, SHA1_CTX *);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_SHA1_H */
diff --git a/zfs/module/icp/include/sha1/sha1_consts.h b/zfs/module/icp/include/sha1/sha1_consts.h
new file mode 100644
index 000000000000..848d25ef050f
--- /dev/null
+++ b/zfs/module/icp/include/sha1/sha1_consts.h
@@ -0,0 +1,65 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 1998, by Sun Microsystems, Inc.
+ * All rights reserved.
+ */
+
+#ifndef _SYS_SHA1_CONSTS_H
+#define _SYS_SHA1_CONSTS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * as explained in sha1.c, loading 32-bit constants on a sparc is expensive
+ * since it involves both a `sethi' and an `or'. thus, we instead use `ld'
+ * to load the constants from an array called `sha1_consts'. however, on
+ * intel (and perhaps other processors), it is cheaper to load the constant
+ * directly. thus, the c code in SHA1Transform() uses the macro SHA1_CONST()
+ * which either expands to a constant or an array reference, depending on
+ * the architecture the code is being compiled for.
+ */
+
+#include <sys/types.h> /* uint32_t */
+
+extern const uint32_t sha1_consts[];
+
+#if defined(__sparc)
+#define SHA1_CONST(x) (sha1_consts[x])
+#else
+#define SHA1_CONST(x) (SHA1_CONST_ ## x)
+#endif
+
+/* constants, as provided in FIPS 180-1 */
+
+#define SHA1_CONST_0 0x5a827999U
+#define SHA1_CONST_1 0x6ed9eba1U
+#define SHA1_CONST_2 0x8f1bbcdcU
+#define SHA1_CONST_3 0xca62c1d6U
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_SHA1_CONSTS_H */
diff --git a/zfs/module/icp/include/sha1/sha1_impl.h b/zfs/module/icp/include/sha1/sha1_impl.h
new file mode 100644
index 000000000000..1c1f8728f9b5
--- /dev/null
+++ b/zfs/module/icp/include/sha1/sha1_impl.h
@@ -0,0 +1,73 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SHA1_IMPL_H
+#define _SHA1_IMPL_H
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define SHA1_HASH_SIZE 20 /* SHA_1 digest length in bytes */
+#define SHA1_DIGEST_LENGTH 20 /* SHA1 digest length in bytes */
+#define SHA1_HMAC_BLOCK_SIZE 64 /* SHA1-HMAC block size */
+#define SHA1_HMAC_MIN_KEY_LEN 1 /* SHA1-HMAC min key length in bytes */
+#define SHA1_HMAC_MAX_KEY_LEN INT_MAX /* SHA1-HMAC max key length in bytes */
+#define SHA1_HMAC_INTS_PER_BLOCK (SHA1_HMAC_BLOCK_SIZE/sizeof (uint32_t))
+
+/*
+ * CSPI information (entry points, provider info, etc.)
+ */
+typedef enum sha1_mech_type {
+ SHA1_MECH_INFO_TYPE, /* SUN_CKM_SHA1 */
+ SHA1_HMAC_MECH_INFO_TYPE, /* SUN_CKM_SHA1_HMAC */
+ SHA1_HMAC_GEN_MECH_INFO_TYPE /* SUN_CKM_SHA1_HMAC_GENERAL */
+} sha1_mech_type_t;
+
+/*
+ * Context for SHA1 mechanism.
+ */
+typedef struct sha1_ctx {
+ sha1_mech_type_t sc_mech_type; /* type of context */
+ SHA1_CTX sc_sha1_ctx; /* SHA1 context */
+} sha1_ctx_t;
+
+/*
+ * Context for SHA1-HMAC and SHA1-HMAC-GENERAL mechanisms.
+ */
+typedef struct sha1_hmac_ctx {
+ sha1_mech_type_t hc_mech_type; /* type of context */
+ uint32_t hc_digest_len; /* digest len in bytes */
+ SHA1_CTX hc_icontext; /* inner SHA1 context */
+ SHA1_CTX hc_ocontext; /* outer SHA1 context */
+} sha1_hmac_ctx_t;
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SHA1_IMPL_H */
diff --git a/zfs/module/icp/include/sha2/sha2_consts.h b/zfs/module/icp/include/sha2/sha2_consts.h
new file mode 100644
index 000000000000..3a6645508fe9
--- /dev/null
+++ b/zfs/module/icp/include/sha2/sha2_consts.h
@@ -0,0 +1,219 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_SHA2_CONSTS_H
+#define _SYS_SHA2_CONSTS_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Loading 32-bit constants on a sparc is expensive since it involves both
+ * a `sethi' and an `or'. thus, we instead use `ld' to load the constants
+ * from an array called `sha2_consts'. however, on intel (and perhaps other
+ * processors), it is cheaper to load the constant directly. thus, the c
+ * code in SHA transform functions uses the macro SHA2_CONST() which either
+ * expands to a constant or an array reference, depending on
+ * the architecture the code is being compiled for.
+ *
+ * SHA512 constants are used for SHA384
+ */
+
+#include <sys/types.h> /* uint32_t */
+
+extern const uint32_t sha256_consts[];
+extern const uint64_t sha512_consts[];
+
+#if defined(__sparc)
+#define SHA256_CONST(x) (sha256_consts[x])
+#define SHA512_CONST(x) (sha512_consts[x])
+#else
+#define SHA256_CONST(x) (SHA256_CONST_ ## x)
+#define SHA512_CONST(x) (SHA512_CONST_ ## x)
+#endif
+
+/* constants, as provided in FIPS 180-2 */
+
+#define SHA256_CONST_0 0x428a2f98U
+#define SHA256_CONST_1 0x71374491U
+#define SHA256_CONST_2 0xb5c0fbcfU
+#define SHA256_CONST_3 0xe9b5dba5U
+#define SHA256_CONST_4 0x3956c25bU
+#define SHA256_CONST_5 0x59f111f1U
+#define SHA256_CONST_6 0x923f82a4U
+#define SHA256_CONST_7 0xab1c5ed5U
+
+#define SHA256_CONST_8 0xd807aa98U
+#define SHA256_CONST_9 0x12835b01U
+#define SHA256_CONST_10 0x243185beU
+#define SHA256_CONST_11 0x550c7dc3U
+#define SHA256_CONST_12 0x72be5d74U
+#define SHA256_CONST_13 0x80deb1feU
+#define SHA256_CONST_14 0x9bdc06a7U
+#define SHA256_CONST_15 0xc19bf174U
+
+#define SHA256_CONST_16 0xe49b69c1U
+#define SHA256_CONST_17 0xefbe4786U
+#define SHA256_CONST_18 0x0fc19dc6U
+#define SHA256_CONST_19 0x240ca1ccU
+#define SHA256_CONST_20 0x2de92c6fU
+#define SHA256_CONST_21 0x4a7484aaU
+#define SHA256_CONST_22 0x5cb0a9dcU
+#define SHA256_CONST_23 0x76f988daU
+
+#define SHA256_CONST_24 0x983e5152U
+#define SHA256_CONST_25 0xa831c66dU
+#define SHA256_CONST_26 0xb00327c8U
+#define SHA256_CONST_27 0xbf597fc7U
+#define SHA256_CONST_28 0xc6e00bf3U
+#define SHA256_CONST_29 0xd5a79147U
+#define SHA256_CONST_30 0x06ca6351U
+#define SHA256_CONST_31 0x14292967U
+
+#define SHA256_CONST_32 0x27b70a85U
+#define SHA256_CONST_33 0x2e1b2138U
+#define SHA256_CONST_34 0x4d2c6dfcU
+#define SHA256_CONST_35 0x53380d13U
+#define SHA256_CONST_36 0x650a7354U
+#define SHA256_CONST_37 0x766a0abbU
+#define SHA256_CONST_38 0x81c2c92eU
+#define SHA256_CONST_39 0x92722c85U
+
+#define SHA256_CONST_40 0xa2bfe8a1U
+#define SHA256_CONST_41 0xa81a664bU
+#define SHA256_CONST_42 0xc24b8b70U
+#define SHA256_CONST_43 0xc76c51a3U
+#define SHA256_CONST_44 0xd192e819U
+#define SHA256_CONST_45 0xd6990624U
+#define SHA256_CONST_46 0xf40e3585U
+#define SHA256_CONST_47 0x106aa070U
+
+#define SHA256_CONST_48 0x19a4c116U
+#define SHA256_CONST_49 0x1e376c08U
+#define SHA256_CONST_50 0x2748774cU
+#define SHA256_CONST_51 0x34b0bcb5U
+#define SHA256_CONST_52 0x391c0cb3U
+#define SHA256_CONST_53 0x4ed8aa4aU
+#define SHA256_CONST_54 0x5b9cca4fU
+#define SHA256_CONST_55 0x682e6ff3U
+
+#define SHA256_CONST_56 0x748f82eeU
+#define SHA256_CONST_57 0x78a5636fU
+#define SHA256_CONST_58 0x84c87814U
+#define SHA256_CONST_59 0x8cc70208U
+#define SHA256_CONST_60 0x90befffaU
+#define SHA256_CONST_61 0xa4506cebU
+#define SHA256_CONST_62 0xbef9a3f7U
+#define SHA256_CONST_63 0xc67178f2U
+
+#define SHA512_CONST_0 0x428a2f98d728ae22ULL
+#define SHA512_CONST_1 0x7137449123ef65cdULL
+#define SHA512_CONST_2 0xb5c0fbcfec4d3b2fULL
+#define SHA512_CONST_3 0xe9b5dba58189dbbcULL
+#define SHA512_CONST_4 0x3956c25bf348b538ULL
+#define SHA512_CONST_5 0x59f111f1b605d019ULL
+#define SHA512_CONST_6 0x923f82a4af194f9bULL
+#define SHA512_CONST_7 0xab1c5ed5da6d8118ULL
+#define SHA512_CONST_8 0xd807aa98a3030242ULL
+#define SHA512_CONST_9 0x12835b0145706fbeULL
+#define SHA512_CONST_10 0x243185be4ee4b28cULL
+#define SHA512_CONST_11 0x550c7dc3d5ffb4e2ULL
+#define SHA512_CONST_12 0x72be5d74f27b896fULL
+#define SHA512_CONST_13 0x80deb1fe3b1696b1ULL
+#define SHA512_CONST_14 0x9bdc06a725c71235ULL
+#define SHA512_CONST_15 0xc19bf174cf692694ULL
+#define SHA512_CONST_16 0xe49b69c19ef14ad2ULL
+#define SHA512_CONST_17 0xefbe4786384f25e3ULL
+#define SHA512_CONST_18 0x0fc19dc68b8cd5b5ULL
+#define SHA512_CONST_19 0x240ca1cc77ac9c65ULL
+#define SHA512_CONST_20 0x2de92c6f592b0275ULL
+#define SHA512_CONST_21 0x4a7484aa6ea6e483ULL
+#define SHA512_CONST_22 0x5cb0a9dcbd41fbd4ULL
+#define SHA512_CONST_23 0x76f988da831153b5ULL
+#define SHA512_CONST_24 0x983e5152ee66dfabULL
+#define SHA512_CONST_25 0xa831c66d2db43210ULL
+#define SHA512_CONST_26 0xb00327c898fb213fULL
+#define SHA512_CONST_27 0xbf597fc7beef0ee4ULL
+#define SHA512_CONST_28 0xc6e00bf33da88fc2ULL
+#define SHA512_CONST_29 0xd5a79147930aa725ULL
+#define SHA512_CONST_30 0x06ca6351e003826fULL
+#define SHA512_CONST_31 0x142929670a0e6e70ULL
+#define SHA512_CONST_32 0x27b70a8546d22ffcULL
+#define SHA512_CONST_33 0x2e1b21385c26c926ULL
+#define SHA512_CONST_34 0x4d2c6dfc5ac42aedULL
+#define SHA512_CONST_35 0x53380d139d95b3dfULL
+#define SHA512_CONST_36 0x650a73548baf63deULL
+#define SHA512_CONST_37 0x766a0abb3c77b2a8ULL
+#define SHA512_CONST_38 0x81c2c92e47edaee6ULL
+#define SHA512_CONST_39 0x92722c851482353bULL
+#define SHA512_CONST_40 0xa2bfe8a14cf10364ULL
+#define SHA512_CONST_41 0xa81a664bbc423001ULL
+#define SHA512_CONST_42 0xc24b8b70d0f89791ULL
+#define SHA512_CONST_43 0xc76c51a30654be30ULL
+#define SHA512_CONST_44 0xd192e819d6ef5218ULL
+#define SHA512_CONST_45 0xd69906245565a910ULL
+#define SHA512_CONST_46 0xf40e35855771202aULL
+#define SHA512_CONST_47 0x106aa07032bbd1b8ULL
+#define SHA512_CONST_48 0x19a4c116b8d2d0c8ULL
+#define SHA512_CONST_49 0x1e376c085141ab53ULL
+#define SHA512_CONST_50 0x2748774cdf8eeb99ULL
+#define SHA512_CONST_51 0x34b0bcb5e19b48a8ULL
+#define SHA512_CONST_52 0x391c0cb3c5c95a63ULL
+#define SHA512_CONST_53 0x4ed8aa4ae3418acbULL
+#define SHA512_CONST_54 0x5b9cca4f7763e373ULL
+#define SHA512_CONST_55 0x682e6ff3d6b2b8a3ULL
+#define SHA512_CONST_56 0x748f82ee5defb2fcULL
+#define SHA512_CONST_57 0x78a5636f43172f60ULL
+#define SHA512_CONST_58 0x84c87814a1f0ab72ULL
+#define SHA512_CONST_59 0x8cc702081a6439ecULL
+#define SHA512_CONST_60 0x90befffa23631e28ULL
+#define SHA512_CONST_61 0xa4506cebde82bde9ULL
+#define SHA512_CONST_62 0xbef9a3f7b2c67915ULL
+#define SHA512_CONST_63 0xc67178f2e372532bULL
+#define SHA512_CONST_64 0xca273eceea26619cULL
+#define SHA512_CONST_65 0xd186b8c721c0c207ULL
+#define SHA512_CONST_66 0xeada7dd6cde0eb1eULL
+#define SHA512_CONST_67 0xf57d4f7fee6ed178ULL
+#define SHA512_CONST_68 0x06f067aa72176fbaULL
+#define SHA512_CONST_69 0x0a637dc5a2c898a6ULL
+#define SHA512_CONST_70 0x113f9804bef90daeULL
+#define SHA512_CONST_71 0x1b710b35131c471bULL
+#define SHA512_CONST_72 0x28db77f523047d84ULL
+#define SHA512_CONST_73 0x32caab7b40c72493ULL
+#define SHA512_CONST_74 0x3c9ebe0a15c9bebcULL
+#define SHA512_CONST_75 0x431d67c49c100d4cULL
+#define SHA512_CONST_76 0x4cc5d4becb3e42b6ULL
+#define SHA512_CONST_77 0x597f299cfc657e2aULL
+#define SHA512_CONST_78 0x5fcb6fab3ad6faecULL
+#define SHA512_CONST_79 0x6c44198c4a475817ULL
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_SHA2_CONSTS_H */
diff --git a/zfs/module/icp/include/sha2/sha2_impl.h b/zfs/module/icp/include/sha2/sha2_impl.h
new file mode 100644
index 000000000000..b9768d344e95
--- /dev/null
+++ b/zfs/module/icp/include/sha2/sha2_impl.h
@@ -0,0 +1,64 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SHA2_IMPL_H
+#define _SHA2_IMPL_H
+
+#include <sys/sha2.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef enum {
+ SHA1_TYPE,
+ SHA256_TYPE,
+ SHA384_TYPE,
+ SHA512_TYPE
+} sha2_mech_t;
+
+/*
+ * Context for SHA2 mechanism.
+ */
+typedef struct sha2_ctx {
+ sha2_mech_type_t sc_mech_type; /* type of context */
+ SHA2_CTX sc_sha2_ctx; /* SHA2 context */
+} sha2_ctx_t;
+
+/*
+ * Context for SHA2 HMAC and HMAC GENERAL mechanisms.
+ */
+typedef struct sha2_hmac_ctx {
+ sha2_mech_type_t hc_mech_type; /* type of context */
+ uint32_t hc_digest_len; /* digest len in bytes */
+ SHA2_CTX hc_icontext; /* inner SHA2 context */
+ SHA2_CTX hc_ocontext; /* outer SHA2 context */
+} sha2_hmac_ctx_t;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SHA2_IMPL_H */
diff --git a/zfs/module/icp/include/sys/asm_linkage.h b/zfs/module/icp/include/sys/asm_linkage.h
new file mode 100644
index 000000000000..49a494b46e0b
--- /dev/null
+++ b/zfs/module/icp/include/sys/asm_linkage.h
@@ -0,0 +1,46 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_ASM_LINKAGE_H
+#define _SYS_ASM_LINKAGE_H
+
+#if defined(__i386) || defined(__amd64)
+
+#include <sys/ia32/asm_linkage.h> /* XX64 x86/sys/asm_linkage.h */
+
+#endif
+
+#if defined(_KERNEL) && defined(HAVE_KERNEL_OBJTOOL)
+
+#include <asm/frame.h>
+
+#else /* userspace */
+#define FRAME_BEGIN
+#define FRAME_END
+#endif
+
+
+#endif /* _SYS_ASM_LINKAGE_H */
diff --git a/zfs/module/icp/include/sys/bitmap.h b/zfs/module/icp/include/sys/bitmap.h
new file mode 100644
index 000000000000..b1f6823e61d4
--- /dev/null
+++ b/zfs/module/icp/include/sys/bitmap.h
@@ -0,0 +1,183 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
+/* All Rights Reserved */
+
+
+#ifndef _SYS_BITMAP_H
+#define _SYS_BITMAP_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if defined(__GNUC__) && defined(_ASM_INLINES) && \
+ (defined(__i386) || defined(__amd64))
+#include <asm/bitmap.h>
+#endif
+
+/*
+ * Operations on bitmaps of arbitrary size
+ * A bitmap is a vector of 1 or more ulong_t's.
+ * The user of the package is responsible for range checks and keeping
+ * track of sizes.
+ */
+
+#ifdef _LP64
+#define BT_ULSHIFT 6 /* log base 2 of BT_NBIPUL, to extract word index */
+#define BT_ULSHIFT32 5 /* log base 2 of BT_NBIPUL, to extract word index */
+#else
+#define BT_ULSHIFT 5 /* log base 2 of BT_NBIPUL, to extract word index */
+#endif
+
+#define BT_NBIPUL (1 << BT_ULSHIFT) /* n bits per ulong_t */
+#define BT_ULMASK (BT_NBIPUL - 1) /* to extract bit index */
+
+#ifdef _LP64
+#define BT_NBIPUL32 (1 << BT_ULSHIFT32) /* n bits per ulong_t */
+#define BT_ULMASK32 (BT_NBIPUL32 - 1) /* to extract bit index */
+#define BT_ULMAXMASK 0xffffffffffffffff /* used by bt_getlowbit */
+#else
+#define BT_ULMAXMASK 0xffffffff
+#endif
+
+/*
+ * bitmap is a ulong_t *, bitindex an index_t
+ *
+ * The macros BT_WIM and BT_BIW internal; there is no need
+ * for users of this package to use them.
+ */
+
+/*
+ * word in map
+ */
+#define BT_WIM(bitmap, bitindex) \
+ ((bitmap)[(bitindex) >> BT_ULSHIFT])
+/*
+ * bit in word
+ */
+#define BT_BIW(bitindex) \
+ (1UL << ((bitindex) & BT_ULMASK))
+
+#ifdef _LP64
+#define BT_WIM32(bitmap, bitindex) \
+ ((bitmap)[(bitindex) >> BT_ULSHIFT32])
+
+#define BT_BIW32(bitindex) \
+ (1UL << ((bitindex) & BT_ULMASK32))
+#endif
+
+/*
+ * These are public macros
+ *
+ * BT_BITOUL == n bits to n ulong_t's
+ */
+#define BT_BITOUL(nbits) \
+ (((nbits) + BT_NBIPUL - 1l) / BT_NBIPUL)
+#define BT_SIZEOFMAP(nbits) \
+ (BT_BITOUL(nbits) * sizeof (ulong_t))
+#define BT_TEST(bitmap, bitindex) \
+ ((BT_WIM((bitmap), (bitindex)) & BT_BIW(bitindex)) ? 1 : 0)
+#define BT_SET(bitmap, bitindex) \
+ { BT_WIM((bitmap), (bitindex)) |= BT_BIW(bitindex); }
+#define BT_CLEAR(bitmap, bitindex) \
+ { BT_WIM((bitmap), (bitindex)) &= ~BT_BIW(bitindex); }
+
+#ifdef _LP64
+#define BT_BITOUL32(nbits) \
+ (((nbits) + BT_NBIPUL32 - 1l) / BT_NBIPUL32)
+#define BT_SIZEOFMAP32(nbits) \
+ (BT_BITOUL32(nbits) * sizeof (uint_t))
+#define BT_TEST32(bitmap, bitindex) \
+ ((BT_WIM32((bitmap), (bitindex)) & BT_BIW32(bitindex)) ? 1 : 0)
+#define BT_SET32(bitmap, bitindex) \
+ { BT_WIM32((bitmap), (bitindex)) |= BT_BIW32(bitindex); }
+#define BT_CLEAR32(bitmap, bitindex) \
+ { BT_WIM32((bitmap), (bitindex)) &= ~BT_BIW32(bitindex); }
+#endif /* _LP64 */
+
+
+/*
+ * BIT_ONLYONESET is a private macro not designed for bitmaps of
+ * arbitrary size. u must be an unsigned integer/long. It returns
+ * true if one and only one bit is set in u.
+ */
+#define BIT_ONLYONESET(u) \
+ ((((u) == 0) ? 0 : ((u) & ((u) - 1)) == 0))
+
+#ifndef _ASM
+
+/*
+ * return next available bit index from map with specified number of bits
+ */
+extern index_t bt_availbit(ulong_t *bitmap, size_t nbits);
+/*
+ * find the highest order bit that is on, and is within or below
+ * the word specified by wx
+ */
+extern int bt_gethighbit(ulong_t *mapp, int wx);
+extern int bt_range(ulong_t *bitmap, size_t *pos1, size_t *pos2,
+ size_t end_pos);
+extern int bt_getlowbit(ulong_t *bitmap, size_t start, size_t stop);
+extern void bt_copy(ulong_t *, ulong_t *, ulong_t);
+
+/*
+ * find the parity
+ */
+extern int odd_parity(ulong_t);
+
+/*
+ * Atomically set/clear bits
+ * Atomic exclusive operations will set "result" to "-1"
+ * if the bit is already set/cleared. "result" will be set
+ * to 0 otherwise.
+ */
+#define BT_ATOMIC_SET(bitmap, bitindex) \
+ { atomic_or_long(&(BT_WIM(bitmap, bitindex)), BT_BIW(bitindex)); }
+#define BT_ATOMIC_CLEAR(bitmap, bitindex) \
+ { atomic_and_long(&(BT_WIM(bitmap, bitindex)), ~BT_BIW(bitindex)); }
+
+#define BT_ATOMIC_SET_EXCL(bitmap, bitindex, result) \
+ { result = atomic_set_long_excl(&(BT_WIM(bitmap, bitindex)), \
+ (bitindex) % BT_NBIPUL); }
+#define BT_ATOMIC_CLEAR_EXCL(bitmap, bitindex, result) \
+ { result = atomic_clear_long_excl(&(BT_WIM(bitmap, bitindex)), \
+ (bitindex) % BT_NBIPUL); }
+
+/*
+ * Extracts bits between index h (high, inclusive) and l (low, exclusive) from
+ * u, which must be an unsigned integer.
+ */
+#define BITX(u, h, l) (((u) >> (l)) & ((1LU << ((h) - (l) + 1LU)) - 1LU))
+
+#endif /* _ASM */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_BITMAP_H */
diff --git a/zfs/module/icp/include/sys/crypto/elfsign.h b/zfs/module/icp/include/sys/crypto/elfsign.h
new file mode 100644
index 000000000000..5432f0c8d607
--- /dev/null
+++ b/zfs/module/icp/include/sys/crypto/elfsign.h
@@ -0,0 +1,137 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_CRYPTO_ELFSIGN_H
+#define _SYS_CRYPTO_ELFSIGN_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Consolidation Private Interface for elfsign/libpkcs11/kcfd
+ */
+
+#include <sys/zfs_context.h>
+
+/*
+ * Project Private structures and types used for communication between kcfd
+ * and KCF over the door.
+ */
+
+typedef enum ELFsign_status_e {
+ ELFSIGN_UNKNOWN,
+ ELFSIGN_SUCCESS,
+ ELFSIGN_FAILED,
+ ELFSIGN_NOTSIGNED,
+ ELFSIGN_INVALID_CERTPATH,
+ ELFSIGN_INVALID_ELFOBJ,
+ ELFSIGN_RESTRICTED
+} ELFsign_status_t;
+
+#define KCF_KCFD_VERSION1 1
+#define SIG_MAX_LENGTH 1024
+
+#define ELF_SIGNATURE_SECTION ".SUNW_signature"
+
+typedef struct kcf_door_arg_s {
+ short da_version;
+ boolean_t da_iskernel;
+
+ union {
+ char filename[MAXPATHLEN]; /* For request */
+
+ struct kcf_door_result_s { /* For response */
+ ELFsign_status_t status;
+ uint32_t siglen;
+ uchar_t signature[1];
+ } result;
+ } da_u;
+} kcf_door_arg_t;
+
+typedef uint32_t filesig_vers_t;
+
+/*
+ * File Signature Structure
+ * Applicable to ELF and other file formats
+ */
+struct filesignatures {
+ uint32_t filesig_cnt; /* count of signatures */
+ uint32_t filesig_pad; /* unused */
+ union {
+ char filesig_data[1];
+ struct filesig { /* one of these for each signature */
+ uint32_t filesig_size;
+ filesig_vers_t filesig_version;
+ union {
+ struct filesig_version1 {
+ uint32_t filesig_v1_dnsize;
+ uint32_t filesig_v1_sigsize;
+ uint32_t filesig_v1_oidsize;
+ char filesig_v1_data[1];
+ } filesig_v1;
+ struct filesig_version3 {
+ uint64_t filesig_v3_time;
+ uint32_t filesig_v3_dnsize;
+ uint32_t filesig_v3_sigsize;
+ uint32_t filesig_v3_oidsize;
+ char filesig_v3_data[1];
+ } filesig_v3;
+ } _u2;
+ } filesig_sig;
+ uint64_t filesig_align;
+ } _u1;
+};
+#define filesig_sig _u1.filesig_sig
+
+#define filesig_v1_dnsize _u2.filesig_v1.filesig_v1_dnsize
+#define filesig_v1_sigsize _u2.filesig_v1.filesig_v1_sigsize
+#define filesig_v1_oidsize _u2.filesig_v1.filesig_v1_oidsize
+#define filesig_v1_data _u2.filesig_v1.filesig_v1_data
+
+#define filesig_v3_time _u2.filesig_v3.filesig_v3_time
+#define filesig_v3_dnsize _u2.filesig_v3.filesig_v3_dnsize
+#define filesig_v3_sigsize _u2.filesig_v3.filesig_v3_sigsize
+#define filesig_v3_oidsize _u2.filesig_v3.filesig_v3_oidsize
+#define filesig_v3_data _u2.filesig_v3.filesig_v3_data
+
+#define filesig_ALIGN(s) (((s) + sizeof (uint64_t) - 1) & \
+ (-sizeof (uint64_t)))
+#define filesig_next(ptr) (struct filesig *)((void *)((char *)(ptr) + \
+ filesig_ALIGN((ptr)->filesig_size)))
+
+#define FILESIG_UNKNOWN 0 /* unrecognized version */
+#define FILESIG_VERSION1 1 /* version1, all but sig section */
+#define FILESIG_VERSION2 2 /* version1 format, SHF_ALLOC only */
+#define FILESIG_VERSION3 3 /* version3, all but sig section */
+#define FILESIG_VERSION4 4 /* version3 format, SHF_ALLOC only */
+
+#define _PATH_KCFD_DOOR "/etc/svc/volatile/kcfd_door"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_CRYPTO_ELFSIGN_H */
diff --git a/zfs/module/icp/include/sys/crypto/impl.h b/zfs/module/icp/include/sys/crypto/impl.h
new file mode 100644
index 000000000000..258cb5fedcd0
--- /dev/null
+++ b/zfs/module/icp/include/sys/crypto/impl.h
@@ -0,0 +1,1363 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_CRYPTO_IMPL_H
+#define _SYS_CRYPTO_IMPL_H
+
+/*
+ * Kernel Cryptographic Framework private implementation definitions.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/crypto/common.h>
+#include <sys/crypto/api.h>
+#include <sys/crypto/spi.h>
+#include <sys/crypto/ioctl.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define KCF_MODULE "kcf"
+
+/*
+ * Prefixes convention: structures internal to the kernel cryptographic
+ * framework start with 'kcf_'. Exposed structure start with 'crypto_'.
+ */
+
+/* Provider stats. Not protected. */
+typedef struct kcf_prov_stats {
+ kstat_named_t ps_ops_total;
+ kstat_named_t ps_ops_passed;
+ kstat_named_t ps_ops_failed;
+ kstat_named_t ps_ops_busy_rval;
+} kcf_prov_stats_t;
+
+/* Various kcf stats. Not protected. */
+typedef struct kcf_stats {
+ kstat_named_t ks_thrs_in_pool;
+ kstat_named_t ks_idle_thrs;
+ kstat_named_t ks_minthrs;
+ kstat_named_t ks_maxthrs;
+ kstat_named_t ks_swq_njobs;
+ kstat_named_t ks_swq_maxjobs;
+ kstat_named_t ks_taskq_threads;
+ kstat_named_t ks_taskq_minalloc;
+ kstat_named_t ks_taskq_maxalloc;
+} kcf_stats_t;
+
+/*
+ * Keep all the information needed by the scheduler from
+ * this provider.
+ */
+typedef struct kcf_sched_info {
+ /* The number of operations dispatched. */
+ uint64_t ks_ndispatches;
+
+ /* The number of operations that failed. */
+ uint64_t ks_nfails;
+
+ /* The number of operations that returned CRYPTO_BUSY. */
+ uint64_t ks_nbusy_rval;
+
+ /* taskq used to dispatch crypto requests */
+ taskq_t *ks_taskq;
+} kcf_sched_info_t;
+
+/*
+ * pd_irefcnt approximates the number of inflight requests to the
+ * provider. Though we increment this counter during registration for
+ * other purposes, that base value is mostly same across all providers.
+ * So, it is a good measure of the load on a provider when it is not
+ * in a busy state. Once a provider notifies it is busy, requests
+ * backup in the taskq. So, we use tq_nalloc in that case which gives
+ * the number of task entries in the task queue. Note that we do not
+ * acquire any locks here as it is not critical to get the exact number
+ * and the lock contention may be too costly for this code path.
+ */
+#define KCF_PROV_LOAD(pd) ((pd)->pd_state != KCF_PROV_BUSY ? \
+ (pd)->pd_irefcnt : (pd)->pd_sched_info.ks_taskq->tq_nalloc)
+
+#define KCF_PROV_INCRSTATS(pd, error) { \
+ (pd)->pd_sched_info.ks_ndispatches++; \
+ if (error == CRYPTO_BUSY) \
+ (pd)->pd_sched_info.ks_nbusy_rval++; \
+ else if (error != CRYPTO_SUCCESS && error != CRYPTO_QUEUED) \
+ (pd)->pd_sched_info.ks_nfails++; \
+}
+
+
+/*
+ * The following two macros should be
+ * #define KCF_OPS_CLASSSIZE (KCF_LAST_OPSCLASS - KCF_FIRST_OPSCLASS + 2)
+ * #define KCF_MAXMECHTAB KCF_MAXCIPHER
+ *
+ * However, doing that would involve reorganizing the header file a bit.
+ * When impl.h is broken up (bug# 4703218), this will be done. For now,
+ * we hardcode these values.
+ */
+#define KCF_OPS_CLASSSIZE 8
+#define KCF_MAXMECHTAB 32
+
+/*
+ * Valid values for the state of a provider. The order of
+ * the elements is important.
+ *
+ * Routines which get a provider or the list of providers
+ * should pick only those that are either in KCF_PROV_READY state
+ * or in KCF_PROV_BUSY state.
+ */
+typedef enum {
+ KCF_PROV_ALLOCATED = 1,
+ KCF_PROV_UNVERIFIED,
+ KCF_PROV_VERIFICATION_FAILED,
+ /*
+ * state < KCF_PROV_READY means the provider can not
+ * be used at all.
+ */
+ KCF_PROV_READY,
+ KCF_PROV_BUSY,
+ /*
+ * state > KCF_PROV_BUSY means the provider can not
+ * be used for new requests.
+ */
+ KCF_PROV_FAILED,
+ /*
+ * Threads setting the following two states should do so only
+ * if the current state < KCF_PROV_DISABLED.
+ */
+ KCF_PROV_DISABLED,
+ KCF_PROV_REMOVED,
+ KCF_PROV_FREED
+} kcf_prov_state_t;
+
+#define KCF_IS_PROV_UNVERIFIED(pd) ((pd)->pd_state == KCF_PROV_UNVERIFIED)
+#define KCF_IS_PROV_USABLE(pd) ((pd)->pd_state == KCF_PROV_READY || \
+ (pd)->pd_state == KCF_PROV_BUSY)
+#define KCF_IS_PROV_REMOVED(pd) ((pd)->pd_state >= KCF_PROV_REMOVED)
+
+/* Internal flags valid for pd_flags field */
+#define KCF_PROV_RESTRICTED 0x40000000
+#define KCF_LPROV_MEMBER 0x80000000 /* is member of a logical provider */
+
+/*
+ * A provider descriptor structure. There is one such structure per
+ * provider. It is allocated and initialized at registration time and
+ * freed when the provider unregisters.
+ *
+ * pd_prov_type: Provider type, hardware or software
+ * pd_sid: Session ID of the provider used by kernel clients.
+ * This is valid only for session-oriented providers.
+ * pd_refcnt: Reference counter to this provider descriptor
+ * pd_irefcnt: References held by the framework internal structs
+ * pd_lock: lock protects pd_state and pd_provider_list
+ * pd_state: State value of the provider
+ * pd_provider_list: Used to cross-reference logical providers and their
+ * members. Not used for software providers.
+ * pd_resume_cv: cv to wait for state to change from KCF_PROV_BUSY
+ * pd_prov_handle: Provider handle specified by provider
+ * pd_ops_vector: The ops vector specified by Provider
+ * pd_mech_indx: Lookup table which maps a core framework mechanism
+ * number to an index in pd_mechanisms array
+ * pd_mechanisms: Array of mechanisms supported by the provider, specified
+ * by the provider during registration
+ * pd_sched_info: Scheduling information associated with the provider
+ * pd_mech_list_count: The number of entries in pi_mechanisms, specified
+ * by the provider during registration
+ * pd_name: Device name or module name
+ * pd_instance: Device instance
+ * pd_module_id: Module ID returned by modload
+ * pd_mctlp: Pointer to modctl structure for this provider
+ * pd_remove_cv: cv to wait on while the provider queue drains
+ * pd_description: Provider description string
+ * pd_flags bitwise OR of pi_flags from crypto_provider_info_t
+ * and other internal flags defined above.
+ * pd_hash_limit Maximum data size that hash mechanisms of this provider
+ * can support.
+ * pd_kcf_prov_handle: KCF-private handle assigned by KCF
+ * pd_prov_id: Identification # assigned by KCF to provider
+ * pd_kstat: kstat associated with the provider
+ * pd_ks_data: kstat data
+ */
+typedef struct kcf_provider_desc {
+ crypto_provider_type_t pd_prov_type;
+ crypto_session_id_t pd_sid;
+ uint_t pd_refcnt;
+ uint_t pd_irefcnt;
+ kmutex_t pd_lock;
+ kcf_prov_state_t pd_state;
+ struct kcf_provider_list *pd_provider_list;
+ kcondvar_t pd_resume_cv;
+ crypto_provider_handle_t pd_prov_handle;
+ crypto_ops_t *pd_ops_vector;
+ ushort_t pd_mech_indx[KCF_OPS_CLASSSIZE]\
+ [KCF_MAXMECHTAB];
+ crypto_mech_info_t *pd_mechanisms;
+ kcf_sched_info_t pd_sched_info;
+ uint_t pd_mech_list_count;
+ // char *pd_name;
+ // uint_t pd_instance;
+ // int pd_module_id;
+ // struct modctl *pd_mctlp;
+ kcondvar_t pd_remove_cv;
+ char *pd_description;
+ uint_t pd_flags;
+ uint_t pd_hash_limit;
+ crypto_kcf_provider_handle_t pd_kcf_prov_handle;
+ crypto_provider_id_t pd_prov_id;
+ kstat_t *pd_kstat;
+ kcf_prov_stats_t pd_ks_data;
+} kcf_provider_desc_t;
+
+/* useful for making a list of providers */
+typedef struct kcf_provider_list {
+ struct kcf_provider_list *pl_next;
+ struct kcf_provider_desc *pl_provider;
+} kcf_provider_list_t;
+
+/* atomic operations in linux implictly form a memory barrier */
+#define membar_exit()
+
+/*
+ * If a component has a reference to a kcf_provider_desc_t,
+ * it REFHOLD()s. A new provider descriptor which is referenced only
+ * by the providers table has a reference counter of one.
+ */
+#define KCF_PROV_REFHOLD(desc) { \
+ atomic_add_32(&(desc)->pd_refcnt, 1); \
+ ASSERT((desc)->pd_refcnt != 0); \
+}
+
+#define KCF_PROV_IREFHOLD(desc) { \
+ atomic_add_32(&(desc)->pd_irefcnt, 1); \
+ ASSERT((desc)->pd_irefcnt != 0); \
+}
+
+#define KCF_PROV_IREFRELE(desc) { \
+ ASSERT((desc)->pd_irefcnt != 0); \
+ membar_exit(); \
+ if (atomic_add_32_nv(&(desc)->pd_irefcnt, -1) == 0) { \
+ cv_broadcast(&(desc)->pd_remove_cv); \
+ } \
+}
+
+#define KCF_PROV_REFHELD(desc) ((desc)->pd_refcnt >= 1)
+
+#define KCF_PROV_REFRELE(desc) { \
+ ASSERT((desc)->pd_refcnt != 0); \
+ membar_exit(); \
+ if (atomic_add_32_nv(&(desc)->pd_refcnt, -1) == 0) { \
+ kcf_provider_zero_refcnt((desc)); \
+ } \
+}
+
+
+/* list of crypto_mech_info_t valid as the second mech in a dual operation */
+
+typedef struct crypto_mech_info_list {
+ struct crypto_mech_info_list *ml_next;
+ crypto_mech_type_t ml_kcf_mechid; /* KCF's id */
+ crypto_mech_info_t ml_mech_info;
+} crypto_mech_info_list_t;
+
+/*
+ * An element in a mechanism provider descriptors chain.
+ * The kcf_prov_mech_desc_t is duplicated in every chain the provider belongs
+ * to. This is a small tradeoff memory vs mutex spinning time to access the
+ * common provider field.
+ */
+
+typedef struct kcf_prov_mech_desc {
+ struct kcf_mech_entry *pm_me; /* Back to the head */
+ struct kcf_prov_mech_desc *pm_next; /* Next in the chain */
+ crypto_mech_info_t pm_mech_info; /* Provider mech info */
+ crypto_mech_info_list_t *pm_mi_list; /* list for duals */
+ kcf_provider_desc_t *pm_prov_desc; /* Common desc. */
+} kcf_prov_mech_desc_t;
+
+/* and the notation shortcuts ... */
+#define pm_provider_type pm_prov_desc.pd_provider_type
+#define pm_provider_handle pm_prov_desc.pd_provider_handle
+#define pm_ops_vector pm_prov_desc.pd_ops_vector
+
+/*
+ * A mechanism entry in an xxx_mech_tab[]. me_pad was deemed
+ * to be unnecessary and removed.
+ */
+typedef struct kcf_mech_entry {
+ crypto_mech_name_t me_name; /* mechanism name */
+ crypto_mech_type_t me_mechid; /* Internal id for mechanism */
+ kmutex_t me_mutex; /* access protection */
+ kcf_prov_mech_desc_t *me_hw_prov_chain; /* list of HW providers */
+ kcf_prov_mech_desc_t *me_sw_prov; /* SW provider */
+ /*
+ * Number of HW providers in the chain. There is only one
+ * SW provider. So, we need only a count of HW providers.
+ */
+ int me_num_hwprov;
+ /*
+ * When a SW provider is present, this is the generation number that
+ * ensures no objects from old SW providers are used in the new one
+ */
+ uint32_t me_gen_swprov;
+ /*
+ * threshold for using hardware providers for this mech
+ */
+ size_t me_threshold;
+} kcf_mech_entry_t;
+
+/*
+ * A policy descriptor structure. It is allocated and initialized
+ * when administrative ioctls load disabled mechanisms.
+ *
+ * pd_prov_type: Provider type, hardware or software
+ * pd_name: Device name or module name.
+ * pd_instance: Device instance.
+ * pd_refcnt: Reference counter for this policy descriptor
+ * pd_mutex: Protects array and count of disabled mechanisms.
+ * pd_disabled_count: Count of disabled mechanisms.
+ * pd_disabled_mechs: Array of disabled mechanisms.
+ */
+typedef struct kcf_policy_desc {
+ crypto_provider_type_t pd_prov_type;
+ char *pd_name;
+ uint_t pd_instance;
+ uint_t pd_refcnt;
+ kmutex_t pd_mutex;
+ uint_t pd_disabled_count;
+ crypto_mech_name_t *pd_disabled_mechs;
+} kcf_policy_desc_t;
+
+/*
+ * If a component has a reference to a kcf_policy_desc_t,
+ * it REFHOLD()s. A new policy descriptor which is referenced only
+ * by the policy table has a reference count of one.
+ */
+#define KCF_POLICY_REFHOLD(desc) { \
+ atomic_add_32(&(desc)->pd_refcnt, 1); \
+ ASSERT((desc)->pd_refcnt != 0); \
+}
+
+/*
+ * Releases a reference to a policy descriptor. When the last
+ * reference is released, the descriptor is freed.
+ */
+#define KCF_POLICY_REFRELE(desc) { \
+ ASSERT((desc)->pd_refcnt != 0); \
+ membar_exit(); \
+ if (atomic_add_32_nv(&(desc)->pd_refcnt, -1) == 0) \
+ kcf_policy_free_desc(desc); \
+}
+
+/*
+ * This entry stores the name of a software module and its
+ * mechanisms. The mechanisms are 'hints' that are used to
+ * trigger loading of the module.
+ */
+typedef struct kcf_soft_conf_entry {
+ struct kcf_soft_conf_entry *ce_next;
+ char *ce_name;
+ crypto_mech_name_t *ce_mechs;
+ uint_t ce_count;
+} kcf_soft_conf_entry_t;
+
+extern kmutex_t soft_config_mutex;
+extern kcf_soft_conf_entry_t *soft_config_list;
+
+/*
+ * Global tables. The sizes are from the predefined PKCS#11 v2.20 mechanisms,
+ * with a margin of few extra empty entry points
+ */
+
+#define KCF_MAXDIGEST 16 /* Digests */
+#define KCF_MAXCIPHER 64 /* Ciphers */
+#define KCF_MAXMAC 40 /* Message authentication codes */
+#define KCF_MAXSIGN 24 /* Sign/Verify */
+#define KCF_MAXKEYOPS 116 /* Key generation and derivation */
+#define KCF_MAXMISC 16 /* Others ... */
+
+#define KCF_MAXMECHS KCF_MAXDIGEST + KCF_MAXCIPHER + KCF_MAXMAC + \
+ KCF_MAXSIGN + KCF_MAXKEYOPS + \
+ KCF_MAXMISC
+
+extern kcf_mech_entry_t kcf_digest_mechs_tab[];
+extern kcf_mech_entry_t kcf_cipher_mechs_tab[];
+extern kcf_mech_entry_t kcf_mac_mechs_tab[];
+extern kcf_mech_entry_t kcf_sign_mechs_tab[];
+extern kcf_mech_entry_t kcf_keyops_mechs_tab[];
+extern kcf_mech_entry_t kcf_misc_mechs_tab[];
+
+extern kmutex_t kcf_mech_tabs_lock;
+
+typedef enum {
+ KCF_DIGEST_CLASS = 1,
+ KCF_CIPHER_CLASS,
+ KCF_MAC_CLASS,
+ KCF_SIGN_CLASS,
+ KCF_KEYOPS_CLASS,
+ KCF_MISC_CLASS
+} kcf_ops_class_t;
+
+#define KCF_FIRST_OPSCLASS KCF_DIGEST_CLASS
+#define KCF_LAST_OPSCLASS KCF_MISC_CLASS
+
+/* The table of all the kcf_xxx_mech_tab[]s, indexed by kcf_ops_class */
+
+typedef struct kcf_mech_entry_tab {
+ int met_size; /* Size of the met_tab[] */
+ kcf_mech_entry_t *met_tab; /* the table */
+} kcf_mech_entry_tab_t;
+
+extern kcf_mech_entry_tab_t kcf_mech_tabs_tab[];
+
+#define KCF_MECHID(class, index) \
+ (((crypto_mech_type_t)(class) << 32) | (crypto_mech_type_t)(index))
+
+#define KCF_MECH2CLASS(mech_type) ((kcf_ops_class_t)((mech_type) >> 32))
+
+#define KCF_MECH2INDEX(mech_type) ((int)(mech_type))
+
+#define KCF_TO_PROV_MECH_INDX(pd, mech_type) \
+ ((pd)->pd_mech_indx[KCF_MECH2CLASS(mech_type)] \
+ [KCF_MECH2INDEX(mech_type)])
+
+#define KCF_TO_PROV_MECHINFO(pd, mech_type) \
+ ((pd)->pd_mechanisms[KCF_TO_PROV_MECH_INDX(pd, mech_type)])
+
+#define KCF_TO_PROV_MECHNUM(pd, mech_type) \
+ (KCF_TO_PROV_MECHINFO(pd, mech_type).cm_mech_number)
+
+#define KCF_CAN_SHARE_OPSTATE(pd, mech_type) \
+ ((KCF_TO_PROV_MECHINFO(pd, mech_type).cm_mech_flags) & \
+ CRYPTO_CAN_SHARE_OPSTATE)
+
+/* ps_refcnt is protected by cm_lock in the crypto_minor structure */
+typedef struct crypto_provider_session {
+ struct crypto_provider_session *ps_next;
+ crypto_session_id_t ps_session;
+ kcf_provider_desc_t *ps_provider;
+ kcf_provider_desc_t *ps_real_provider;
+ uint_t ps_refcnt;
+} crypto_provider_session_t;
+
+typedef struct crypto_session_data {
+ kmutex_t sd_lock;
+ kcondvar_t sd_cv;
+ uint32_t sd_flags;
+ int sd_pre_approved_amount;
+ crypto_ctx_t *sd_digest_ctx;
+ crypto_ctx_t *sd_encr_ctx;
+ crypto_ctx_t *sd_decr_ctx;
+ crypto_ctx_t *sd_sign_ctx;
+ crypto_ctx_t *sd_verify_ctx;
+ crypto_ctx_t *sd_sign_recover_ctx;
+ crypto_ctx_t *sd_verify_recover_ctx;
+ kcf_provider_desc_t *sd_provider;
+ void *sd_find_init_cookie;
+ crypto_provider_session_t *sd_provider_session;
+} crypto_session_data_t;
+
+#define CRYPTO_SESSION_IN_USE 0x00000001
+#define CRYPTO_SESSION_IS_BUSY 0x00000002
+#define CRYPTO_SESSION_IS_CLOSED 0x00000004
+
+#define KCF_MAX_PIN_LEN 1024
+
+/*
+ * Per-minor info.
+ *
+ * cm_lock protects everything in this structure except for cm_refcnt.
+ */
+typedef struct crypto_minor {
+ uint_t cm_refcnt;
+ kmutex_t cm_lock;
+ kcondvar_t cm_cv;
+ crypto_session_data_t **cm_session_table;
+ uint_t cm_session_table_count;
+ kcf_provider_desc_t **cm_provider_array;
+ uint_t cm_provider_count;
+ crypto_provider_session_t *cm_provider_session;
+} crypto_minor_t;
+
+/*
+ * Return codes for internal functions
+ */
+#define KCF_SUCCESS 0x0 /* Successful call */
+#define KCF_INVALID_MECH_NUMBER 0x1 /* invalid mechanism number */
+#define KCF_INVALID_MECH_NAME 0x2 /* invalid mechanism name */
+#define KCF_INVALID_MECH_CLASS 0x3 /* invalid mechanism class */
+#define KCF_MECH_TAB_FULL 0x4 /* Need more room in the mech tabs. */
+#define KCF_INVALID_INDX ((ushort_t)-1)
+
+/*
+ * kCF internal mechanism and function group for tracking RNG providers.
+ */
+#define SUN_RANDOM "random"
+#define CRYPTO_FG_RANDOM 0x80000000 /* generate_random() */
+
+/*
+ * Wrappers for ops vectors. In the wrapper definitions below, the pd
+ * argument always corresponds to a pointer to a provider descriptor
+ * of type kcf_prov_desc_t.
+ */
+
+#define KCF_PROV_CONTROL_OPS(pd) ((pd)->pd_ops_vector->co_control_ops)
+#define KCF_PROV_CTX_OPS(pd) ((pd)->pd_ops_vector->co_ctx_ops)
+#define KCF_PROV_DIGEST_OPS(pd) ((pd)->pd_ops_vector->co_digest_ops)
+#define KCF_PROV_CIPHER_OPS(pd) ((pd)->pd_ops_vector->co_cipher_ops)
+#define KCF_PROV_MAC_OPS(pd) ((pd)->pd_ops_vector->co_mac_ops)
+#define KCF_PROV_SIGN_OPS(pd) ((pd)->pd_ops_vector->co_sign_ops)
+#define KCF_PROV_VERIFY_OPS(pd) ((pd)->pd_ops_vector->co_verify_ops)
+#define KCF_PROV_DUAL_OPS(pd) ((pd)->pd_ops_vector->co_dual_ops)
+#define KCF_PROV_DUAL_CIPHER_MAC_OPS(pd) \
+ ((pd)->pd_ops_vector->co_dual_cipher_mac_ops)
+#define KCF_PROV_RANDOM_OPS(pd) ((pd)->pd_ops_vector->co_random_ops)
+#define KCF_PROV_SESSION_OPS(pd) ((pd)->pd_ops_vector->co_session_ops)
+#define KCF_PROV_OBJECT_OPS(pd) ((pd)->pd_ops_vector->co_object_ops)
+#define KCF_PROV_KEY_OPS(pd) ((pd)->pd_ops_vector->co_key_ops)
+#define KCF_PROV_PROVIDER_OPS(pd) ((pd)->pd_ops_vector->co_provider_ops)
+#define KCF_PROV_MECH_OPS(pd) ((pd)->pd_ops_vector->co_mech_ops)
+#define KCF_PROV_NOSTORE_KEY_OPS(pd) \
+ ((pd)->pd_ops_vector->co_nostore_key_ops)
+
+/*
+ * Wrappers for crypto_control_ops(9S) entry points.
+ */
+
+#define KCF_PROV_STATUS(pd, status) ( \
+ (KCF_PROV_CONTROL_OPS(pd) && \
+ KCF_PROV_CONTROL_OPS(pd)->provider_status) ? \
+ KCF_PROV_CONTROL_OPS(pd)->provider_status( \
+ (pd)->pd_prov_handle, status) : \
+ CRYPTO_NOT_SUPPORTED)
+
+/*
+ * Wrappers for crypto_ctx_ops(9S) entry points.
+ */
+
+#define KCF_PROV_CREATE_CTX_TEMPLATE(pd, mech, key, template, size, req) ( \
+ (KCF_PROV_CTX_OPS(pd) && KCF_PROV_CTX_OPS(pd)->create_ctx_template) ? \
+ KCF_PROV_CTX_OPS(pd)->create_ctx_template( \
+ (pd)->pd_prov_handle, mech, key, template, size, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_FREE_CONTEXT(pd, ctx) ( \
+ (KCF_PROV_CTX_OPS(pd) && KCF_PROV_CTX_OPS(pd)->free_context) ? \
+ KCF_PROV_CTX_OPS(pd)->free_context(ctx) : CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_COPYIN_MECH(pd, umech, kmech, errorp, mode) ( \
+ (KCF_PROV_MECH_OPS(pd) && KCF_PROV_MECH_OPS(pd)->copyin_mechanism) ? \
+ KCF_PROV_MECH_OPS(pd)->copyin_mechanism( \
+ (pd)->pd_prov_handle, umech, kmech, errorp, mode) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_COPYOUT_MECH(pd, kmech, umech, errorp, mode) ( \
+ (KCF_PROV_MECH_OPS(pd) && KCF_PROV_MECH_OPS(pd)->copyout_mechanism) ? \
+ KCF_PROV_MECH_OPS(pd)->copyout_mechanism( \
+ (pd)->pd_prov_handle, kmech, umech, errorp, mode) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_FREE_MECH(pd, prov_mech) ( \
+ (KCF_PROV_MECH_OPS(pd) && KCF_PROV_MECH_OPS(pd)->free_mechanism) ? \
+ KCF_PROV_MECH_OPS(pd)->free_mechanism( \
+ (pd)->pd_prov_handle, prov_mech) : CRYPTO_NOT_SUPPORTED)
+
+/*
+ * Wrappers for crypto_digest_ops(9S) entry points.
+ */
+
+#define KCF_PROV_DIGEST_INIT(pd, ctx, mech, req) ( \
+ (KCF_PROV_DIGEST_OPS(pd) && KCF_PROV_DIGEST_OPS(pd)->digest_init) ? \
+ KCF_PROV_DIGEST_OPS(pd)->digest_init(ctx, mech, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+/*
+ * The _ (underscore) in _digest is needed to avoid replacing the
+ * function digest().
+ */
+#define KCF_PROV_DIGEST(pd, ctx, data, _digest, req) ( \
+ (KCF_PROV_DIGEST_OPS(pd) && KCF_PROV_DIGEST_OPS(pd)->digest) ? \
+ KCF_PROV_DIGEST_OPS(pd)->digest(ctx, data, _digest, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_DIGEST_UPDATE(pd, ctx, data, req) ( \
+ (KCF_PROV_DIGEST_OPS(pd) && KCF_PROV_DIGEST_OPS(pd)->digest_update) ? \
+ KCF_PROV_DIGEST_OPS(pd)->digest_update(ctx, data, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_DIGEST_KEY(pd, ctx, key, req) ( \
+ (KCF_PROV_DIGEST_OPS(pd) && KCF_PROV_DIGEST_OPS(pd)->digest_key) ? \
+ KCF_PROV_DIGEST_OPS(pd)->digest_key(ctx, key, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_DIGEST_FINAL(pd, ctx, digest, req) ( \
+ (KCF_PROV_DIGEST_OPS(pd) && KCF_PROV_DIGEST_OPS(pd)->digest_final) ? \
+ KCF_PROV_DIGEST_OPS(pd)->digest_final(ctx, digest, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_DIGEST_ATOMIC(pd, session, mech, data, digest, req) ( \
+ (KCF_PROV_DIGEST_OPS(pd) && KCF_PROV_DIGEST_OPS(pd)->digest_atomic) ? \
+ KCF_PROV_DIGEST_OPS(pd)->digest_atomic( \
+ (pd)->pd_prov_handle, session, mech, data, digest, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+/*
+ * Wrappers for crypto_cipher_ops(9S) entry points.
+ */
+
+#define KCF_PROV_ENCRYPT_INIT(pd, ctx, mech, key, template, req) ( \
+ (KCF_PROV_CIPHER_OPS(pd) && KCF_PROV_CIPHER_OPS(pd)->encrypt_init) ? \
+ KCF_PROV_CIPHER_OPS(pd)->encrypt_init(ctx, mech, key, template, \
+ req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_ENCRYPT(pd, ctx, plaintext, ciphertext, req) ( \
+ (KCF_PROV_CIPHER_OPS(pd) && KCF_PROV_CIPHER_OPS(pd)->encrypt) ? \
+ KCF_PROV_CIPHER_OPS(pd)->encrypt(ctx, plaintext, ciphertext, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_ENCRYPT_UPDATE(pd, ctx, plaintext, ciphertext, req) ( \
+ (KCF_PROV_CIPHER_OPS(pd) && KCF_PROV_CIPHER_OPS(pd)->encrypt_update) ? \
+ KCF_PROV_CIPHER_OPS(pd)->encrypt_update(ctx, plaintext, \
+ ciphertext, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_ENCRYPT_FINAL(pd, ctx, ciphertext, req) ( \
+ (KCF_PROV_CIPHER_OPS(pd) && KCF_PROV_CIPHER_OPS(pd)->encrypt_final) ? \
+ KCF_PROV_CIPHER_OPS(pd)->encrypt_final(ctx, ciphertext, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_ENCRYPT_ATOMIC(pd, session, mech, key, plaintext, ciphertext, \
+ template, req) ( \
+ (KCF_PROV_CIPHER_OPS(pd) && KCF_PROV_CIPHER_OPS(pd)->encrypt_atomic) ? \
+ KCF_PROV_CIPHER_OPS(pd)->encrypt_atomic( \
+ (pd)->pd_prov_handle, session, mech, key, plaintext, ciphertext, \
+ template, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_DECRYPT_INIT(pd, ctx, mech, key, template, req) ( \
+ (KCF_PROV_CIPHER_OPS(pd) && KCF_PROV_CIPHER_OPS(pd)->decrypt_init) ? \
+ KCF_PROV_CIPHER_OPS(pd)->decrypt_init(ctx, mech, key, template, \
+ req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_DECRYPT(pd, ctx, ciphertext, plaintext, req) ( \
+ (KCF_PROV_CIPHER_OPS(pd) && KCF_PROV_CIPHER_OPS(pd)->decrypt) ? \
+ KCF_PROV_CIPHER_OPS(pd)->decrypt(ctx, ciphertext, plaintext, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_DECRYPT_UPDATE(pd, ctx, ciphertext, plaintext, req) ( \
+ (KCF_PROV_CIPHER_OPS(pd) && KCF_PROV_CIPHER_OPS(pd)->decrypt_update) ? \
+ KCF_PROV_CIPHER_OPS(pd)->decrypt_update(ctx, ciphertext, \
+ plaintext, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_DECRYPT_FINAL(pd, ctx, plaintext, req) ( \
+ (KCF_PROV_CIPHER_OPS(pd) && KCF_PROV_CIPHER_OPS(pd)->decrypt_final) ? \
+ KCF_PROV_CIPHER_OPS(pd)->decrypt_final(ctx, plaintext, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_DECRYPT_ATOMIC(pd, session, mech, key, ciphertext, plaintext, \
+ template, req) ( \
+ (KCF_PROV_CIPHER_OPS(pd) && KCF_PROV_CIPHER_OPS(pd)->decrypt_atomic) ? \
+ KCF_PROV_CIPHER_OPS(pd)->decrypt_atomic( \
+ (pd)->pd_prov_handle, session, mech, key, ciphertext, plaintext, \
+ template, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+/*
+ * Wrappers for crypto_mac_ops(9S) entry points.
+ */
+
+#define KCF_PROV_MAC_INIT(pd, ctx, mech, key, template, req) ( \
+ (KCF_PROV_MAC_OPS(pd) && KCF_PROV_MAC_OPS(pd)->mac_init) ? \
+ KCF_PROV_MAC_OPS(pd)->mac_init(ctx, mech, key, template, req) \
+ : CRYPTO_NOT_SUPPORTED)
+
+/*
+ * The _ (underscore) in _mac is needed to avoid replacing the
+ * function mac().
+ */
+#define KCF_PROV_MAC(pd, ctx, data, _mac, req) ( \
+ (KCF_PROV_MAC_OPS(pd) && KCF_PROV_MAC_OPS(pd)->mac) ? \
+ KCF_PROV_MAC_OPS(pd)->mac(ctx, data, _mac, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_MAC_UPDATE(pd, ctx, data, req) ( \
+ (KCF_PROV_MAC_OPS(pd) && KCF_PROV_MAC_OPS(pd)->mac_update) ? \
+ KCF_PROV_MAC_OPS(pd)->mac_update(ctx, data, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_MAC_FINAL(pd, ctx, mac, req) ( \
+ (KCF_PROV_MAC_OPS(pd) && KCF_PROV_MAC_OPS(pd)->mac_final) ? \
+ KCF_PROV_MAC_OPS(pd)->mac_final(ctx, mac, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_MAC_ATOMIC(pd, session, mech, key, data, mac, template, \
+ req) ( \
+ (KCF_PROV_MAC_OPS(pd) && KCF_PROV_MAC_OPS(pd)->mac_atomic) ? \
+ KCF_PROV_MAC_OPS(pd)->mac_atomic( \
+ (pd)->pd_prov_handle, session, mech, key, data, mac, template, \
+ req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_MAC_VERIFY_ATOMIC(pd, session, mech, key, data, mac, \
+ template, req) ( \
+ (KCF_PROV_MAC_OPS(pd) && KCF_PROV_MAC_OPS(pd)->mac_verify_atomic) ? \
+ KCF_PROV_MAC_OPS(pd)->mac_verify_atomic( \
+ (pd)->pd_prov_handle, session, mech, key, data, mac, template, \
+ req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+/*
+ * Wrappers for crypto_sign_ops(9S) entry points.
+ */
+
+#define KCF_PROV_SIGN_INIT(pd, ctx, mech, key, template, req) ( \
+ (KCF_PROV_SIGN_OPS(pd) && KCF_PROV_SIGN_OPS(pd)->sign_init) ? \
+ KCF_PROV_SIGN_OPS(pd)->sign_init( \
+ ctx, mech, key, template, req) : CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_SIGN(pd, ctx, data, sig, req) ( \
+ (KCF_PROV_SIGN_OPS(pd) && KCF_PROV_SIGN_OPS(pd)->sign) ? \
+ KCF_PROV_SIGN_OPS(pd)->sign(ctx, data, sig, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_SIGN_UPDATE(pd, ctx, data, req) ( \
+ (KCF_PROV_SIGN_OPS(pd) && KCF_PROV_SIGN_OPS(pd)->sign_update) ? \
+ KCF_PROV_SIGN_OPS(pd)->sign_update(ctx, data, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_SIGN_FINAL(pd, ctx, sig, req) ( \
+ (KCF_PROV_SIGN_OPS(pd) && KCF_PROV_SIGN_OPS(pd)->sign_final) ? \
+ KCF_PROV_SIGN_OPS(pd)->sign_final(ctx, sig, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_SIGN_ATOMIC(pd, session, mech, key, data, template, \
+ sig, req) ( \
+ (KCF_PROV_SIGN_OPS(pd) && KCF_PROV_SIGN_OPS(pd)->sign_atomic) ? \
+ KCF_PROV_SIGN_OPS(pd)->sign_atomic( \
+ (pd)->pd_prov_handle, session, mech, key, data, sig, template, \
+ req) : CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_SIGN_RECOVER_INIT(pd, ctx, mech, key, template, \
+ req) ( \
+ (KCF_PROV_SIGN_OPS(pd) && KCF_PROV_SIGN_OPS(pd)->sign_recover_init) ? \
+ KCF_PROV_SIGN_OPS(pd)->sign_recover_init(ctx, mech, key, template, \
+ req) : CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_SIGN_RECOVER(pd, ctx, data, sig, req) ( \
+ (KCF_PROV_SIGN_OPS(pd) && KCF_PROV_SIGN_OPS(pd)->sign_recover) ? \
+ KCF_PROV_SIGN_OPS(pd)->sign_recover(ctx, data, sig, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_SIGN_RECOVER_ATOMIC(pd, session, mech, key, data, template, \
+ sig, req) ( \
+ (KCF_PROV_SIGN_OPS(pd) && \
+ KCF_PROV_SIGN_OPS(pd)->sign_recover_atomic) ? \
+ KCF_PROV_SIGN_OPS(pd)->sign_recover_atomic( \
+ (pd)->pd_prov_handle, session, mech, key, data, sig, template, \
+ req) : CRYPTO_NOT_SUPPORTED)
+
+/*
+ * Wrappers for crypto_verify_ops(9S) entry points.
+ */
+
+#define KCF_PROV_VERIFY_INIT(pd, ctx, mech, key, template, req) ( \
+ (KCF_PROV_VERIFY_OPS(pd) && KCF_PROV_VERIFY_OPS(pd)->verify_init) ? \
+ KCF_PROV_VERIFY_OPS(pd)->verify_init(ctx, mech, key, template, \
+ req) : CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_VERIFY(pd, ctx, data, sig, req) ( \
+ (KCF_PROV_VERIFY_OPS(pd) && KCF_PROV_VERIFY_OPS(pd)->do_verify) ? \
+ KCF_PROV_VERIFY_OPS(pd)->do_verify(ctx, data, sig, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_VERIFY_UPDATE(pd, ctx, data, req) ( \
+ (KCF_PROV_VERIFY_OPS(pd) && KCF_PROV_VERIFY_OPS(pd)->verify_update) ? \
+ KCF_PROV_VERIFY_OPS(pd)->verify_update(ctx, data, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_VERIFY_FINAL(pd, ctx, sig, req) ( \
+ (KCF_PROV_VERIFY_OPS(pd) && KCF_PROV_VERIFY_OPS(pd)->verify_final) ? \
+ KCF_PROV_VERIFY_OPS(pd)->verify_final(ctx, sig, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_VERIFY_ATOMIC(pd, session, mech, key, data, template, sig, \
+ req) ( \
+ (KCF_PROV_VERIFY_OPS(pd) && KCF_PROV_VERIFY_OPS(pd)->verify_atomic) ? \
+ KCF_PROV_VERIFY_OPS(pd)->verify_atomic( \
+ (pd)->pd_prov_handle, session, mech, key, data, sig, template, \
+ req) : CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_VERIFY_RECOVER_INIT(pd, ctx, mech, key, template, \
+ req) ( \
+ (KCF_PROV_VERIFY_OPS(pd) && \
+ KCF_PROV_VERIFY_OPS(pd)->verify_recover_init) ? \
+ KCF_PROV_VERIFY_OPS(pd)->verify_recover_init(ctx, mech, key, \
+ template, req) : CRYPTO_NOT_SUPPORTED)
+
+/* verify_recover() CSPI routine has different argument order than verify() */
+#define KCF_PROV_VERIFY_RECOVER(pd, ctx, sig, data, req) ( \
+ (KCF_PROV_VERIFY_OPS(pd) && KCF_PROV_VERIFY_OPS(pd)->verify_recover) ? \
+ KCF_PROV_VERIFY_OPS(pd)->verify_recover(ctx, sig, data, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+/*
+ * verify_recover_atomic() CSPI routine has different argument order
+ * than verify_atomic().
+ */
+#define KCF_PROV_VERIFY_RECOVER_ATOMIC(pd, session, mech, key, sig, \
+ template, data, req) ( \
+ (KCF_PROV_VERIFY_OPS(pd) && \
+ KCF_PROV_VERIFY_OPS(pd)->verify_recover_atomic) ? \
+ KCF_PROV_VERIFY_OPS(pd)->verify_recover_atomic( \
+ (pd)->pd_prov_handle, session, mech, key, sig, data, template, \
+ req) : CRYPTO_NOT_SUPPORTED)
+
+/*
+ * Wrappers for crypto_dual_ops(9S) entry points.
+ */
+
+#define KCF_PROV_DIGEST_ENCRYPT_UPDATE(digest_ctx, encrypt_ctx, plaintext, \
+ ciphertext, req) ( \
+ (KCF_PROV_DUAL_OPS(pd) && \
+ KCF_PROV_DUAL_OPS(pd)->digest_encrypt_update) ? \
+ KCF_PROV_DUAL_OPS(pd)->digest_encrypt_update( \
+ digest_ctx, encrypt_ctx, plaintext, ciphertext, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_DECRYPT_DIGEST_UPDATE(decrypt_ctx, digest_ctx, ciphertext, \
+ plaintext, req) ( \
+ (KCF_PROV_DUAL_OPS(pd) && \
+ KCF_PROV_DUAL_OPS(pd)->decrypt_digest_update) ? \
+ KCF_PROV_DUAL_OPS(pd)->decrypt_digest_update( \
+ decrypt_ctx, digest_ctx, ciphertext, plaintext, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_SIGN_ENCRYPT_UPDATE(sign_ctx, encrypt_ctx, plaintext, \
+ ciphertext, req) ( \
+ (KCF_PROV_DUAL_OPS(pd) && \
+ KCF_PROV_DUAL_OPS(pd)->sign_encrypt_update) ? \
+ KCF_PROV_DUAL_OPS(pd)->sign_encrypt_update( \
+ sign_ctx, encrypt_ctx, plaintext, ciphertext, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_DECRYPT_VERIFY_UPDATE(decrypt_ctx, verify_ctx, ciphertext, \
+ plaintext, req) ( \
+ (KCF_PROV_DUAL_OPS(pd) && \
+ KCF_PROV_DUAL_OPS(pd)->decrypt_verify_update) ? \
+ KCF_PROV_DUAL_OPS(pd)->decrypt_verify_update( \
+ decrypt_ctx, verify_ctx, ciphertext, plaintext, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+/*
+ * Wrappers for crypto_dual_cipher_mac_ops(9S) entry points.
+ */
+
+#define KCF_PROV_ENCRYPT_MAC_INIT(pd, ctx, encr_mech, encr_key, mac_mech, \
+ mac_key, encr_ctx_template, mac_ctx_template, req) ( \
+ (KCF_PROV_DUAL_CIPHER_MAC_OPS(pd) && \
+ KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->encrypt_mac_init) ? \
+ KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->encrypt_mac_init( \
+ ctx, encr_mech, encr_key, mac_mech, mac_key, encr_ctx_template, \
+ mac_ctx_template, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_ENCRYPT_MAC(pd, ctx, plaintext, ciphertext, mac, req) ( \
+ (KCF_PROV_DUAL_CIPHER_MAC_OPS(pd) && \
+ KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->encrypt_mac) ? \
+ KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->encrypt_mac( \
+ ctx, plaintext, ciphertext, mac, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_ENCRYPT_MAC_UPDATE(pd, ctx, plaintext, ciphertext, req) ( \
+ (KCF_PROV_DUAL_CIPHER_MAC_OPS(pd) && \
+ KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->encrypt_mac_update) ? \
+ KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->encrypt_mac_update( \
+ ctx, plaintext, ciphertext, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_ENCRYPT_MAC_FINAL(pd, ctx, ciphertext, mac, req) ( \
+ (KCF_PROV_DUAL_CIPHER_MAC_OPS(pd) && \
+ KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->encrypt_mac_final) ? \
+ KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->encrypt_mac_final( \
+ ctx, ciphertext, mac, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_ENCRYPT_MAC_ATOMIC(pd, session, encr_mech, encr_key, \
+ mac_mech, mac_key, plaintext, ciphertext, mac, \
+ encr_ctx_template, mac_ctx_template, req) ( \
+ (KCF_PROV_DUAL_CIPHER_MAC_OPS(pd) && \
+ KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->encrypt_mac_atomic) ? \
+ KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->encrypt_mac_atomic( \
+ (pd)->pd_prov_handle, session, encr_mech, encr_key, \
+ mac_mech, mac_key, plaintext, ciphertext, mac, \
+ encr_ctx_template, mac_ctx_template, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_MAC_DECRYPT_INIT(pd, ctx, mac_mech, mac_key, decr_mech, \
+ decr_key, mac_ctx_template, decr_ctx_template, req) ( \
+ (KCF_PROV_DUAL_CIPHER_MAC_OPS(pd) && \
+ KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->mac_decrypt_init) ? \
+ KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->mac_decrypt_init( \
+ ctx, mac_mech, mac_key, decr_mech, decr_key, mac_ctx_template, \
+ decr_ctx_template, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_MAC_DECRYPT(pd, ctx, ciphertext, mac, plaintext, req) ( \
+ (KCF_PROV_DUAL_CIPHER_MAC_OPS(pd) && \
+ KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->mac_decrypt) ? \
+ KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->mac_decrypt( \
+ ctx, ciphertext, mac, plaintext, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_MAC_DECRYPT_UPDATE(pd, ctx, ciphertext, plaintext, req) ( \
+ (KCF_PROV_DUAL_CIPHER_MAC_OPS(pd) && \
+ KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->mac_decrypt_update) ? \
+ KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->mac_decrypt_update( \
+ ctx, ciphertext, plaintext, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_MAC_DECRYPT_FINAL(pd, ctx, mac, plaintext, req) ( \
+ (KCF_PROV_DUAL_CIPHER_MAC_OPS(pd) && \
+ KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->mac_decrypt_final) ? \
+ KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->mac_decrypt_final( \
+ ctx, mac, plaintext, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_MAC_DECRYPT_ATOMIC(pd, session, mac_mech, mac_key, \
+ decr_mech, decr_key, ciphertext, mac, plaintext, \
+ mac_ctx_template, decr_ctx_template, req) ( \
+ (KCF_PROV_DUAL_CIPHER_MAC_OPS(pd) && \
+ KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->mac_decrypt_atomic) ? \
+ KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->mac_decrypt_atomic( \
+ (pd)->pd_prov_handle, session, mac_mech, mac_key, \
+ decr_mech, decr_key, ciphertext, mac, plaintext, \
+ mac_ctx_template, decr_ctx_template, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_MAC_VERIFY_DECRYPT_ATOMIC(pd, session, mac_mech, mac_key, \
+ decr_mech, decr_key, ciphertext, mac, plaintext, \
+ mac_ctx_template, decr_ctx_template, req) ( \
+ (KCF_PROV_DUAL_CIPHER_MAC_OPS(pd) && \
+ KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->mac_verify_decrypt_atomic \
+ != NULL) ? \
+ KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->mac_verify_decrypt_atomic( \
+ (pd)->pd_prov_handle, session, mac_mech, mac_key, \
+ decr_mech, decr_key, ciphertext, mac, plaintext, \
+ mac_ctx_template, decr_ctx_template, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+/*
+ * Wrappers for crypto_random_number_ops(9S) entry points.
+ */
+
+#define KCF_PROV_SEED_RANDOM(pd, session, buf, len, est, flags, req) ( \
+ (KCF_PROV_RANDOM_OPS(pd) && KCF_PROV_RANDOM_OPS(pd)->seed_random) ? \
+ KCF_PROV_RANDOM_OPS(pd)->seed_random((pd)->pd_prov_handle, \
+ session, buf, len, est, flags, req) : CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_GENERATE_RANDOM(pd, session, buf, len, req) ( \
+ (KCF_PROV_RANDOM_OPS(pd) && \
+ KCF_PROV_RANDOM_OPS(pd)->generate_random) ? \
+ KCF_PROV_RANDOM_OPS(pd)->generate_random((pd)->pd_prov_handle, \
+ session, buf, len, req) : CRYPTO_NOT_SUPPORTED)
+
+/*
+ * Wrappers for crypto_session_ops(9S) entry points.
+ *
+ * ops_pd is the provider descriptor that supplies the ops_vector.
+ * pd is the descriptor that supplies the provider handle.
+ * Only session open/close needs two handles.
+ */
+
+#define KCF_PROV_SESSION_OPEN(ops_pd, session, req, pd) ( \
+ (KCF_PROV_SESSION_OPS(ops_pd) && \
+ KCF_PROV_SESSION_OPS(ops_pd)->session_open) ? \
+ KCF_PROV_SESSION_OPS(ops_pd)->session_open((pd)->pd_prov_handle, \
+ session, req) : CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_SESSION_CLOSE(ops_pd, session, req, pd) ( \
+ (KCF_PROV_SESSION_OPS(ops_pd) && \
+ KCF_PROV_SESSION_OPS(ops_pd)->session_close) ? \
+ KCF_PROV_SESSION_OPS(ops_pd)->session_close((pd)->pd_prov_handle, \
+ session, req) : CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_SESSION_LOGIN(pd, session, user_type, pin, len, req) ( \
+ (KCF_PROV_SESSION_OPS(pd) && \
+ KCF_PROV_SESSION_OPS(pd)->session_login) ? \
+ KCF_PROV_SESSION_OPS(pd)->session_login((pd)->pd_prov_handle, \
+ session, user_type, pin, len, req) : CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_SESSION_LOGOUT(pd, session, req) ( \
+ (KCF_PROV_SESSION_OPS(pd) && \
+ KCF_PROV_SESSION_OPS(pd)->session_logout) ? \
+ KCF_PROV_SESSION_OPS(pd)->session_logout((pd)->pd_prov_handle, \
+ session, req) : CRYPTO_NOT_SUPPORTED)
+
+/*
+ * Wrappers for crypto_object_ops(9S) entry points.
+ */
+
+#define KCF_PROV_OBJECT_CREATE(pd, session, template, count, object, req) ( \
+ (KCF_PROV_OBJECT_OPS(pd) && KCF_PROV_OBJECT_OPS(pd)->object_create) ? \
+ KCF_PROV_OBJECT_OPS(pd)->object_create((pd)->pd_prov_handle, \
+ session, template, count, object, req) : CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_OBJECT_COPY(pd, session, object, template, count, \
+ new_object, req) ( \
+ (KCF_PROV_OBJECT_OPS(pd) && KCF_PROV_OBJECT_OPS(pd)->object_copy) ? \
+ KCF_PROV_OBJECT_OPS(pd)->object_copy((pd)->pd_prov_handle, \
+ session, object, template, count, new_object, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_OBJECT_DESTROY(pd, session, object, req) ( \
+ (KCF_PROV_OBJECT_OPS(pd) && KCF_PROV_OBJECT_OPS(pd)->object_destroy) ? \
+ KCF_PROV_OBJECT_OPS(pd)->object_destroy((pd)->pd_prov_handle, \
+ session, object, req) : CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_OBJECT_GET_SIZE(pd, session, object, size, req) ( \
+ (KCF_PROV_OBJECT_OPS(pd) && \
+ KCF_PROV_OBJECT_OPS(pd)->object_get_size) ? \
+ KCF_PROV_OBJECT_OPS(pd)->object_get_size((pd)->pd_prov_handle, \
+ session, object, size, req) : CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_OBJECT_GET_ATTRIBUTE_VALUE(pd, session, object, template, \
+ count, req) ( \
+ (KCF_PROV_OBJECT_OPS(pd) && \
+ KCF_PROV_OBJECT_OPS(pd)->object_get_attribute_value) ? \
+ KCF_PROV_OBJECT_OPS(pd)->object_get_attribute_value( \
+ (pd)->pd_prov_handle, session, object, template, count, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_OBJECT_SET_ATTRIBUTE_VALUE(pd, session, object, template, \
+ count, req) ( \
+ (KCF_PROV_OBJECT_OPS(pd) && \
+ KCF_PROV_OBJECT_OPS(pd)->object_set_attribute_value) ? \
+ KCF_PROV_OBJECT_OPS(pd)->object_set_attribute_value( \
+ (pd)->pd_prov_handle, session, object, template, count, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_OBJECT_FIND_INIT(pd, session, template, count, ppriv, \
+ req) ( \
+ (KCF_PROV_OBJECT_OPS(pd) && \
+ KCF_PROV_OBJECT_OPS(pd)->object_find_init) ? \
+ KCF_PROV_OBJECT_OPS(pd)->object_find_init((pd)->pd_prov_handle, \
+ session, template, count, ppriv, req) : CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_OBJECT_FIND(pd, ppriv, objects, max_objects, object_count, \
+ req) ( \
+ (KCF_PROV_OBJECT_OPS(pd) && KCF_PROV_OBJECT_OPS(pd)->object_find) ? \
+ KCF_PROV_OBJECT_OPS(pd)->object_find( \
+ (pd)->pd_prov_handle, ppriv, objects, max_objects, object_count, \
+ req) : CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_OBJECT_FIND_FINAL(pd, ppriv, req) ( \
+ (KCF_PROV_OBJECT_OPS(pd) && \
+ KCF_PROV_OBJECT_OPS(pd)->object_find_final) ? \
+ KCF_PROV_OBJECT_OPS(pd)->object_find_final( \
+ (pd)->pd_prov_handle, ppriv, req) : CRYPTO_NOT_SUPPORTED)
+
+/*
+ * Wrappers for crypto_key_ops(9S) entry points.
+ */
+
+#define KCF_PROV_KEY_GENERATE(pd, session, mech, template, count, object, \
+ req) ( \
+ (KCF_PROV_KEY_OPS(pd) && KCF_PROV_KEY_OPS(pd)->key_generate) ? \
+ KCF_PROV_KEY_OPS(pd)->key_generate((pd)->pd_prov_handle, \
+ session, mech, template, count, object, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_KEY_GENERATE_PAIR(pd, session, mech, pub_template, \
+ pub_count, priv_template, priv_count, pub_key, priv_key, req) ( \
+ (KCF_PROV_KEY_OPS(pd) && KCF_PROV_KEY_OPS(pd)->key_generate_pair) ? \
+ KCF_PROV_KEY_OPS(pd)->key_generate_pair((pd)->pd_prov_handle, \
+ session, mech, pub_template, pub_count, priv_template, \
+ priv_count, pub_key, priv_key, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_KEY_WRAP(pd, session, mech, wrapping_key, key, wrapped_key, \
+ wrapped_key_len, req) ( \
+ (KCF_PROV_KEY_OPS(pd) && KCF_PROV_KEY_OPS(pd)->key_wrap) ? \
+ KCF_PROV_KEY_OPS(pd)->key_wrap((pd)->pd_prov_handle, \
+ session, mech, wrapping_key, key, wrapped_key, wrapped_key_len, \
+ req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_KEY_UNWRAP(pd, session, mech, unwrapping_key, wrapped_key, \
+ wrapped_key_len, template, count, key, req) ( \
+ (KCF_PROV_KEY_OPS(pd) && KCF_PROV_KEY_OPS(pd)->key_unwrap) ? \
+ KCF_PROV_KEY_OPS(pd)->key_unwrap((pd)->pd_prov_handle, \
+ session, mech, unwrapping_key, wrapped_key, wrapped_key_len, \
+ template, count, key, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_KEY_DERIVE(pd, session, mech, base_key, template, count, \
+ key, req) ( \
+ (KCF_PROV_KEY_OPS(pd) && KCF_PROV_KEY_OPS(pd)->key_derive) ? \
+ KCF_PROV_KEY_OPS(pd)->key_derive((pd)->pd_prov_handle, \
+ session, mech, base_key, template, count, key, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_KEY_CHECK(pd, mech, key) ( \
+ (KCF_PROV_KEY_OPS(pd) && KCF_PROV_KEY_OPS(pd)->key_check) ? \
+ KCF_PROV_KEY_OPS(pd)->key_check((pd)->pd_prov_handle, mech, key) : \
+ CRYPTO_NOT_SUPPORTED)
+
+/*
+ * Wrappers for crypto_provider_management_ops(9S) entry points.
+ *
+ * ops_pd is the provider descriptor that supplies the ops_vector.
+ * pd is the descriptor that supplies the provider handle.
+ * Only ext_info needs two handles.
+ */
+
+#define KCF_PROV_EXT_INFO(ops_pd, provext_info, req, pd) ( \
+ (KCF_PROV_PROVIDER_OPS(ops_pd) && \
+ KCF_PROV_PROVIDER_OPS(ops_pd)->ext_info) ? \
+ KCF_PROV_PROVIDER_OPS(ops_pd)->ext_info((pd)->pd_prov_handle, \
+ provext_info, req) : CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_INIT_TOKEN(pd, pin, pin_len, label, req) ( \
+ (KCF_PROV_PROVIDER_OPS(pd) && KCF_PROV_PROVIDER_OPS(pd)->init_token) ? \
+ KCF_PROV_PROVIDER_OPS(pd)->init_token((pd)->pd_prov_handle, \
+ pin, pin_len, label, req) : CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_INIT_PIN(pd, session, pin, pin_len, req) ( \
+ (KCF_PROV_PROVIDER_OPS(pd) && KCF_PROV_PROVIDER_OPS(pd)->init_pin) ? \
+ KCF_PROV_PROVIDER_OPS(pd)->init_pin((pd)->pd_prov_handle, \
+ session, pin, pin_len, req) : CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_SET_PIN(pd, session, old_pin, old_len, new_pin, new_len, \
+ req) ( \
+ (KCF_PROV_PROVIDER_OPS(pd) && KCF_PROV_PROVIDER_OPS(pd)->set_pin) ? \
+ KCF_PROV_PROVIDER_OPS(pd)->set_pin((pd)->pd_prov_handle, \
+ session, old_pin, old_len, new_pin, new_len, req) : \
+ CRYPTO_NOT_SUPPORTED)
+
+/*
+ * Wrappers for crypto_nostore_key_ops(9S) entry points.
+ */
+
+#define KCF_PROV_NOSTORE_KEY_GENERATE(pd, session, mech, template, count, \
+ out_template, out_count, req) ( \
+ (KCF_PROV_NOSTORE_KEY_OPS(pd) && \
+ KCF_PROV_NOSTORE_KEY_OPS(pd)->nostore_key_generate) ? \
+ KCF_PROV_NOSTORE_KEY_OPS(pd)->nostore_key_generate( \
+ (pd)->pd_prov_handle, session, mech, template, count, \
+ out_template, out_count, req) : CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_NOSTORE_KEY_GENERATE_PAIR(pd, session, mech, pub_template, \
+ pub_count, priv_template, priv_count, out_pub_template, \
+ out_pub_count, out_priv_template, out_priv_count, req) ( \
+ (KCF_PROV_NOSTORE_KEY_OPS(pd) && \
+ KCF_PROV_NOSTORE_KEY_OPS(pd)->nostore_key_generate_pair) ? \
+ KCF_PROV_NOSTORE_KEY_OPS(pd)->nostore_key_generate_pair( \
+ (pd)->pd_prov_handle, session, mech, pub_template, pub_count, \
+ priv_template, priv_count, out_pub_template, out_pub_count, \
+ out_priv_template, out_priv_count, req) : CRYPTO_NOT_SUPPORTED)
+
+#define KCF_PROV_NOSTORE_KEY_DERIVE(pd, session, mech, base_key, template, \
+ count, out_template, out_count, req) ( \
+ (KCF_PROV_NOSTORE_KEY_OPS(pd) && \
+ KCF_PROV_NOSTORE_KEY_OPS(pd)->nostore_key_derive) ? \
+ KCF_PROV_NOSTORE_KEY_OPS(pd)->nostore_key_derive( \
+ (pd)->pd_prov_handle, session, mech, base_key, template, count, \
+ out_template, out_count, req) : CRYPTO_NOT_SUPPORTED)
+
+/*
+ * The following routines are exported by the kcf module (/kernel/misc/kcf)
+ * to the crypto and cryptoadmin modules.
+ */
+
+/* Digest/mac/cipher entry points that take a provider descriptor and session */
+extern int crypto_digest_single(crypto_context_t, crypto_data_t *,
+ crypto_data_t *, crypto_call_req_t *);
+
+extern int crypto_mac_single(crypto_context_t, crypto_data_t *,
+ crypto_data_t *, crypto_call_req_t *);
+
+extern int crypto_encrypt_single(crypto_context_t, crypto_data_t *,
+ crypto_data_t *, crypto_call_req_t *);
+
+extern int crypto_decrypt_single(crypto_context_t, crypto_data_t *,
+ crypto_data_t *, crypto_call_req_t *);
+
+
+/* Other private digest/mac/cipher entry points not exported through k-API */
+extern int crypto_digest_key_prov(crypto_context_t, crypto_key_t *,
+ crypto_call_req_t *);
+
+/* Private sign entry points exported by KCF */
+extern int crypto_sign_single(crypto_context_t, crypto_data_t *,
+ crypto_data_t *, crypto_call_req_t *);
+
+extern int crypto_sign_recover_single(crypto_context_t, crypto_data_t *,
+ crypto_data_t *, crypto_call_req_t *);
+
+/* Private verify entry points exported by KCF */
+extern int crypto_verify_single(crypto_context_t, crypto_data_t *,
+ crypto_data_t *, crypto_call_req_t *);
+
+extern int crypto_verify_recover_single(crypto_context_t, crypto_data_t *,
+ crypto_data_t *, crypto_call_req_t *);
+
+/* Private dual operations entry points exported by KCF */
+extern int crypto_digest_encrypt_update(crypto_context_t, crypto_context_t,
+ crypto_data_t *, crypto_data_t *, crypto_call_req_t *);
+extern int crypto_decrypt_digest_update(crypto_context_t, crypto_context_t,
+ crypto_data_t *, crypto_data_t *, crypto_call_req_t *);
+extern int crypto_sign_encrypt_update(crypto_context_t, crypto_context_t,
+ crypto_data_t *, crypto_data_t *, crypto_call_req_t *);
+extern int crypto_decrypt_verify_update(crypto_context_t, crypto_context_t,
+ crypto_data_t *, crypto_data_t *, crypto_call_req_t *);
+
+/* Random Number Generation */
+int crypto_seed_random(crypto_provider_handle_t provider, uchar_t *buf,
+ size_t len, crypto_call_req_t *req);
+int crypto_generate_random(crypto_provider_handle_t provider, uchar_t *buf,
+ size_t len, crypto_call_req_t *req);
+
+/* Provider Management */
+int crypto_get_provider_info(crypto_provider_id_t id,
+ crypto_provider_info_t **info, crypto_call_req_t *req);
+int crypto_get_provider_mechanisms(crypto_minor_t *, crypto_provider_id_t id,
+ uint_t *count, crypto_mech_name_t **list);
+int crypto_init_token(crypto_provider_handle_t provider, char *pin,
+ size_t pin_len, char *label, crypto_call_req_t *);
+int crypto_init_pin(crypto_provider_handle_t provider, char *pin,
+ size_t pin_len, crypto_call_req_t *req);
+int crypto_set_pin(crypto_provider_handle_t provider, char *old_pin,
+ size_t old_len, char *new_pin, size_t new_len, crypto_call_req_t *req);
+void crypto_free_provider_list(crypto_provider_entry_t *list, uint_t count);
+void crypto_free_provider_info(crypto_provider_info_t *info);
+
+/* Administrative */
+int crypto_get_dev_list(uint_t *count, crypto_dev_list_entry_t **list);
+int crypto_get_soft_list(uint_t *count, char **list, size_t *len);
+int crypto_get_dev_info(char *name, uint_t instance, uint_t *count,
+ crypto_mech_name_t **list);
+int crypto_get_soft_info(caddr_t name, uint_t *count,
+ crypto_mech_name_t **list);
+int crypto_load_dev_disabled(char *name, uint_t instance, uint_t count,
+ crypto_mech_name_t *list);
+int crypto_load_soft_disabled(caddr_t name, uint_t count,
+ crypto_mech_name_t *list);
+int crypto_unload_soft_module(caddr_t path);
+int crypto_load_soft_config(caddr_t name, uint_t count,
+ crypto_mech_name_t *list);
+int crypto_load_door(uint_t did);
+void crypto_free_mech_list(crypto_mech_name_t *list, uint_t count);
+void crypto_free_dev_list(crypto_dev_list_entry_t *list, uint_t count);
+
+/* Miscellaneous */
+int crypto_get_mechanism_number(caddr_t name, crypto_mech_type_t *number);
+int crypto_get_function_list(crypto_provider_id_t id,
+ crypto_function_list_t **list, int kmflag);
+void crypto_free_function_list(crypto_function_list_t *list);
+int crypto_build_permitted_mech_names(kcf_provider_desc_t *,
+ crypto_mech_name_t **, uint_t *, int);
+extern void kcf_destroy_mech_tabs(void);
+extern void kcf_init_mech_tabs(void);
+extern int kcf_add_mech_provider(short, kcf_provider_desc_t *,
+ kcf_prov_mech_desc_t **);
+extern void kcf_remove_mech_provider(char *, kcf_provider_desc_t *);
+extern int kcf_get_mech_entry(crypto_mech_type_t, kcf_mech_entry_t **);
+extern kcf_provider_desc_t *kcf_alloc_provider_desc(crypto_provider_info_t *);
+extern void kcf_provider_zero_refcnt(kcf_provider_desc_t *);
+extern void kcf_free_provider_desc(kcf_provider_desc_t *);
+extern void kcf_soft_config_init(void);
+extern int get_sw_provider_for_mech(crypto_mech_name_t, char **);
+extern crypto_mech_type_t crypto_mech2id_common(char *, boolean_t);
+extern void undo_register_provider(kcf_provider_desc_t *, boolean_t);
+extern void redo_register_provider(kcf_provider_desc_t *);
+extern void kcf_rnd_init(void);
+extern boolean_t kcf_rngprov_check(void);
+extern int kcf_rnd_get_pseudo_bytes(uint8_t *, size_t);
+extern int kcf_rnd_get_bytes(uint8_t *, size_t, boolean_t, boolean_t);
+extern int random_add_pseudo_entropy(uint8_t *, size_t, uint_t);
+extern void kcf_rnd_schedule_timeout(boolean_t);
+extern int crypto_uio_data(crypto_data_t *, uchar_t *, int, cmd_type_t,
+ void *, void (*update)(void));
+extern int crypto_mblk_data(crypto_data_t *, uchar_t *, int, cmd_type_t,
+ void *, void (*update)(void));
+extern int crypto_put_output_data(uchar_t *, crypto_data_t *, int);
+extern int crypto_get_input_data(crypto_data_t *, uchar_t **, uchar_t *);
+extern int crypto_copy_key_to_ctx(crypto_key_t *, crypto_key_t **, size_t *,
+ int kmflag);
+extern int crypto_digest_data(crypto_data_t *, void *, uchar_t *,
+ void (*update)(void), void (*final)(void), uchar_t);
+extern int crypto_update_iov(void *, crypto_data_t *, crypto_data_t *,
+ int (*cipher)(void *, caddr_t, size_t, crypto_data_t *),
+ void (*copy_block)(uint8_t *, uint64_t *));
+extern int crypto_update_uio(void *, crypto_data_t *, crypto_data_t *,
+ int (*cipher)(void *, caddr_t, size_t, crypto_data_t *),
+ void (*copy_block)(uint8_t *, uint64_t *));
+extern int crypto_update_mp(void *, crypto_data_t *, crypto_data_t *,
+ int (*cipher)(void *, caddr_t, size_t, crypto_data_t *),
+ void (*copy_block)(uint8_t *, uint64_t *));
+extern int crypto_get_key_attr(crypto_key_t *, crypto_attr_type_t, uchar_t **,
+ ssize_t *);
+
+/* Access to the provider's table */
+extern void kcf_prov_tab_destroy(void);
+extern void kcf_prov_tab_init(void);
+extern int kcf_prov_tab_add_provider(kcf_provider_desc_t *);
+extern int kcf_prov_tab_rem_provider(crypto_provider_id_t);
+extern kcf_provider_desc_t *kcf_prov_tab_lookup_by_name(char *);
+extern kcf_provider_desc_t *kcf_prov_tab_lookup_by_dev(char *, uint_t);
+extern int kcf_get_hw_prov_tab(uint_t *, kcf_provider_desc_t ***, int,
+ char *, uint_t, boolean_t);
+extern int kcf_get_slot_list(uint_t *, kcf_provider_desc_t ***, boolean_t);
+extern void kcf_free_provider_tab(uint_t, kcf_provider_desc_t **);
+extern kcf_provider_desc_t *kcf_prov_tab_lookup(crypto_provider_id_t);
+extern int kcf_get_sw_prov(crypto_mech_type_t, kcf_provider_desc_t **,
+ kcf_mech_entry_t **, boolean_t);
+
+/* Access to the policy table */
+extern boolean_t is_mech_disabled(kcf_provider_desc_t *, crypto_mech_name_t);
+extern boolean_t is_mech_disabled_byname(crypto_provider_type_t, char *,
+ uint_t, crypto_mech_name_t);
+extern void kcf_policy_tab_init(void);
+extern void kcf_policy_free_desc(kcf_policy_desc_t *);
+extern void kcf_policy_remove_by_name(char *, uint_t *, crypto_mech_name_t **);
+extern void kcf_policy_remove_by_dev(char *, uint_t, uint_t *,
+ crypto_mech_name_t **);
+extern kcf_policy_desc_t *kcf_policy_lookup_by_name(char *);
+extern kcf_policy_desc_t *kcf_policy_lookup_by_dev(char *, uint_t);
+extern int kcf_policy_load_soft_disabled(char *, uint_t, crypto_mech_name_t *,
+ uint_t *, crypto_mech_name_t **);
+extern int kcf_policy_load_dev_disabled(char *, uint_t, uint_t,
+ crypto_mech_name_t *, uint_t *, crypto_mech_name_t **);
+extern boolean_t in_soft_config_list(char *);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_CRYPTO_IMPL_H */
diff --git a/zfs/module/icp/include/sys/crypto/ioctl.h b/zfs/module/icp/include/sys/crypto/ioctl.h
new file mode 100644
index 000000000000..dd59ca7f2be5
--- /dev/null
+++ b/zfs/module/icp/include/sys/crypto/ioctl.h
@@ -0,0 +1,1483 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_CRYPTO_IOCTL_H
+#define _SYS_CRYPTO_IOCTL_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/zfs_context.h>
+#include <sys/crypto/api.h>
+#include <sys/crypto/spi.h>
+#include <sys/crypto/common.h>
+
+#define CRYPTO_MAX_ATTRIBUTE_COUNT 128
+
+#define CRYPTO_IOFLAGS_RW_SESSION 0x00000001
+
+#define CRYPTO(x) (('y' << 8) | (x))
+
+#define MAX_NUM_THRESHOLD 7
+
+/* the PKCS11 Mechanisms */
+#define CKM_RC4 0x00000111
+#define CKM_DES3_ECB 0x00000132
+#define CKM_DES3_CBC 0x00000133
+#define CKM_MD5 0x00000210
+#define CKM_SHA_1 0x00000220
+#define CKM_AES_ECB 0x00001081
+#define CKM_AES_CBC 0x00001082
+
+/*
+ * General Purpose Ioctls
+ */
+
+typedef struct fl_mechs_threshold {
+ int mech_type;
+ uint32_t mech_threshold;
+} fl_mechs_threshold_t;
+
+typedef struct crypto_function_list {
+ boolean_t fl_digest_init;
+ boolean_t fl_digest;
+ boolean_t fl_digest_update;
+ boolean_t fl_digest_key;
+ boolean_t fl_digest_final;
+
+ boolean_t fl_encrypt_init;
+ boolean_t fl_encrypt;
+ boolean_t fl_encrypt_update;
+ boolean_t fl_encrypt_final;
+
+ boolean_t fl_decrypt_init;
+ boolean_t fl_decrypt;
+ boolean_t fl_decrypt_update;
+ boolean_t fl_decrypt_final;
+
+ boolean_t fl_mac_init;
+ boolean_t fl_mac;
+ boolean_t fl_mac_update;
+ boolean_t fl_mac_final;
+
+ boolean_t fl_sign_init;
+ boolean_t fl_sign;
+ boolean_t fl_sign_update;
+ boolean_t fl_sign_final;
+ boolean_t fl_sign_recover_init;
+ boolean_t fl_sign_recover;
+
+ boolean_t fl_verify_init;
+ boolean_t fl_verify;
+ boolean_t fl_verify_update;
+ boolean_t fl_verify_final;
+ boolean_t fl_verify_recover_init;
+ boolean_t fl_verify_recover;
+
+ boolean_t fl_digest_encrypt_update;
+ boolean_t fl_decrypt_digest_update;
+ boolean_t fl_sign_encrypt_update;
+ boolean_t fl_decrypt_verify_update;
+
+ boolean_t fl_seed_random;
+ boolean_t fl_generate_random;
+
+ boolean_t fl_session_open;
+ boolean_t fl_session_close;
+ boolean_t fl_session_login;
+ boolean_t fl_session_logout;
+
+ boolean_t fl_object_create;
+ boolean_t fl_object_copy;
+ boolean_t fl_object_destroy;
+ boolean_t fl_object_get_size;
+ boolean_t fl_object_get_attribute_value;
+ boolean_t fl_object_set_attribute_value;
+ boolean_t fl_object_find_init;
+ boolean_t fl_object_find;
+ boolean_t fl_object_find_final;
+
+ boolean_t fl_key_generate;
+ boolean_t fl_key_generate_pair;
+ boolean_t fl_key_wrap;
+ boolean_t fl_key_unwrap;
+ boolean_t fl_key_derive;
+
+ boolean_t fl_init_token;
+ boolean_t fl_init_pin;
+ boolean_t fl_set_pin;
+
+ boolean_t prov_is_limited;
+ uint32_t prov_hash_threshold;
+ uint32_t prov_hash_limit;
+
+ int total_threshold_count;
+ fl_mechs_threshold_t fl_threshold[MAX_NUM_THRESHOLD];
+} crypto_function_list_t;
+
+typedef struct crypto_get_function_list {
+ uint_t fl_return_value;
+ crypto_provider_id_t fl_provider_id;
+ crypto_function_list_t fl_list;
+} crypto_get_function_list_t;
+
+typedef struct crypto_get_mechanism_number {
+ uint_t pn_return_value;
+ caddr_t pn_mechanism_string;
+ size_t pn_mechanism_len;
+ crypto_mech_type_t pn_internal_number;
+} crypto_get_mechanism_number_t;
+
+#ifdef _KERNEL
+#ifdef _SYSCALL32
+
+#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
+#pragma pack(4)
+#endif
+
+typedef struct crypto_get_mechanism_number32 {
+ uint32_t pn_return_value;
+ caddr32_t pn_mechanism_string;
+ size32_t pn_mechanism_len;
+ crypto_mech_type_t pn_internal_number;
+} crypto_get_mechanism_number32_t;
+
+#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
+#pragma pack()
+#endif
+
+#endif /* _SYSCALL32 */
+#endif /* _KERNEL */
+
+#define CRYPTO_GET_FUNCTION_LIST CRYPTO(20)
+#define CRYPTO_GET_MECHANISM_NUMBER CRYPTO(21)
+
+/*
+ * Session Ioctls
+ */
+
+typedef uint32_t crypto_flags_t;
+
+typedef struct crypto_open_session {
+ uint_t os_return_value;
+ crypto_session_id_t os_session;
+ crypto_flags_t os_flags;
+ crypto_provider_id_t os_provider_id;
+} crypto_open_session_t;
+
+typedef struct crypto_close_session {
+ uint_t cs_return_value;
+ crypto_session_id_t cs_session;
+} crypto_close_session_t;
+
+typedef struct crypto_close_all_sessions {
+ uint_t as_return_value;
+ crypto_provider_id_t as_provider_id;
+} crypto_close_all_sessions_t;
+
+#define CRYPTO_OPEN_SESSION CRYPTO(30)
+#define CRYPTO_CLOSE_SESSION CRYPTO(31)
+#define CRYPTO_CLOSE_ALL_SESSIONS CRYPTO(32)
+
+/*
+ * Login Ioctls
+ */
+typedef struct crypto_login {
+ uint_t co_return_value;
+ crypto_session_id_t co_session;
+ uint_t co_user_type;
+ uint_t co_pin_len;
+ caddr_t co_pin;
+} crypto_login_t;
+
+typedef struct crypto_logout {
+ uint_t cl_return_value;
+ crypto_session_id_t cl_session;
+} crypto_logout_t;
+
+#ifdef _KERNEL
+#ifdef _SYSCALL32
+
+typedef struct crypto_login32 {
+ uint32_t co_return_value;
+ crypto_session_id_t co_session;
+ uint32_t co_user_type;
+ uint32_t co_pin_len;
+ caddr32_t co_pin;
+} crypto_login32_t;
+
+typedef struct crypto_logout32 {
+ uint32_t cl_return_value;
+ crypto_session_id_t cl_session;
+} crypto_logout32_t;
+
+#endif /* _SYSCALL32 */
+#endif /* _KERNEL */
+
+#define CRYPTO_LOGIN CRYPTO(40)
+#define CRYPTO_LOGOUT CRYPTO(41)
+
+/* flag for encrypt and decrypt operations */
+#define CRYPTO_INPLACE_OPERATION 0x00000001
+
+/*
+ * Cryptographic Ioctls
+ */
+typedef struct crypto_encrypt {
+ uint_t ce_return_value;
+ crypto_session_id_t ce_session;
+ size_t ce_datalen;
+ caddr_t ce_databuf;
+ size_t ce_encrlen;
+ caddr_t ce_encrbuf;
+ uint_t ce_flags;
+} crypto_encrypt_t;
+
+typedef struct crypto_encrypt_init {
+ uint_t ei_return_value;
+ crypto_session_id_t ei_session;
+ crypto_mechanism_t ei_mech;
+ crypto_key_t ei_key;
+} crypto_encrypt_init_t;
+
+typedef struct crypto_encrypt_update {
+ uint_t eu_return_value;
+ crypto_session_id_t eu_session;
+ size_t eu_datalen;
+ caddr_t eu_databuf;
+ size_t eu_encrlen;
+ caddr_t eu_encrbuf;
+} crypto_encrypt_update_t;
+
+typedef struct crypto_encrypt_final {
+ uint_t ef_return_value;
+ crypto_session_id_t ef_session;
+ size_t ef_encrlen;
+ caddr_t ef_encrbuf;
+} crypto_encrypt_final_t;
+
+typedef struct crypto_decrypt {
+ uint_t cd_return_value;
+ crypto_session_id_t cd_session;
+ size_t cd_encrlen;
+ caddr_t cd_encrbuf;
+ size_t cd_datalen;
+ caddr_t cd_databuf;
+ uint_t cd_flags;
+} crypto_decrypt_t;
+
+typedef struct crypto_decrypt_init {
+ uint_t di_return_value;
+ crypto_session_id_t di_session;
+ crypto_mechanism_t di_mech;
+ crypto_key_t di_key;
+} crypto_decrypt_init_t;
+
+typedef struct crypto_decrypt_update {
+ uint_t du_return_value;
+ crypto_session_id_t du_session;
+ size_t du_encrlen;
+ caddr_t du_encrbuf;
+ size_t du_datalen;
+ caddr_t du_databuf;
+} crypto_decrypt_update_t;
+
+typedef struct crypto_decrypt_final {
+ uint_t df_return_value;
+ crypto_session_id_t df_session;
+ size_t df_datalen;
+ caddr_t df_databuf;
+} crypto_decrypt_final_t;
+
+typedef struct crypto_digest {
+ uint_t cd_return_value;
+ crypto_session_id_t cd_session;
+ size_t cd_datalen;
+ caddr_t cd_databuf;
+ size_t cd_digestlen;
+ caddr_t cd_digestbuf;
+} crypto_digest_t;
+
+typedef struct crypto_digest_init {
+ uint_t di_return_value;
+ crypto_session_id_t di_session;
+ crypto_mechanism_t di_mech;
+} crypto_digest_init_t;
+
+typedef struct crypto_digest_update {
+ uint_t du_return_value;
+ crypto_session_id_t du_session;
+ size_t du_datalen;
+ caddr_t du_databuf;
+} crypto_digest_update_t;
+
+typedef struct crypto_digest_key {
+ uint_t dk_return_value;
+ crypto_session_id_t dk_session;
+ crypto_key_t dk_key;
+} crypto_digest_key_t;
+
+typedef struct crypto_digest_final {
+ uint_t df_return_value;
+ crypto_session_id_t df_session;
+ size_t df_digestlen;
+ caddr_t df_digestbuf;
+} crypto_digest_final_t;
+
+typedef struct crypto_mac {
+ uint_t cm_return_value;
+ crypto_session_id_t cm_session;
+ size_t cm_datalen;
+ caddr_t cm_databuf;
+ size_t cm_maclen;
+ caddr_t cm_macbuf;
+} crypto_mac_t;
+
+typedef struct crypto_mac_init {
+ uint_t mi_return_value;
+ crypto_session_id_t mi_session;
+ crypto_mechanism_t mi_mech;
+ crypto_key_t mi_key;
+} crypto_mac_init_t;
+
+typedef struct crypto_mac_update {
+ uint_t mu_return_value;
+ crypto_session_id_t mu_session;
+ size_t mu_datalen;
+ caddr_t mu_databuf;
+} crypto_mac_update_t;
+
+typedef struct crypto_mac_final {
+ uint_t mf_return_value;
+ crypto_session_id_t mf_session;
+ size_t mf_maclen;
+ caddr_t mf_macbuf;
+} crypto_mac_final_t;
+
+typedef struct crypto_sign {
+ uint_t cs_return_value;
+ crypto_session_id_t cs_session;
+ size_t cs_datalen;
+ caddr_t cs_databuf;
+ size_t cs_signlen;
+ caddr_t cs_signbuf;
+} crypto_sign_t;
+
+typedef struct crypto_sign_init {
+ uint_t si_return_value;
+ crypto_session_id_t si_session;
+ crypto_mechanism_t si_mech;
+ crypto_key_t si_key;
+} crypto_sign_init_t;
+
+typedef struct crypto_sign_update {
+ uint_t su_return_value;
+ crypto_session_id_t su_session;
+ size_t su_datalen;
+ caddr_t su_databuf;
+} crypto_sign_update_t;
+
+typedef struct crypto_sign_final {
+ uint_t sf_return_value;
+ crypto_session_id_t sf_session;
+ size_t sf_signlen;
+ caddr_t sf_signbuf;
+} crypto_sign_final_t;
+
+typedef struct crypto_sign_recover_init {
+ uint_t ri_return_value;
+ crypto_session_id_t ri_session;
+ crypto_mechanism_t ri_mech;
+ crypto_key_t ri_key;
+} crypto_sign_recover_init_t;
+
+typedef struct crypto_sign_recover {
+ uint_t sr_return_value;
+ crypto_session_id_t sr_session;
+ size_t sr_datalen;
+ caddr_t sr_databuf;
+ size_t sr_signlen;
+ caddr_t sr_signbuf;
+} crypto_sign_recover_t;
+
+typedef struct crypto_verify {
+ uint_t cv_return_value;
+ crypto_session_id_t cv_session;
+ size_t cv_datalen;
+ caddr_t cv_databuf;
+ size_t cv_signlen;
+ caddr_t cv_signbuf;
+} crypto_verify_t;
+
+typedef struct crypto_verify_init {
+ uint_t vi_return_value;
+ crypto_session_id_t vi_session;
+ crypto_mechanism_t vi_mech;
+ crypto_key_t vi_key;
+} crypto_verify_init_t;
+
+typedef struct crypto_verify_update {
+ uint_t vu_return_value;
+ crypto_session_id_t vu_session;
+ size_t vu_datalen;
+ caddr_t vu_databuf;
+} crypto_verify_update_t;
+
+typedef struct crypto_verify_final {
+ uint_t vf_return_value;
+ crypto_session_id_t vf_session;
+ size_t vf_signlen;
+ caddr_t vf_signbuf;
+} crypto_verify_final_t;
+
+typedef struct crypto_verify_recover_init {
+ uint_t ri_return_value;
+ crypto_session_id_t ri_session;
+ crypto_mechanism_t ri_mech;
+ crypto_key_t ri_key;
+} crypto_verify_recover_init_t;
+
+typedef struct crypto_verify_recover {
+ uint_t vr_return_value;
+ crypto_session_id_t vr_session;
+ size_t vr_signlen;
+ caddr_t vr_signbuf;
+ size_t vr_datalen;
+ caddr_t vr_databuf;
+} crypto_verify_recover_t;
+
+typedef struct crypto_digest_encrypt_update {
+ uint_t eu_return_value;
+ crypto_session_id_t eu_session;
+ size_t eu_datalen;
+ caddr_t eu_databuf;
+ size_t eu_encrlen;
+ caddr_t eu_encrbuf;
+} crypto_digest_encrypt_update_t;
+
+typedef struct crypto_decrypt_digest_update {
+ uint_t du_return_value;
+ crypto_session_id_t du_session;
+ size_t du_encrlen;
+ caddr_t du_encrbuf;
+ size_t du_datalen;
+ caddr_t du_databuf;
+} crypto_decrypt_digest_update_t;
+
+typedef struct crypto_sign_encrypt_update {
+ uint_t eu_return_value;
+ crypto_session_id_t eu_session;
+ size_t eu_datalen;
+ caddr_t eu_databuf;
+ size_t eu_encrlen;
+ caddr_t eu_encrbuf;
+} crypto_sign_encrypt_update_t;
+
+typedef struct crypto_decrypt_verify_update {
+ uint_t vu_return_value;
+ crypto_session_id_t vu_session;
+ size_t vu_encrlen;
+ caddr_t vu_encrbuf;
+ size_t vu_datalen;
+ caddr_t vu_databuf;
+} crypto_decrypt_verify_update_t;
+
+#ifdef _KERNEL
+#ifdef _SYSCALL32
+
+typedef struct crypto_encrypt32 {
+ uint32_t ce_return_value;
+ crypto_session_id_t ce_session;
+ size32_t ce_datalen;
+ caddr32_t ce_databuf;
+ size32_t ce_encrlen;
+ caddr32_t ce_encrbuf;
+ uint32_t ce_flags;
+} crypto_encrypt32_t;
+
+#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
+#pragma pack(4)
+#endif
+
+typedef struct crypto_encrypt_init32 {
+ uint32_t ei_return_value;
+ crypto_session_id_t ei_session;
+ crypto_mechanism32_t ei_mech;
+ crypto_key32_t ei_key;
+} crypto_encrypt_init32_t;
+
+#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
+#pragma pack()
+#endif
+
+typedef struct crypto_encrypt_update32 {
+ uint32_t eu_return_value;
+ crypto_session_id_t eu_session;
+ size32_t eu_datalen;
+ caddr32_t eu_databuf;
+ size32_t eu_encrlen;
+ caddr32_t eu_encrbuf;
+} crypto_encrypt_update32_t;
+
+typedef struct crypto_encrypt_final32 {
+ uint32_t ef_return_value;
+ crypto_session_id_t ef_session;
+ size32_t ef_encrlen;
+ caddr32_t ef_encrbuf;
+} crypto_encrypt_final32_t;
+
+typedef struct crypto_decrypt32 {
+ uint32_t cd_return_value;
+ crypto_session_id_t cd_session;
+ size32_t cd_encrlen;
+ caddr32_t cd_encrbuf;
+ size32_t cd_datalen;
+ caddr32_t cd_databuf;
+ uint32_t cd_flags;
+} crypto_decrypt32_t;
+
+#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
+#pragma pack(4)
+#endif
+
+typedef struct crypto_decrypt_init32 {
+ uint32_t di_return_value;
+ crypto_session_id_t di_session;
+ crypto_mechanism32_t di_mech;
+ crypto_key32_t di_key;
+} crypto_decrypt_init32_t;
+
+#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
+#pragma pack()
+#endif
+
+typedef struct crypto_decrypt_update32 {
+ uint32_t du_return_value;
+ crypto_session_id_t du_session;
+ size32_t du_encrlen;
+ caddr32_t du_encrbuf;
+ size32_t du_datalen;
+ caddr32_t du_databuf;
+} crypto_decrypt_update32_t;
+
+typedef struct crypto_decrypt_final32 {
+ uint32_t df_return_value;
+ crypto_session_id_t df_session;
+ size32_t df_datalen;
+ caddr32_t df_databuf;
+} crypto_decrypt_final32_t;
+
+typedef struct crypto_digest32 {
+ uint32_t cd_return_value;
+ crypto_session_id_t cd_session;
+ size32_t cd_datalen;
+ caddr32_t cd_databuf;
+ size32_t cd_digestlen;
+ caddr32_t cd_digestbuf;
+} crypto_digest32_t;
+
+typedef struct crypto_digest_init32 {
+ uint32_t di_return_value;
+ crypto_session_id_t di_session;
+ crypto_mechanism32_t di_mech;
+} crypto_digest_init32_t;
+
+typedef struct crypto_digest_update32 {
+ uint32_t du_return_value;
+ crypto_session_id_t du_session;
+ size32_t du_datalen;
+ caddr32_t du_databuf;
+} crypto_digest_update32_t;
+
+typedef struct crypto_digest_key32 {
+ uint32_t dk_return_value;
+ crypto_session_id_t dk_session;
+ crypto_key32_t dk_key;
+} crypto_digest_key32_t;
+
+typedef struct crypto_digest_final32 {
+ uint32_t df_return_value;
+ crypto_session_id_t df_session;
+ size32_t df_digestlen;
+ caddr32_t df_digestbuf;
+} crypto_digest_final32_t;
+
+typedef struct crypto_mac32 {
+ uint32_t cm_return_value;
+ crypto_session_id_t cm_session;
+ size32_t cm_datalen;
+ caddr32_t cm_databuf;
+ size32_t cm_maclen;
+ caddr32_t cm_macbuf;
+} crypto_mac32_t;
+
+#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
+#pragma pack(4)
+#endif
+
+typedef struct crypto_mac_init32 {
+ uint32_t mi_return_value;
+ crypto_session_id_t mi_session;
+ crypto_mechanism32_t mi_mech;
+ crypto_key32_t mi_key;
+} crypto_mac_init32_t;
+
+#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
+#pragma pack()
+#endif
+
+typedef struct crypto_mac_update32 {
+ uint32_t mu_return_value;
+ crypto_session_id_t mu_session;
+ size32_t mu_datalen;
+ caddr32_t mu_databuf;
+} crypto_mac_update32_t;
+
+typedef struct crypto_mac_final32 {
+ uint32_t mf_return_value;
+ crypto_session_id_t mf_session;
+ size32_t mf_maclen;
+ caddr32_t mf_macbuf;
+} crypto_mac_final32_t;
+
+typedef struct crypto_sign32 {
+ uint32_t cs_return_value;
+ crypto_session_id_t cs_session;
+ size32_t cs_datalen;
+ caddr32_t cs_databuf;
+ size32_t cs_signlen;
+ caddr32_t cs_signbuf;
+} crypto_sign32_t;
+
+#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
+#pragma pack(4)
+#endif
+
+typedef struct crypto_sign_init32 {
+ uint32_t si_return_value;
+ crypto_session_id_t si_session;
+ crypto_mechanism32_t si_mech;
+ crypto_key32_t si_key;
+} crypto_sign_init32_t;
+
+#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
+#pragma pack()
+#endif
+
+typedef struct crypto_sign_update32 {
+ uint32_t su_return_value;
+ crypto_session_id_t su_session;
+ size32_t su_datalen;
+ caddr32_t su_databuf;
+} crypto_sign_update32_t;
+
+typedef struct crypto_sign_final32 {
+ uint32_t sf_return_value;
+ crypto_session_id_t sf_session;
+ size32_t sf_signlen;
+ caddr32_t sf_signbuf;
+} crypto_sign_final32_t;
+
+#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
+#pragma pack(4)
+#endif
+
+typedef struct crypto_sign_recover_init32 {
+ uint32_t ri_return_value;
+ crypto_session_id_t ri_session;
+ crypto_mechanism32_t ri_mech;
+ crypto_key32_t ri_key;
+} crypto_sign_recover_init32_t;
+
+#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
+#pragma pack()
+#endif
+
+typedef struct crypto_sign_recover32 {
+ uint32_t sr_return_value;
+ crypto_session_id_t sr_session;
+ size32_t sr_datalen;
+ caddr32_t sr_databuf;
+ size32_t sr_signlen;
+ caddr32_t sr_signbuf;
+} crypto_sign_recover32_t;
+
+typedef struct crypto_verify32 {
+ uint32_t cv_return_value;
+ crypto_session_id_t cv_session;
+ size32_t cv_datalen;
+ caddr32_t cv_databuf;
+ size32_t cv_signlen;
+ caddr32_t cv_signbuf;
+} crypto_verify32_t;
+
+#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
+#pragma pack(4)
+#endif
+
+typedef struct crypto_verify_init32 {
+ uint32_t vi_return_value;
+ crypto_session_id_t vi_session;
+ crypto_mechanism32_t vi_mech;
+ crypto_key32_t vi_key;
+} crypto_verify_init32_t;
+
+#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
+#pragma pack()
+#endif
+
+typedef struct crypto_verify_update32 {
+ uint32_t vu_return_value;
+ crypto_session_id_t vu_session;
+ size32_t vu_datalen;
+ caddr32_t vu_databuf;
+} crypto_verify_update32_t;
+
+typedef struct crypto_verify_final32 {
+ uint32_t vf_return_value;
+ crypto_session_id_t vf_session;
+ size32_t vf_signlen;
+ caddr32_t vf_signbuf;
+} crypto_verify_final32_t;
+
+#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
+#pragma pack(4)
+#endif
+
+typedef struct crypto_verify_recover_init32 {
+ uint32_t ri_return_value;
+ crypto_session_id_t ri_session;
+ crypto_mechanism32_t ri_mech;
+ crypto_key32_t ri_key;
+} crypto_verify_recover_init32_t;
+
+#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
+#pragma pack()
+#endif
+
+typedef struct crypto_verify_recover32 {
+ uint32_t vr_return_value;
+ crypto_session_id_t vr_session;
+ size32_t vr_signlen;
+ caddr32_t vr_signbuf;
+ size32_t vr_datalen;
+ caddr32_t vr_databuf;
+} crypto_verify_recover32_t;
+
+typedef struct crypto_digest_encrypt_update32 {
+ uint32_t eu_return_value;
+ crypto_session_id_t eu_session;
+ size32_t eu_datalen;
+ caddr32_t eu_databuf;
+ size32_t eu_encrlen;
+ caddr32_t eu_encrbuf;
+} crypto_digest_encrypt_update32_t;
+
+typedef struct crypto_decrypt_digest_update32 {
+ uint32_t du_return_value;
+ crypto_session_id_t du_session;
+ size32_t du_encrlen;
+ caddr32_t du_encrbuf;
+ size32_t du_datalen;
+ caddr32_t du_databuf;
+} crypto_decrypt_digest_update32_t;
+
+typedef struct crypto_sign_encrypt_update32 {
+ uint32_t eu_return_value;
+ crypto_session_id_t eu_session;
+ size32_t eu_datalen;
+ caddr32_t eu_databuf;
+ size32_t eu_encrlen;
+ caddr32_t eu_encrbuf;
+} crypto_sign_encrypt_update32_t;
+
+typedef struct crypto_decrypt_verify_update32 {
+ uint32_t vu_return_value;
+ crypto_session_id_t vu_session;
+ size32_t vu_encrlen;
+ caddr32_t vu_encrbuf;
+ size32_t vu_datalen;
+ caddr32_t vu_databuf;
+} crypto_decrypt_verify_update32_t;
+
+#endif /* _SYSCALL32 */
+#endif /* _KERNEL */
+
+#define CRYPTO_ENCRYPT CRYPTO(50)
+#define CRYPTO_ENCRYPT_INIT CRYPTO(51)
+#define CRYPTO_ENCRYPT_UPDATE CRYPTO(52)
+#define CRYPTO_ENCRYPT_FINAL CRYPTO(53)
+#define CRYPTO_DECRYPT CRYPTO(54)
+#define CRYPTO_DECRYPT_INIT CRYPTO(55)
+#define CRYPTO_DECRYPT_UPDATE CRYPTO(56)
+#define CRYPTO_DECRYPT_FINAL CRYPTO(57)
+
+#define CRYPTO_DIGEST CRYPTO(58)
+#define CRYPTO_DIGEST_INIT CRYPTO(59)
+#define CRYPTO_DIGEST_UPDATE CRYPTO(60)
+#define CRYPTO_DIGEST_KEY CRYPTO(61)
+#define CRYPTO_DIGEST_FINAL CRYPTO(62)
+#define CRYPTO_MAC CRYPTO(63)
+#define CRYPTO_MAC_INIT CRYPTO(64)
+#define CRYPTO_MAC_UPDATE CRYPTO(65)
+#define CRYPTO_MAC_FINAL CRYPTO(66)
+
+#define CRYPTO_SIGN CRYPTO(67)
+#define CRYPTO_SIGN_INIT CRYPTO(68)
+#define CRYPTO_SIGN_UPDATE CRYPTO(69)
+#define CRYPTO_SIGN_FINAL CRYPTO(70)
+#define CRYPTO_SIGN_RECOVER_INIT CRYPTO(71)
+#define CRYPTO_SIGN_RECOVER CRYPTO(72)
+#define CRYPTO_VERIFY CRYPTO(73)
+#define CRYPTO_VERIFY_INIT CRYPTO(74)
+#define CRYPTO_VERIFY_UPDATE CRYPTO(75)
+#define CRYPTO_VERIFY_FINAL CRYPTO(76)
+#define CRYPTO_VERIFY_RECOVER_INIT CRYPTO(77)
+#define CRYPTO_VERIFY_RECOVER CRYPTO(78)
+
+#define CRYPTO_DIGEST_ENCRYPT_UPDATE CRYPTO(79)
+#define CRYPTO_DECRYPT_DIGEST_UPDATE CRYPTO(80)
+#define CRYPTO_SIGN_ENCRYPT_UPDATE CRYPTO(81)
+#define CRYPTO_DECRYPT_VERIFY_UPDATE CRYPTO(82)
+
+/*
+ * Random Number Ioctls
+ */
+typedef struct crypto_seed_random {
+ uint_t sr_return_value;
+ crypto_session_id_t sr_session;
+ size_t sr_seedlen;
+ caddr_t sr_seedbuf;
+} crypto_seed_random_t;
+
+typedef struct crypto_generate_random {
+ uint_t gr_return_value;
+ crypto_session_id_t gr_session;
+ caddr_t gr_buf;
+ size_t gr_buflen;
+} crypto_generate_random_t;
+
+#ifdef _KERNEL
+#ifdef _SYSCALL32
+
+typedef struct crypto_seed_random32 {
+ uint32_t sr_return_value;
+ crypto_session_id_t sr_session;
+ size32_t sr_seedlen;
+ caddr32_t sr_seedbuf;
+} crypto_seed_random32_t;
+
+typedef struct crypto_generate_random32 {
+ uint32_t gr_return_value;
+ crypto_session_id_t gr_session;
+ caddr32_t gr_buf;
+ size32_t gr_buflen;
+} crypto_generate_random32_t;
+
+#endif /* _SYSCALL32 */
+#endif /* _KERNEL */
+
+#define CRYPTO_SEED_RANDOM CRYPTO(90)
+#define CRYPTO_GENERATE_RANDOM CRYPTO(91)
+
+/*
+ * Object Management Ioctls
+ */
+typedef struct crypto_object_create {
+ uint_t oc_return_value;
+ crypto_session_id_t oc_session;
+ crypto_object_id_t oc_handle;
+ uint_t oc_count;
+ caddr_t oc_attributes;
+} crypto_object_create_t;
+
+typedef struct crypto_object_copy {
+ uint_t oc_return_value;
+ crypto_session_id_t oc_session;
+ crypto_object_id_t oc_handle;
+ crypto_object_id_t oc_new_handle;
+ uint_t oc_count;
+ caddr_t oc_new_attributes;
+} crypto_object_copy_t;
+
+typedef struct crypto_object_destroy {
+ uint_t od_return_value;
+ crypto_session_id_t od_session;
+ crypto_object_id_t od_handle;
+} crypto_object_destroy_t;
+
+typedef struct crypto_object_get_attribute_value {
+ uint_t og_return_value;
+ crypto_session_id_t og_session;
+ crypto_object_id_t og_handle;
+ uint_t og_count;
+ caddr_t og_attributes;
+} crypto_object_get_attribute_value_t;
+
+typedef struct crypto_object_get_size {
+ uint_t gs_return_value;
+ crypto_session_id_t gs_session;
+ crypto_object_id_t gs_handle;
+ size_t gs_size;
+} crypto_object_get_size_t;
+
+typedef struct crypto_object_set_attribute_value {
+ uint_t sa_return_value;
+ crypto_session_id_t sa_session;
+ crypto_object_id_t sa_handle;
+ uint_t sa_count;
+ caddr_t sa_attributes;
+} crypto_object_set_attribute_value_t;
+
+typedef struct crypto_object_find_init {
+ uint_t fi_return_value;
+ crypto_session_id_t fi_session;
+ uint_t fi_count;
+ caddr_t fi_attributes;
+} crypto_object_find_init_t;
+
+typedef struct crypto_object_find_update {
+ uint_t fu_return_value;
+ crypto_session_id_t fu_session;
+ uint_t fu_max_count;
+ uint_t fu_count;
+ caddr_t fu_handles;
+} crypto_object_find_update_t;
+
+typedef struct crypto_object_find_final {
+ uint_t ff_return_value;
+ crypto_session_id_t ff_session;
+} crypto_object_find_final_t;
+
+#ifdef _KERNEL
+#ifdef _SYSCALL32
+
+typedef struct crypto_object_create32 {
+ uint32_t oc_return_value;
+ crypto_session_id_t oc_session;
+ crypto_object_id_t oc_handle;
+ uint32_t oc_count;
+ caddr32_t oc_attributes;
+} crypto_object_create32_t;
+
+typedef struct crypto_object_copy32 {
+ uint32_t oc_return_value;
+ crypto_session_id_t oc_session;
+ crypto_object_id_t oc_handle;
+ crypto_object_id_t oc_new_handle;
+ uint32_t oc_count;
+ caddr32_t oc_new_attributes;
+} crypto_object_copy32_t;
+
+typedef struct crypto_object_destroy32 {
+ uint32_t od_return_value;
+ crypto_session_id_t od_session;
+ crypto_object_id_t od_handle;
+} crypto_object_destroy32_t;
+
+typedef struct crypto_object_get_attribute_value32 {
+ uint32_t og_return_value;
+ crypto_session_id_t og_session;
+ crypto_object_id_t og_handle;
+ uint32_t og_count;
+ caddr32_t og_attributes;
+} crypto_object_get_attribute_value32_t;
+
+typedef struct crypto_object_get_size32 {
+ uint32_t gs_return_value;
+ crypto_session_id_t gs_session;
+ crypto_object_id_t gs_handle;
+ size32_t gs_size;
+} crypto_object_get_size32_t;
+
+typedef struct crypto_object_set_attribute_value32 {
+ uint32_t sa_return_value;
+ crypto_session_id_t sa_session;
+ crypto_object_id_t sa_handle;
+ uint32_t sa_count;
+ caddr32_t sa_attributes;
+} crypto_object_set_attribute_value32_t;
+
+typedef struct crypto_object_find_init32 {
+ uint32_t fi_return_value;
+ crypto_session_id_t fi_session;
+ uint32_t fi_count;
+ caddr32_t fi_attributes;
+} crypto_object_find_init32_t;
+
+typedef struct crypto_object_find_update32 {
+ uint32_t fu_return_value;
+ crypto_session_id_t fu_session;
+ uint32_t fu_max_count;
+ uint32_t fu_count;
+ caddr32_t fu_handles;
+} crypto_object_find_update32_t;
+
+typedef struct crypto_object_find_final32 {
+ uint32_t ff_return_value;
+ crypto_session_id_t ff_session;
+} crypto_object_find_final32_t;
+
+#endif /* _SYSCALL32 */
+#endif /* _KERNEL */
+
+#define CRYPTO_OBJECT_CREATE CRYPTO(100)
+#define CRYPTO_OBJECT_COPY CRYPTO(101)
+#define CRYPTO_OBJECT_DESTROY CRYPTO(102)
+#define CRYPTO_OBJECT_GET_ATTRIBUTE_VALUE CRYPTO(103)
+#define CRYPTO_OBJECT_GET_SIZE CRYPTO(104)
+#define CRYPTO_OBJECT_SET_ATTRIBUTE_VALUE CRYPTO(105)
+#define CRYPTO_OBJECT_FIND_INIT CRYPTO(106)
+#define CRYPTO_OBJECT_FIND_UPDATE CRYPTO(107)
+#define CRYPTO_OBJECT_FIND_FINAL CRYPTO(108)
+
+/*
+ * Key Generation Ioctls
+ */
+typedef struct crypto_object_generate_key {
+ uint_t gk_return_value;
+ crypto_session_id_t gk_session;
+ crypto_object_id_t gk_handle;
+ crypto_mechanism_t gk_mechanism;
+ uint_t gk_count;
+ caddr_t gk_attributes;
+} crypto_object_generate_key_t;
+
+typedef struct crypto_object_generate_key_pair {
+ uint_t kp_return_value;
+ crypto_session_id_t kp_session;
+ crypto_object_id_t kp_public_handle;
+ crypto_object_id_t kp_private_handle;
+ uint_t kp_public_count;
+ uint_t kp_private_count;
+ caddr_t kp_public_attributes;
+ caddr_t kp_private_attributes;
+ crypto_mechanism_t kp_mechanism;
+} crypto_object_generate_key_pair_t;
+
+typedef struct crypto_object_wrap_key {
+ uint_t wk_return_value;
+ crypto_session_id_t wk_session;
+ crypto_mechanism_t wk_mechanism;
+ crypto_key_t wk_wrapping_key;
+ crypto_object_id_t wk_object_handle;
+ size_t wk_wrapped_key_len;
+ caddr_t wk_wrapped_key;
+} crypto_object_wrap_key_t;
+
+typedef struct crypto_object_unwrap_key {
+ uint_t uk_return_value;
+ crypto_session_id_t uk_session;
+ crypto_mechanism_t uk_mechanism;
+ crypto_key_t uk_unwrapping_key;
+ crypto_object_id_t uk_object_handle;
+ size_t uk_wrapped_key_len;
+ caddr_t uk_wrapped_key;
+ uint_t uk_count;
+ caddr_t uk_attributes;
+} crypto_object_unwrap_key_t;
+
+typedef struct crypto_derive_key {
+ uint_t dk_return_value;
+ crypto_session_id_t dk_session;
+ crypto_mechanism_t dk_mechanism;
+ crypto_key_t dk_base_key;
+ crypto_object_id_t dk_object_handle;
+ uint_t dk_count;
+ caddr_t dk_attributes;
+} crypto_derive_key_t;
+
+#ifdef _KERNEL
+#ifdef _SYSCALL32
+
+#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
+#pragma pack(4)
+#endif
+
+typedef struct crypto_object_generate_key32 {
+ uint32_t gk_return_value;
+ crypto_session_id_t gk_session;
+ crypto_object_id_t gk_handle;
+ crypto_mechanism32_t gk_mechanism;
+ uint32_t gk_count;
+ caddr32_t gk_attributes;
+} crypto_object_generate_key32_t;
+
+#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
+#pragma pack()
+#endif
+
+typedef struct crypto_object_generate_key_pair32 {
+ uint32_t kp_return_value;
+ crypto_session_id_t kp_session;
+ crypto_object_id_t kp_public_handle;
+ crypto_object_id_t kp_private_handle;
+ uint32_t kp_public_count;
+ uint32_t kp_private_count;
+ caddr32_t kp_public_attributes;
+ caddr32_t kp_private_attributes;
+ crypto_mechanism32_t kp_mechanism;
+} crypto_object_generate_key_pair32_t;
+
+typedef struct crypto_object_wrap_key32 {
+ uint32_t wk_return_value;
+ crypto_session_id_t wk_session;
+ crypto_mechanism32_t wk_mechanism;
+ crypto_key32_t wk_wrapping_key;
+ crypto_object_id_t wk_object_handle;
+ size32_t wk_wrapped_key_len;
+ caddr32_t wk_wrapped_key;
+} crypto_object_wrap_key32_t;
+
+typedef struct crypto_object_unwrap_key32 {
+ uint32_t uk_return_value;
+ crypto_session_id_t uk_session;
+ crypto_mechanism32_t uk_mechanism;
+ crypto_key32_t uk_unwrapping_key;
+ crypto_object_id_t uk_object_handle;
+ size32_t uk_wrapped_key_len;
+ caddr32_t uk_wrapped_key;
+ uint32_t uk_count;
+ caddr32_t uk_attributes;
+} crypto_object_unwrap_key32_t;
+
+typedef struct crypto_derive_key32 {
+ uint32_t dk_return_value;
+ crypto_session_id_t dk_session;
+ crypto_mechanism32_t dk_mechanism;
+ crypto_key32_t dk_base_key;
+ crypto_object_id_t dk_object_handle;
+ uint32_t dk_count;
+ caddr32_t dk_attributes;
+} crypto_derive_key32_t;
+
+#endif /* _SYSCALL32 */
+#endif /* _KERNEL */
+
+#define CRYPTO_GENERATE_KEY CRYPTO(110)
+#define CRYPTO_GENERATE_KEY_PAIR CRYPTO(111)
+#define CRYPTO_WRAP_KEY CRYPTO(112)
+#define CRYPTO_UNWRAP_KEY CRYPTO(113)
+#define CRYPTO_DERIVE_KEY CRYPTO(114)
+
+/*
+ * Provider Management Ioctls
+ */
+
+typedef struct crypto_get_provider_list {
+ uint_t pl_return_value;
+ uint_t pl_count;
+ crypto_provider_entry_t pl_list[1];
+} crypto_get_provider_list_t;
+
+typedef struct crypto_provider_data {
+ uchar_t pd_prov_desc[CRYPTO_PROVIDER_DESCR_MAX_LEN];
+ uchar_t pd_label[CRYPTO_EXT_SIZE_LABEL];
+ uchar_t pd_manufacturerID[CRYPTO_EXT_SIZE_MANUF];
+ uchar_t pd_model[CRYPTO_EXT_SIZE_MODEL];
+ uchar_t pd_serial_number[CRYPTO_EXT_SIZE_SERIAL];
+ ulong_t pd_flags;
+ ulong_t pd_max_session_count;
+ ulong_t pd_session_count;
+ ulong_t pd_max_rw_session_count;
+ ulong_t pd_rw_session_count;
+ ulong_t pd_max_pin_len;
+ ulong_t pd_min_pin_len;
+ ulong_t pd_total_public_memory;
+ ulong_t pd_free_public_memory;
+ ulong_t pd_total_private_memory;
+ ulong_t pd_free_private_memory;
+ crypto_version_t pd_hardware_version;
+ crypto_version_t pd_firmware_version;
+ uchar_t pd_time[CRYPTO_EXT_SIZE_TIME];
+} crypto_provider_data_t;
+
+typedef struct crypto_get_provider_info {
+ uint_t gi_return_value;
+ crypto_provider_id_t gi_provider_id;
+ crypto_provider_data_t gi_provider_data;
+} crypto_get_provider_info_t;
+
+typedef struct crypto_get_provider_mechanisms {
+ uint_t pm_return_value;
+ crypto_provider_id_t pm_provider_id;
+ uint_t pm_count;
+ crypto_mech_name_t pm_list[1];
+} crypto_get_provider_mechanisms_t;
+
+typedef struct crypto_get_provider_mechanism_info {
+ uint_t mi_return_value;
+ crypto_provider_id_t mi_provider_id;
+ crypto_mech_name_t mi_mechanism_name;
+ uint32_t mi_min_key_size;
+ uint32_t mi_max_key_size;
+ uint32_t mi_flags;
+} crypto_get_provider_mechanism_info_t;
+
+typedef struct crypto_init_token {
+ uint_t it_return_value;
+ crypto_provider_id_t it_provider_id;
+ caddr_t it_pin;
+ size_t it_pin_len;
+ caddr_t it_label;
+} crypto_init_token_t;
+
+typedef struct crypto_init_pin {
+ uint_t ip_return_value;
+ crypto_session_id_t ip_session;
+ caddr_t ip_pin;
+ size_t ip_pin_len;
+} crypto_init_pin_t;
+
+typedef struct crypto_set_pin {
+ uint_t sp_return_value;
+ crypto_session_id_t sp_session;
+ caddr_t sp_old_pin;
+ size_t sp_old_len;
+ caddr_t sp_new_pin;
+ size_t sp_new_len;
+} crypto_set_pin_t;
+
+#ifdef _KERNEL
+#ifdef _SYSCALL32
+
+typedef struct crypto_get_provider_list32 {
+ uint32_t pl_return_value;
+ uint32_t pl_count;
+ crypto_provider_entry_t pl_list[1];
+} crypto_get_provider_list32_t;
+
+typedef struct crypto_version32 {
+ uchar_t cv_major;
+ uchar_t cv_minor;
+} crypto_version32_t;
+
+typedef struct crypto_provider_data32 {
+ uchar_t pd_prov_desc[CRYPTO_PROVIDER_DESCR_MAX_LEN];
+ uchar_t pd_label[CRYPTO_EXT_SIZE_LABEL];
+ uchar_t pd_manufacturerID[CRYPTO_EXT_SIZE_MANUF];
+ uchar_t pd_model[CRYPTO_EXT_SIZE_MODEL];
+ uchar_t pd_serial_number[CRYPTO_EXT_SIZE_SERIAL];
+ uint32_t pd_flags;
+ uint32_t pd_max_session_count;
+ uint32_t pd_session_count;
+ uint32_t pd_max_rw_session_count;
+ uint32_t pd_rw_session_count;
+ uint32_t pd_max_pin_len;
+ uint32_t pd_min_pin_len;
+ uint32_t pd_total_public_memory;
+ uint32_t pd_free_public_memory;
+ uint32_t pd_total_private_memory;
+ uint32_t pd_free_private_memory;
+ crypto_version32_t pd_hardware_version;
+ crypto_version32_t pd_firmware_version;
+ uchar_t pd_time[CRYPTO_EXT_SIZE_TIME];
+} crypto_provider_data32_t;
+
+typedef struct crypto_get_provider_info32 {
+ uint32_t gi_return_value;
+ crypto_provider_id_t gi_provider_id;
+ crypto_provider_data32_t gi_provider_data;
+} crypto_get_provider_info32_t;
+
+typedef struct crypto_get_provider_mechanisms32 {
+ uint32_t pm_return_value;
+ crypto_provider_id_t pm_provider_id;
+ uint32_t pm_count;
+ crypto_mech_name_t pm_list[1];
+} crypto_get_provider_mechanisms32_t;
+
+typedef struct crypto_init_token32 {
+ uint32_t it_return_value;
+ crypto_provider_id_t it_provider_id;
+ caddr32_t it_pin;
+ size32_t it_pin_len;
+ caddr32_t it_label;
+} crypto_init_token32_t;
+
+typedef struct crypto_init_pin32 {
+ uint32_t ip_return_value;
+ crypto_session_id_t ip_session;
+ caddr32_t ip_pin;
+ size32_t ip_pin_len;
+} crypto_init_pin32_t;
+
+typedef struct crypto_set_pin32 {
+ uint32_t sp_return_value;
+ crypto_session_id_t sp_session;
+ caddr32_t sp_old_pin;
+ size32_t sp_old_len;
+ caddr32_t sp_new_pin;
+ size32_t sp_new_len;
+} crypto_set_pin32_t;
+
+#endif /* _SYSCALL32 */
+#endif /* _KERNEL */
+
+#define CRYPTO_GET_PROVIDER_LIST CRYPTO(120)
+#define CRYPTO_GET_PROVIDER_INFO CRYPTO(121)
+#define CRYPTO_GET_PROVIDER_MECHANISMS CRYPTO(122)
+#define CRYPTO_GET_PROVIDER_MECHANISM_INFO CRYPTO(123)
+#define CRYPTO_INIT_TOKEN CRYPTO(124)
+#define CRYPTO_INIT_PIN CRYPTO(125)
+#define CRYPTO_SET_PIN CRYPTO(126)
+
+/*
+ * No (Key) Store Key Generation Ioctls
+ */
+typedef struct crypto_nostore_generate_key {
+ uint_t ngk_return_value;
+ crypto_session_id_t ngk_session;
+ crypto_mechanism_t ngk_mechanism;
+ uint_t ngk_in_count;
+ uint_t ngk_out_count;
+ caddr_t ngk_in_attributes;
+ caddr_t ngk_out_attributes;
+} crypto_nostore_generate_key_t;
+
+typedef struct crypto_nostore_generate_key_pair {
+ uint_t nkp_return_value;
+ crypto_session_id_t nkp_session;
+ uint_t nkp_in_public_count;
+ uint_t nkp_in_private_count;
+ uint_t nkp_out_public_count;
+ uint_t nkp_out_private_count;
+ caddr_t nkp_in_public_attributes;
+ caddr_t nkp_in_private_attributes;
+ caddr_t nkp_out_public_attributes;
+ caddr_t nkp_out_private_attributes;
+ crypto_mechanism_t nkp_mechanism;
+} crypto_nostore_generate_key_pair_t;
+
+typedef struct crypto_nostore_derive_key {
+ uint_t ndk_return_value;
+ crypto_session_id_t ndk_session;
+ crypto_mechanism_t ndk_mechanism;
+ crypto_key_t ndk_base_key;
+ uint_t ndk_in_count;
+ uint_t ndk_out_count;
+ caddr_t ndk_in_attributes;
+ caddr_t ndk_out_attributes;
+} crypto_nostore_derive_key_t;
+
+#ifdef _KERNEL
+#ifdef _SYSCALL32
+
+typedef struct crypto_nostore_generate_key32 {
+ uint32_t ngk_return_value;
+ crypto_session_id_t ngk_session;
+ crypto_mechanism32_t ngk_mechanism;
+ uint32_t ngk_in_count;
+ uint32_t ngk_out_count;
+ caddr32_t ngk_in_attributes;
+ caddr32_t ngk_out_attributes;
+} crypto_nostore_generate_key32_t;
+
+typedef struct crypto_nostore_generate_key_pair32 {
+ uint32_t nkp_return_value;
+ crypto_session_id_t nkp_session;
+ uint32_t nkp_in_public_count;
+ uint32_t nkp_in_private_count;
+ uint32_t nkp_out_public_count;
+ uint32_t nkp_out_private_count;
+ caddr32_t nkp_in_public_attributes;
+ caddr32_t nkp_in_private_attributes;
+ caddr32_t nkp_out_public_attributes;
+ caddr32_t nkp_out_private_attributes;
+ crypto_mechanism32_t nkp_mechanism;
+} crypto_nostore_generate_key_pair32_t;
+
+#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
+#pragma pack(4)
+#endif
+
+typedef struct crypto_nostore_derive_key32 {
+ uint32_t ndk_return_value;
+ crypto_session_id_t ndk_session;
+ crypto_mechanism32_t ndk_mechanism;
+ crypto_key32_t ndk_base_key;
+ uint32_t ndk_in_count;
+ uint32_t ndk_out_count;
+ caddr32_t ndk_in_attributes;
+ caddr32_t ndk_out_attributes;
+} crypto_nostore_derive_key32_t;
+
+#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
+#pragma pack()
+#endif
+
+#endif /* _SYSCALL32 */
+#endif /* _KERNEL */
+
+#define CRYPTO_NOSTORE_GENERATE_KEY CRYPTO(127)
+#define CRYPTO_NOSTORE_GENERATE_KEY_PAIR CRYPTO(128)
+#define CRYPTO_NOSTORE_DERIVE_KEY CRYPTO(129)
+
+/*
+ * Mechanism Ioctls
+ */
+
+typedef struct crypto_get_mechanism_list {
+ uint_t ml_return_value;
+ uint_t ml_count;
+ crypto_mech_name_t ml_list[1];
+} crypto_get_mechanism_list_t;
+
+typedef struct crypto_get_all_mechanism_info {
+ uint_t mi_return_value;
+ crypto_mech_name_t mi_mechanism_name;
+ uint_t mi_count;
+ crypto_mechanism_info_t mi_list[1];
+} crypto_get_all_mechanism_info_t;
+
+#ifdef _KERNEL
+#ifdef _SYSCALL32
+
+typedef struct crypto_get_mechanism_list32 {
+ uint32_t ml_return_value;
+ uint32_t ml_count;
+ crypto_mech_name_t ml_list[1];
+} crypto_get_mechanism_list32_t;
+
+typedef struct crypto_get_all_mechanism_info32 {
+ uint32_t mi_return_value;
+ crypto_mech_name_t mi_mechanism_name;
+ uint32_t mi_count;
+ crypto_mechanism_info32_t mi_list[1];
+} crypto_get_all_mechanism_info32_t;
+
+#endif /* _SYSCALL32 */
+#endif /* _KERNEL */
+
+#define CRYPTO_GET_MECHANISM_LIST CRYPTO(140)
+#define CRYPTO_GET_ALL_MECHANISM_INFO CRYPTO(141)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_CRYPTO_IOCTL_H */
diff --git a/zfs/module/icp/include/sys/crypto/ioctladmin.h b/zfs/module/icp/include/sys/crypto/ioctladmin.h
new file mode 100644
index 000000000000..24babd7755cc
--- /dev/null
+++ b/zfs/module/icp/include/sys/crypto/ioctladmin.h
@@ -0,0 +1,136 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_CRYPTO_IOCTLADMIN_H
+#define _SYS_CRYPTO_IOCTLADMIN_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/zfs_context.h>
+#include <sys/crypto/common.h>
+
+#define ADMIN_IOCTL_DEVICE "/dev/cryptoadm"
+
+#define CRYPTOADMIN(x) (('y' << 8) | (x))
+
+/*
+ * Administrative IOCTLs
+ */
+
+typedef struct crypto_get_dev_list {
+ uint_t dl_return_value;
+ uint_t dl_dev_count;
+ crypto_dev_list_entry_t dl_devs[1];
+} crypto_get_dev_list_t;
+
+typedef struct crypto_get_soft_list {
+ uint_t sl_return_value;
+ uint_t sl_soft_count;
+ size_t sl_soft_len;
+ caddr_t sl_soft_names;
+} crypto_get_soft_list_t;
+
+typedef struct crypto_get_dev_info {
+ uint_t di_return_value;
+ char di_dev_name[MAXNAMELEN];
+ uint_t di_dev_instance;
+ uint_t di_count;
+ crypto_mech_name_t di_list[1];
+} crypto_get_dev_info_t;
+
+typedef struct crypto_get_soft_info {
+ uint_t si_return_value;
+ char si_name[MAXNAMELEN];
+ uint_t si_count;
+ crypto_mech_name_t si_list[1];
+} crypto_get_soft_info_t;
+
+typedef struct crypto_load_dev_disabled {
+ uint_t dd_return_value;
+ char dd_dev_name[MAXNAMELEN];
+ uint_t dd_dev_instance;
+ uint_t dd_count;
+ crypto_mech_name_t dd_list[1];
+} crypto_load_dev_disabled_t;
+
+typedef struct crypto_load_soft_disabled {
+ uint_t sd_return_value;
+ char sd_name[MAXNAMELEN];
+ uint_t sd_count;
+ crypto_mech_name_t sd_list[1];
+} crypto_load_soft_disabled_t;
+
+typedef struct crypto_unload_soft_module {
+ uint_t sm_return_value;
+ char sm_name[MAXNAMELEN];
+} crypto_unload_soft_module_t;
+
+typedef struct crypto_load_soft_config {
+ uint_t sc_return_value;
+ char sc_name[MAXNAMELEN];
+ uint_t sc_count;
+ crypto_mech_name_t sc_list[1];
+} crypto_load_soft_config_t;
+
+typedef struct crypto_load_door {
+ uint_t ld_return_value;
+ uint_t ld_did;
+} crypto_load_door_t;
+
+#ifdef _KERNEL
+#ifdef _SYSCALL32
+
+typedef struct crypto_get_soft_list32 {
+ uint32_t sl_return_value;
+ uint32_t sl_soft_count;
+ size32_t sl_soft_len;
+ caddr32_t sl_soft_names;
+} crypto_get_soft_list32_t;
+
+#endif /* _SYSCALL32 */
+#endif /* _KERNEL */
+
+#define CRYPTO_GET_VERSION CRYPTOADMIN(1)
+#define CRYPTO_GET_DEV_LIST CRYPTOADMIN(2)
+#define CRYPTO_GET_SOFT_LIST CRYPTOADMIN(3)
+#define CRYPTO_GET_DEV_INFO CRYPTOADMIN(4)
+#define CRYPTO_GET_SOFT_INFO CRYPTOADMIN(5)
+#define CRYPTO_LOAD_DEV_DISABLED CRYPTOADMIN(8)
+#define CRYPTO_LOAD_SOFT_DISABLED CRYPTOADMIN(9)
+#define CRYPTO_UNLOAD_SOFT_MODULE CRYPTOADMIN(10)
+#define CRYPTO_LOAD_SOFT_CONFIG CRYPTOADMIN(11)
+#define CRYPTO_POOL_CREATE CRYPTOADMIN(12)
+#define CRYPTO_POOL_WAIT CRYPTOADMIN(13)
+#define CRYPTO_POOL_RUN CRYPTOADMIN(14)
+#define CRYPTO_LOAD_DOOR CRYPTOADMIN(15)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_CRYPTO_IOCTLADMIN_H */
diff --git a/zfs/module/icp/include/sys/crypto/ops_impl.h b/zfs/module/icp/include/sys/crypto/ops_impl.h
new file mode 100644
index 000000000000..230d74b063fc
--- /dev/null
+++ b/zfs/module/icp/include/sys/crypto/ops_impl.h
@@ -0,0 +1,630 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_CRYPTO_OPS_IMPL_H
+#define _SYS_CRYPTO_OPS_IMPL_H
+
+/*
+ * Scheduler internal structures.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/zfs_context.h>
+#include <sys/crypto/api.h>
+#include <sys/crypto/spi.h>
+#include <sys/crypto/impl.h>
+#include <sys/crypto/common.h>
+
+/*
+ * The parameters needed for each function group are batched
+ * in one structure. This is much simpler than having a
+ * separate structure for each function.
+ *
+ * In some cases, a field is generically named to keep the
+ * structure small. The comments indicate these cases.
+ */
+typedef struct kcf_digest_ops_params {
+ crypto_session_id_t do_sid;
+ crypto_mech_type_t do_framework_mechtype;
+ crypto_mechanism_t do_mech;
+ crypto_data_t *do_data;
+ crypto_data_t *do_digest;
+ crypto_key_t *do_digest_key; /* Argument for digest_key() */
+} kcf_digest_ops_params_t;
+
+typedef struct kcf_mac_ops_params {
+ crypto_session_id_t mo_sid;
+ crypto_mech_type_t mo_framework_mechtype;
+ crypto_mechanism_t mo_mech;
+ crypto_key_t *mo_key;
+ crypto_data_t *mo_data;
+ crypto_data_t *mo_mac;
+ crypto_spi_ctx_template_t mo_templ;
+} kcf_mac_ops_params_t;
+
+typedef struct kcf_encrypt_ops_params {
+ crypto_session_id_t eo_sid;
+ crypto_mech_type_t eo_framework_mechtype;
+ crypto_mechanism_t eo_mech;
+ crypto_key_t *eo_key;
+ crypto_data_t *eo_plaintext;
+ crypto_data_t *eo_ciphertext;
+ crypto_spi_ctx_template_t eo_templ;
+} kcf_encrypt_ops_params_t;
+
+typedef struct kcf_decrypt_ops_params {
+ crypto_session_id_t dop_sid;
+ crypto_mech_type_t dop_framework_mechtype;
+ crypto_mechanism_t dop_mech;
+ crypto_key_t *dop_key;
+ crypto_data_t *dop_ciphertext;
+ crypto_data_t *dop_plaintext;
+ crypto_spi_ctx_template_t dop_templ;
+} kcf_decrypt_ops_params_t;
+
+typedef struct kcf_sign_ops_params {
+ crypto_session_id_t so_sid;
+ crypto_mech_type_t so_framework_mechtype;
+ crypto_mechanism_t so_mech;
+ crypto_key_t *so_key;
+ crypto_data_t *so_data;
+ crypto_data_t *so_signature;
+ crypto_spi_ctx_template_t so_templ;
+} kcf_sign_ops_params_t;
+
+typedef struct kcf_verify_ops_params {
+ crypto_session_id_t vo_sid;
+ crypto_mech_type_t vo_framework_mechtype;
+ crypto_mechanism_t vo_mech;
+ crypto_key_t *vo_key;
+ crypto_data_t *vo_data;
+ crypto_data_t *vo_signature;
+ crypto_spi_ctx_template_t vo_templ;
+} kcf_verify_ops_params_t;
+
+typedef struct kcf_encrypt_mac_ops_params {
+ crypto_session_id_t em_sid;
+ crypto_mech_type_t em_framework_encr_mechtype;
+ crypto_mechanism_t em_encr_mech;
+ crypto_key_t *em_encr_key;
+ crypto_mech_type_t em_framework_mac_mechtype;
+ crypto_mechanism_t em_mac_mech;
+ crypto_key_t *em_mac_key;
+ crypto_data_t *em_plaintext;
+ crypto_dual_data_t *em_ciphertext;
+ crypto_data_t *em_mac;
+ crypto_spi_ctx_template_t em_encr_templ;
+ crypto_spi_ctx_template_t em_mac_templ;
+} kcf_encrypt_mac_ops_params_t;
+
+typedef struct kcf_mac_decrypt_ops_params {
+ crypto_session_id_t md_sid;
+ crypto_mech_type_t md_framework_mac_mechtype;
+ crypto_mechanism_t md_mac_mech;
+ crypto_key_t *md_mac_key;
+ crypto_mech_type_t md_framework_decr_mechtype;
+ crypto_mechanism_t md_decr_mech;
+ crypto_key_t *md_decr_key;
+ crypto_dual_data_t *md_ciphertext;
+ crypto_data_t *md_mac;
+ crypto_data_t *md_plaintext;
+ crypto_spi_ctx_template_t md_mac_templ;
+ crypto_spi_ctx_template_t md_decr_templ;
+} kcf_mac_decrypt_ops_params_t;
+
+typedef struct kcf_random_number_ops_params {
+ crypto_session_id_t rn_sid;
+ uchar_t *rn_buf;
+ size_t rn_buflen;
+ uint_t rn_entropy_est;
+ uint32_t rn_flags;
+} kcf_random_number_ops_params_t;
+
+/*
+ * so_pd is useful when the provider descriptor (pd) supplying the
+ * provider handle is different from the pd supplying the ops vector.
+ * This is the case for session open/close where so_pd can be the pd
+ * of a logical provider. The pd supplying the ops vector is passed
+ * as an argument to kcf_submit_request().
+ */
+typedef struct kcf_session_ops_params {
+ crypto_session_id_t *so_sid_ptr;
+ crypto_session_id_t so_sid;
+ crypto_user_type_t so_user_type;
+ char *so_pin;
+ size_t so_pin_len;
+ kcf_provider_desc_t *so_pd;
+} kcf_session_ops_params_t;
+
+typedef struct kcf_object_ops_params {
+ crypto_session_id_t oo_sid;
+ crypto_object_id_t oo_object_id;
+ crypto_object_attribute_t *oo_template;
+ uint_t oo_attribute_count;
+ crypto_object_id_t *oo_object_id_ptr;
+ size_t *oo_object_size;
+ void **oo_find_init_pp_ptr;
+ void *oo_find_pp;
+ uint_t oo_max_object_count;
+ uint_t *oo_object_count_ptr;
+} kcf_object_ops_params_t;
+
+/*
+ * ko_key is used to encode wrapping key in key_wrap() and
+ * unwrapping key in key_unwrap(). ko_key_template and
+ * ko_key_attribute_count are used to encode public template
+ * and public template attr count in key_generate_pair().
+ * kops->ko_key_object_id_ptr is used to encode public key
+ * in key_generate_pair().
+ */
+typedef struct kcf_key_ops_params {
+ crypto_session_id_t ko_sid;
+ crypto_mech_type_t ko_framework_mechtype;
+ crypto_mechanism_t ko_mech;
+ crypto_object_attribute_t *ko_key_template;
+ uint_t ko_key_attribute_count;
+ crypto_object_id_t *ko_key_object_id_ptr;
+ crypto_object_attribute_t *ko_private_key_template;
+ uint_t ko_private_key_attribute_count;
+ crypto_object_id_t *ko_private_key_object_id_ptr;
+ crypto_key_t *ko_key;
+ uchar_t *ko_wrapped_key;
+ size_t *ko_wrapped_key_len_ptr;
+ crypto_object_attribute_t *ko_out_template1;
+ crypto_object_attribute_t *ko_out_template2;
+ uint_t ko_out_attribute_count1;
+ uint_t ko_out_attribute_count2;
+} kcf_key_ops_params_t;
+
+/*
+ * po_pin and po_pin_len are used to encode new_pin and new_pin_len
+ * when wrapping set_pin() function parameters.
+ *
+ * po_pd is useful when the provider descriptor (pd) supplying the
+ * provider handle is different from the pd supplying the ops vector.
+ * This is true for the ext_info provider entry point where po_pd
+ * can be the pd of a logical provider. The pd supplying the ops vector
+ * is passed as an argument to kcf_submit_request().
+ */
+typedef struct kcf_provmgmt_ops_params {
+ crypto_session_id_t po_sid;
+ char *po_pin;
+ size_t po_pin_len;
+ char *po_old_pin;
+ size_t po_old_pin_len;
+ char *po_label;
+ crypto_provider_ext_info_t *po_ext_info;
+ kcf_provider_desc_t *po_pd;
+} kcf_provmgmt_ops_params_t;
+
+/*
+ * The operation type within a function group.
+ */
+typedef enum kcf_op_type {
+ /* common ops for all mechanisms */
+ KCF_OP_INIT = 1,
+ KCF_OP_SINGLE, /* pkcs11 sense. So, INIT is already done */
+ KCF_OP_UPDATE,
+ KCF_OP_FINAL,
+ KCF_OP_ATOMIC,
+
+ /* digest_key op */
+ KCF_OP_DIGEST_KEY,
+
+ /* mac specific op */
+ KCF_OP_MAC_VERIFY_ATOMIC,
+
+ /* mac/cipher specific op */
+ KCF_OP_MAC_VERIFY_DECRYPT_ATOMIC,
+
+ /* sign_recover ops */
+ KCF_OP_SIGN_RECOVER_INIT,
+ KCF_OP_SIGN_RECOVER,
+ KCF_OP_SIGN_RECOVER_ATOMIC,
+
+ /* verify_recover ops */
+ KCF_OP_VERIFY_RECOVER_INIT,
+ KCF_OP_VERIFY_RECOVER,
+ KCF_OP_VERIFY_RECOVER_ATOMIC,
+
+ /* random number ops */
+ KCF_OP_RANDOM_SEED,
+ KCF_OP_RANDOM_GENERATE,
+
+ /* session management ops */
+ KCF_OP_SESSION_OPEN,
+ KCF_OP_SESSION_CLOSE,
+ KCF_OP_SESSION_LOGIN,
+ KCF_OP_SESSION_LOGOUT,
+
+ /* object management ops */
+ KCF_OP_OBJECT_CREATE,
+ KCF_OP_OBJECT_COPY,
+ KCF_OP_OBJECT_DESTROY,
+ KCF_OP_OBJECT_GET_SIZE,
+ KCF_OP_OBJECT_GET_ATTRIBUTE_VALUE,
+ KCF_OP_OBJECT_SET_ATTRIBUTE_VALUE,
+ KCF_OP_OBJECT_FIND_INIT,
+ KCF_OP_OBJECT_FIND,
+ KCF_OP_OBJECT_FIND_FINAL,
+
+ /* key management ops */
+ KCF_OP_KEY_GENERATE,
+ KCF_OP_KEY_GENERATE_PAIR,
+ KCF_OP_KEY_WRAP,
+ KCF_OP_KEY_UNWRAP,
+ KCF_OP_KEY_DERIVE,
+ KCF_OP_KEY_CHECK,
+
+ /* provider management ops */
+ KCF_OP_MGMT_EXTINFO,
+ KCF_OP_MGMT_INITTOKEN,
+ KCF_OP_MGMT_INITPIN,
+ KCF_OP_MGMT_SETPIN
+} kcf_op_type_t;
+
+/*
+ * The operation groups that need wrapping of parameters. This is somewhat
+ * similar to the function group type in spi.h except that this also includes
+ * all the functions that don't have a mechanism.
+ *
+ * The wrapper macros should never take these enum values as an argument.
+ * Rather, they are assigned in the macro itself since they are known
+ * from the macro name.
+ */
+typedef enum kcf_op_group {
+ KCF_OG_DIGEST = 1,
+ KCF_OG_MAC,
+ KCF_OG_ENCRYPT,
+ KCF_OG_DECRYPT,
+ KCF_OG_SIGN,
+ KCF_OG_VERIFY,
+ KCF_OG_ENCRYPT_MAC,
+ KCF_OG_MAC_DECRYPT,
+ KCF_OG_RANDOM,
+ KCF_OG_SESSION,
+ KCF_OG_OBJECT,
+ KCF_OG_KEY,
+ KCF_OG_PROVMGMT,
+ KCF_OG_NOSTORE_KEY
+} kcf_op_group_t;
+
+/*
+ * The kcf_op_type_t enum values used here should be only for those
+ * operations for which there is a k-api routine in sys/crypto/api.h.
+ */
+#define IS_INIT_OP(ftype) ((ftype) == KCF_OP_INIT)
+#define IS_SINGLE_OP(ftype) ((ftype) == KCF_OP_SINGLE)
+#define IS_UPDATE_OP(ftype) ((ftype) == KCF_OP_UPDATE)
+#define IS_FINAL_OP(ftype) ((ftype) == KCF_OP_FINAL)
+#define IS_ATOMIC_OP(ftype) ( \
+ (ftype) == KCF_OP_ATOMIC || (ftype) == KCF_OP_MAC_VERIFY_ATOMIC || \
+ (ftype) == KCF_OP_MAC_VERIFY_DECRYPT_ATOMIC || \
+ (ftype) == KCF_OP_SIGN_RECOVER_ATOMIC || \
+ (ftype) == KCF_OP_VERIFY_RECOVER_ATOMIC)
+
+/*
+ * Keep the parameters associated with a request around.
+ * We need to pass them to the SPI.
+ */
+typedef struct kcf_req_params {
+ kcf_op_group_t rp_opgrp;
+ kcf_op_type_t rp_optype;
+
+ union {
+ kcf_digest_ops_params_t digest_params;
+ kcf_mac_ops_params_t mac_params;
+ kcf_encrypt_ops_params_t encrypt_params;
+ kcf_decrypt_ops_params_t decrypt_params;
+ kcf_sign_ops_params_t sign_params;
+ kcf_verify_ops_params_t verify_params;
+ kcf_encrypt_mac_ops_params_t encrypt_mac_params;
+ kcf_mac_decrypt_ops_params_t mac_decrypt_params;
+ kcf_random_number_ops_params_t random_number_params;
+ kcf_session_ops_params_t session_params;
+ kcf_object_ops_params_t object_params;
+ kcf_key_ops_params_t key_params;
+ kcf_provmgmt_ops_params_t provmgmt_params;
+ } rp_u;
+} kcf_req_params_t;
+
+
+/*
+ * The ioctl/k-api code should bundle the parameters into a kcf_req_params_t
+ * structure before calling a scheduler routine. The following macros are
+ * available for that purpose.
+ *
+ * For the most part, the macro arguments closely correspond to the
+ * function parameters. In some cases, we use generic names. The comments
+ * for the structure should indicate these cases.
+ */
+#define KCF_WRAP_DIGEST_OPS_PARAMS(req, ftype, _sid, _mech, _key, \
+ _data, _digest) { \
+ kcf_digest_ops_params_t *dops = &(req)->rp_u.digest_params; \
+ crypto_mechanism_t *mechp = _mech; \
+ \
+ (req)->rp_opgrp = KCF_OG_DIGEST; \
+ (req)->rp_optype = ftype; \
+ dops->do_sid = _sid; \
+ if (mechp != NULL) { \
+ dops->do_mech = *mechp; \
+ dops->do_framework_mechtype = mechp->cm_type; \
+ } \
+ dops->do_digest_key = _key; \
+ dops->do_data = _data; \
+ dops->do_digest = _digest; \
+}
+
+#define KCF_WRAP_MAC_OPS_PARAMS(req, ftype, _sid, _mech, _key, \
+ _data, _mac, _templ) { \
+ kcf_mac_ops_params_t *mops = &(req)->rp_u.mac_params; \
+ crypto_mechanism_t *mechp = _mech; \
+ \
+ (req)->rp_opgrp = KCF_OG_MAC; \
+ (req)->rp_optype = ftype; \
+ mops->mo_sid = _sid; \
+ if (mechp != NULL) { \
+ mops->mo_mech = *mechp; \
+ mops->mo_framework_mechtype = mechp->cm_type; \
+ } \
+ mops->mo_key = _key; \
+ mops->mo_data = _data; \
+ mops->mo_mac = _mac; \
+ mops->mo_templ = _templ; \
+}
+
+#define KCF_WRAP_ENCRYPT_OPS_PARAMS(req, ftype, _sid, _mech, _key, \
+ _plaintext, _ciphertext, _templ) { \
+ kcf_encrypt_ops_params_t *cops = &(req)->rp_u.encrypt_params; \
+ crypto_mechanism_t *mechp = _mech; \
+ \
+ (req)->rp_opgrp = KCF_OG_ENCRYPT; \
+ (req)->rp_optype = ftype; \
+ cops->eo_sid = _sid; \
+ if (mechp != NULL) { \
+ cops->eo_mech = *mechp; \
+ cops->eo_framework_mechtype = mechp->cm_type; \
+ } \
+ cops->eo_key = _key; \
+ cops->eo_plaintext = _plaintext; \
+ cops->eo_ciphertext = _ciphertext; \
+ cops->eo_templ = _templ; \
+}
+
+#define KCF_WRAP_DECRYPT_OPS_PARAMS(req, ftype, _sid, _mech, _key, \
+ _ciphertext, _plaintext, _templ) { \
+ kcf_decrypt_ops_params_t *cops = &(req)->rp_u.decrypt_params; \
+ crypto_mechanism_t *mechp = _mech; \
+ \
+ (req)->rp_opgrp = KCF_OG_DECRYPT; \
+ (req)->rp_optype = ftype; \
+ cops->dop_sid = _sid; \
+ if (mechp != NULL) { \
+ cops->dop_mech = *mechp; \
+ cops->dop_framework_mechtype = mechp->cm_type; \
+ } \
+ cops->dop_key = _key; \
+ cops->dop_ciphertext = _ciphertext; \
+ cops->dop_plaintext = _plaintext; \
+ cops->dop_templ = _templ; \
+}
+
+#define KCF_WRAP_SIGN_OPS_PARAMS(req, ftype, _sid, _mech, _key, \
+ _data, _signature, _templ) { \
+ kcf_sign_ops_params_t *sops = &(req)->rp_u.sign_params; \
+ crypto_mechanism_t *mechp = _mech; \
+ \
+ (req)->rp_opgrp = KCF_OG_SIGN; \
+ (req)->rp_optype = ftype; \
+ sops->so_sid = _sid; \
+ if (mechp != NULL) { \
+ sops->so_mech = *mechp; \
+ sops->so_framework_mechtype = mechp->cm_type; \
+ } \
+ sops->so_key = _key; \
+ sops->so_data = _data; \
+ sops->so_signature = _signature; \
+ sops->so_templ = _templ; \
+}
+
+#define KCF_WRAP_VERIFY_OPS_PARAMS(req, ftype, _sid, _mech, _key, \
+ _data, _signature, _templ) { \
+ kcf_verify_ops_params_t *vops = &(req)->rp_u.verify_params; \
+ crypto_mechanism_t *mechp = _mech; \
+ \
+ (req)->rp_opgrp = KCF_OG_VERIFY; \
+ (req)->rp_optype = ftype; \
+ vops->vo_sid = _sid; \
+ if (mechp != NULL) { \
+ vops->vo_mech = *mechp; \
+ vops->vo_framework_mechtype = mechp->cm_type; \
+ } \
+ vops->vo_key = _key; \
+ vops->vo_data = _data; \
+ vops->vo_signature = _signature; \
+ vops->vo_templ = _templ; \
+}
+
+#define KCF_WRAP_ENCRYPT_MAC_OPS_PARAMS(req, ftype, _sid, _encr_key, \
+ _mac_key, _plaintext, _ciphertext, _mac, _encr_templ, _mac_templ) { \
+ kcf_encrypt_mac_ops_params_t *cmops = &(req)->rp_u.encrypt_mac_params; \
+ \
+ (req)->rp_opgrp = KCF_OG_ENCRYPT_MAC; \
+ (req)->rp_optype = ftype; \
+ cmops->em_sid = _sid; \
+ cmops->em_encr_key = _encr_key; \
+ cmops->em_mac_key = _mac_key; \
+ cmops->em_plaintext = _plaintext; \
+ cmops->em_ciphertext = _ciphertext; \
+ cmops->em_mac = _mac; \
+ cmops->em_encr_templ = _encr_templ; \
+ cmops->em_mac_templ = _mac_templ; \
+}
+
+#define KCF_WRAP_MAC_DECRYPT_OPS_PARAMS(req, ftype, _sid, _mac_key, \
+ _decr_key, _ciphertext, _mac, _plaintext, _mac_templ, _decr_templ) { \
+ kcf_mac_decrypt_ops_params_t *cmops = &(req)->rp_u.mac_decrypt_params; \
+ \
+ (req)->rp_opgrp = KCF_OG_MAC_DECRYPT; \
+ (req)->rp_optype = ftype; \
+ cmops->md_sid = _sid; \
+ cmops->md_mac_key = _mac_key; \
+ cmops->md_decr_key = _decr_key; \
+ cmops->md_ciphertext = _ciphertext; \
+ cmops->md_mac = _mac; \
+ cmops->md_plaintext = _plaintext; \
+ cmops->md_mac_templ = _mac_templ; \
+ cmops->md_decr_templ = _decr_templ; \
+}
+
+#define KCF_WRAP_RANDOM_OPS_PARAMS(req, ftype, _sid, _buf, _buflen, \
+ _est, _flags) { \
+ kcf_random_number_ops_params_t *rops = \
+ &(req)->rp_u.random_number_params; \
+ \
+ (req)->rp_opgrp = KCF_OG_RANDOM; \
+ (req)->rp_optype = ftype; \
+ rops->rn_sid = _sid; \
+ rops->rn_buf = _buf; \
+ rops->rn_buflen = _buflen; \
+ rops->rn_entropy_est = _est; \
+ rops->rn_flags = _flags; \
+}
+
+#define KCF_WRAP_SESSION_OPS_PARAMS(req, ftype, _sid_ptr, _sid, \
+ _user_type, _pin, _pin_len, _pd) { \
+ kcf_session_ops_params_t *sops = &(req)->rp_u.session_params; \
+ \
+ (req)->rp_opgrp = KCF_OG_SESSION; \
+ (req)->rp_optype = ftype; \
+ sops->so_sid_ptr = _sid_ptr; \
+ sops->so_sid = _sid; \
+ sops->so_user_type = _user_type; \
+ sops->so_pin = _pin; \
+ sops->so_pin_len = _pin_len; \
+ sops->so_pd = _pd; \
+}
+
+#define KCF_WRAP_OBJECT_OPS_PARAMS(req, ftype, _sid, _object_id, \
+ _template, _attribute_count, _object_id_ptr, _object_size, \
+ _find_init_pp_ptr, _find_pp, _max_object_count, _object_count_ptr) { \
+ kcf_object_ops_params_t *jops = &(req)->rp_u.object_params; \
+ \
+ (req)->rp_opgrp = KCF_OG_OBJECT; \
+ (req)->rp_optype = ftype; \
+ jops->oo_sid = _sid; \
+ jops->oo_object_id = _object_id; \
+ jops->oo_template = _template; \
+ jops->oo_attribute_count = _attribute_count; \
+ jops->oo_object_id_ptr = _object_id_ptr; \
+ jops->oo_object_size = _object_size; \
+ jops->oo_find_init_pp_ptr = _find_init_pp_ptr; \
+ jops->oo_find_pp = _find_pp; \
+ jops->oo_max_object_count = _max_object_count; \
+ jops->oo_object_count_ptr = _object_count_ptr; \
+}
+
+#define KCF_WRAP_KEY_OPS_PARAMS(req, ftype, _sid, _mech, _key_template, \
+ _key_attribute_count, _key_object_id_ptr, _private_key_template, \
+ _private_key_attribute_count, _private_key_object_id_ptr, \
+ _key, _wrapped_key, _wrapped_key_len_ptr) { \
+ kcf_key_ops_params_t *kops = &(req)->rp_u.key_params; \
+ crypto_mechanism_t *mechp = _mech; \
+ \
+ (req)->rp_opgrp = KCF_OG_KEY; \
+ (req)->rp_optype = ftype; \
+ kops->ko_sid = _sid; \
+ if (mechp != NULL) { \
+ kops->ko_mech = *mechp; \
+ kops->ko_framework_mechtype = mechp->cm_type; \
+ } \
+ kops->ko_key_template = _key_template; \
+ kops->ko_key_attribute_count = _key_attribute_count; \
+ kops->ko_key_object_id_ptr = _key_object_id_ptr; \
+ kops->ko_private_key_template = _private_key_template; \
+ kops->ko_private_key_attribute_count = _private_key_attribute_count; \
+ kops->ko_private_key_object_id_ptr = _private_key_object_id_ptr; \
+ kops->ko_key = _key; \
+ kops->ko_wrapped_key = _wrapped_key; \
+ kops->ko_wrapped_key_len_ptr = _wrapped_key_len_ptr; \
+}
+
+#define KCF_WRAP_PROVMGMT_OPS_PARAMS(req, ftype, _sid, _old_pin, \
+ _old_pin_len, _pin, _pin_len, _label, _ext_info, _pd) { \
+ kcf_provmgmt_ops_params_t *pops = &(req)->rp_u.provmgmt_params; \
+ \
+ (req)->rp_opgrp = KCF_OG_PROVMGMT; \
+ (req)->rp_optype = ftype; \
+ pops->po_sid = _sid; \
+ pops->po_pin = _pin; \
+ pops->po_pin_len = _pin_len; \
+ pops->po_old_pin = _old_pin; \
+ pops->po_old_pin_len = _old_pin_len; \
+ pops->po_label = _label; \
+ pops->po_ext_info = _ext_info; \
+ pops->po_pd = _pd; \
+}
+
+#define KCF_WRAP_NOSTORE_KEY_OPS_PARAMS(req, ftype, _sid, _mech, \
+ _key_template, _key_attribute_count, _private_key_template, \
+ _private_key_attribute_count, _key, _out_template1, \
+ _out_attribute_count1, _out_template2, _out_attribute_count2) { \
+ kcf_key_ops_params_t *kops = &(req)->rp_u.key_params; \
+ crypto_mechanism_t *mechp = _mech; \
+ \
+ (req)->rp_opgrp = KCF_OG_NOSTORE_KEY; \
+ (req)->rp_optype = ftype; \
+ kops->ko_sid = _sid; \
+ if (mechp != NULL) { \
+ kops->ko_mech = *mechp; \
+ kops->ko_framework_mechtype = mechp->cm_type; \
+ } \
+ kops->ko_key_template = _key_template; \
+ kops->ko_key_attribute_count = _key_attribute_count; \
+ kops->ko_key_object_id_ptr = NULL; \
+ kops->ko_private_key_template = _private_key_template; \
+ kops->ko_private_key_attribute_count = _private_key_attribute_count; \
+ kops->ko_private_key_object_id_ptr = NULL; \
+ kops->ko_key = _key; \
+ kops->ko_wrapped_key = NULL; \
+ kops->ko_wrapped_key_len_ptr = 0; \
+ kops->ko_out_template1 = _out_template1; \
+ kops->ko_out_template2 = _out_template2; \
+ kops->ko_out_attribute_count1 = _out_attribute_count1; \
+ kops->ko_out_attribute_count2 = _out_attribute_count2; \
+}
+
+#define KCF_SET_PROVIDER_MECHNUM(fmtype, pd, mechp) \
+ (mechp)->cm_type = \
+ KCF_TO_PROV_MECHNUM(pd, fmtype);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_CRYPTO_OPS_IMPL_H */
diff --git a/zfs/module/icp/include/sys/crypto/sched_impl.h b/zfs/module/icp/include/sys/crypto/sched_impl.h
new file mode 100644
index 000000000000..32ffa774957b
--- /dev/null
+++ b/zfs/module/icp/include/sys/crypto/sched_impl.h
@@ -0,0 +1,531 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_CRYPTO_SCHED_IMPL_H
+#define _SYS_CRYPTO_SCHED_IMPL_H
+
+/*
+ * Scheduler internal structures.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/zfs_context.h>
+#include <sys/crypto/api.h>
+#include <sys/crypto/spi.h>
+#include <sys/crypto/impl.h>
+#include <sys/crypto/common.h>
+#include <sys/crypto/ops_impl.h>
+
+typedef void (kcf_func_t)(void *, int);
+
+typedef enum kcf_req_status {
+ REQ_ALLOCATED = 1,
+ REQ_WAITING, /* At the framework level */
+ REQ_INPROGRESS, /* At the provider level */
+ REQ_DONE,
+ REQ_CANCELED
+} kcf_req_status_t;
+
+typedef enum kcf_call_type {
+ CRYPTO_SYNCH = 1,
+ CRYPTO_ASYNCH
+} kcf_call_type_t;
+
+#define CHECK_RESTRICT(crq) (crq != NULL && \
+ ((crq)->cr_flag & CRYPTO_RESTRICTED))
+
+#define CHECK_RESTRICT_FALSE B_FALSE
+
+#define CHECK_FASTPATH(crq, pd) ((crq) == NULL || \
+ !((crq)->cr_flag & CRYPTO_ALWAYS_QUEUE)) && \
+ (pd)->pd_prov_type == CRYPTO_SW_PROVIDER
+
+#define KCF_KMFLAG(crq) (((crq) == NULL) ? KM_SLEEP : KM_NOSLEEP)
+
+/*
+ * The framework keeps an internal handle to use in the adaptive
+ * asynchronous case. This is the case when a client has the
+ * CRYPTO_ALWAYS_QUEUE bit clear and a software provider is used for
+ * the request. The request is completed in the context of the calling
+ * thread and kernel memory must be allocated with KM_NOSLEEP.
+ *
+ * The framework passes a pointer to the handle in crypto_req_handle_t
+ * argument when it calls the SPI of the software provider. The macros
+ * KCF_RHNDL() and KCF_SWFP_RHNDL() are used to do this.
+ *
+ * When a provider asks the framework for kmflag value via
+ * crypto_kmflag(9S) we use REQHNDL2_KMFLAG() macro.
+ */
+extern ulong_t kcf_swprov_hndl;
+#define KCF_RHNDL(kmflag) (((kmflag) == KM_SLEEP) ? NULL : &kcf_swprov_hndl)
+#define KCF_SWFP_RHNDL(crq) (((crq) == NULL) ? NULL : &kcf_swprov_hndl)
+#define REQHNDL2_KMFLAG(rhndl) \
+ ((rhndl == &kcf_swprov_hndl) ? KM_NOSLEEP : KM_SLEEP)
+
+/* Internal call_req flags. They start after the public ones in api.h */
+
+#define CRYPTO_SETDUAL 0x00001000 /* Set the 'cont' boolean before */
+ /* submitting the request */
+#define KCF_ISDUALREQ(crq) \
+ (((crq) == NULL) ? B_FALSE : (crq->cr_flag & CRYPTO_SETDUAL))
+
+typedef struct kcf_prov_tried {
+ kcf_provider_desc_t *pt_pd;
+ struct kcf_prov_tried *pt_next;
+} kcf_prov_tried_t;
+
+#define IS_FG_SUPPORTED(mdesc, fg) \
+ (((mdesc)->pm_mech_info.cm_func_group_mask & (fg)) != 0)
+
+#define IS_PROVIDER_TRIED(pd, tlist) \
+ (tlist != NULL && is_in_triedlist(pd, tlist))
+
+#define IS_RECOVERABLE(error) \
+ (error == CRYPTO_BUFFER_TOO_BIG || \
+ error == CRYPTO_BUSY || \
+ error == CRYPTO_DEVICE_ERROR || \
+ error == CRYPTO_DEVICE_MEMORY || \
+ error == CRYPTO_KEY_SIZE_RANGE || \
+ error == CRYPTO_NO_PERMISSION)
+
+#define KCF_ATOMIC_INCR(x) atomic_add_32(&(x), 1)
+#define KCF_ATOMIC_DECR(x) atomic_add_32(&(x), -1)
+
+/*
+ * Node structure for synchronous requests.
+ */
+typedef struct kcf_sreq_node {
+ /* Should always be the first field in this structure */
+ kcf_call_type_t sn_type;
+ /*
+ * sn_cv and sr_lock are used to wait for the
+ * operation to complete. sn_lock also protects
+ * the sn_state field.
+ */
+ kcondvar_t sn_cv;
+ kmutex_t sn_lock;
+ kcf_req_status_t sn_state;
+
+ /*
+ * Return value from the operation. This will be
+ * one of the CRYPTO_* errors defined in common.h.
+ */
+ int sn_rv;
+
+ /*
+ * parameters to call the SPI with. This can be
+ * a pointer as we know the caller context/stack stays.
+ */
+ struct kcf_req_params *sn_params;
+
+ /* Internal context for this request */
+ struct kcf_context *sn_context;
+
+ /* Provider handling this request */
+ kcf_provider_desc_t *sn_provider;
+} kcf_sreq_node_t;
+
+/*
+ * Node structure for asynchronous requests. A node can be on
+ * on a chain of requests hanging of the internal context
+ * structure and can be in the global software provider queue.
+ */
+typedef struct kcf_areq_node {
+ /* Should always be the first field in this structure */
+ kcf_call_type_t an_type;
+
+ /* an_lock protects the field an_state */
+ kmutex_t an_lock;
+ kcf_req_status_t an_state;
+ crypto_call_req_t an_reqarg;
+
+ /*
+ * parameters to call the SPI with. We need to
+ * save the params since the caller stack can go away.
+ */
+ struct kcf_req_params an_params;
+
+ /*
+ * The next two fields should be NULL for operations that
+ * don't need a context.
+ */
+ /* Internal context for this request */
+ struct kcf_context *an_context;
+
+ /* next in chain of requests for context */
+ struct kcf_areq_node *an_ctxchain_next;
+
+ kcondvar_t an_turn_cv;
+ boolean_t an_is_my_turn;
+ boolean_t an_isdual; /* for internal reuse */
+
+ /*
+ * Next and previous nodes in the global software
+ * queue. These fields are NULL for a hardware
+ * provider since we use a taskq there.
+ */
+ struct kcf_areq_node *an_next;
+ struct kcf_areq_node *an_prev;
+
+ /* Provider handling this request */
+ kcf_provider_desc_t *an_provider;
+ kcf_prov_tried_t *an_tried_plist;
+
+ struct kcf_areq_node *an_idnext; /* Next in ID hash */
+ struct kcf_areq_node *an_idprev; /* Prev in ID hash */
+ kcondvar_t an_done; /* Signal request completion */
+ uint_t an_refcnt;
+} kcf_areq_node_t;
+
+#define KCF_AREQ_REFHOLD(areq) { \
+ atomic_add_32(&(areq)->an_refcnt, 1); \
+ ASSERT((areq)->an_refcnt != 0); \
+}
+
+#define KCF_AREQ_REFRELE(areq) { \
+ ASSERT((areq)->an_refcnt != 0); \
+ membar_exit(); \
+ if (atomic_add_32_nv(&(areq)->an_refcnt, -1) == 0) \
+ kcf_free_req(areq); \
+}
+
+#define GET_REQ_TYPE(arg) *((kcf_call_type_t *)(arg))
+
+#define NOTIFY_CLIENT(areq, err) (*(areq)->an_reqarg.cr_callback_func)(\
+ (areq)->an_reqarg.cr_callback_arg, err);
+
+/* For internally generated call requests for dual operations */
+typedef struct kcf_call_req {
+ crypto_call_req_t kr_callreq; /* external client call req */
+ kcf_req_params_t kr_params; /* Params saved for next call */
+ kcf_areq_node_t *kr_areq; /* Use this areq */
+ off_t kr_saveoffset;
+ size_t kr_savelen;
+} kcf_dual_req_t;
+
+/*
+ * The following are some what similar to macros in callo.h, which implement
+ * callout tables.
+ *
+ * The lower four bits of the ID are used to encode the table ID to
+ * index in to. The REQID_COUNTER_HIGH bit is used to avoid any check for
+ * wrap around when generating ID. We assume that there won't be a request
+ * which takes more time than 2^^(sizeof (long) - 5) other requests submitted
+ * after it. This ensures there won't be any ID collision.
+ */
+#define REQID_COUNTER_HIGH (1UL << (8 * sizeof (long) - 1))
+#define REQID_COUNTER_SHIFT 4
+#define REQID_COUNTER_LOW (1 << REQID_COUNTER_SHIFT)
+#define REQID_TABLES 16
+#define REQID_TABLE_MASK (REQID_TABLES - 1)
+
+#define REQID_BUCKETS 512
+#define REQID_BUCKET_MASK (REQID_BUCKETS - 1)
+#define REQID_HASH(id) (((id) >> REQID_COUNTER_SHIFT) & REQID_BUCKET_MASK)
+
+#define GET_REQID(areq) (areq)->an_reqarg.cr_reqid
+#define SET_REQID(areq, val) GET_REQID(areq) = val
+
+/*
+ * Hash table for async requests.
+ */
+typedef struct kcf_reqid_table {
+ kmutex_t rt_lock;
+ crypto_req_id_t rt_curid;
+ kcf_areq_node_t *rt_idhash[REQID_BUCKETS];
+} kcf_reqid_table_t;
+
+/*
+ * Global software provider queue structure. Requests to be
+ * handled by a SW provider and have the ALWAYS_QUEUE flag set
+ * get queued here.
+ */
+typedef struct kcf_global_swq {
+ /*
+ * gs_cv and gs_lock are used to wait for new requests.
+ * gs_lock protects the changes to the queue.
+ */
+ kcondvar_t gs_cv;
+ kmutex_t gs_lock;
+ uint_t gs_njobs;
+ uint_t gs_maxjobs;
+ kcf_areq_node_t *gs_first;
+ kcf_areq_node_t *gs_last;
+} kcf_global_swq_t;
+
+
+/*
+ * Internal representation of a canonical context. We contain crypto_ctx_t
+ * structure in order to have just one memory allocation. The SPI
+ * ((crypto_ctx_t *)ctx)->cc_framework_private maps to this structure.
+ */
+typedef struct kcf_context {
+ crypto_ctx_t kc_glbl_ctx;
+ uint_t kc_refcnt;
+ kmutex_t kc_in_use_lock;
+ /*
+ * kc_req_chain_first and kc_req_chain_last are used to chain
+ * multiple async requests using the same context. They should be
+ * NULL for sync requests.
+ */
+ kcf_areq_node_t *kc_req_chain_first;
+ kcf_areq_node_t *kc_req_chain_last;
+ kcf_provider_desc_t *kc_prov_desc; /* Prov. descriptor */
+ kcf_provider_desc_t *kc_sw_prov_desc; /* Prov. descriptor */
+ kcf_mech_entry_t *kc_mech;
+ struct kcf_context *kc_secondctx; /* for dual contexts */
+} kcf_context_t;
+
+/*
+ * Bump up the reference count on the framework private context. A
+ * global context or a request that references this structure should
+ * do a hold.
+ */
+#define KCF_CONTEXT_REFHOLD(ictx) { \
+ atomic_add_32(&(ictx)->kc_refcnt, 1); \
+ ASSERT((ictx)->kc_refcnt != 0); \
+}
+
+/*
+ * Decrement the reference count on the framework private context.
+ * When the last reference is released, the framework private
+ * context structure is freed along with the global context.
+ */
+#define KCF_CONTEXT_REFRELE(ictx) { \
+ ASSERT((ictx)->kc_refcnt != 0); \
+ membar_exit(); \
+ if (atomic_add_32_nv(&(ictx)->kc_refcnt, -1) == 0) \
+ kcf_free_context(ictx); \
+}
+
+/*
+ * Check if we can release the context now. In case of CRYPTO_QUEUED
+ * we do not release it as we can do it only after the provider notified
+ * us. In case of CRYPTO_BUSY, the client can retry the request using
+ * the context, so we do not release the context.
+ *
+ * This macro should be called only from the final routine in
+ * an init/update/final sequence. We do not release the context in case
+ * of update operations. We require the consumer to free it
+ * explicitly, in case it wants to abandon the operation. This is done
+ * as there may be mechanisms in ECB mode that can continue even if
+ * an operation on a block fails.
+ */
+#define KCF_CONTEXT_COND_RELEASE(rv, kcf_ctx) { \
+ if (KCF_CONTEXT_DONE(rv)) \
+ KCF_CONTEXT_REFRELE(kcf_ctx); \
+}
+
+/*
+ * This macro determines whether we're done with a context.
+ */
+#define KCF_CONTEXT_DONE(rv) \
+ ((rv) != CRYPTO_QUEUED && (rv) != CRYPTO_BUSY && \
+ (rv) != CRYPTO_BUFFER_TOO_SMALL)
+
+/*
+ * A crypto_ctx_template_t is internally a pointer to this struct
+ */
+typedef struct kcf_ctx_template {
+ crypto_kcf_provider_handle_t ct_prov_handle; /* provider handle */
+ uint_t ct_generation; /* generation # */
+ size_t ct_size; /* for freeing */
+ crypto_spi_ctx_template_t ct_prov_tmpl; /* context template */
+ /* from the SW prov */
+} kcf_ctx_template_t;
+
+/*
+ * Structure for pool of threads working on global software queue.
+ */
+typedef struct kcf_pool {
+ uint32_t kp_threads; /* Number of threads in pool */
+ uint32_t kp_idlethreads; /* Idle threads in pool */
+ uint32_t kp_blockedthreads; /* Blocked threads in pool */
+
+ /*
+ * cv & lock to monitor the condition when no threads
+ * are around. In this case the failover thread kicks in.
+ */
+ kcondvar_t kp_nothr_cv;
+ kmutex_t kp_thread_lock;
+
+ /* Userspace thread creator variables. */
+ boolean_t kp_signal_create_thread; /* Create requested flag */
+ int kp_nthrs; /* # of threads to create */
+ boolean_t kp_user_waiting; /* Thread waiting for work */
+
+ /*
+ * cv & lock for the condition where more threads need to be
+ * created. kp_user_lock also protects the three fileds above.
+ */
+ kcondvar_t kp_user_cv; /* Creator cond. variable */
+ kmutex_t kp_user_lock; /* Creator lock */
+} kcf_pool_t;
+
+
+/*
+ * State of a crypto bufcall element.
+ */
+typedef enum cbuf_state {
+ CBUF_FREE = 1,
+ CBUF_WAITING,
+ CBUF_RUNNING
+} cbuf_state_t;
+
+/*
+ * Structure of a crypto bufcall element.
+ */
+typedef struct kcf_cbuf_elem {
+ /*
+ * lock and cv to wait for CBUF_RUNNING to be done
+ * kc_lock also protects kc_state.
+ */
+ kmutex_t kc_lock;
+ kcondvar_t kc_cv;
+ cbuf_state_t kc_state;
+
+ struct kcf_cbuf_elem *kc_next;
+ struct kcf_cbuf_elem *kc_prev;
+
+ void (*kc_func)(void *arg);
+ void *kc_arg;
+} kcf_cbuf_elem_t;
+
+/*
+ * State of a notify element.
+ */
+typedef enum ntfy_elem_state {
+ NTFY_WAITING = 1,
+ NTFY_RUNNING
+} ntfy_elem_state_t;
+
+/*
+ * Structure of a notify list element.
+ */
+typedef struct kcf_ntfy_elem {
+ /*
+ * lock and cv to wait for NTFY_RUNNING to be done.
+ * kn_lock also protects kn_state.
+ */
+ kmutex_t kn_lock;
+ kcondvar_t kn_cv;
+ ntfy_elem_state_t kn_state;
+
+ struct kcf_ntfy_elem *kn_next;
+ struct kcf_ntfy_elem *kn_prev;
+
+ crypto_notify_callback_t kn_func;
+ uint32_t kn_event_mask;
+} kcf_ntfy_elem_t;
+
+
+/*
+ * The following values are based on the assumption that it would
+ * take around eight cpus to load a hardware provider (This is true for
+ * at least one product) and a kernel client may come from different
+ * low-priority interrupt levels. We will have CYRPTO_TASKQ_MIN number
+ * of cached taskq entries. The CRYPTO_TASKQ_MAX number is based on
+ * a throughput of 1GB/s using 512-byte buffers. These are just
+ * reasonable estimates and might need to change in future.
+ */
+#define CRYPTO_TASKQ_THREADS 8
+#define CYRPTO_TASKQ_MIN 64
+#define CRYPTO_TASKQ_MAX 2 * 1024 * 1024
+
+extern int crypto_taskq_threads;
+extern int crypto_taskq_minalloc;
+extern int crypto_taskq_maxalloc;
+extern kcf_global_swq_t *gswq;
+extern int kcf_maxthreads;
+extern int kcf_minthreads;
+
+/*
+ * All pending crypto bufcalls are put on a list. cbuf_list_lock
+ * protects changes to this list.
+ */
+extern kmutex_t cbuf_list_lock;
+extern kcondvar_t cbuf_list_cv;
+
+/*
+ * All event subscribers are put on a list. kcf_notify_list_lock
+ * protects changes to this list.
+ */
+extern kmutex_t ntfy_list_lock;
+extern kcondvar_t ntfy_list_cv;
+
+boolean_t kcf_get_next_logical_provider_member(kcf_provider_desc_t *,
+ kcf_provider_desc_t *, kcf_provider_desc_t **);
+extern int kcf_get_hardware_provider(crypto_mech_type_t, crypto_mech_type_t,
+ boolean_t, kcf_provider_desc_t *, kcf_provider_desc_t **,
+ crypto_func_group_t);
+extern int kcf_get_hardware_provider_nomech(offset_t, offset_t,
+ boolean_t, kcf_provider_desc_t *, kcf_provider_desc_t **);
+extern void kcf_free_triedlist(kcf_prov_tried_t *);
+extern kcf_prov_tried_t *kcf_insert_triedlist(kcf_prov_tried_t **,
+ kcf_provider_desc_t *, int);
+extern kcf_provider_desc_t *kcf_get_mech_provider(crypto_mech_type_t,
+ kcf_mech_entry_t **, int *, kcf_prov_tried_t *, crypto_func_group_t,
+ boolean_t, size_t);
+extern kcf_provider_desc_t *kcf_get_dual_provider(crypto_mechanism_t *,
+ crypto_mechanism_t *, kcf_mech_entry_t **, crypto_mech_type_t *,
+ crypto_mech_type_t *, int *, kcf_prov_tried_t *,
+ crypto_func_group_t, crypto_func_group_t, boolean_t, size_t);
+extern crypto_ctx_t *kcf_new_ctx(crypto_call_req_t *, kcf_provider_desc_t *,
+ crypto_session_id_t);
+extern int kcf_submit_request(kcf_provider_desc_t *, crypto_ctx_t *,
+ crypto_call_req_t *, kcf_req_params_t *, boolean_t);
+extern void kcf_sched_destroy(void);
+extern void kcf_sched_init(void);
+extern void kcf_sched_start(void);
+extern void kcf_sop_done(kcf_sreq_node_t *, int);
+extern void kcf_aop_done(kcf_areq_node_t *, int);
+extern int common_submit_request(kcf_provider_desc_t *,
+ crypto_ctx_t *, kcf_req_params_t *, crypto_req_handle_t);
+extern void kcf_free_context(kcf_context_t *);
+
+extern int kcf_svc_wait(int *);
+extern int kcf_svc_do_run(void);
+extern int kcf_need_signature_verification(kcf_provider_desc_t *);
+extern void kcf_verify_signature(void *);
+extern struct modctl *kcf_get_modctl(crypto_provider_info_t *);
+extern void verify_unverified_providers(void);
+extern void kcf_free_req(kcf_areq_node_t *areq);
+extern void crypto_bufcall_service(void);
+
+extern void kcf_walk_ntfylist(uint32_t, void *);
+extern void kcf_do_notify(kcf_provider_desc_t *, boolean_t);
+
+extern kcf_dual_req_t *kcf_alloc_req(crypto_call_req_t *);
+extern void kcf_next_req(void *, int);
+extern void kcf_last_req(void *, int);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_CRYPTO_SCHED_IMPL_H */
diff --git a/zfs/module/icp/include/sys/crypto/spi.h b/zfs/module/icp/include/sys/crypto/spi.h
new file mode 100644
index 000000000000..0aae9181adc7
--- /dev/null
+++ b/zfs/module/icp/include/sys/crypto/spi.h
@@ -0,0 +1,726 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_CRYPTO_SPI_H
+#define _SYS_CRYPTO_SPI_H
+
+/*
+ * CSPI: Cryptographic Service Provider Interface.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/crypto/common.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef CONSTIFY_PLUGIN
+#define __no_const __attribute__((no_const))
+#else
+#define __no_const
+#endif /* CONSTIFY_PLUGIN */
+
+#define CRYPTO_SPI_VERSION_1 1
+#define CRYPTO_SPI_VERSION_2 2
+#define CRYPTO_SPI_VERSION_3 3
+
+/*
+ * Provider-private handle. This handle is specified by a provider
+ * when it registers by means of the pi_provider_handle field of
+ * the crypto_provider_info structure, and passed to the provider
+ * when its entry points are invoked.
+ */
+typedef void *crypto_provider_handle_t;
+
+/*
+ * Context templates can be used to by software providers to pre-process
+ * keying material, such as key schedules. They are allocated by
+ * a software provider create_ctx_template(9E) entry point, and passed
+ * as argument to initialization and atomic provider entry points.
+ */
+typedef void *crypto_spi_ctx_template_t;
+
+/*
+ * Request handles are used by the kernel to identify an asynchronous
+ * request being processed by a provider. It is passed by the kernel
+ * to a hardware provider when submitting a request, and must be
+ * specified by a provider when calling crypto_op_notification(9F)
+ */
+typedef void *crypto_req_handle_t;
+
+/* Values for cc_flags field */
+#define CRYPTO_INIT_OPSTATE 0x00000001 /* allocate and init cc_opstate */
+#define CRYPTO_USE_OPSTATE 0x00000002 /* .. start using it as context */
+
+/*
+ * The context structure is passed from the kernel to a provider.
+ * It contains the information needed to process a multi-part or
+ * single part operation. The context structure is not used
+ * by atomic operations.
+ *
+ * Parameters needed to perform a cryptographic operation, such
+ * as keys, mechanisms, input and output buffers, are passed
+ * as separate arguments to Provider routines.
+ */
+typedef struct crypto_ctx {
+ crypto_provider_handle_t cc_provider;
+ crypto_session_id_t cc_session;
+ void *cc_provider_private; /* owned by provider */
+ void *cc_framework_private; /* owned by framework */
+ uint32_t cc_flags; /* flags */
+ void *cc_opstate; /* state */
+} crypto_ctx_t;
+
+/*
+ * Extended provider information.
+ */
+
+/*
+ * valid values for ei_flags field of extended info structure
+ * They match the RSA Security, Inc PKCS#11 tokenInfo flags.
+ */
+#define CRYPTO_EXTF_RNG 0x00000001
+#define CRYPTO_EXTF_WRITE_PROTECTED 0x00000002
+#define CRYPTO_EXTF_LOGIN_REQUIRED 0x00000004
+#define CRYPTO_EXTF_USER_PIN_INITIALIZED 0x00000008
+#define CRYPTO_EXTF_CLOCK_ON_TOKEN 0x00000040
+#define CRYPTO_EXTF_PROTECTED_AUTHENTICATION_PATH 0x00000100
+#define CRYPTO_EXTF_DUAL_CRYPTO_OPERATIONS 0x00000200
+#define CRYPTO_EXTF_TOKEN_INITIALIZED 0x00000400
+#define CRYPTO_EXTF_USER_PIN_COUNT_LOW 0x00010000
+#define CRYPTO_EXTF_USER_PIN_FINAL_TRY 0x00020000
+#define CRYPTO_EXTF_USER_PIN_LOCKED 0x00040000
+#define CRYPTO_EXTF_USER_PIN_TO_BE_CHANGED 0x00080000
+#define CRYPTO_EXTF_SO_PIN_COUNT_LOW 0x00100000
+#define CRYPTO_EXTF_SO_PIN_FINAL_TRY 0x00200000
+#define CRYPTO_EXTF_SO_PIN_LOCKED 0x00400000
+#define CRYPTO_EXTF_SO_PIN_TO_BE_CHANGED 0x00800000
+
+/*
+ * The crypto_control_ops structure contains pointers to control
+ * operations for cryptographic providers. It is passed through
+ * the crypto_ops(9S) structure when providers register with the
+ * kernel using crypto_register_provider(9F).
+ */
+typedef struct crypto_control_ops {
+ void (*provider_status)(crypto_provider_handle_t, uint_t *);
+} __no_const crypto_control_ops_t;
+
+/*
+ * The crypto_ctx_ops structure contains points to context and context
+ * templates management operations for cryptographic providers. It is
+ * passed through the crypto_ops(9S) structure when providers register
+ * with the kernel using crypto_register_provider(9F).
+ */
+typedef struct crypto_ctx_ops {
+ int (*create_ctx_template)(crypto_provider_handle_t,
+ crypto_mechanism_t *, crypto_key_t *,
+ crypto_spi_ctx_template_t *, size_t *, crypto_req_handle_t);
+ int (*free_context)(crypto_ctx_t *);
+} __no_const crypto_ctx_ops_t;
+
+/*
+ * The crypto_digest_ops structure contains pointers to digest
+ * operations for cryptographic providers. It is passed through
+ * the crypto_ops(9S) structure when providers register with the
+ * kernel using crypto_register_provider(9F).
+ */
+typedef struct crypto_digest_ops {
+ int (*digest_init)(crypto_ctx_t *, crypto_mechanism_t *,
+ crypto_req_handle_t);
+ int (*digest)(crypto_ctx_t *, crypto_data_t *, crypto_data_t *,
+ crypto_req_handle_t);
+ int (*digest_update)(crypto_ctx_t *, crypto_data_t *,
+ crypto_req_handle_t);
+ int (*digest_key)(crypto_ctx_t *, crypto_key_t *, crypto_req_handle_t);
+ int (*digest_final)(crypto_ctx_t *, crypto_data_t *,
+ crypto_req_handle_t);
+ int (*digest_atomic)(crypto_provider_handle_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_data_t *,
+ crypto_data_t *, crypto_req_handle_t);
+} __no_const crypto_digest_ops_t;
+
+/*
+ * The crypto_cipher_ops structure contains pointers to encryption
+ * and decryption operations for cryptographic providers. It is
+ * passed through the crypto_ops(9S) structure when providers register
+ * with the kernel using crypto_register_provider(9F).
+ */
+typedef struct crypto_cipher_ops {
+ int (*encrypt_init)(crypto_ctx_t *,
+ crypto_mechanism_t *, crypto_key_t *,
+ crypto_spi_ctx_template_t, crypto_req_handle_t);
+ int (*encrypt)(crypto_ctx_t *,
+ crypto_data_t *, crypto_data_t *, crypto_req_handle_t);
+ int (*encrypt_update)(crypto_ctx_t *,
+ crypto_data_t *, crypto_data_t *, crypto_req_handle_t);
+ int (*encrypt_final)(crypto_ctx_t *,
+ crypto_data_t *, crypto_req_handle_t);
+ int (*encrypt_atomic)(crypto_provider_handle_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_key_t *, crypto_data_t *,
+ crypto_data_t *, crypto_spi_ctx_template_t, crypto_req_handle_t);
+
+ int (*decrypt_init)(crypto_ctx_t *,
+ crypto_mechanism_t *, crypto_key_t *,
+ crypto_spi_ctx_template_t, crypto_req_handle_t);
+ int (*decrypt)(crypto_ctx_t *,
+ crypto_data_t *, crypto_data_t *, crypto_req_handle_t);
+ int (*decrypt_update)(crypto_ctx_t *,
+ crypto_data_t *, crypto_data_t *, crypto_req_handle_t);
+ int (*decrypt_final)(crypto_ctx_t *,
+ crypto_data_t *, crypto_req_handle_t);
+ int (*decrypt_atomic)(crypto_provider_handle_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_key_t *, crypto_data_t *,
+ crypto_data_t *, crypto_spi_ctx_template_t, crypto_req_handle_t);
+} __no_const crypto_cipher_ops_t;
+
+/*
+ * The crypto_mac_ops structure contains pointers to MAC
+ * operations for cryptographic providers. It is passed through
+ * the crypto_ops(9S) structure when providers register with the
+ * kernel using crypto_register_provider(9F).
+ */
+typedef struct crypto_mac_ops {
+ int (*mac_init)(crypto_ctx_t *,
+ crypto_mechanism_t *, crypto_key_t *,
+ crypto_spi_ctx_template_t, crypto_req_handle_t);
+ int (*mac)(crypto_ctx_t *,
+ crypto_data_t *, crypto_data_t *, crypto_req_handle_t);
+ int (*mac_update)(crypto_ctx_t *,
+ crypto_data_t *, crypto_req_handle_t);
+ int (*mac_final)(crypto_ctx_t *,
+ crypto_data_t *, crypto_req_handle_t);
+ int (*mac_atomic)(crypto_provider_handle_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_key_t *, crypto_data_t *,
+ crypto_data_t *, crypto_spi_ctx_template_t,
+ crypto_req_handle_t);
+ int (*mac_verify_atomic)(crypto_provider_handle_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_key_t *, crypto_data_t *,
+ crypto_data_t *, crypto_spi_ctx_template_t,
+ crypto_req_handle_t);
+} __no_const crypto_mac_ops_t;
+
+/*
+ * The crypto_sign_ops structure contains pointers to signing
+ * operations for cryptographic providers. It is passed through
+ * the crypto_ops(9S) structure when providers register with the
+ * kernel using crypto_register_provider(9F).
+ */
+typedef struct crypto_sign_ops {
+ int (*sign_init)(crypto_ctx_t *,
+ crypto_mechanism_t *, crypto_key_t *, crypto_spi_ctx_template_t,
+ crypto_req_handle_t);
+ int (*sign)(crypto_ctx_t *,
+ crypto_data_t *, crypto_data_t *, crypto_req_handle_t);
+ int (*sign_update)(crypto_ctx_t *,
+ crypto_data_t *, crypto_req_handle_t);
+ int (*sign_final)(crypto_ctx_t *,
+ crypto_data_t *, crypto_req_handle_t);
+ int (*sign_atomic)(crypto_provider_handle_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_key_t *, crypto_data_t *,
+ crypto_data_t *, crypto_spi_ctx_template_t,
+ crypto_req_handle_t);
+ int (*sign_recover_init)(crypto_ctx_t *, crypto_mechanism_t *,
+ crypto_key_t *, crypto_spi_ctx_template_t,
+ crypto_req_handle_t);
+ int (*sign_recover)(crypto_ctx_t *,
+ crypto_data_t *, crypto_data_t *, crypto_req_handle_t);
+ int (*sign_recover_atomic)(crypto_provider_handle_t,
+ crypto_session_id_t, crypto_mechanism_t *, crypto_key_t *,
+ crypto_data_t *, crypto_data_t *, crypto_spi_ctx_template_t,
+ crypto_req_handle_t);
+} __no_const crypto_sign_ops_t;
+
+/*
+ * The crypto_verify_ops structure contains pointers to verify
+ * operations for cryptographic providers. It is passed through
+ * the crypto_ops(9S) structure when providers register with the
+ * kernel using crypto_register_provider(9F).
+ */
+typedef struct crypto_verify_ops {
+ int (*verify_init)(crypto_ctx_t *,
+ crypto_mechanism_t *, crypto_key_t *, crypto_spi_ctx_template_t,
+ crypto_req_handle_t);
+ int (*do_verify)(crypto_ctx_t *,
+ crypto_data_t *, crypto_data_t *, crypto_req_handle_t);
+ int (*verify_update)(crypto_ctx_t *,
+ crypto_data_t *, crypto_req_handle_t);
+ int (*verify_final)(crypto_ctx_t *,
+ crypto_data_t *, crypto_req_handle_t);
+ int (*verify_atomic)(crypto_provider_handle_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_key_t *, crypto_data_t *,
+ crypto_data_t *, crypto_spi_ctx_template_t,
+ crypto_req_handle_t);
+ int (*verify_recover_init)(crypto_ctx_t *, crypto_mechanism_t *,
+ crypto_key_t *, crypto_spi_ctx_template_t,
+ crypto_req_handle_t);
+ int (*verify_recover)(crypto_ctx_t *,
+ crypto_data_t *, crypto_data_t *, crypto_req_handle_t);
+ int (*verify_recover_atomic)(crypto_provider_handle_t,
+ crypto_session_id_t, crypto_mechanism_t *, crypto_key_t *,
+ crypto_data_t *, crypto_data_t *, crypto_spi_ctx_template_t,
+ crypto_req_handle_t);
+} __no_const crypto_verify_ops_t;
+
+/*
+ * The crypto_dual_ops structure contains pointers to dual
+ * cipher and sign/verify operations for cryptographic providers.
+ * It is passed through the crypto_ops(9S) structure when
+ * providers register with the kernel using
+ * crypto_register_provider(9F).
+ */
+typedef struct crypto_dual_ops {
+ int (*digest_encrypt_update)(
+ crypto_ctx_t *, crypto_ctx_t *, crypto_data_t *,
+ crypto_data_t *, crypto_req_handle_t);
+ int (*decrypt_digest_update)(
+ crypto_ctx_t *, crypto_ctx_t *, crypto_data_t *,
+ crypto_data_t *, crypto_req_handle_t);
+ int (*sign_encrypt_update)(
+ crypto_ctx_t *, crypto_ctx_t *, crypto_data_t *,
+ crypto_data_t *, crypto_req_handle_t);
+ int (*decrypt_verify_update)(
+ crypto_ctx_t *, crypto_ctx_t *, crypto_data_t *,
+ crypto_data_t *, crypto_req_handle_t);
+} __no_const crypto_dual_ops_t;
+
+/*
+ * The crypto_dual_cipher_mac_ops structure contains pointers to dual
+ * cipher and MAC operations for cryptographic providers.
+ * It is passed through the crypto_ops(9S) structure when
+ * providers register with the kernel using
+ * crypto_register_provider(9F).
+ */
+typedef struct crypto_dual_cipher_mac_ops {
+ int (*encrypt_mac_init)(crypto_ctx_t *,
+ crypto_mechanism_t *, crypto_key_t *, crypto_mechanism_t *,
+ crypto_key_t *, crypto_spi_ctx_template_t,
+ crypto_spi_ctx_template_t, crypto_req_handle_t);
+ int (*encrypt_mac)(crypto_ctx_t *,
+ crypto_data_t *, crypto_dual_data_t *, crypto_data_t *,
+ crypto_req_handle_t);
+ int (*encrypt_mac_update)(crypto_ctx_t *,
+ crypto_data_t *, crypto_dual_data_t *, crypto_req_handle_t);
+ int (*encrypt_mac_final)(crypto_ctx_t *,
+ crypto_dual_data_t *, crypto_data_t *, crypto_req_handle_t);
+ int (*encrypt_mac_atomic)(crypto_provider_handle_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_key_t *, crypto_mechanism_t *,
+ crypto_key_t *, crypto_data_t *, crypto_dual_data_t *,
+ crypto_data_t *, crypto_spi_ctx_template_t,
+ crypto_spi_ctx_template_t, crypto_req_handle_t);
+
+ int (*mac_decrypt_init)(crypto_ctx_t *,
+ crypto_mechanism_t *, crypto_key_t *, crypto_mechanism_t *,
+ crypto_key_t *, crypto_spi_ctx_template_t,
+ crypto_spi_ctx_template_t, crypto_req_handle_t);
+ int (*mac_decrypt)(crypto_ctx_t *,
+ crypto_dual_data_t *, crypto_data_t *, crypto_data_t *,
+ crypto_req_handle_t);
+ int (*mac_decrypt_update)(crypto_ctx_t *,
+ crypto_dual_data_t *, crypto_data_t *, crypto_req_handle_t);
+ int (*mac_decrypt_final)(crypto_ctx_t *,
+ crypto_data_t *, crypto_data_t *, crypto_req_handle_t);
+ int (*mac_decrypt_atomic)(crypto_provider_handle_t,
+ crypto_session_id_t, crypto_mechanism_t *, crypto_key_t *,
+ crypto_mechanism_t *, crypto_key_t *, crypto_dual_data_t *,
+ crypto_data_t *, crypto_data_t *, crypto_spi_ctx_template_t,
+ crypto_spi_ctx_template_t, crypto_req_handle_t);
+ int (*mac_verify_decrypt_atomic)(crypto_provider_handle_t,
+ crypto_session_id_t, crypto_mechanism_t *, crypto_key_t *,
+ crypto_mechanism_t *, crypto_key_t *, crypto_dual_data_t *,
+ crypto_data_t *, crypto_data_t *, crypto_spi_ctx_template_t,
+ crypto_spi_ctx_template_t, crypto_req_handle_t);
+} __no_const crypto_dual_cipher_mac_ops_t;
+
+/*
+ * The crypto_random_number_ops structure contains pointers to random
+ * number operations for cryptographic providers. It is passed through
+ * the crypto_ops(9S) structure when providers register with the
+ * kernel using crypto_register_provider(9F).
+ */
+typedef struct crypto_random_number_ops {
+ int (*seed_random)(crypto_provider_handle_t, crypto_session_id_t,
+ uchar_t *, size_t, uint_t, uint32_t, crypto_req_handle_t);
+ int (*generate_random)(crypto_provider_handle_t, crypto_session_id_t,
+ uchar_t *, size_t, crypto_req_handle_t);
+} __no_const crypto_random_number_ops_t;
+
+/*
+ * Flag values for seed_random.
+ */
+#define CRYPTO_SEED_NOW 0x00000001
+
+/*
+ * The crypto_session_ops structure contains pointers to session
+ * operations for cryptographic providers. It is passed through
+ * the crypto_ops(9S) structure when providers register with the
+ * kernel using crypto_register_provider(9F).
+ */
+typedef struct crypto_session_ops {
+ int (*session_open)(crypto_provider_handle_t, crypto_session_id_t *,
+ crypto_req_handle_t);
+ int (*session_close)(crypto_provider_handle_t, crypto_session_id_t,
+ crypto_req_handle_t);
+ int (*session_login)(crypto_provider_handle_t, crypto_session_id_t,
+ crypto_user_type_t, char *, size_t, crypto_req_handle_t);
+ int (*session_logout)(crypto_provider_handle_t, crypto_session_id_t,
+ crypto_req_handle_t);
+} __no_const crypto_session_ops_t;
+
+/*
+ * The crypto_object_ops structure contains pointers to object
+ * operations for cryptographic providers. It is passed through
+ * the crypto_ops(9S) structure when providers register with the
+ * kernel using crypto_register_provider(9F).
+ */
+typedef struct crypto_object_ops {
+ int (*object_create)(crypto_provider_handle_t, crypto_session_id_t,
+ crypto_object_attribute_t *, uint_t, crypto_object_id_t *,
+ crypto_req_handle_t);
+ int (*object_copy)(crypto_provider_handle_t, crypto_session_id_t,
+ crypto_object_id_t, crypto_object_attribute_t *, uint_t,
+ crypto_object_id_t *, crypto_req_handle_t);
+ int (*object_destroy)(crypto_provider_handle_t, crypto_session_id_t,
+ crypto_object_id_t, crypto_req_handle_t);
+ int (*object_get_size)(crypto_provider_handle_t, crypto_session_id_t,
+ crypto_object_id_t, size_t *, crypto_req_handle_t);
+ int (*object_get_attribute_value)(crypto_provider_handle_t,
+ crypto_session_id_t, crypto_object_id_t,
+ crypto_object_attribute_t *, uint_t, crypto_req_handle_t);
+ int (*object_set_attribute_value)(crypto_provider_handle_t,
+ crypto_session_id_t, crypto_object_id_t,
+ crypto_object_attribute_t *, uint_t, crypto_req_handle_t);
+ int (*object_find_init)(crypto_provider_handle_t, crypto_session_id_t,
+ crypto_object_attribute_t *, uint_t, void **,
+ crypto_req_handle_t);
+ int (*object_find)(crypto_provider_handle_t, void *,
+ crypto_object_id_t *, uint_t, uint_t *, crypto_req_handle_t);
+ int (*object_find_final)(crypto_provider_handle_t, void *,
+ crypto_req_handle_t);
+} __no_const crypto_object_ops_t;
+
+/*
+ * The crypto_key_ops structure contains pointers to key
+ * operations for cryptographic providers. It is passed through
+ * the crypto_ops(9S) structure when providers register with the
+ * kernel using crypto_register_provider(9F).
+ */
+typedef struct crypto_key_ops {
+ int (*key_generate)(crypto_provider_handle_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_object_attribute_t *, uint_t,
+ crypto_object_id_t *, crypto_req_handle_t);
+ int (*key_generate_pair)(crypto_provider_handle_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_object_attribute_t *, uint_t,
+ crypto_object_attribute_t *, uint_t, crypto_object_id_t *,
+ crypto_object_id_t *, crypto_req_handle_t);
+ int (*key_wrap)(crypto_provider_handle_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_key_t *, crypto_object_id_t *,
+ uchar_t *, size_t *, crypto_req_handle_t);
+ int (*key_unwrap)(crypto_provider_handle_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_key_t *, uchar_t *, size_t *,
+ crypto_object_attribute_t *, uint_t,
+ crypto_object_id_t *, crypto_req_handle_t);
+ int (*key_derive)(crypto_provider_handle_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_key_t *, crypto_object_attribute_t *,
+ uint_t, crypto_object_id_t *, crypto_req_handle_t);
+ int (*key_check)(crypto_provider_handle_t, crypto_mechanism_t *,
+ crypto_key_t *);
+} __no_const crypto_key_ops_t;
+
+/*
+ * The crypto_provider_management_ops structure contains pointers
+ * to management operations for cryptographic providers. It is passed
+ * through the crypto_ops(9S) structure when providers register with the
+ * kernel using crypto_register_provider(9F).
+ */
+typedef struct crypto_provider_management_ops {
+ int (*ext_info)(crypto_provider_handle_t,
+ crypto_provider_ext_info_t *, crypto_req_handle_t);
+ int (*init_token)(crypto_provider_handle_t, char *, size_t,
+ char *, crypto_req_handle_t);
+ int (*init_pin)(crypto_provider_handle_t, crypto_session_id_t,
+ char *, size_t, crypto_req_handle_t);
+ int (*set_pin)(crypto_provider_handle_t, crypto_session_id_t,
+ char *, size_t, char *, size_t, crypto_req_handle_t);
+} __no_const crypto_provider_management_ops_t;
+
+typedef struct crypto_mech_ops {
+ int (*copyin_mechanism)(crypto_provider_handle_t,
+ crypto_mechanism_t *, crypto_mechanism_t *, int *, int);
+ int (*copyout_mechanism)(crypto_provider_handle_t,
+ crypto_mechanism_t *, crypto_mechanism_t *, int *, int);
+ int (*free_mechanism)(crypto_provider_handle_t, crypto_mechanism_t *);
+} __no_const crypto_mech_ops_t;
+
+typedef struct crypto_nostore_key_ops {
+ int (*nostore_key_generate)(crypto_provider_handle_t,
+ crypto_session_id_t, crypto_mechanism_t *,
+ crypto_object_attribute_t *, uint_t, crypto_object_attribute_t *,
+ uint_t, crypto_req_handle_t);
+ int (*nostore_key_generate_pair)(crypto_provider_handle_t,
+ crypto_session_id_t, crypto_mechanism_t *,
+ crypto_object_attribute_t *, uint_t, crypto_object_attribute_t *,
+ uint_t, crypto_object_attribute_t *, uint_t,
+ crypto_object_attribute_t *, uint_t, crypto_req_handle_t);
+ int (*nostore_key_derive)(crypto_provider_handle_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_key_t *, crypto_object_attribute_t *,
+ uint_t, crypto_object_attribute_t *, uint_t, crypto_req_handle_t);
+} __no_const crypto_nostore_key_ops_t;
+
+/*
+ * The crypto_ops(9S) structure contains the structures containing
+ * the pointers to functions implemented by cryptographic providers.
+ * It is specified as part of the crypto_provider_info(9S)
+ * supplied by a provider when it registers with the kernel
+ * by calling crypto_register_provider(9F).
+ */
+typedef struct crypto_ops_v1 {
+ crypto_control_ops_t *co_control_ops;
+ crypto_digest_ops_t *co_digest_ops;
+ crypto_cipher_ops_t *co_cipher_ops;
+ crypto_mac_ops_t *co_mac_ops;
+ crypto_sign_ops_t *co_sign_ops;
+ crypto_verify_ops_t *co_verify_ops;
+ crypto_dual_ops_t *co_dual_ops;
+ crypto_dual_cipher_mac_ops_t *co_dual_cipher_mac_ops;
+ crypto_random_number_ops_t *co_random_ops;
+ crypto_session_ops_t *co_session_ops;
+ crypto_object_ops_t *co_object_ops;
+ crypto_key_ops_t *co_key_ops;
+ crypto_provider_management_ops_t *co_provider_ops;
+ crypto_ctx_ops_t *co_ctx_ops;
+} crypto_ops_v1_t;
+
+typedef struct crypto_ops_v2 {
+ crypto_ops_v1_t v1_ops;
+ crypto_mech_ops_t *co_mech_ops;
+} crypto_ops_v2_t;
+
+typedef struct crypto_ops_v3 {
+ crypto_ops_v2_t v2_ops;
+ crypto_nostore_key_ops_t *co_nostore_key_ops;
+} crypto_ops_v3_t;
+
+typedef struct crypto_ops {
+ union {
+ crypto_ops_v3_t cou_v3;
+ crypto_ops_v2_t cou_v2;
+ crypto_ops_v1_t cou_v1;
+ } cou;
+} crypto_ops_t;
+
+#define co_control_ops cou.cou_v1.co_control_ops
+#define co_digest_ops cou.cou_v1.co_digest_ops
+#define co_cipher_ops cou.cou_v1.co_cipher_ops
+#define co_mac_ops cou.cou_v1.co_mac_ops
+#define co_sign_ops cou.cou_v1.co_sign_ops
+#define co_verify_ops cou.cou_v1.co_verify_ops
+#define co_dual_ops cou.cou_v1.co_dual_ops
+#define co_dual_cipher_mac_ops cou.cou_v1.co_dual_cipher_mac_ops
+#define co_random_ops cou.cou_v1.co_random_ops
+#define co_session_ops cou.cou_v1.co_session_ops
+#define co_object_ops cou.cou_v1.co_object_ops
+#define co_key_ops cou.cou_v1.co_key_ops
+#define co_provider_ops cou.cou_v1.co_provider_ops
+#define co_ctx_ops cou.cou_v1.co_ctx_ops
+#define co_mech_ops cou.cou_v2.co_mech_ops
+#define co_nostore_key_ops cou.cou_v3.co_nostore_key_ops
+
+/*
+ * The mechanism info structure crypto_mech_info_t contains a function group
+ * bit mask cm_func_group_mask. This field, of type crypto_func_group_t,
+ * specifies the provider entry point that can be used a particular
+ * mechanism. The function group mask is a combination of the following values.
+ */
+
+typedef uint32_t crypto_func_group_t;
+
+
+#define CRYPTO_FG_ENCRYPT 0x00000001 /* encrypt_init() */
+#define CRYPTO_FG_DECRYPT 0x00000002 /* decrypt_init() */
+#define CRYPTO_FG_DIGEST 0x00000004 /* digest_init() */
+#define CRYPTO_FG_SIGN 0x00000008 /* sign_init() */
+#define CRYPTO_FG_SIGN_RECOVER 0x00000010 /* sign_recover_init() */
+#define CRYPTO_FG_VERIFY 0x00000020 /* verify_init() */
+#define CRYPTO_FG_VERIFY_RECOVER 0x00000040 /* verify_recover_init() */
+#define CRYPTO_FG_GENERATE 0x00000080 /* key_generate() */
+#define CRYPTO_FG_GENERATE_KEY_PAIR 0x00000100 /* key_generate_pair() */
+#define CRYPTO_FG_WRAP 0x00000200 /* key_wrap() */
+#define CRYPTO_FG_UNWRAP 0x00000400 /* key_unwrap() */
+#define CRYPTO_FG_DERIVE 0x00000800 /* key_derive() */
+#define CRYPTO_FG_MAC 0x00001000 /* mac_init() */
+#define CRYPTO_FG_ENCRYPT_MAC 0x00002000 /* encrypt_mac_init() */
+#define CRYPTO_FG_MAC_DECRYPT 0x00004000 /* decrypt_mac_init() */
+#define CRYPTO_FG_ENCRYPT_ATOMIC 0x00008000 /* encrypt_atomic() */
+#define CRYPTO_FG_DECRYPT_ATOMIC 0x00010000 /* decrypt_atomic() */
+#define CRYPTO_FG_MAC_ATOMIC 0x00020000 /* mac_atomic() */
+#define CRYPTO_FG_DIGEST_ATOMIC 0x00040000 /* digest_atomic() */
+#define CRYPTO_FG_SIGN_ATOMIC 0x00080000 /* sign_atomic() */
+#define CRYPTO_FG_SIGN_RECOVER_ATOMIC 0x00100000 /* sign_recover_atomic() */
+#define CRYPTO_FG_VERIFY_ATOMIC 0x00200000 /* verify_atomic() */
+#define CRYPTO_FG_VERIFY_RECOVER_ATOMIC 0x00400000 /* verify_recover_atomic() */
+#define CRYPTO_FG_ENCRYPT_MAC_ATOMIC 0x00800000 /* encrypt_mac_atomic() */
+#define CRYPTO_FG_MAC_DECRYPT_ATOMIC 0x01000000 /* mac_decrypt_atomic() */
+#define CRYPTO_FG_RESERVED 0x80000000
+
+/*
+ * Maximum length of the pi_provider_description field of the
+ * crypto_provider_info structure.
+ */
+#define CRYPTO_PROVIDER_DESCR_MAX_LEN 64
+
+
+/* Bit mask for all the simple operations */
+#define CRYPTO_FG_SIMPLEOP_MASK (CRYPTO_FG_ENCRYPT | CRYPTO_FG_DECRYPT | \
+ CRYPTO_FG_DIGEST | CRYPTO_FG_SIGN | CRYPTO_FG_VERIFY | CRYPTO_FG_MAC | \
+ CRYPTO_FG_ENCRYPT_ATOMIC | CRYPTO_FG_DECRYPT_ATOMIC | \
+ CRYPTO_FG_MAC_ATOMIC | CRYPTO_FG_DIGEST_ATOMIC | CRYPTO_FG_SIGN_ATOMIC | \
+ CRYPTO_FG_VERIFY_ATOMIC)
+
+/* Bit mask for all the dual operations */
+#define CRYPTO_FG_MAC_CIPHER_MASK (CRYPTO_FG_ENCRYPT_MAC | \
+ CRYPTO_FG_MAC_DECRYPT | CRYPTO_FG_ENCRYPT_MAC_ATOMIC | \
+ CRYPTO_FG_MAC_DECRYPT_ATOMIC)
+
+/* Add other combos to CRYPTO_FG_DUAL_MASK */
+#define CRYPTO_FG_DUAL_MASK CRYPTO_FG_MAC_CIPHER_MASK
+
+/*
+ * The crypto_mech_info structure specifies one of the mechanisms
+ * supported by a cryptographic provider. The pi_mechanisms field of
+ * the crypto_provider_info structure contains a pointer to an array
+ * of crypto_mech_info's.
+ */
+typedef struct crypto_mech_info {
+ crypto_mech_name_t cm_mech_name;
+ crypto_mech_type_t cm_mech_number;
+ crypto_func_group_t cm_func_group_mask;
+ ssize_t cm_min_key_length;
+ ssize_t cm_max_key_length;
+ uint32_t cm_mech_flags;
+} crypto_mech_info_t;
+
+/* Alias the old name to the new name for compatibility. */
+#define cm_keysize_unit cm_mech_flags
+
+/*
+ * The following is used by a provider that sets
+ * CRYPTO_HASH_NO_UPDATE. It needs to specify the maximum
+ * input data size it can digest in this field.
+ */
+#define cm_max_input_length cm_max_key_length
+
+/*
+ * crypto_kcf_provider_handle_t is a handle allocated by the kernel.
+ * It is returned after the provider registers with
+ * crypto_register_provider(), and must be specified by the provider
+ * when calling crypto_unregister_provider(), and
+ * crypto_provider_notification().
+ */
+typedef uint_t crypto_kcf_provider_handle_t;
+
+/*
+ * Provider information. Passed as argument to crypto_register_provider(9F).
+ * Describes the provider and its capabilities. Multiple providers can
+ * register for the same device instance. In this case, the same
+ * pi_provider_dev must be specified with a different pi_provider_handle.
+ */
+typedef struct crypto_provider_info_v1 {
+ uint_t pi_interface_version;
+ char *pi_provider_description;
+ crypto_provider_type_t pi_provider_type;
+ crypto_provider_handle_t pi_provider_handle;
+ crypto_ops_t *pi_ops_vector;
+ uint_t pi_mech_list_count;
+ crypto_mech_info_t *pi_mechanisms;
+ uint_t pi_logical_provider_count;
+ crypto_kcf_provider_handle_t *pi_logical_providers;
+} crypto_provider_info_v1_t;
+
+typedef struct crypto_provider_info_v2 {
+ crypto_provider_info_v1_t v1_info;
+ uint_t pi_flags;
+} crypto_provider_info_v2_t;
+
+typedef struct crypto_provider_info {
+ union {
+ crypto_provider_info_v2_t piu_v2;
+ crypto_provider_info_v1_t piu_v1;
+ } piu;
+} crypto_provider_info_t;
+
+#define pi_interface_version piu.piu_v1.pi_interface_version
+#define pi_provider_description piu.piu_v1.pi_provider_description
+#define pi_provider_type piu.piu_v1.pi_provider_type
+#define pi_provider_handle piu.piu_v1.pi_provider_handle
+#define pi_ops_vector piu.piu_v1.pi_ops_vector
+#define pi_mech_list_count piu.piu_v1.pi_mech_list_count
+#define pi_mechanisms piu.piu_v1.pi_mechanisms
+#define pi_logical_provider_count piu.piu_v1.pi_logical_provider_count
+#define pi_logical_providers piu.piu_v1.pi_logical_providers
+#define pi_flags piu.piu_v2.pi_flags
+
+/* hidden providers can only be accessed via a logical provider */
+#define CRYPTO_HIDE_PROVIDER 0x00000001
+/*
+ * provider can not do multi-part digest (updates) and has a limit
+ * on maximum input data that it can digest.
+ */
+#define CRYPTO_HASH_NO_UPDATE 0x00000002
+
+/* provider can handle the request without returning a CRYPTO_QUEUED */
+#define CRYPTO_SYNCHRONOUS 0x00000004
+
+#define CRYPTO_PIFLAGS_RESERVED2 0x40000000
+#define CRYPTO_PIFLAGS_RESERVED1 0x80000000
+
+/*
+ * Provider status passed by a provider to crypto_provider_notification(9F)
+ * and returned by the provider_stauts(9E) entry point.
+ */
+#define CRYPTO_PROVIDER_READY 0
+#define CRYPTO_PROVIDER_BUSY 1
+#define CRYPTO_PROVIDER_FAILED 2
+
+/*
+ * Functions exported by Solaris to cryptographic providers. Providers
+ * call these functions to register and unregister, notify the kernel
+ * of state changes, and notify the kernel when a asynchronous request
+ * completed.
+ */
+extern int crypto_register_provider(crypto_provider_info_t *,
+ crypto_kcf_provider_handle_t *);
+extern int crypto_unregister_provider(crypto_kcf_provider_handle_t);
+extern void crypto_provider_notification(crypto_kcf_provider_handle_t, uint_t);
+extern void crypto_op_notification(crypto_req_handle_t, int);
+extern int crypto_kmflag(crypto_req_handle_t);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_CRYPTO_SPI_H */
diff --git a/zfs/module/icp/include/sys/ia32/asm_linkage.h b/zfs/module/icp/include/sys/ia32/asm_linkage.h
new file mode 100644
index 000000000000..f2dae7093b94
--- /dev/null
+++ b/zfs/module/icp/include/sys/ia32/asm_linkage.h
@@ -0,0 +1,307 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _IA32_SYS_ASM_LINKAGE_H
+#define _IA32_SYS_ASM_LINKAGE_H
+
+#include <sys/stack.h>
+#include <sys/trap.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _ASM /* The remainder of this file is only for assembly files */
+
+/*
+ * make annoying differences in assembler syntax go away
+ */
+
+/*
+ * D16 and A16 are used to insert instructions prefixes; the
+ * macros help the assembler code be slightly more portable.
+ */
+#if !defined(__GNUC_AS__)
+/*
+ * /usr/ccs/bin/as prefixes are parsed as separate instructions
+ */
+#define D16 data16;
+#define A16 addr16;
+
+/*
+ * (There are some weird constructs in constant expressions)
+ */
+#define _CONST(const) [const]
+#define _BITNOT(const) -1!_CONST(const)
+#define _MUL(a, b) _CONST(a \* b)
+
+#else
+/*
+ * Why not use the 'data16' and 'addr16' prefixes .. well, the
+ * assembler doesn't quite believe in real mode, and thus argues with
+ * us about what we're trying to do.
+ */
+#define D16 .byte 0x66;
+#define A16 .byte 0x67;
+
+#define _CONST(const) (const)
+#define _BITNOT(const) ~_CONST(const)
+#define _MUL(a, b) _CONST(a * b)
+
+#endif
+
+/*
+ * C pointers are different sizes between i386 and amd64.
+ * These constants can be used to compute offsets into pointer arrays.
+ */
+#if defined(__amd64)
+#define CLONGSHIFT 3
+#define CLONGSIZE 8
+#define CLONGMASK 7
+#elif defined(__i386)
+#define CLONGSHIFT 2
+#define CLONGSIZE 4
+#define CLONGMASK 3
+#endif
+
+/*
+ * Since we know we're either ILP32 or LP64 ..
+ */
+#define CPTRSHIFT CLONGSHIFT
+#define CPTRSIZE CLONGSIZE
+#define CPTRMASK CLONGMASK
+
+#if CPTRSIZE != (1 << CPTRSHIFT) || CLONGSIZE != (1 << CLONGSHIFT)
+#error "inconsistent shift constants"
+#endif
+
+#if CPTRMASK != (CPTRSIZE - 1) || CLONGMASK != (CLONGSIZE - 1)
+#error "inconsistent mask constants"
+#endif
+
+#define ASM_ENTRY_ALIGN 16
+
+/*
+ * SSE register alignment and save areas
+ */
+
+#define XMM_SIZE 16
+#define XMM_ALIGN 16
+
+#if defined(__amd64)
+
+#define SAVE_XMM_PROLOG(sreg, nreg) \
+ subq $_CONST(_MUL(XMM_SIZE, nreg)), %rsp; \
+ movq %rsp, sreg
+
+#define RSTOR_XMM_EPILOG(sreg, nreg) \
+ addq $_CONST(_MUL(XMM_SIZE, nreg)), %rsp
+
+#elif defined(__i386)
+
+#define SAVE_XMM_PROLOG(sreg, nreg) \
+ subl $_CONST(_MUL(XMM_SIZE, nreg) + XMM_ALIGN), %esp; \
+ movl %esp, sreg; \
+ addl $XMM_ALIGN, sreg; \
+ andl $_BITNOT(XMM_ALIGN-1), sreg
+
+#define RSTOR_XMM_EPILOG(sreg, nreg) \
+ addl $_CONST(_MUL(XMM_SIZE, nreg) + XMM_ALIGN), %esp;
+
+#endif /* __i386 */
+
+/*
+ * profiling causes definitions of the MCOUNT and RTMCOUNT
+ * particular to the type
+ */
+#ifdef GPROF
+
+#define MCOUNT(x) \
+ pushl %ebp; \
+ movl %esp, %ebp; \
+ call _mcount; \
+ popl %ebp
+
+#endif /* GPROF */
+
+#ifdef PROF
+
+#define MCOUNT(x) \
+/* CSTYLED */ \
+ .lcomm .L_/**/x/**/1, 4, 4; \
+ pushl %ebp; \
+ movl %esp, %ebp; \
+/* CSTYLED */ \
+ movl $.L_/**/x/**/1, %edx; \
+ call _mcount; \
+ popl %ebp
+
+#endif /* PROF */
+
+/*
+ * if we are not profiling, MCOUNT should be defined to nothing
+ */
+#if !defined(PROF) && !defined(GPROF)
+#define MCOUNT(x)
+#endif /* !defined(PROF) && !defined(GPROF) */
+
+#define RTMCOUNT(x) MCOUNT(x)
+
+/*
+ * Macro to define weak symbol aliases. These are similar to the ANSI-C
+ * #pragma weak _name = name
+ * except a compiler can determine type. The assembler must be told. Hence,
+ * the second parameter must be the type of the symbol (i.e.: function,...)
+ */
+#define ANSI_PRAGMA_WEAK(sym, stype) \
+/* CSTYLED */ \
+ .weak _/**/sym; \
+/* CSTYLED */ \
+ .type _/**/sym, @stype; \
+/* CSTYLED */ \
+_/**/sym = sym
+
+/*
+ * Like ANSI_PRAGMA_WEAK(), but for unrelated names, as in:
+ * #pragma weak sym1 = sym2
+ */
+#define ANSI_PRAGMA_WEAK2(sym1, sym2, stype) \
+ .weak sym1; \
+ .type sym1, @stype; \
+sym1 = sym2
+
+/*
+ * ENTRY provides the standard procedure entry code and an easy way to
+ * insert the calls to mcount for profiling. ENTRY_NP is identical, but
+ * never calls mcount.
+ */
+#define ENTRY(x) \
+ .text; \
+ .align ASM_ENTRY_ALIGN; \
+ .globl x; \
+ .type x, @function; \
+x: MCOUNT(x)
+
+#define ENTRY_NP(x) \
+ .text; \
+ .align ASM_ENTRY_ALIGN; \
+ .globl x; \
+ .type x, @function; \
+x:
+
+#define RTENTRY(x) \
+ .text; \
+ .align ASM_ENTRY_ALIGN; \
+ .globl x; \
+ .type x, @function; \
+x: RTMCOUNT(x)
+
+/*
+ * ENTRY2 is identical to ENTRY but provides two labels for the entry point.
+ */
+#define ENTRY2(x, y) \
+ .text; \
+ .align ASM_ENTRY_ALIGN; \
+ .globl x, y; \
+ .type x, @function; \
+ .type y, @function; \
+/* CSTYLED */ \
+x: ; \
+y: MCOUNT(x)
+
+#define ENTRY_NP2(x, y) \
+ .text; \
+ .align ASM_ENTRY_ALIGN; \
+ .globl x, y; \
+ .type x, @function; \
+ .type y, @function; \
+/* CSTYLED */ \
+x: ; \
+y:
+
+
+/*
+ * ALTENTRY provides for additional entry points.
+ */
+#define ALTENTRY(x) \
+ .globl x; \
+ .type x, @function; \
+x:
+
+/*
+ * DGDEF and DGDEF2 provide global data declarations.
+ *
+ * DGDEF provides a word aligned word of storage.
+ *
+ * DGDEF2 allocates "sz" bytes of storage with **NO** alignment. This
+ * implies this macro is best used for byte arrays.
+ *
+ * DGDEF3 allocates "sz" bytes of storage with "algn" alignment.
+ */
+#define DGDEF2(name, sz) \
+ .data; \
+ .globl name; \
+ .type name, @object; \
+ .size name, sz; \
+name:
+
+#define DGDEF3(name, sz, algn) \
+ .data; \
+ .align algn; \
+ .globl name; \
+ .type name, @object; \
+ .size name, sz; \
+name:
+
+#define DGDEF(name) DGDEF3(name, 4, 4)
+
+/*
+ * SET_SIZE trails a function and set the size for the ELF symbol table.
+ */
+#define SET_SIZE(x) \
+ .size x, [.-x]
+
+/*
+ * NWORD provides native word value.
+ */
+#if defined(__amd64)
+
+/*CSTYLED*/
+#define NWORD quad
+
+#elif defined(__i386)
+
+#define NWORD long
+
+#endif /* __i386 */
+
+#endif /* _ASM */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _IA32_SYS_ASM_LINKAGE_H */
diff --git a/zfs/module/icp/include/sys/ia32/stack.h b/zfs/module/icp/include/sys/ia32/stack.h
new file mode 100644
index 000000000000..c4deb7bcaf5a
--- /dev/null
+++ b/zfs/module/icp/include/sys/ia32/stack.h
@@ -0,0 +1,160 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _IA32_SYS_STACK_H
+#define _IA32_SYS_STACK_H
+
+#if !defined(_ASM)
+
+#include <sys/types.h>
+
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * In the x86 world, a stack frame looks like this:
+ *
+ * |--------------------------|
+ * 4n+8(%ebp) ->| argument word n |
+ * | ... | (Previous frame)
+ * 8(%ebp) ->| argument word 0 |
+ * |--------------------------|--------------------
+ * 4(%ebp) ->| return address |
+ * |--------------------------|
+ * 0(%ebp) ->| previous %ebp (optional) |
+ * |--------------------------|
+ * -4(%ebp) ->| unspecified | (Current frame)
+ * | ... |
+ * 0(%esp) ->| variable size |
+ * |--------------------------|
+ */
+
+/*
+ * Stack alignment macros.
+ */
+
+#define STACK_ALIGN32 4
+#define STACK_ENTRY_ALIGN32 4
+#define STACK_BIAS32 0
+#define SA32(x) (((x)+(STACK_ALIGN32-1)) & ~(STACK_ALIGN32-1))
+#define STACK_RESERVE32 0
+#define MINFRAME32 0
+
+#if defined(__amd64)
+
+/*
+ * In the amd64 world, a stack frame looks like this:
+ *
+ * |--------------------------|
+ * 8n+16(%rbp)->| argument word n |
+ * | ... | (Previous frame)
+ * 16(%rbp) ->| argument word 0 |
+ * |--------------------------|--------------------
+ * 8(%rbp) ->| return address |
+ * |--------------------------|
+ * 0(%rbp) ->| previous %rbp |
+ * |--------------------------|
+ * -8(%rbp) ->| unspecified | (Current frame)
+ * | ... |
+ * 0(%rsp) ->| variable size |
+ * |--------------------------|
+ * -128(%rsp) ->| reserved for function |
+ * |--------------------------|
+ *
+ * The end of the input argument area must be aligned on a 16-byte
+ * boundary; i.e. (%rsp - 8) % 16 == 0 at function entry.
+ *
+ * The 128-byte location beyond %rsp is considered to be reserved for
+ * functions and is NOT modified by signal handlers. It can be used
+ * to store temporary data that is not needed across function calls.
+ */
+
+/*
+ * Stack alignment macros.
+ */
+
+#define STACK_ALIGN64 16
+#define STACK_ENTRY_ALIGN64 8
+#define STACK_BIAS64 0
+#define SA64(x) (((x)+(STACK_ALIGN64-1)) & ~(STACK_ALIGN64-1))
+#define STACK_RESERVE64 128
+#define MINFRAME64 0
+
+#define STACK_ALIGN STACK_ALIGN64
+#define STACK_ENTRY_ALIGN STACK_ENTRY_ALIGN64
+#define STACK_BIAS STACK_BIAS64
+#define SA(x) SA64(x)
+#define STACK_RESERVE STACK_RESERVE64
+#define MINFRAME MINFRAME64
+
+#elif defined(__i386)
+
+#define STACK_ALIGN STACK_ALIGN32
+#define STACK_ENTRY_ALIGN STACK_ENTRY_ALIGN32
+#define STACK_BIAS STACK_BIAS32
+#define SA(x) SA32(x)
+#define STACK_RESERVE STACK_RESERVE32
+#define MINFRAME MINFRAME32
+
+#endif /* __i386 */
+
+#if defined(_KERNEL) && !defined(_ASM)
+
+#if defined(DEBUG)
+#if STACK_ALIGN == 4
+#define ASSERT_STACK_ALIGNED() \
+ { \
+ uint32_t __tmp; \
+ ASSERT((((uintptr_t)&__tmp) & (STACK_ALIGN - 1)) == 0); \
+ }
+#elif (STACK_ALIGN == 16) && (_LONG_DOUBLE_ALIGNMENT == 16)
+#define ASSERT_STACK_ALIGNED() \
+ { \
+ long double __tmp; \
+ ASSERT((((uintptr_t)&__tmp) & (STACK_ALIGN - 1)) == 0); \
+ }
+#endif
+#else /* DEBUG */
+#define ASSERT_STACK_ALIGNED()
+#endif /* DEBUG */
+
+struct regs;
+
+void traceregs(struct regs *);
+void traceback(caddr_t);
+
+#endif /* defined(_KERNEL) && !defined(_ASM) */
+
+#define STACK_GROWTH_DOWN /* stacks grow from high to low addresses */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _IA32_SYS_STACK_H */
diff --git a/zfs/module/icp/include/sys/ia32/trap.h b/zfs/module/icp/include/sys/ia32/trap.h
new file mode 100644
index 000000000000..55b94969b80b
--- /dev/null
+++ b/zfs/module/icp/include/sys/ia32/trap.h
@@ -0,0 +1,107 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
+/* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */
+/* All Rights Reserved */
+
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _IA32_SYS_TRAP_H
+#define _IA32_SYS_TRAP_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Trap type values
+ */
+
+#define T_ZERODIV 0x0 /* #de divide by 0 error */
+#define T_SGLSTP 0x1 /* #db single step */
+#define T_NMIFLT 0x2 /* NMI */
+#define T_BPTFLT 0x3 /* #bp breakpoint fault, INT3 insn */
+#define T_OVFLW 0x4 /* #of INTO overflow fault */
+#define T_BOUNDFLT 0x5 /* #br BOUND insn fault */
+#define T_ILLINST 0x6 /* #ud invalid opcode fault */
+#define T_NOEXTFLT 0x7 /* #nm device not available: x87 */
+#define T_DBLFLT 0x8 /* #df double fault */
+#define T_EXTOVRFLT 0x9 /* [not generated: 386 only] */
+#define T_TSSFLT 0xa /* #ts invalid TSS fault */
+#define T_SEGFLT 0xb /* #np segment not present fault */
+#define T_STKFLT 0xc /* #ss stack fault */
+#define T_GPFLT 0xd /* #gp general protection fault */
+#define T_PGFLT 0xe /* #pf page fault */
+#define T_EXTERRFLT 0x10 /* #mf x87 FPU error fault */
+#define T_ALIGNMENT 0x11 /* #ac alignment check error */
+#define T_MCE 0x12 /* #mc machine check exception */
+#define T_SIMDFPE 0x13 /* #xm SSE/SSE exception */
+#define T_DBGENTR 0x14 /* debugger entry */
+#define T_ENDPERR 0x21 /* emulated extension error flt */
+#define T_ENOEXTFLT 0x20 /* emulated ext not present */
+#define T_FASTTRAP 0xd2 /* fast system call */
+#define T_SYSCALLINT 0x91 /* general system call */
+#define T_DTRACE_RET 0x7f /* DTrace pid return */
+#define T_INT80 0x80 /* int80 handler for linux emulation */
+#define T_SOFTINT 0x50fd /* pseudo softint trap type */
+
+/*
+ * Pseudo traps.
+ */
+#define T_INTERRUPT 0x100
+#define T_FAULT 0x200
+#define T_AST 0x400
+#define T_SYSCALL 0x180
+
+
+/*
+ * Values of error code on stack in case of page fault
+ */
+
+#define PF_ERR_MASK 0x01 /* Mask for error bit */
+#define PF_ERR_PAGE 0x00 /* page not present */
+#define PF_ERR_PROT 0x01 /* protection error */
+#define PF_ERR_WRITE 0x02 /* fault caused by write (else read) */
+#define PF_ERR_USER 0x04 /* processor was in user mode */
+ /* (else supervisor) */
+#define PF_ERR_EXEC 0x10 /* attempt to execute a No eXec page (AMD) */
+
+/*
+ * Definitions for fast system call subfunctions
+ */
+#define T_FNULL 0 /* Null trap for testing */
+#define T_FGETFP 1 /* Get emulated FP context */
+#define T_FSETFP 2 /* Set emulated FP context */
+#define T_GETHRTIME 3 /* Get high resolution time */
+#define T_GETHRVTIME 4 /* Get high resolution virtual time */
+#define T_GETHRESTIME 5 /* Get high resolution time */
+#define T_GETLGRP 6 /* Get home lgrpid */
+
+#define T_LASTFAST 6 /* Last valid subfunction */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _IA32_SYS_TRAP_H */
diff --git a/zfs/module/icp/include/sys/modctl.h b/zfs/module/icp/include/sys/modctl.h
new file mode 100644
index 000000000000..a0b94ef39db8
--- /dev/null
+++ b/zfs/module/icp/include/sys/modctl.h
@@ -0,0 +1,477 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_MODCTL_H
+#define _SYS_MODCTL_H
+
+/*
+ * loadable module support.
+ */
+
+#include <sys/zfs_context.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct modlmisc;
+struct modlinkage;
+
+/*
+ * The following structure defines the operations used by modctl
+ * to load and unload modules. Each supported loadable module type
+ * requires a set of mod_ops.
+ */
+struct mod_ops {
+ int (*modm_install)(struct modlmisc *, struct modlinkage *);
+ int (*modm_remove)(struct modlmisc *, struct modlinkage *);
+ int (*modm_info)(void *, struct modlinkage *, int *);
+};
+
+/*
+ * The defined set of mod_ops structures for each loadable module type
+ * Defined in modctl.c
+ */
+extern struct mod_ops mod_brandops;
+#if defined(__i386) || defined(__amd64)
+extern struct mod_ops mod_cpuops;
+#endif
+extern struct mod_ops mod_cryptoops;
+extern struct mod_ops mod_driverops;
+extern struct mod_ops mod_execops;
+extern struct mod_ops mod_fsops;
+extern struct mod_ops mod_miscops;
+extern struct mod_ops mod_schedops;
+extern struct mod_ops mod_strmodops;
+extern struct mod_ops mod_syscallops;
+extern struct mod_ops mod_sockmodops;
+#ifdef _SYSCALL32_IMPL
+extern struct mod_ops mod_syscallops32;
+#endif
+extern struct mod_ops mod_dacfops;
+extern struct mod_ops mod_ippops;
+extern struct mod_ops mod_pcbeops;
+extern struct mod_ops mod_devfsops;
+extern struct mod_ops mod_kiconvops;
+
+/*
+ * Definitions for the module specific linkage structures.
+ * The first two fields are the same in all of the structures.
+ * The linkinfo is for informational purposes only and is returned by
+ * modctl with the MODINFO cmd.
+ */
+
+/* For cryptographic providers */
+struct modlcrypto {
+ struct mod_ops *crypto_modops;
+ char *crypto_linkinfo;
+};
+
+/* For misc */
+struct modlmisc {
+ struct mod_ops *misc_modops;
+ char *misc_linkinfo;
+};
+
+/*
+ * Revision number of loadable modules support. This is the value
+ * that must be used in the modlinkage structure.
+ */
+#define MODREV_1 1
+
+/*
+ * The modlinkage structure is the structure that the module writer
+ * provides to the routines to install, remove, and stat a module.
+ * The ml_linkage element is an array of pointers to linkage structures.
+ * For most modules there is only one linkage structure. We allocate
+ * enough space for 3 linkage structures which happens to be the most
+ * we have in any sun supplied module. For those modules with more
+ * than 3 linkage structures (which is very unlikely), a modlinkage
+ * structure must be kmem_alloc'd in the module wrapper to be big enough
+ * for all of the linkage structures.
+ */
+struct modlinkage {
+ int ml_rev; /* rev of loadable modules system */
+#ifdef _LP64
+ void *ml_linkage[7]; /* more space in 64-bit OS */
+#else
+ void *ml_linkage[4]; /* NULL terminated list of */
+ /* linkage structures */
+#endif
+};
+
+/*
+ * commands. These are the commands supported by the modctl system call.
+ */
+#define MODLOAD 0
+#define MODUNLOAD 1
+#define MODINFO 2
+#define MODRESERVED 3
+#define MODSETMINIROOT 4
+#define MODADDMAJBIND 5
+#define MODGETPATH 6
+#define MODREADSYSBIND 7
+#define MODGETMAJBIND 8
+#define MODGETNAME 9
+#define MODSIZEOF_DEVID 10
+#define MODGETDEVID 11
+#define MODSIZEOF_MINORNAME 12
+#define MODGETMINORNAME 13
+#define MODGETPATHLEN 14
+#define MODEVENTS 15
+#define MODGETFBNAME 16
+#define MODREREADDACF 17
+#define MODLOADDRVCONF 18
+#define MODUNLOADDRVCONF 19
+#define MODREMMAJBIND 20
+#define MODDEVT2INSTANCE 21
+#define MODGETDEVFSPATH_LEN 22
+#define MODGETDEVFSPATH 23
+#define MODDEVID2PATHS 24
+#define MODSETDEVPOLICY 26
+#define MODGETDEVPOLICY 27
+#define MODALLOCPRIV 28
+#define MODGETDEVPOLICYBYNAME 29
+#define MODLOADMINORPERM 31
+#define MODADDMINORPERM 32
+#define MODREMMINORPERM 33
+#define MODREMDRVCLEANUP 34
+#define MODDEVEXISTS 35
+#define MODDEVREADDIR 36
+#define MODDEVNAME 37
+#define MODGETDEVFSPATH_MI_LEN 38
+#define MODGETDEVFSPATH_MI 39
+#define MODRETIRE 40
+#define MODUNRETIRE 41
+#define MODISRETIRED 42
+#define MODDEVEMPTYDIR 43
+#define MODREMDRVALIAS 44
+
+/*
+ * sub cmds for MODEVENTS
+ */
+#define MODEVENTS_FLUSH 0
+#define MODEVENTS_FLUSH_DUMP 1
+#define MODEVENTS_SET_DOOR_UPCALL_FILENAME 2
+#define MODEVENTS_GETDATA 3
+#define MODEVENTS_FREEDATA 4
+#define MODEVENTS_POST_EVENT 5
+#define MODEVENTS_REGISTER_EVENT 6
+
+/*
+ * devname subcmds for MODDEVNAME
+ */
+#define MODDEVNAME_LOOKUPDOOR 0
+#define MODDEVNAME_DEVFSADMNODE 1
+#define MODDEVNAME_NSMAPS 2
+#define MODDEVNAME_PROFILE 3
+#define MODDEVNAME_RECONFIG 4
+#define MODDEVNAME_SYSAVAIL 5
+
+
+/*
+ * Data structure passed to modconfig command in kernel to build devfs tree
+ */
+
+struct aliases {
+ struct aliases *a_next;
+ char *a_name;
+ int a_len;
+};
+
+#define MAXMODCONFNAME 256
+
+struct modconfig {
+ char drvname[MAXMODCONFNAME];
+ char drvclass[MAXMODCONFNAME];
+ int major;
+ int flags;
+ int num_aliases;
+ struct aliases *ap;
+};
+
+#if defined(_SYSCALL32)
+
+struct aliases32 {
+ caddr32_t a_next;
+ caddr32_t a_name;
+ int32_t a_len;
+};
+
+struct modconfig32 {
+ char drvname[MAXMODCONFNAME];
+ char drvclass[MAXMODCONFNAME];
+ int32_t major;
+ int32_t flags;
+ int32_t num_aliases;
+ caddr32_t ap;
+};
+
+#endif /* _SYSCALL32 */
+
+/* flags for modconfig */
+#define MOD_UNBIND_OVERRIDE 0x01 /* fail unbind if in use */
+
+/*
+ * Max module path length
+ */
+#define MOD_MAXPATH 256
+
+/*
+ * Default search path for modules ADDITIONAL to the directory
+ * where the kernel components we booted from are.
+ *
+ * Most often, this will be "/platform/{platform}/kernel /kernel /usr/kernel",
+ * but we don't wire it down here.
+ */
+#define MOD_DEFPATH "/kernel /usr/kernel"
+
+/*
+ * Default file name extension for autoloading modules.
+ */
+#define MOD_DEFEXT ""
+
+/*
+ * Parameters for modinfo
+ */
+#define MODMAXNAMELEN 32 /* max module name length */
+#define MODMAXLINKINFOLEN 32 /* max link info length */
+
+/*
+ * Module specific information.
+ */
+struct modspecific_info {
+ char msi_linkinfo[MODMAXLINKINFOLEN]; /* name in linkage struct */
+ int msi_p0; /* module specific information */
+};
+
+/*
+ * Structure returned by modctl with MODINFO command.
+ */
+#define MODMAXLINK 10 /* max linkages modinfo can handle */
+
+struct modinfo {
+ int mi_info; /* Flags for info wanted */
+ int mi_state; /* Flags for module state */
+ int mi_id; /* id of this loaded module */
+ int mi_nextid; /* id of next module or -1 */
+ caddr_t mi_base; /* virtual addr of text */
+ size_t mi_size; /* size of module in bytes */
+ int mi_rev; /* loadable modules rev */
+ int mi_loadcnt; /* # of times loaded */
+ char mi_name[MODMAXNAMELEN]; /* name of module */
+ struct modspecific_info mi_msinfo[MODMAXLINK];
+ /* mod specific info */
+};
+
+
+#if defined(_SYSCALL32)
+
+#define MODMAXNAMELEN32 32 /* max module name length */
+#define MODMAXLINKINFOLEN32 32 /* max link info length */
+#define MODMAXLINK32 10 /* max linkages modinfo can handle */
+
+struct modspecific_info32 {
+ char msi_linkinfo[MODMAXLINKINFOLEN32]; /* name in linkage struct */
+ int32_t msi_p0; /* module specific information */
+};
+
+struct modinfo32 {
+ int32_t mi_info; /* Flags for info wanted */
+ int32_t mi_state; /* Flags for module state */
+ int32_t mi_id; /* id of this loaded module */
+ int32_t mi_nextid; /* id of next module or -1 */
+ caddr32_t mi_base; /* virtual addr of text */
+ uint32_t mi_size; /* size of module in bytes */
+ int32_t mi_rev; /* loadable modules rev */
+ int32_t mi_loadcnt; /* # of times loaded */
+ char mi_name[MODMAXNAMELEN32]; /* name of module */
+ struct modspecific_info32 mi_msinfo[MODMAXLINK32];
+ /* mod specific info */
+};
+
+#endif /* _SYSCALL32 */
+
+/* Values for mi_info flags */
+#define MI_INFO_ONE 1
+#define MI_INFO_ALL 2
+#define MI_INFO_CNT 4
+#define MI_INFO_LINKAGE 8 /* used internally to extract modlinkage */
+/*
+ * MI_INFO_NOBASE indicates caller does not need mi_base. Failure to use this
+ * flag may lead 32-bit apps to receive an EOVERFLOW error from modctl(MODINFO)
+ * when used with a 64-bit kernel.
+ */
+#define MI_INFO_NOBASE 16
+
+/* Values for mi_state */
+#define MI_LOADED 1
+#define MI_INSTALLED 2
+
+/*
+ * Macros to vector to the appropriate module specific routine.
+ */
+#define MODL_INSTALL(MODL, MODLP) \
+ (*(MODL)->misc_modops->modm_install)(MODL, MODLP)
+#define MODL_REMOVE(MODL, MODLP) \
+ (*(MODL)->misc_modops->modm_remove)(MODL, MODLP)
+#define MODL_INFO(MODL, MODLP, P0) \
+ (*(MODL)->misc_modops->modm_info)(MODL, MODLP, P0)
+
+/*
+ * Definitions for stubs
+ */
+struct mod_stub_info {
+ uintptr_t mods_func_adr;
+ struct mod_modinfo *mods_modinfo;
+ uintptr_t mods_stub_adr;
+ int (*mods_errfcn)(void);
+ int mods_flag; /* flags defined below */
+};
+
+/*
+ * Definitions for mods_flag.
+ */
+#define MODS_WEAK 0x01 /* weak stub (not loaded if called) */
+#define MODS_NOUNLOAD 0x02 /* module not unloadable (no _fini()) */
+#define MODS_INSTALLED 0x10 /* module installed */
+
+struct mod_modinfo {
+ char *modm_module_name;
+ struct modctl *mp;
+ struct mod_stub_info modm_stubs[1];
+};
+
+struct modctl_list {
+ struct modctl_list *modl_next;
+ struct modctl *modl_modp;
+};
+
+/*
+ * Structure to manage a loadable module.
+ * Note: the module (mod_mp) structure's "text" and "text_size" information
+ * are replicated in the modctl structure so that mod_containing_pc()
+ * doesn't have to grab any locks (modctls are persistent; modules are not.)
+ */
+typedef struct modctl {
+ struct modctl *mod_next; /* &modules based list */
+ struct modctl *mod_prev;
+ int mod_id;
+ void *mod_mp;
+ kthread_t *mod_inprogress_thread;
+ struct mod_modinfo *mod_modinfo;
+ struct modlinkage *mod_linkage;
+ char *mod_filename;
+ char *mod_modname;
+
+ char mod_busy; /* inprogress_thread has locked */
+ char mod_want; /* someone waiting for unlock */
+ char mod_prim; /* primary module */
+
+ int mod_ref; /* ref count - from dependent or stub */
+
+ char mod_loaded; /* module in memory */
+ char mod_installed; /* post _init pre _fini */
+ char mod_loadflags;
+ char mod_delay_unload; /* deferred unload */
+
+ struct modctl_list *mod_requisites; /* mods this one depends on. */
+ void *__unused; /* NOTE: reuse (same size) is OK, */
+ /* deletion causes mdb.vs.core issues */
+ int mod_loadcnt; /* number of times mod was loaded */
+ int mod_nenabled; /* # of enabled DTrace probes in mod */
+ char *mod_text;
+ size_t mod_text_size;
+
+ int mod_gencount; /* # times loaded/unloaded */
+ struct modctl *mod_requisite_loading; /* mod circular dependency */
+} modctl_t;
+
+/*
+ * mod_loadflags
+ */
+
+#define MOD_NOAUTOUNLOAD 0x1 /* Auto mod-unloader skips this mod */
+#define MOD_NONOTIFY 0x2 /* No krtld notifications on (un)load */
+#define MOD_NOUNLOAD 0x4 /* Assume EBUSY for all _fini's */
+
+#define MOD_BIND_HASHSIZE 64
+#define MOD_BIND_HASHMASK (MOD_BIND_HASHSIZE-1)
+
+typedef int modid_t;
+
+/*
+ * global function and data declarations
+ */
+extern kmutex_t mod_lock;
+
+extern char *systemfile;
+extern char **syscallnames;
+extern int moddebug;
+
+/*
+ * this is the head of a doubly linked list. Only the next and prev
+ * pointers are used
+ */
+extern modctl_t modules;
+
+/*
+ * Only the following are part of the DDI/DKI
+ */
+extern int mod_install(struct modlinkage *);
+extern int mod_remove(struct modlinkage *);
+extern int mod_info(struct modlinkage *, struct modinfo *);
+
+/*
+ * bit definitions for moddebug.
+ */
+#define MODDEBUG_LOADMSG 0x80000000 /* print "[un]loading..." msg */
+#define MODDEBUG_ERRMSG 0x40000000 /* print detailed error msgs */
+#define MODDEBUG_LOADMSG2 0x20000000 /* print 2nd level msgs */
+#define MODDEBUG_RETIRE 0x10000000 /* print retire msgs */
+#define MODDEBUG_BINDING 0x00040000 /* driver/alias binding */
+#define MODDEBUG_FINI_EBUSY 0x00020000 /* pretend fini returns EBUSY */
+#define MODDEBUG_NOAUL_IPP 0x00010000 /* no Autounloading ipp mods */
+#define MODDEBUG_NOAUL_DACF 0x00008000 /* no Autounloading dacf mods */
+#define MODDEBUG_KEEPTEXT 0x00004000 /* keep text after unloading */
+#define MODDEBUG_NOAUL_DRV 0x00001000 /* no Autounloading Drivers */
+#define MODDEBUG_NOAUL_EXEC 0x00000800 /* no Autounloading Execs */
+#define MODDEBUG_NOAUL_FS 0x00000400 /* no Autounloading File sys */
+#define MODDEBUG_NOAUL_MISC 0x00000200 /* no Autounloading misc */
+#define MODDEBUG_NOAUL_SCHED 0x00000100 /* no Autounloading scheds */
+#define MODDEBUG_NOAUL_STR 0x00000080 /* no Autounloading streams */
+#define MODDEBUG_NOAUL_SYS 0x00000040 /* no Autounloading syscalls */
+#define MODDEBUG_NOCTF 0x00000020 /* do not load CTF debug data */
+#define MODDEBUG_NOAUTOUNLOAD 0x00000010 /* no autounloading at all */
+#define MODDEBUG_DDI_MOD 0x00000008 /* ddi_mod{open,sym,close} */
+#define MODDEBUG_MP_MATCH 0x00000004 /* dev_minorperm */
+#define MODDEBUG_MINORPERM 0x00000002 /* minor perm modctls */
+#define MODDEBUG_USERDEBUG 0x00000001 /* bpt after init_module() */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_MODCTL_H */
diff --git a/zfs/module/icp/include/sys/modhash.h b/zfs/module/icp/include/sys/modhash.h
new file mode 100644
index 000000000000..06b52ff02604
--- /dev/null
+++ b/zfs/module/icp/include/sys/modhash.h
@@ -0,0 +1,147 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_MODHASH_H
+#define _SYS_MODHASH_H
+
+/*
+ * Generic hash implementation for the kernel.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/zfs_context.h>
+
+/*
+ * Opaque data types for storing keys and values
+ */
+typedef void *mod_hash_val_t;
+typedef void *mod_hash_key_t;
+
+/*
+ * Opaque data type for reservation
+ */
+typedef void *mod_hash_hndl_t;
+
+/*
+ * Opaque type for hash itself.
+ */
+struct mod_hash;
+typedef struct mod_hash mod_hash_t;
+
+/*
+ * String hash table
+ */
+mod_hash_t *mod_hash_create_strhash_nodtr(char *, size_t,
+ void (*)(mod_hash_val_t));
+mod_hash_t *mod_hash_create_strhash(char *, size_t, void (*)(mod_hash_val_t));
+void mod_hash_destroy_strhash(mod_hash_t *);
+int mod_hash_strkey_cmp(mod_hash_key_t, mod_hash_key_t);
+void mod_hash_strkey_dtor(mod_hash_key_t);
+void mod_hash_strval_dtor(mod_hash_val_t);
+uint_t mod_hash_bystr(void *, mod_hash_key_t);
+
+/*
+ * Pointer hash table
+ */
+mod_hash_t *mod_hash_create_ptrhash(char *, size_t, void (*)(mod_hash_val_t),
+ size_t);
+void mod_hash_destroy_ptrhash(mod_hash_t *);
+int mod_hash_ptrkey_cmp(mod_hash_key_t, mod_hash_key_t);
+uint_t mod_hash_byptr(void *, mod_hash_key_t);
+
+/*
+ * ID hash table
+ */
+mod_hash_t *mod_hash_create_idhash(char *, size_t, void (*)(mod_hash_val_t));
+void mod_hash_destroy_idhash(mod_hash_t *);
+int mod_hash_idkey_cmp(mod_hash_key_t, mod_hash_key_t);
+uint_t mod_hash_byid(void *, mod_hash_key_t);
+uint_t mod_hash_iddata_gen(size_t);
+
+/*
+ * Hash management functions
+ */
+mod_hash_t *mod_hash_create_extended(char *, size_t, void (*)(mod_hash_key_t),
+ void (*)(mod_hash_val_t), uint_t (*)(void *, mod_hash_key_t), void *,
+ int (*)(mod_hash_key_t, mod_hash_key_t), int);
+
+void mod_hash_destroy_hash(mod_hash_t *);
+void mod_hash_clear(mod_hash_t *);
+
+/*
+ * Null key and value destructors
+ */
+void mod_hash_null_keydtor(mod_hash_key_t);
+void mod_hash_null_valdtor(mod_hash_val_t);
+
+/*
+ * Basic hash operations
+ */
+
+/*
+ * Error codes for insert, remove, find, destroy.
+ */
+#define MH_ERR_NOMEM -1
+#define MH_ERR_DUPLICATE -2
+#define MH_ERR_NOTFOUND -3
+
+/*
+ * Return codes for hash walkers
+ */
+#define MH_WALK_CONTINUE 0
+#define MH_WALK_TERMINATE 1
+
+/*
+ * Basic hash operations
+ */
+int mod_hash_insert(mod_hash_t *, mod_hash_key_t, mod_hash_val_t);
+int mod_hash_replace(mod_hash_t *, mod_hash_key_t, mod_hash_val_t);
+int mod_hash_remove(mod_hash_t *, mod_hash_key_t, mod_hash_val_t *);
+int mod_hash_destroy(mod_hash_t *, mod_hash_key_t);
+int mod_hash_find(mod_hash_t *, mod_hash_key_t, mod_hash_val_t *);
+int mod_hash_find_cb(mod_hash_t *, mod_hash_key_t, mod_hash_val_t *,
+ void (*)(mod_hash_key_t, mod_hash_val_t));
+int mod_hash_find_cb_rval(mod_hash_t *, mod_hash_key_t, mod_hash_val_t *,
+ int (*)(mod_hash_key_t, mod_hash_val_t), int *);
+void mod_hash_walk(mod_hash_t *,
+ uint_t (*)(mod_hash_key_t, mod_hash_val_t *, void *), void *);
+
+/*
+ * Reserving hash operations
+ */
+int mod_hash_reserve(mod_hash_t *, mod_hash_hndl_t *);
+int mod_hash_reserve_nosleep(mod_hash_t *, mod_hash_hndl_t *);
+void mod_hash_cancel(mod_hash_t *, mod_hash_hndl_t *);
+int mod_hash_insert_reserve(mod_hash_t *, mod_hash_key_t, mod_hash_val_t,
+ mod_hash_hndl_t);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_MODHASH_H */
diff --git a/zfs/module/icp/include/sys/modhash_impl.h b/zfs/module/icp/include/sys/modhash_impl.h
new file mode 100644
index 000000000000..3130773aa196
--- /dev/null
+++ b/zfs/module/icp/include/sys/modhash_impl.h
@@ -0,0 +1,108 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_MODHASH_IMPL_H
+#define _SYS_MODHASH_IMPL_H
+
+/*
+ * Internal details for the kernel's generic hash implementation.
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <sys/zfs_context.h>
+#include <sys/modhash.h>
+
+struct mod_hash_entry {
+ mod_hash_key_t mhe_key; /* stored hash key */
+ mod_hash_val_t mhe_val; /* stored hash value */
+ struct mod_hash_entry *mhe_next; /* next item in chain */
+};
+
+struct mod_hash_stat {
+ ulong_t mhs_hit; /* tried a 'find' and it succeeded */
+ ulong_t mhs_miss; /* tried a 'find' but it failed */
+ ulong_t mhs_coll; /* occur when insert fails because of dup's */
+ ulong_t mhs_nelems; /* total number of stored key/value pairs */
+ ulong_t mhs_nomem; /* number of times kmem_alloc failed */
+};
+
+struct mod_hash {
+ krwlock_t mh_contents; /* lock protecting contents */
+ char *mh_name; /* hash name */
+ int mh_sleep; /* kmem_alloc flag */
+ size_t mh_nchains; /* # of elements in mh_entries */
+
+ /* key and val destructor */
+ void (*mh_kdtor)(mod_hash_key_t);
+ void (*mh_vdtor)(mod_hash_val_t);
+
+ /* key comparator */
+ int (*mh_keycmp)(mod_hash_key_t, mod_hash_key_t);
+
+ /* hash algorithm, and algorithm-private data */
+ uint_t (*mh_hashalg)(void *, mod_hash_key_t);
+ void *mh_hashalg_data;
+
+ struct mod_hash *mh_next; /* next hash in list */
+
+ struct mod_hash_stat mh_stat;
+
+ struct mod_hash_entry *mh_entries[1];
+};
+
+/*
+ * MH_SIZE()
+ * Compute the size of a mod_hash_t, in bytes, given the number of
+ * elements it contains.
+ */
+#define MH_SIZE(n) \
+ (sizeof (mod_hash_t) + ((n) - 1) * (sizeof (struct mod_hash_entry *)))
+
+/*
+ * Module initialization; called once.
+ */
+void mod_hash_fini(void);
+void mod_hash_init(void);
+
+/*
+ * Internal routines. Use directly with care.
+ */
+uint_t i_mod_hash(mod_hash_t *, mod_hash_key_t);
+int i_mod_hash_insert_nosync(mod_hash_t *, mod_hash_key_t, mod_hash_val_t,
+ mod_hash_hndl_t);
+int i_mod_hash_remove_nosync(mod_hash_t *, mod_hash_key_t, mod_hash_val_t *);
+int i_mod_hash_find_nosync(mod_hash_t *, mod_hash_key_t, mod_hash_val_t *);
+void i_mod_hash_walk_nosync(mod_hash_t *, uint_t (*)(mod_hash_key_t,
+ mod_hash_val_t *, void *), void *);
+void i_mod_hash_clear_nosync(mod_hash_t *hash);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _SYS_MODHASH_IMPL_H */
diff --git a/zfs/module/icp/include/sys/stack.h b/zfs/module/icp/include/sys/stack.h
new file mode 100644
index 000000000000..64fecf409b5c
--- /dev/null
+++ b/zfs/module/icp/include/sys/stack.h
@@ -0,0 +1,36 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_STACK_H
+#define _SYS_STACK_H
+
+#if defined(__i386) || defined(__amd64)
+
+#include <sys/ia32/stack.h> /* XX64 x86/sys/stack.h */
+
+#endif
+
+#endif /* _SYS_STACK_H */
diff --git a/zfs/module/icp/include/sys/trap.h b/zfs/module/icp/include/sys/trap.h
new file mode 100644
index 000000000000..7f9fd375805f
--- /dev/null
+++ b/zfs/module/icp/include/sys/trap.h
@@ -0,0 +1,36 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License, Version 1.0 only
+ * (the "License"). You may not use this file except in compliance
+ * with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#ifndef _SYS_TRAP_H
+#define _SYS_TRAP_H
+
+#if defined(__i386) || defined(__amd64)
+
+#include <sys/ia32/trap.h> /* XX64 x86/sys/trap.h */
+
+#endif
+
+#endif /* _SYS_TRAP_H */
diff --git a/zfs/module/icp/io/aes.c b/zfs/module/icp/io/aes.c
new file mode 100644
index 000000000000..12d57ed79eee
--- /dev/null
+++ b/zfs/module/icp/io/aes.c
@@ -0,0 +1,1439 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+/*
+ * AES provider for the Kernel Cryptographic Framework (KCF)
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/crypto/common.h>
+#include <sys/crypto/impl.h>
+#include <sys/crypto/spi.h>
+#include <sys/crypto/icp.h>
+#include <modes/modes.h>
+#include <sys/modctl.h>
+#define _AES_IMPL
+#include <aes/aes_impl.h>
+
+#define CRYPTO_PROVIDER_NAME "aes"
+
+extern struct mod_ops mod_cryptoops;
+
+/*
+ * Module linkage information for the kernel.
+ */
+static struct modlcrypto modlcrypto = {
+ &mod_cryptoops,
+ "AES Kernel SW Provider"
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, { (void *)&modlcrypto, NULL }
+};
+
+/*
+ * Mechanism info structure passed to KCF during registration.
+ */
+static crypto_mech_info_t aes_mech_info_tab[] = {
+ /* AES_ECB */
+ {SUN_CKM_AES_ECB, AES_ECB_MECH_INFO_TYPE,
+ CRYPTO_FG_ENCRYPT | CRYPTO_FG_ENCRYPT_ATOMIC |
+ CRYPTO_FG_DECRYPT | CRYPTO_FG_DECRYPT_ATOMIC,
+ AES_MIN_KEY_BYTES, AES_MAX_KEY_BYTES, CRYPTO_KEYSIZE_UNIT_IN_BYTES},
+ /* AES_CBC */
+ {SUN_CKM_AES_CBC, AES_CBC_MECH_INFO_TYPE,
+ CRYPTO_FG_ENCRYPT | CRYPTO_FG_ENCRYPT_ATOMIC |
+ CRYPTO_FG_DECRYPT | CRYPTO_FG_DECRYPT_ATOMIC,
+ AES_MIN_KEY_BYTES, AES_MAX_KEY_BYTES, CRYPTO_KEYSIZE_UNIT_IN_BYTES},
+ /* AES_CTR */
+ {SUN_CKM_AES_CTR, AES_CTR_MECH_INFO_TYPE,
+ CRYPTO_FG_ENCRYPT | CRYPTO_FG_ENCRYPT_ATOMIC |
+ CRYPTO_FG_DECRYPT | CRYPTO_FG_DECRYPT_ATOMIC,
+ AES_MIN_KEY_BYTES, AES_MAX_KEY_BYTES, CRYPTO_KEYSIZE_UNIT_IN_BYTES},
+ /* AES_CCM */
+ {SUN_CKM_AES_CCM, AES_CCM_MECH_INFO_TYPE,
+ CRYPTO_FG_ENCRYPT | CRYPTO_FG_ENCRYPT_ATOMIC |
+ CRYPTO_FG_DECRYPT | CRYPTO_FG_DECRYPT_ATOMIC,
+ AES_MIN_KEY_BYTES, AES_MAX_KEY_BYTES, CRYPTO_KEYSIZE_UNIT_IN_BYTES},
+ /* AES_GCM */
+ {SUN_CKM_AES_GCM, AES_GCM_MECH_INFO_TYPE,
+ CRYPTO_FG_ENCRYPT | CRYPTO_FG_ENCRYPT_ATOMIC |
+ CRYPTO_FG_DECRYPT | CRYPTO_FG_DECRYPT_ATOMIC,
+ AES_MIN_KEY_BYTES, AES_MAX_KEY_BYTES, CRYPTO_KEYSIZE_UNIT_IN_BYTES},
+ /* AES_GMAC */
+ {SUN_CKM_AES_GMAC, AES_GMAC_MECH_INFO_TYPE,
+ CRYPTO_FG_ENCRYPT | CRYPTO_FG_ENCRYPT_ATOMIC |
+ CRYPTO_FG_DECRYPT | CRYPTO_FG_DECRYPT_ATOMIC |
+ CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC |
+ CRYPTO_FG_SIGN | CRYPTO_FG_SIGN_ATOMIC |
+ CRYPTO_FG_VERIFY | CRYPTO_FG_VERIFY_ATOMIC,
+ AES_MIN_KEY_BYTES, AES_MAX_KEY_BYTES, CRYPTO_KEYSIZE_UNIT_IN_BYTES}
+};
+
+/* operations are in-place if the output buffer is NULL */
+#define AES_ARG_INPLACE(input, output) \
+ if ((output) == NULL) \
+ (output) = (input);
+
+static void aes_provider_status(crypto_provider_handle_t, uint_t *);
+
+static crypto_control_ops_t aes_control_ops = {
+ aes_provider_status
+};
+
+static int aes_encrypt_init(crypto_ctx_t *, crypto_mechanism_t *,
+ crypto_key_t *, crypto_spi_ctx_template_t, crypto_req_handle_t);
+static int aes_decrypt_init(crypto_ctx_t *, crypto_mechanism_t *,
+ crypto_key_t *, crypto_spi_ctx_template_t, crypto_req_handle_t);
+static int aes_common_init(crypto_ctx_t *, crypto_mechanism_t *,
+ crypto_key_t *, crypto_spi_ctx_template_t, crypto_req_handle_t, boolean_t);
+static int aes_common_init_ctx(aes_ctx_t *, crypto_spi_ctx_template_t *,
+ crypto_mechanism_t *, crypto_key_t *, int, boolean_t);
+static int aes_encrypt_final(crypto_ctx_t *, crypto_data_t *,
+ crypto_req_handle_t);
+static int aes_decrypt_final(crypto_ctx_t *, crypto_data_t *,
+ crypto_req_handle_t);
+
+static int aes_encrypt(crypto_ctx_t *, crypto_data_t *, crypto_data_t *,
+ crypto_req_handle_t);
+static int aes_encrypt_update(crypto_ctx_t *, crypto_data_t *,
+ crypto_data_t *, crypto_req_handle_t);
+static int aes_encrypt_atomic(crypto_provider_handle_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_key_t *, crypto_data_t *,
+ crypto_data_t *, crypto_spi_ctx_template_t, crypto_req_handle_t);
+
+static int aes_decrypt(crypto_ctx_t *, crypto_data_t *, crypto_data_t *,
+ crypto_req_handle_t);
+static int aes_decrypt_update(crypto_ctx_t *, crypto_data_t *,
+ crypto_data_t *, crypto_req_handle_t);
+static int aes_decrypt_atomic(crypto_provider_handle_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_key_t *, crypto_data_t *,
+ crypto_data_t *, crypto_spi_ctx_template_t, crypto_req_handle_t);
+
+static crypto_cipher_ops_t aes_cipher_ops = {
+ aes_encrypt_init,
+ aes_encrypt,
+ aes_encrypt_update,
+ aes_encrypt_final,
+ aes_encrypt_atomic,
+ aes_decrypt_init,
+ aes_decrypt,
+ aes_decrypt_update,
+ aes_decrypt_final,
+ aes_decrypt_atomic
+};
+
+static int aes_mac_atomic(crypto_provider_handle_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, crypto_data_t *,
+ crypto_spi_ctx_template_t, crypto_req_handle_t);
+static int aes_mac_verify_atomic(crypto_provider_handle_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, crypto_data_t *,
+ crypto_spi_ctx_template_t, crypto_req_handle_t);
+
+static crypto_mac_ops_t aes_mac_ops = {
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ aes_mac_atomic,
+ aes_mac_verify_atomic
+};
+
+static int aes_create_ctx_template(crypto_provider_handle_t,
+ crypto_mechanism_t *, crypto_key_t *, crypto_spi_ctx_template_t *,
+ size_t *, crypto_req_handle_t);
+static int aes_free_context(crypto_ctx_t *);
+
+static crypto_ctx_ops_t aes_ctx_ops = {
+ aes_create_ctx_template,
+ aes_free_context
+};
+
+static crypto_ops_t aes_crypto_ops = {{{{{
+ &aes_control_ops,
+ NULL,
+ &aes_cipher_ops,
+ &aes_mac_ops,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ &aes_ctx_ops
+}}}}};
+
+static crypto_provider_info_t aes_prov_info = {{{{
+ CRYPTO_SPI_VERSION_1,
+ "AES Software Provider",
+ CRYPTO_SW_PROVIDER,
+ NULL,
+ &aes_crypto_ops,
+ sizeof (aes_mech_info_tab)/sizeof (crypto_mech_info_t),
+ aes_mech_info_tab
+}}}};
+
+static crypto_kcf_provider_handle_t aes_prov_handle = 0;
+static crypto_data_t null_crypto_data = { CRYPTO_DATA_RAW };
+
+int
+aes_mod_init(void)
+{
+ int ret;
+
+ if ((ret = mod_install(&modlinkage)) != 0)
+ return (ret);
+
+ /* Register with KCF. If the registration fails, remove the module. */
+ if (crypto_register_provider(&aes_prov_info, &aes_prov_handle)) {
+ (void) mod_remove(&modlinkage);
+ return (EACCES);
+ }
+
+ return (0);
+}
+
+int
+aes_mod_fini(void)
+{
+ /* Unregister from KCF if module is registered */
+ if (aes_prov_handle != 0) {
+ if (crypto_unregister_provider(aes_prov_handle))
+ return (EBUSY);
+
+ aes_prov_handle = 0;
+ }
+
+ return (mod_remove(&modlinkage));
+}
+
+static int
+aes_check_mech_param(crypto_mechanism_t *mechanism, aes_ctx_t **ctx, int kmflag)
+{
+ void *p = NULL;
+ boolean_t param_required = B_TRUE;
+ size_t param_len;
+ void *(*alloc_fun)(int);
+ int rv = CRYPTO_SUCCESS;
+
+ switch (mechanism->cm_type) {
+ case AES_ECB_MECH_INFO_TYPE:
+ param_required = B_FALSE;
+ alloc_fun = ecb_alloc_ctx;
+ break;
+ case AES_CBC_MECH_INFO_TYPE:
+ param_len = AES_BLOCK_LEN;
+ alloc_fun = cbc_alloc_ctx;
+ break;
+ case AES_CTR_MECH_INFO_TYPE:
+ param_len = sizeof (CK_AES_CTR_PARAMS);
+ alloc_fun = ctr_alloc_ctx;
+ break;
+ case AES_CCM_MECH_INFO_TYPE:
+ param_len = sizeof (CK_AES_CCM_PARAMS);
+ alloc_fun = ccm_alloc_ctx;
+ break;
+ case AES_GCM_MECH_INFO_TYPE:
+ param_len = sizeof (CK_AES_GCM_PARAMS);
+ alloc_fun = gcm_alloc_ctx;
+ break;
+ case AES_GMAC_MECH_INFO_TYPE:
+ param_len = sizeof (CK_AES_GMAC_PARAMS);
+ alloc_fun = gmac_alloc_ctx;
+ break;
+ default:
+ rv = CRYPTO_MECHANISM_INVALID;
+ return (rv);
+ }
+ if (param_required && mechanism->cm_param != NULL &&
+ mechanism->cm_param_len != param_len) {
+ rv = CRYPTO_MECHANISM_PARAM_INVALID;
+ }
+ if (ctx != NULL) {
+ p = (alloc_fun)(kmflag);
+ *ctx = p;
+ }
+ return (rv);
+}
+
+/*
+ * Initialize key schedules for AES
+ */
+static int
+init_keysched(crypto_key_t *key, void *newbie)
+{
+ /*
+ * Only keys by value are supported by this module.
+ */
+ switch (key->ck_format) {
+ case CRYPTO_KEY_RAW:
+ if (key->ck_length < AES_MINBITS ||
+ key->ck_length > AES_MAXBITS) {
+ return (CRYPTO_KEY_SIZE_RANGE);
+ }
+
+ /* key length must be either 128, 192, or 256 */
+ if ((key->ck_length & 63) != 0)
+ return (CRYPTO_KEY_SIZE_RANGE);
+ break;
+ default:
+ return (CRYPTO_KEY_TYPE_INCONSISTENT);
+ }
+
+ aes_init_keysched(key->ck_data, key->ck_length, newbie);
+ return (CRYPTO_SUCCESS);
+}
+
+/*
+ * KCF software provider control entry points.
+ */
+/* ARGSUSED */
+static void
+aes_provider_status(crypto_provider_handle_t provider, uint_t *status)
+{
+ *status = CRYPTO_PROVIDER_READY;
+}
+
+static int
+aes_encrypt_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism,
+ crypto_key_t *key, crypto_spi_ctx_template_t template,
+ crypto_req_handle_t req)
+{
+ return (aes_common_init(ctx, mechanism, key, template, req, B_TRUE));
+}
+
+static int
+aes_decrypt_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism,
+ crypto_key_t *key, crypto_spi_ctx_template_t template,
+ crypto_req_handle_t req)
+{
+ return (aes_common_init(ctx, mechanism, key, template, req, B_FALSE));
+}
+
+
+
+/*
+ * KCF software provider encrypt entry points.
+ */
+static int
+aes_common_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism,
+ crypto_key_t *key, crypto_spi_ctx_template_t template,
+ crypto_req_handle_t req, boolean_t is_encrypt_init)
+{
+ aes_ctx_t *aes_ctx;
+ int rv;
+ int kmflag;
+
+ /*
+ * Only keys by value are supported by this module.
+ */
+ if (key->ck_format != CRYPTO_KEY_RAW) {
+ return (CRYPTO_KEY_TYPE_INCONSISTENT);
+ }
+
+ kmflag = crypto_kmflag(req);
+ if ((rv = aes_check_mech_param(mechanism, &aes_ctx, kmflag))
+ != CRYPTO_SUCCESS)
+ return (rv);
+
+ rv = aes_common_init_ctx(aes_ctx, template, mechanism, key, kmflag,
+ is_encrypt_init);
+ if (rv != CRYPTO_SUCCESS) {
+ crypto_free_mode_ctx(aes_ctx);
+ return (rv);
+ }
+
+ ctx->cc_provider_private = aes_ctx;
+
+ return (CRYPTO_SUCCESS);
+}
+
+static void
+aes_copy_block64(uint8_t *in, uint64_t *out)
+{
+ if (IS_P2ALIGNED(in, sizeof (uint64_t))) {
+ /* LINTED: pointer alignment */
+ out[0] = *(uint64_t *)&in[0];
+ /* LINTED: pointer alignment */
+ out[1] = *(uint64_t *)&in[8];
+ } else {
+ uint8_t *iv8 = (uint8_t *)&out[0];
+
+ AES_COPY_BLOCK(in, iv8);
+ }
+}
+
+
+static int
+aes_encrypt(crypto_ctx_t *ctx, crypto_data_t *plaintext,
+ crypto_data_t *ciphertext, crypto_req_handle_t req)
+{
+ int ret = CRYPTO_FAILED;
+
+ aes_ctx_t *aes_ctx;
+ size_t saved_length, saved_offset, length_needed;
+
+ ASSERT(ctx->cc_provider_private != NULL);
+ aes_ctx = ctx->cc_provider_private;
+
+ /*
+ * For block ciphers, plaintext must be a multiple of AES block size.
+ * This test is only valid for ciphers whose blocksize is a power of 2.
+ */
+ if (((aes_ctx->ac_flags & (CTR_MODE|CCM_MODE|GCM_MODE|GMAC_MODE))
+ == 0) && (plaintext->cd_length & (AES_BLOCK_LEN - 1)) != 0)
+ return (CRYPTO_DATA_LEN_RANGE);
+
+ AES_ARG_INPLACE(plaintext, ciphertext);
+
+ /*
+ * We need to just return the length needed to store the output.
+ * We should not destroy the context for the following case.
+ */
+ switch (aes_ctx->ac_flags & (CCM_MODE|GCM_MODE|GMAC_MODE)) {
+ case CCM_MODE:
+ length_needed = plaintext->cd_length + aes_ctx->ac_mac_len;
+ break;
+ case GCM_MODE:
+ length_needed = plaintext->cd_length + aes_ctx->ac_tag_len;
+ break;
+ case GMAC_MODE:
+ if (plaintext->cd_length != 0)
+ return (CRYPTO_ARGUMENTS_BAD);
+
+ length_needed = aes_ctx->ac_tag_len;
+ break;
+ default:
+ length_needed = plaintext->cd_length;
+ }
+
+ if (ciphertext->cd_length < length_needed) {
+ ciphertext->cd_length = length_needed;
+ return (CRYPTO_BUFFER_TOO_SMALL);
+ }
+
+ saved_length = ciphertext->cd_length;
+ saved_offset = ciphertext->cd_offset;
+
+ /*
+ * Do an update on the specified input data.
+ */
+ ret = aes_encrypt_update(ctx, plaintext, ciphertext, req);
+ if (ret != CRYPTO_SUCCESS) {
+ return (ret);
+ }
+
+ /*
+ * For CCM mode, aes_ccm_encrypt_final() will take care of any
+ * left-over unprocessed data, and compute the MAC
+ */
+ if (aes_ctx->ac_flags & CCM_MODE) {
+ /*
+ * ccm_encrypt_final() will compute the MAC and append
+ * it to existing ciphertext. So, need to adjust the left over
+ * length value accordingly
+ */
+
+ /* order of following 2 lines MUST not be reversed */
+ ciphertext->cd_offset = ciphertext->cd_length;
+ ciphertext->cd_length = saved_length - ciphertext->cd_length;
+ ret = ccm_encrypt_final((ccm_ctx_t *)aes_ctx, ciphertext,
+ AES_BLOCK_LEN, aes_encrypt_block, aes_xor_block);
+ if (ret != CRYPTO_SUCCESS) {
+ return (ret);
+ }
+
+ if (plaintext != ciphertext) {
+ ciphertext->cd_length =
+ ciphertext->cd_offset - saved_offset;
+ }
+ ciphertext->cd_offset = saved_offset;
+ } else if (aes_ctx->ac_flags & (GCM_MODE|GMAC_MODE)) {
+ /*
+ * gcm_encrypt_final() will compute the MAC and append
+ * it to existing ciphertext. So, need to adjust the left over
+ * length value accordingly
+ */
+
+ /* order of following 2 lines MUST not be reversed */
+ ciphertext->cd_offset = ciphertext->cd_length;
+ ciphertext->cd_length = saved_length - ciphertext->cd_length;
+ ret = gcm_encrypt_final((gcm_ctx_t *)aes_ctx, ciphertext,
+ AES_BLOCK_LEN, aes_encrypt_block, aes_copy_block,
+ aes_xor_block);
+ if (ret != CRYPTO_SUCCESS) {
+ return (ret);
+ }
+
+ if (plaintext != ciphertext) {
+ ciphertext->cd_length =
+ ciphertext->cd_offset - saved_offset;
+ }
+ ciphertext->cd_offset = saved_offset;
+ }
+
+ ASSERT(aes_ctx->ac_remainder_len == 0);
+ (void) aes_free_context(ctx);
+
+ return (ret);
+}
+
+
+static int
+aes_decrypt(crypto_ctx_t *ctx, crypto_data_t *ciphertext,
+ crypto_data_t *plaintext, crypto_req_handle_t req)
+{
+ int ret = CRYPTO_FAILED;
+
+ aes_ctx_t *aes_ctx;
+ off_t saved_offset;
+ size_t saved_length, length_needed;
+
+ ASSERT(ctx->cc_provider_private != NULL);
+ aes_ctx = ctx->cc_provider_private;
+
+ /*
+ * For block ciphers, plaintext must be a multiple of AES block size.
+ * This test is only valid for ciphers whose blocksize is a power of 2.
+ */
+ if (((aes_ctx->ac_flags & (CTR_MODE|CCM_MODE|GCM_MODE|GMAC_MODE))
+ == 0) && (ciphertext->cd_length & (AES_BLOCK_LEN - 1)) != 0) {
+ return (CRYPTO_ENCRYPTED_DATA_LEN_RANGE);
+ }
+
+ AES_ARG_INPLACE(ciphertext, plaintext);
+
+ /*
+ * Return length needed to store the output.
+ * Do not destroy context when plaintext buffer is too small.
+ *
+ * CCM: plaintext is MAC len smaller than cipher text
+ * GCM: plaintext is TAG len smaller than cipher text
+ * GMAC: plaintext length must be zero
+ */
+ switch (aes_ctx->ac_flags & (CCM_MODE|GCM_MODE|GMAC_MODE)) {
+ case CCM_MODE:
+ length_needed = aes_ctx->ac_processed_data_len;
+ break;
+ case GCM_MODE:
+ length_needed = ciphertext->cd_length - aes_ctx->ac_tag_len;
+ break;
+ case GMAC_MODE:
+ if (plaintext->cd_length != 0)
+ return (CRYPTO_ARGUMENTS_BAD);
+
+ length_needed = 0;
+ break;
+ default:
+ length_needed = ciphertext->cd_length;
+ }
+
+ if (plaintext->cd_length < length_needed) {
+ plaintext->cd_length = length_needed;
+ return (CRYPTO_BUFFER_TOO_SMALL);
+ }
+
+ saved_offset = plaintext->cd_offset;
+ saved_length = plaintext->cd_length;
+
+ /*
+ * Do an update on the specified input data.
+ */
+ ret = aes_decrypt_update(ctx, ciphertext, plaintext, req);
+ if (ret != CRYPTO_SUCCESS) {
+ goto cleanup;
+ }
+
+ if (aes_ctx->ac_flags & CCM_MODE) {
+ ASSERT(aes_ctx->ac_processed_data_len == aes_ctx->ac_data_len);
+ ASSERT(aes_ctx->ac_processed_mac_len == aes_ctx->ac_mac_len);
+
+ /* order of following 2 lines MUST not be reversed */
+ plaintext->cd_offset = plaintext->cd_length;
+ plaintext->cd_length = saved_length - plaintext->cd_length;
+
+ ret = ccm_decrypt_final((ccm_ctx_t *)aes_ctx, plaintext,
+ AES_BLOCK_LEN, aes_encrypt_block, aes_copy_block,
+ aes_xor_block);
+ if (ret == CRYPTO_SUCCESS) {
+ if (plaintext != ciphertext) {
+ plaintext->cd_length =
+ plaintext->cd_offset - saved_offset;
+ }
+ } else {
+ plaintext->cd_length = saved_length;
+ }
+
+ plaintext->cd_offset = saved_offset;
+ } else if (aes_ctx->ac_flags & (GCM_MODE|GMAC_MODE)) {
+ /* order of following 2 lines MUST not be reversed */
+ plaintext->cd_offset = plaintext->cd_length;
+ plaintext->cd_length = saved_length - plaintext->cd_length;
+
+ ret = gcm_decrypt_final((gcm_ctx_t *)aes_ctx, plaintext,
+ AES_BLOCK_LEN, aes_encrypt_block, aes_xor_block);
+ if (ret == CRYPTO_SUCCESS) {
+ if (plaintext != ciphertext) {
+ plaintext->cd_length =
+ plaintext->cd_offset - saved_offset;
+ }
+ } else {
+ plaintext->cd_length = saved_length;
+ }
+
+ plaintext->cd_offset = saved_offset;
+ }
+
+ ASSERT(aes_ctx->ac_remainder_len == 0);
+
+cleanup:
+ (void) aes_free_context(ctx);
+
+ return (ret);
+}
+
+
+/* ARGSUSED */
+static int
+aes_encrypt_update(crypto_ctx_t *ctx, crypto_data_t *plaintext,
+ crypto_data_t *ciphertext, crypto_req_handle_t req)
+{
+ off_t saved_offset;
+ size_t saved_length, out_len;
+ int ret = CRYPTO_SUCCESS;
+ aes_ctx_t *aes_ctx;
+
+ ASSERT(ctx->cc_provider_private != NULL);
+ aes_ctx = ctx->cc_provider_private;
+
+ AES_ARG_INPLACE(plaintext, ciphertext);
+
+ /* compute number of bytes that will hold the ciphertext */
+ out_len = aes_ctx->ac_remainder_len;
+ out_len += plaintext->cd_length;
+ out_len &= ~(AES_BLOCK_LEN - 1);
+
+ /* return length needed to store the output */
+ if (ciphertext->cd_length < out_len) {
+ ciphertext->cd_length = out_len;
+ return (CRYPTO_BUFFER_TOO_SMALL);
+ }
+
+ saved_offset = ciphertext->cd_offset;
+ saved_length = ciphertext->cd_length;
+
+ /*
+ * Do the AES update on the specified input data.
+ */
+ switch (plaintext->cd_format) {
+ case CRYPTO_DATA_RAW:
+ ret = crypto_update_iov(ctx->cc_provider_private,
+ plaintext, ciphertext, aes_encrypt_contiguous_blocks,
+ aes_copy_block64);
+ break;
+ case CRYPTO_DATA_UIO:
+ ret = crypto_update_uio(ctx->cc_provider_private,
+ plaintext, ciphertext, aes_encrypt_contiguous_blocks,
+ aes_copy_block64);
+ break;
+ default:
+ ret = CRYPTO_ARGUMENTS_BAD;
+ }
+
+ /*
+ * Since AES counter mode is a stream cipher, we call
+ * ctr_mode_final() to pick up any remaining bytes.
+ * It is an internal function that does not destroy
+ * the context like *normal* final routines.
+ */
+ if ((aes_ctx->ac_flags & CTR_MODE) && (aes_ctx->ac_remainder_len > 0)) {
+ ret = ctr_mode_final((ctr_ctx_t *)aes_ctx,
+ ciphertext, aes_encrypt_block);
+ }
+
+ if (ret == CRYPTO_SUCCESS) {
+ if (plaintext != ciphertext)
+ ciphertext->cd_length =
+ ciphertext->cd_offset - saved_offset;
+ } else {
+ ciphertext->cd_length = saved_length;
+ }
+ ciphertext->cd_offset = saved_offset;
+
+ return (ret);
+}
+
+
+static int
+aes_decrypt_update(crypto_ctx_t *ctx, crypto_data_t *ciphertext,
+ crypto_data_t *plaintext, crypto_req_handle_t req)
+{
+ off_t saved_offset;
+ size_t saved_length, out_len;
+ int ret = CRYPTO_SUCCESS;
+ aes_ctx_t *aes_ctx;
+
+ ASSERT(ctx->cc_provider_private != NULL);
+ aes_ctx = ctx->cc_provider_private;
+
+ AES_ARG_INPLACE(ciphertext, plaintext);
+
+ /*
+ * Compute number of bytes that will hold the plaintext.
+ * This is not necessary for CCM, GCM, and GMAC since these
+ * mechanisms never return plaintext for update operations.
+ */
+ if ((aes_ctx->ac_flags & (CCM_MODE|GCM_MODE|GMAC_MODE)) == 0) {
+ out_len = aes_ctx->ac_remainder_len;
+ out_len += ciphertext->cd_length;
+ out_len &= ~(AES_BLOCK_LEN - 1);
+
+ /* return length needed to store the output */
+ if (plaintext->cd_length < out_len) {
+ plaintext->cd_length = out_len;
+ return (CRYPTO_BUFFER_TOO_SMALL);
+ }
+ }
+
+ saved_offset = plaintext->cd_offset;
+ saved_length = plaintext->cd_length;
+
+ if (aes_ctx->ac_flags & (GCM_MODE|GMAC_MODE))
+ gcm_set_kmflag((gcm_ctx_t *)aes_ctx, crypto_kmflag(req));
+
+ /*
+ * Do the AES update on the specified input data.
+ */
+ switch (ciphertext->cd_format) {
+ case CRYPTO_DATA_RAW:
+ ret = crypto_update_iov(ctx->cc_provider_private,
+ ciphertext, plaintext, aes_decrypt_contiguous_blocks,
+ aes_copy_block64);
+ break;
+ case CRYPTO_DATA_UIO:
+ ret = crypto_update_uio(ctx->cc_provider_private,
+ ciphertext, plaintext, aes_decrypt_contiguous_blocks,
+ aes_copy_block64);
+ break;
+ default:
+ ret = CRYPTO_ARGUMENTS_BAD;
+ }
+
+ /*
+ * Since AES counter mode is a stream cipher, we call
+ * ctr_mode_final() to pick up any remaining bytes.
+ * It is an internal function that does not destroy
+ * the context like *normal* final routines.
+ */
+ if ((aes_ctx->ac_flags & CTR_MODE) && (aes_ctx->ac_remainder_len > 0)) {
+ ret = ctr_mode_final((ctr_ctx_t *)aes_ctx, plaintext,
+ aes_encrypt_block);
+ if (ret == CRYPTO_DATA_LEN_RANGE)
+ ret = CRYPTO_ENCRYPTED_DATA_LEN_RANGE;
+ }
+
+ if (ret == CRYPTO_SUCCESS) {
+ if (ciphertext != plaintext)
+ plaintext->cd_length =
+ plaintext->cd_offset - saved_offset;
+ } else {
+ plaintext->cd_length = saved_length;
+ }
+ plaintext->cd_offset = saved_offset;
+
+
+ return (ret);
+}
+
+/* ARGSUSED */
+static int
+aes_encrypt_final(crypto_ctx_t *ctx, crypto_data_t *data,
+ crypto_req_handle_t req)
+{
+ aes_ctx_t *aes_ctx;
+ int ret;
+
+ ASSERT(ctx->cc_provider_private != NULL);
+ aes_ctx = ctx->cc_provider_private;
+
+ if (data->cd_format != CRYPTO_DATA_RAW &&
+ data->cd_format != CRYPTO_DATA_UIO) {
+ return (CRYPTO_ARGUMENTS_BAD);
+ }
+
+ if (aes_ctx->ac_flags & CTR_MODE) {
+ if (aes_ctx->ac_remainder_len > 0) {
+ ret = ctr_mode_final((ctr_ctx_t *)aes_ctx, data,
+ aes_encrypt_block);
+ if (ret != CRYPTO_SUCCESS)
+ return (ret);
+ }
+ } else if (aes_ctx->ac_flags & CCM_MODE) {
+ ret = ccm_encrypt_final((ccm_ctx_t *)aes_ctx, data,
+ AES_BLOCK_LEN, aes_encrypt_block, aes_xor_block);
+ if (ret != CRYPTO_SUCCESS) {
+ return (ret);
+ }
+ } else if (aes_ctx->ac_flags & (GCM_MODE|GMAC_MODE)) {
+ size_t saved_offset = data->cd_offset;
+
+ ret = gcm_encrypt_final((gcm_ctx_t *)aes_ctx, data,
+ AES_BLOCK_LEN, aes_encrypt_block, aes_copy_block,
+ aes_xor_block);
+ if (ret != CRYPTO_SUCCESS) {
+ return (ret);
+ }
+ data->cd_length = data->cd_offset - saved_offset;
+ data->cd_offset = saved_offset;
+ } else {
+ /*
+ * There must be no unprocessed plaintext.
+ * This happens if the length of the last data is
+ * not a multiple of the AES block length.
+ */
+ if (aes_ctx->ac_remainder_len > 0) {
+ return (CRYPTO_DATA_LEN_RANGE);
+ }
+ data->cd_length = 0;
+ }
+
+ (void) aes_free_context(ctx);
+
+ return (CRYPTO_SUCCESS);
+}
+
+/* ARGSUSED */
+static int
+aes_decrypt_final(crypto_ctx_t *ctx, crypto_data_t *data,
+ crypto_req_handle_t req)
+{
+ aes_ctx_t *aes_ctx;
+ int ret;
+ off_t saved_offset;
+ size_t saved_length;
+
+ ASSERT(ctx->cc_provider_private != NULL);
+ aes_ctx = ctx->cc_provider_private;
+
+ if (data->cd_format != CRYPTO_DATA_RAW &&
+ data->cd_format != CRYPTO_DATA_UIO) {
+ return (CRYPTO_ARGUMENTS_BAD);
+ }
+
+ /*
+ * There must be no unprocessed ciphertext.
+ * This happens if the length of the last ciphertext is
+ * not a multiple of the AES block length.
+ */
+ if (aes_ctx->ac_remainder_len > 0) {
+ if ((aes_ctx->ac_flags & CTR_MODE) == 0)
+ return (CRYPTO_ENCRYPTED_DATA_LEN_RANGE);
+ else {
+ ret = ctr_mode_final((ctr_ctx_t *)aes_ctx, data,
+ aes_encrypt_block);
+ if (ret == CRYPTO_DATA_LEN_RANGE)
+ ret = CRYPTO_ENCRYPTED_DATA_LEN_RANGE;
+ if (ret != CRYPTO_SUCCESS)
+ return (ret);
+ }
+ }
+
+ if (aes_ctx->ac_flags & CCM_MODE) {
+ /*
+ * This is where all the plaintext is returned, make sure
+ * the plaintext buffer is big enough
+ */
+ size_t pt_len = aes_ctx->ac_data_len;
+ if (data->cd_length < pt_len) {
+ data->cd_length = pt_len;
+ return (CRYPTO_BUFFER_TOO_SMALL);
+ }
+
+ ASSERT(aes_ctx->ac_processed_data_len == pt_len);
+ ASSERT(aes_ctx->ac_processed_mac_len == aes_ctx->ac_mac_len);
+ saved_offset = data->cd_offset;
+ saved_length = data->cd_length;
+ ret = ccm_decrypt_final((ccm_ctx_t *)aes_ctx, data,
+ AES_BLOCK_LEN, aes_encrypt_block, aes_copy_block,
+ aes_xor_block);
+ if (ret == CRYPTO_SUCCESS) {
+ data->cd_length = data->cd_offset - saved_offset;
+ } else {
+ data->cd_length = saved_length;
+ }
+
+ data->cd_offset = saved_offset;
+ if (ret != CRYPTO_SUCCESS) {
+ return (ret);
+ }
+ } else if (aes_ctx->ac_flags & (GCM_MODE|GMAC_MODE)) {
+ /*
+ * This is where all the plaintext is returned, make sure
+ * the plaintext buffer is big enough
+ */
+ gcm_ctx_t *ctx = (gcm_ctx_t *)aes_ctx;
+ size_t pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len;
+
+ if (data->cd_length < pt_len) {
+ data->cd_length = pt_len;
+ return (CRYPTO_BUFFER_TOO_SMALL);
+ }
+
+ saved_offset = data->cd_offset;
+ saved_length = data->cd_length;
+ ret = gcm_decrypt_final((gcm_ctx_t *)aes_ctx, data,
+ AES_BLOCK_LEN, aes_encrypt_block, aes_xor_block);
+ if (ret == CRYPTO_SUCCESS) {
+ data->cd_length = data->cd_offset - saved_offset;
+ } else {
+ data->cd_length = saved_length;
+ }
+
+ data->cd_offset = saved_offset;
+ if (ret != CRYPTO_SUCCESS) {
+ return (ret);
+ }
+ }
+
+
+ if ((aes_ctx->ac_flags & (CTR_MODE|CCM_MODE|GCM_MODE|GMAC_MODE)) == 0) {
+ data->cd_length = 0;
+ }
+
+ (void) aes_free_context(ctx);
+
+ return (CRYPTO_SUCCESS);
+}
+
+/* ARGSUSED */
+static int
+aes_encrypt_atomic(crypto_provider_handle_t provider,
+ crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
+ crypto_key_t *key, crypto_data_t *plaintext, crypto_data_t *ciphertext,
+ crypto_spi_ctx_template_t template, crypto_req_handle_t req)
+{
+ aes_ctx_t aes_ctx; /* on the stack */
+ off_t saved_offset;
+ size_t saved_length;
+ size_t length_needed;
+ int ret;
+
+ AES_ARG_INPLACE(plaintext, ciphertext);
+
+ /*
+ * CTR, CCM, GCM, and GMAC modes do not require that plaintext
+ * be a multiple of AES block size.
+ */
+ switch (mechanism->cm_type) {
+ case AES_CTR_MECH_INFO_TYPE:
+ case AES_CCM_MECH_INFO_TYPE:
+ case AES_GCM_MECH_INFO_TYPE:
+ case AES_GMAC_MECH_INFO_TYPE:
+ break;
+ default:
+ if ((plaintext->cd_length & (AES_BLOCK_LEN - 1)) != 0)
+ return (CRYPTO_DATA_LEN_RANGE);
+ }
+
+ if ((ret = aes_check_mech_param(mechanism, NULL, 0)) != CRYPTO_SUCCESS)
+ return (ret);
+
+ bzero(&aes_ctx, sizeof (aes_ctx_t));
+
+ ret = aes_common_init_ctx(&aes_ctx, template, mechanism, key,
+ crypto_kmflag(req), B_TRUE);
+ if (ret != CRYPTO_SUCCESS)
+ return (ret);
+
+ switch (mechanism->cm_type) {
+ case AES_CCM_MECH_INFO_TYPE:
+ length_needed = plaintext->cd_length + aes_ctx.ac_mac_len;
+ break;
+ case AES_GMAC_MECH_INFO_TYPE:
+ if (plaintext->cd_length != 0)
+ return (CRYPTO_ARGUMENTS_BAD);
+ /* FALLTHRU */
+ case AES_GCM_MECH_INFO_TYPE:
+ length_needed = plaintext->cd_length + aes_ctx.ac_tag_len;
+ break;
+ default:
+ length_needed = plaintext->cd_length;
+ }
+
+ /* return size of buffer needed to store output */
+ if (ciphertext->cd_length < length_needed) {
+ ciphertext->cd_length = length_needed;
+ ret = CRYPTO_BUFFER_TOO_SMALL;
+ goto out;
+ }
+
+ saved_offset = ciphertext->cd_offset;
+ saved_length = ciphertext->cd_length;
+
+ /*
+ * Do an update on the specified input data.
+ */
+ switch (plaintext->cd_format) {
+ case CRYPTO_DATA_RAW:
+ ret = crypto_update_iov(&aes_ctx, plaintext, ciphertext,
+ aes_encrypt_contiguous_blocks, aes_copy_block64);
+ break;
+ case CRYPTO_DATA_UIO:
+ ret = crypto_update_uio(&aes_ctx, plaintext, ciphertext,
+ aes_encrypt_contiguous_blocks, aes_copy_block64);
+ break;
+ default:
+ ret = CRYPTO_ARGUMENTS_BAD;
+ }
+
+ if (ret == CRYPTO_SUCCESS) {
+ if (mechanism->cm_type == AES_CCM_MECH_INFO_TYPE) {
+ ret = ccm_encrypt_final((ccm_ctx_t *)&aes_ctx,
+ ciphertext, AES_BLOCK_LEN, aes_encrypt_block,
+ aes_xor_block);
+ if (ret != CRYPTO_SUCCESS)
+ goto out;
+ ASSERT(aes_ctx.ac_remainder_len == 0);
+ } else if (mechanism->cm_type == AES_GCM_MECH_INFO_TYPE ||
+ mechanism->cm_type == AES_GMAC_MECH_INFO_TYPE) {
+ ret = gcm_encrypt_final((gcm_ctx_t *)&aes_ctx,
+ ciphertext, AES_BLOCK_LEN, aes_encrypt_block,
+ aes_copy_block, aes_xor_block);
+ if (ret != CRYPTO_SUCCESS)
+ goto out;
+ ASSERT(aes_ctx.ac_remainder_len == 0);
+ } else if (mechanism->cm_type == AES_CTR_MECH_INFO_TYPE) {
+ if (aes_ctx.ac_remainder_len > 0) {
+ ret = ctr_mode_final((ctr_ctx_t *)&aes_ctx,
+ ciphertext, aes_encrypt_block);
+ if (ret != CRYPTO_SUCCESS)
+ goto out;
+ }
+ } else {
+ ASSERT(aes_ctx.ac_remainder_len == 0);
+ }
+
+ if (plaintext != ciphertext) {
+ ciphertext->cd_length =
+ ciphertext->cd_offset - saved_offset;
+ }
+ } else {
+ ciphertext->cd_length = saved_length;
+ }
+ ciphertext->cd_offset = saved_offset;
+
+out:
+ if (aes_ctx.ac_flags & PROVIDER_OWNS_KEY_SCHEDULE) {
+ bzero(aes_ctx.ac_keysched, aes_ctx.ac_keysched_len);
+ kmem_free(aes_ctx.ac_keysched, aes_ctx.ac_keysched_len);
+ }
+
+ return (ret);
+}
+
+/* ARGSUSED */
+static int
+aes_decrypt_atomic(crypto_provider_handle_t provider,
+ crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
+ crypto_key_t *key, crypto_data_t *ciphertext, crypto_data_t *plaintext,
+ crypto_spi_ctx_template_t template, crypto_req_handle_t req)
+{
+ aes_ctx_t aes_ctx; /* on the stack */
+ off_t saved_offset;
+ size_t saved_length;
+ size_t length_needed;
+ int ret;
+
+ AES_ARG_INPLACE(ciphertext, plaintext);
+
+ /*
+ * CCM, GCM, CTR, and GMAC modes do not require that ciphertext
+ * be a multiple of AES block size.
+ */
+ switch (mechanism->cm_type) {
+ case AES_CTR_MECH_INFO_TYPE:
+ case AES_CCM_MECH_INFO_TYPE:
+ case AES_GCM_MECH_INFO_TYPE:
+ case AES_GMAC_MECH_INFO_TYPE:
+ break;
+ default:
+ if ((ciphertext->cd_length & (AES_BLOCK_LEN - 1)) != 0)
+ return (CRYPTO_ENCRYPTED_DATA_LEN_RANGE);
+ }
+
+ if ((ret = aes_check_mech_param(mechanism, NULL, 0)) != CRYPTO_SUCCESS)
+ return (ret);
+
+ bzero(&aes_ctx, sizeof (aes_ctx_t));
+
+ ret = aes_common_init_ctx(&aes_ctx, template, mechanism, key,
+ crypto_kmflag(req), B_FALSE);
+ if (ret != CRYPTO_SUCCESS)
+ return (ret);
+
+ switch (mechanism->cm_type) {
+ case AES_CCM_MECH_INFO_TYPE:
+ length_needed = aes_ctx.ac_data_len;
+ break;
+ case AES_GCM_MECH_INFO_TYPE:
+ length_needed = ciphertext->cd_length - aes_ctx.ac_tag_len;
+ break;
+ case AES_GMAC_MECH_INFO_TYPE:
+ if (plaintext->cd_length != 0)
+ return (CRYPTO_ARGUMENTS_BAD);
+ length_needed = 0;
+ break;
+ default:
+ length_needed = ciphertext->cd_length;
+ }
+
+ /* return size of buffer needed to store output */
+ if (plaintext->cd_length < length_needed) {
+ plaintext->cd_length = length_needed;
+ ret = CRYPTO_BUFFER_TOO_SMALL;
+ goto out;
+ }
+
+ saved_offset = plaintext->cd_offset;
+ saved_length = plaintext->cd_length;
+
+ if (mechanism->cm_type == AES_GCM_MECH_INFO_TYPE ||
+ mechanism->cm_type == AES_GMAC_MECH_INFO_TYPE)
+ gcm_set_kmflag((gcm_ctx_t *)&aes_ctx, crypto_kmflag(req));
+
+ /*
+ * Do an update on the specified input data.
+ */
+ switch (ciphertext->cd_format) {
+ case CRYPTO_DATA_RAW:
+ ret = crypto_update_iov(&aes_ctx, ciphertext, plaintext,
+ aes_decrypt_contiguous_blocks, aes_copy_block64);
+ break;
+ case CRYPTO_DATA_UIO:
+ ret = crypto_update_uio(&aes_ctx, ciphertext, plaintext,
+ aes_decrypt_contiguous_blocks, aes_copy_block64);
+ break;
+ default:
+ ret = CRYPTO_ARGUMENTS_BAD;
+ }
+
+ if (ret == CRYPTO_SUCCESS) {
+ if (mechanism->cm_type == AES_CCM_MECH_INFO_TYPE) {
+ ASSERT(aes_ctx.ac_processed_data_len
+ == aes_ctx.ac_data_len);
+ ASSERT(aes_ctx.ac_processed_mac_len
+ == aes_ctx.ac_mac_len);
+ ret = ccm_decrypt_final((ccm_ctx_t *)&aes_ctx,
+ plaintext, AES_BLOCK_LEN, aes_encrypt_block,
+ aes_copy_block, aes_xor_block);
+ ASSERT(aes_ctx.ac_remainder_len == 0);
+ if ((ret == CRYPTO_SUCCESS) &&
+ (ciphertext != plaintext)) {
+ plaintext->cd_length =
+ plaintext->cd_offset - saved_offset;
+ } else {
+ plaintext->cd_length = saved_length;
+ }
+ } else if (mechanism->cm_type == AES_GCM_MECH_INFO_TYPE ||
+ mechanism->cm_type == AES_GMAC_MECH_INFO_TYPE) {
+ ret = gcm_decrypt_final((gcm_ctx_t *)&aes_ctx,
+ plaintext, AES_BLOCK_LEN, aes_encrypt_block,
+ aes_xor_block);
+ ASSERT(aes_ctx.ac_remainder_len == 0);
+ if ((ret == CRYPTO_SUCCESS) &&
+ (ciphertext != plaintext)) {
+ plaintext->cd_length =
+ plaintext->cd_offset - saved_offset;
+ } else {
+ plaintext->cd_length = saved_length;
+ }
+ } else if (mechanism->cm_type != AES_CTR_MECH_INFO_TYPE) {
+ ASSERT(aes_ctx.ac_remainder_len == 0);
+ if (ciphertext != plaintext)
+ plaintext->cd_length =
+ plaintext->cd_offset - saved_offset;
+ } else {
+ if (aes_ctx.ac_remainder_len > 0) {
+ ret = ctr_mode_final((ctr_ctx_t *)&aes_ctx,
+ plaintext, aes_encrypt_block);
+ if (ret == CRYPTO_DATA_LEN_RANGE)
+ ret = CRYPTO_ENCRYPTED_DATA_LEN_RANGE;
+ if (ret != CRYPTO_SUCCESS)
+ goto out;
+ }
+ if (ciphertext != plaintext)
+ plaintext->cd_length =
+ plaintext->cd_offset - saved_offset;
+ }
+ } else {
+ plaintext->cd_length = saved_length;
+ }
+ plaintext->cd_offset = saved_offset;
+
+out:
+ if (aes_ctx.ac_flags & PROVIDER_OWNS_KEY_SCHEDULE) {
+ bzero(aes_ctx.ac_keysched, aes_ctx.ac_keysched_len);
+ kmem_free(aes_ctx.ac_keysched, aes_ctx.ac_keysched_len);
+ }
+
+ if (aes_ctx.ac_flags & CCM_MODE) {
+ if (aes_ctx.ac_pt_buf != NULL) {
+ vmem_free(aes_ctx.ac_pt_buf, aes_ctx.ac_data_len);
+ }
+ } else if (aes_ctx.ac_flags & (GCM_MODE|GMAC_MODE)) {
+ if (((gcm_ctx_t *)&aes_ctx)->gcm_pt_buf != NULL) {
+ vmem_free(((gcm_ctx_t *)&aes_ctx)->gcm_pt_buf,
+ ((gcm_ctx_t *)&aes_ctx)->gcm_pt_buf_len);
+ }
+ }
+
+ return (ret);
+}
+
+/*
+ * KCF software provider context template entry points.
+ */
+/* ARGSUSED */
+static int
+aes_create_ctx_template(crypto_provider_handle_t provider,
+ crypto_mechanism_t *mechanism, crypto_key_t *key,
+ crypto_spi_ctx_template_t *tmpl, size_t *tmpl_size, crypto_req_handle_t req)
+{
+ void *keysched;
+ size_t size;
+ int rv;
+
+ if (mechanism->cm_type != AES_ECB_MECH_INFO_TYPE &&
+ mechanism->cm_type != AES_CBC_MECH_INFO_TYPE &&
+ mechanism->cm_type != AES_CTR_MECH_INFO_TYPE &&
+ mechanism->cm_type != AES_CCM_MECH_INFO_TYPE &&
+ mechanism->cm_type != AES_GCM_MECH_INFO_TYPE &&
+ mechanism->cm_type != AES_GMAC_MECH_INFO_TYPE)
+ return (CRYPTO_MECHANISM_INVALID);
+
+ if ((keysched = aes_alloc_keysched(&size,
+ crypto_kmflag(req))) == NULL) {
+ return (CRYPTO_HOST_MEMORY);
+ }
+
+ /*
+ * Initialize key schedule. Key length information is stored
+ * in the key.
+ */
+ if ((rv = init_keysched(key, keysched)) != CRYPTO_SUCCESS) {
+ bzero(keysched, size);
+ kmem_free(keysched, size);
+ return (rv);
+ }
+
+ *tmpl = keysched;
+ *tmpl_size = size;
+
+ return (CRYPTO_SUCCESS);
+}
+
+
+static int
+aes_free_context(crypto_ctx_t *ctx)
+{
+ aes_ctx_t *aes_ctx = ctx->cc_provider_private;
+
+ if (aes_ctx != NULL) {
+ if (aes_ctx->ac_flags & PROVIDER_OWNS_KEY_SCHEDULE) {
+ ASSERT(aes_ctx->ac_keysched_len != 0);
+ bzero(aes_ctx->ac_keysched, aes_ctx->ac_keysched_len);
+ kmem_free(aes_ctx->ac_keysched,
+ aes_ctx->ac_keysched_len);
+ }
+ crypto_free_mode_ctx(aes_ctx);
+ ctx->cc_provider_private = NULL;
+ }
+
+ return (CRYPTO_SUCCESS);
+}
+
+
+static int
+aes_common_init_ctx(aes_ctx_t *aes_ctx, crypto_spi_ctx_template_t *template,
+ crypto_mechanism_t *mechanism, crypto_key_t *key, int kmflag,
+ boolean_t is_encrypt_init)
+{
+ int rv = CRYPTO_SUCCESS;
+ void *keysched;
+ size_t size = 0;
+
+ if (template == NULL) {
+ if ((keysched = aes_alloc_keysched(&size, kmflag)) == NULL)
+ return (CRYPTO_HOST_MEMORY);
+ /*
+ * Initialize key schedule.
+ * Key length is stored in the key.
+ */
+ if ((rv = init_keysched(key, keysched)) != CRYPTO_SUCCESS) {
+ kmem_free(keysched, size);
+ return (rv);
+ }
+
+ aes_ctx->ac_flags |= PROVIDER_OWNS_KEY_SCHEDULE;
+ aes_ctx->ac_keysched_len = size;
+ } else {
+ keysched = template;
+ }
+ aes_ctx->ac_keysched = keysched;
+
+ switch (mechanism->cm_type) {
+ case AES_CBC_MECH_INFO_TYPE:
+ rv = cbc_init_ctx((cbc_ctx_t *)aes_ctx, mechanism->cm_param,
+ mechanism->cm_param_len, AES_BLOCK_LEN, aes_copy_block64);
+ break;
+ case AES_CTR_MECH_INFO_TYPE: {
+ CK_AES_CTR_PARAMS *pp;
+
+ if (mechanism->cm_param == NULL ||
+ mechanism->cm_param_len != sizeof (CK_AES_CTR_PARAMS)) {
+ return (CRYPTO_MECHANISM_PARAM_INVALID);
+ }
+ pp = (CK_AES_CTR_PARAMS *)(void *)mechanism->cm_param;
+ rv = ctr_init_ctx((ctr_ctx_t *)aes_ctx, pp->ulCounterBits,
+ pp->cb, aes_copy_block);
+ break;
+ }
+ case AES_CCM_MECH_INFO_TYPE:
+ if (mechanism->cm_param == NULL ||
+ mechanism->cm_param_len != sizeof (CK_AES_CCM_PARAMS)) {
+ return (CRYPTO_MECHANISM_PARAM_INVALID);
+ }
+ rv = ccm_init_ctx((ccm_ctx_t *)aes_ctx, mechanism->cm_param,
+ kmflag, is_encrypt_init, AES_BLOCK_LEN, aes_encrypt_block,
+ aes_xor_block);
+ break;
+ case AES_GCM_MECH_INFO_TYPE:
+ if (mechanism->cm_param == NULL ||
+ mechanism->cm_param_len != sizeof (CK_AES_GCM_PARAMS)) {
+ return (CRYPTO_MECHANISM_PARAM_INVALID);
+ }
+ rv = gcm_init_ctx((gcm_ctx_t *)aes_ctx, mechanism->cm_param,
+ AES_BLOCK_LEN, aes_encrypt_block, aes_copy_block,
+ aes_xor_block);
+ break;
+ case AES_GMAC_MECH_INFO_TYPE:
+ if (mechanism->cm_param == NULL ||
+ mechanism->cm_param_len != sizeof (CK_AES_GMAC_PARAMS)) {
+ return (CRYPTO_MECHANISM_PARAM_INVALID);
+ }
+ rv = gmac_init_ctx((gcm_ctx_t *)aes_ctx, mechanism->cm_param,
+ AES_BLOCK_LEN, aes_encrypt_block, aes_copy_block,
+ aes_xor_block);
+ break;
+ case AES_ECB_MECH_INFO_TYPE:
+ aes_ctx->ac_flags |= ECB_MODE;
+ }
+
+ if (rv != CRYPTO_SUCCESS) {
+ if (aes_ctx->ac_flags & PROVIDER_OWNS_KEY_SCHEDULE) {
+ bzero(keysched, size);
+ kmem_free(keysched, size);
+ }
+ }
+
+ return (rv);
+}
+
+static int
+process_gmac_mech(crypto_mechanism_t *mech, crypto_data_t *data,
+ CK_AES_GCM_PARAMS *gcm_params)
+{
+ /* LINTED: pointer alignment */
+ CK_AES_GMAC_PARAMS *params = (CK_AES_GMAC_PARAMS *)mech->cm_param;
+
+ if (mech->cm_type != AES_GMAC_MECH_INFO_TYPE)
+ return (CRYPTO_MECHANISM_INVALID);
+
+ if (mech->cm_param_len != sizeof (CK_AES_GMAC_PARAMS))
+ return (CRYPTO_MECHANISM_PARAM_INVALID);
+
+ if (params->pIv == NULL)
+ return (CRYPTO_MECHANISM_PARAM_INVALID);
+
+ gcm_params->pIv = params->pIv;
+ gcm_params->ulIvLen = AES_GMAC_IV_LEN;
+ gcm_params->ulTagBits = AES_GMAC_TAG_BITS;
+
+ if (data == NULL)
+ return (CRYPTO_SUCCESS);
+
+ if (data->cd_format != CRYPTO_DATA_RAW)
+ return (CRYPTO_ARGUMENTS_BAD);
+
+ gcm_params->pAAD = (uchar_t *)data->cd_raw.iov_base;
+ gcm_params->ulAADLen = data->cd_length;
+ return (CRYPTO_SUCCESS);
+}
+
+static int
+aes_mac_atomic(crypto_provider_handle_t provider,
+ crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
+ crypto_key_t *key, crypto_data_t *data, crypto_data_t *mac,
+ crypto_spi_ctx_template_t template, crypto_req_handle_t req)
+{
+ CK_AES_GCM_PARAMS gcm_params;
+ crypto_mechanism_t gcm_mech;
+ int rv;
+
+ if ((rv = process_gmac_mech(mechanism, data, &gcm_params))
+ != CRYPTO_SUCCESS)
+ return (rv);
+
+ gcm_mech.cm_type = AES_GCM_MECH_INFO_TYPE;
+ gcm_mech.cm_param_len = sizeof (CK_AES_GCM_PARAMS);
+ gcm_mech.cm_param = (char *)&gcm_params;
+
+ return (aes_encrypt_atomic(provider, session_id, &gcm_mech,
+ key, &null_crypto_data, mac, template, req));
+}
+
+static int
+aes_mac_verify_atomic(crypto_provider_handle_t provider,
+ crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
+ crypto_key_t *key, crypto_data_t *data, crypto_data_t *mac,
+ crypto_spi_ctx_template_t template, crypto_req_handle_t req)
+{
+ CK_AES_GCM_PARAMS gcm_params;
+ crypto_mechanism_t gcm_mech;
+ int rv;
+
+ if ((rv = process_gmac_mech(mechanism, data, &gcm_params))
+ != CRYPTO_SUCCESS)
+ return (rv);
+
+ gcm_mech.cm_type = AES_GCM_MECH_INFO_TYPE;
+ gcm_mech.cm_param_len = sizeof (CK_AES_GCM_PARAMS);
+ gcm_mech.cm_param = (char *)&gcm_params;
+
+ return (aes_decrypt_atomic(provider, session_id, &gcm_mech,
+ key, mac, &null_crypto_data, template, req));
+}
diff --git a/zfs/module/icp/io/edonr_mod.c b/zfs/module/icp/io/edonr_mod.c
new file mode 100644
index 000000000000..cb748a954e25
--- /dev/null
+++ b/zfs/module/icp/io/edonr_mod.c
@@ -0,0 +1,63 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2013 Saso Kiselkov. All rights reserved.
+ */
+
+#include <sys/modctl.h>
+#include <sys/crypto/common.h>
+#include <sys/crypto/spi.h>
+#include <sys/sysmacros.h>
+#include <sys/systm.h>
+#include <sys/edonr.h>
+
+/*
+ * Unlike sha2 or skein, we won't expose edonr via the Kernel Cryptographic
+ * Framework (KCF), because Edon-R is *NOT* suitable for general-purpose
+ * cryptographic use. Users of Edon-R must interface directly to this module.
+ */
+
+static struct modlmisc modlmisc = {
+ &mod_cryptoops,
+ "Edon-R Message-Digest Algorithm"
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, {&modlmisc, NULL}
+};
+
+int
+edonr_mod_init(void)
+{
+ int error;
+
+ if ((error = mod_install(&modlinkage)) != 0)
+ return (error);
+
+ return (0);
+}
+
+int
+edonr_mod_fini(void)
+{
+ return (mod_remove(&modlinkage));
+}
diff --git a/zfs/module/icp/io/sha1_mod.c b/zfs/module/icp/io/sha1_mod.c
new file mode 100644
index 000000000000..a278dac7fc82
--- /dev/null
+++ b/zfs/module/icp/io/sha1_mod.c
@@ -0,0 +1,1239 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/modctl.h>
+#include <sys/crypto/common.h>
+#include <sys/crypto/spi.h>
+
+#include <sha1/sha1.h>
+#include <sha1/sha1_impl.h>
+
+/*
+ * The sha1 module is created with two modlinkages:
+ * - a modlmisc that allows consumers to directly call the entry points
+ * SHA1Init, SHA1Update, and SHA1Final.
+ * - a modlcrypto that allows the module to register with the Kernel
+ * Cryptographic Framework (KCF) as a software provider for the SHA1
+ * mechanisms.
+ */
+
+static struct modlcrypto modlcrypto = {
+ &mod_cryptoops,
+ "SHA1 Kernel SW Provider 1.1"
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, { &modlcrypto, NULL }
+};
+
+
+/*
+ * Macros to access the SHA1 or SHA1-HMAC contexts from a context passed
+ * by KCF to one of the entry points.
+ */
+
+#define PROV_SHA1_CTX(ctx) ((sha1_ctx_t *)(ctx)->cc_provider_private)
+#define PROV_SHA1_HMAC_CTX(ctx) ((sha1_hmac_ctx_t *)(ctx)->cc_provider_private)
+
+/* to extract the digest length passed as mechanism parameter */
+#define PROV_SHA1_GET_DIGEST_LEN(m, len) { \
+ if (IS_P2ALIGNED((m)->cm_param, sizeof (ulong_t))) \
+ (len) = (uint32_t)*((ulong_t *)(void *)mechanism->cm_param); \
+ else { \
+ ulong_t tmp_ulong; \
+ bcopy((m)->cm_param, &tmp_ulong, sizeof (ulong_t)); \
+ (len) = (uint32_t)tmp_ulong; \
+ } \
+}
+
+#define PROV_SHA1_DIGEST_KEY(ctx, key, len, digest) { \
+ SHA1Init(ctx); \
+ SHA1Update(ctx, key, len); \
+ SHA1Final(digest, ctx); \
+}
+
+/*
+ * Mechanism info structure passed to KCF during registration.
+ */
+static crypto_mech_info_t sha1_mech_info_tab[] = {
+ /* SHA1 */
+ {SUN_CKM_SHA1, SHA1_MECH_INFO_TYPE,
+ CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC,
+ 0, 0, CRYPTO_KEYSIZE_UNIT_IN_BITS},
+ /* SHA1-HMAC */
+ {SUN_CKM_SHA1_HMAC, SHA1_HMAC_MECH_INFO_TYPE,
+ CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC,
+ SHA1_HMAC_MIN_KEY_LEN, SHA1_HMAC_MAX_KEY_LEN,
+ CRYPTO_KEYSIZE_UNIT_IN_BYTES},
+ /* SHA1-HMAC GENERAL */
+ {SUN_CKM_SHA1_HMAC_GENERAL, SHA1_HMAC_GEN_MECH_INFO_TYPE,
+ CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC,
+ SHA1_HMAC_MIN_KEY_LEN, SHA1_HMAC_MAX_KEY_LEN,
+ CRYPTO_KEYSIZE_UNIT_IN_BYTES}
+};
+
+static void sha1_provider_status(crypto_provider_handle_t, uint_t *);
+
+static crypto_control_ops_t sha1_control_ops = {
+ sha1_provider_status
+};
+
+static int sha1_digest_init(crypto_ctx_t *, crypto_mechanism_t *,
+ crypto_req_handle_t);
+static int sha1_digest(crypto_ctx_t *, crypto_data_t *, crypto_data_t *,
+ crypto_req_handle_t);
+static int sha1_digest_update(crypto_ctx_t *, crypto_data_t *,
+ crypto_req_handle_t);
+static int sha1_digest_final(crypto_ctx_t *, crypto_data_t *,
+ crypto_req_handle_t);
+static int sha1_digest_atomic(crypto_provider_handle_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_data_t *, crypto_data_t *,
+ crypto_req_handle_t);
+
+static crypto_digest_ops_t sha1_digest_ops = {
+ sha1_digest_init,
+ sha1_digest,
+ sha1_digest_update,
+ NULL,
+ sha1_digest_final,
+ sha1_digest_atomic
+};
+
+static int sha1_mac_init(crypto_ctx_t *, crypto_mechanism_t *, crypto_key_t *,
+ crypto_spi_ctx_template_t, crypto_req_handle_t);
+static int sha1_mac_update(crypto_ctx_t *, crypto_data_t *,
+ crypto_req_handle_t);
+static int sha1_mac_final(crypto_ctx_t *, crypto_data_t *, crypto_req_handle_t);
+static int sha1_mac_atomic(crypto_provider_handle_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, crypto_data_t *,
+ crypto_spi_ctx_template_t, crypto_req_handle_t);
+static int sha1_mac_verify_atomic(crypto_provider_handle_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, crypto_data_t *,
+ crypto_spi_ctx_template_t, crypto_req_handle_t);
+
+static crypto_mac_ops_t sha1_mac_ops = {
+ sha1_mac_init,
+ NULL,
+ sha1_mac_update,
+ sha1_mac_final,
+ sha1_mac_atomic,
+ sha1_mac_verify_atomic
+};
+
+static int sha1_create_ctx_template(crypto_provider_handle_t,
+ crypto_mechanism_t *, crypto_key_t *, crypto_spi_ctx_template_t *,
+ size_t *, crypto_req_handle_t);
+static int sha1_free_context(crypto_ctx_t *);
+
+static crypto_ctx_ops_t sha1_ctx_ops = {
+ sha1_create_ctx_template,
+ sha1_free_context
+};
+
+static crypto_ops_t sha1_crypto_ops = {{{{{
+ &sha1_control_ops,
+ &sha1_digest_ops,
+ NULL,
+ &sha1_mac_ops,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ &sha1_ctx_ops,
+}}}}};
+
+static crypto_provider_info_t sha1_prov_info = {{{{
+ CRYPTO_SPI_VERSION_1,
+ "SHA1 Software Provider",
+ CRYPTO_SW_PROVIDER,
+ NULL,
+ &sha1_crypto_ops,
+ sizeof (sha1_mech_info_tab)/sizeof (crypto_mech_info_t),
+ sha1_mech_info_tab
+}}}};
+
+static crypto_kcf_provider_handle_t sha1_prov_handle = 0;
+
+int
+sha1_mod_init(void)
+{
+ int ret;
+
+ if ((ret = mod_install(&modlinkage)) != 0)
+ return (ret);
+
+ /*
+ * Register with KCF. If the registration fails, log an
+ * error but do not uninstall the module, since the functionality
+ * provided by misc/sha1 should still be available.
+ */
+ if ((ret = crypto_register_provider(&sha1_prov_info,
+ &sha1_prov_handle)) != CRYPTO_SUCCESS)
+ cmn_err(CE_WARN, "sha1 _init: "
+ "crypto_register_provider() failed (0x%x)", ret);
+
+ return (0);
+}
+
+int
+sha1_mod_fini(void)
+{
+ int ret;
+
+ if (sha1_prov_handle != 0) {
+ if ((ret = crypto_unregister_provider(sha1_prov_handle)) !=
+ CRYPTO_SUCCESS) {
+ cmn_err(CE_WARN,
+ "sha1 _fini: crypto_unregister_provider() "
+ "failed (0x%x)", ret);
+ return (EBUSY);
+ }
+ sha1_prov_handle = 0;
+ }
+
+ return (mod_remove(&modlinkage));
+}
+
+/*
+ * KCF software provider control entry points.
+ */
+/* ARGSUSED */
+static void
+sha1_provider_status(crypto_provider_handle_t provider, uint_t *status)
+{
+ *status = CRYPTO_PROVIDER_READY;
+}
+
+/*
+ * KCF software provider digest entry points.
+ */
+
+static int
+sha1_digest_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism,
+ crypto_req_handle_t req)
+{
+ if (mechanism->cm_type != SHA1_MECH_INFO_TYPE)
+ return (CRYPTO_MECHANISM_INVALID);
+
+ /*
+ * Allocate and initialize SHA1 context.
+ */
+ ctx->cc_provider_private = kmem_alloc(sizeof (sha1_ctx_t),
+ crypto_kmflag(req));
+ if (ctx->cc_provider_private == NULL)
+ return (CRYPTO_HOST_MEMORY);
+
+ PROV_SHA1_CTX(ctx)->sc_mech_type = SHA1_MECH_INFO_TYPE;
+ SHA1Init(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx);
+
+ return (CRYPTO_SUCCESS);
+}
+
+/*
+ * Helper SHA1 digest update function for uio data.
+ */
+static int
+sha1_digest_update_uio(SHA1_CTX *sha1_ctx, crypto_data_t *data)
+{
+ off_t offset = data->cd_offset;
+ size_t length = data->cd_length;
+ uint_t vec_idx;
+ size_t cur_len;
+
+ /* we support only kernel buffer */
+ if (data->cd_uio->uio_segflg != UIO_SYSSPACE)
+ return (CRYPTO_ARGUMENTS_BAD);
+
+ /*
+ * Jump to the first iovec containing data to be
+ * digested.
+ */
+ for (vec_idx = 0; vec_idx < data->cd_uio->uio_iovcnt &&
+ offset >= data->cd_uio->uio_iov[vec_idx].iov_len;
+ offset -= data->cd_uio->uio_iov[vec_idx++].iov_len)
+ ;
+ if (vec_idx == data->cd_uio->uio_iovcnt) {
+ /*
+ * The caller specified an offset that is larger than the
+ * total size of the buffers it provided.
+ */
+ return (CRYPTO_DATA_LEN_RANGE);
+ }
+
+ /*
+ * Now do the digesting on the iovecs.
+ */
+ while (vec_idx < data->cd_uio->uio_iovcnt && length > 0) {
+ cur_len = MIN(data->cd_uio->uio_iov[vec_idx].iov_len -
+ offset, length);
+
+ SHA1Update(sha1_ctx,
+ (uint8_t *)data->cd_uio->uio_iov[vec_idx].iov_base + offset,
+ cur_len);
+
+ length -= cur_len;
+ vec_idx++;
+ offset = 0;
+ }
+
+ if (vec_idx == data->cd_uio->uio_iovcnt && length > 0) {
+ /*
+ * The end of the specified iovec's was reached but
+ * the length requested could not be processed, i.e.
+ * The caller requested to digest more data than it provided.
+ */
+ return (CRYPTO_DATA_LEN_RANGE);
+ }
+
+ return (CRYPTO_SUCCESS);
+}
+
+/*
+ * Helper SHA1 digest final function for uio data.
+ * digest_len is the length of the desired digest. If digest_len
+ * is smaller than the default SHA1 digest length, the caller
+ * must pass a scratch buffer, digest_scratch, which must
+ * be at least SHA1_DIGEST_LENGTH bytes.
+ */
+static int
+sha1_digest_final_uio(SHA1_CTX *sha1_ctx, crypto_data_t *digest,
+ ulong_t digest_len, uchar_t *digest_scratch)
+{
+ off_t offset = digest->cd_offset;
+ uint_t vec_idx;
+
+ /* we support only kernel buffer */
+ if (digest->cd_uio->uio_segflg != UIO_SYSSPACE)
+ return (CRYPTO_ARGUMENTS_BAD);
+
+ /*
+ * Jump to the first iovec containing ptr to the digest to
+ * be returned.
+ */
+ for (vec_idx = 0; offset >= digest->cd_uio->uio_iov[vec_idx].iov_len &&
+ vec_idx < digest->cd_uio->uio_iovcnt;
+ offset -= digest->cd_uio->uio_iov[vec_idx++].iov_len)
+ ;
+ if (vec_idx == digest->cd_uio->uio_iovcnt) {
+ /*
+ * The caller specified an offset that is
+ * larger than the total size of the buffers
+ * it provided.
+ */
+ return (CRYPTO_DATA_LEN_RANGE);
+ }
+
+ if (offset + digest_len <=
+ digest->cd_uio->uio_iov[vec_idx].iov_len) {
+ /*
+ * The computed SHA1 digest will fit in the current
+ * iovec.
+ */
+ if (digest_len != SHA1_DIGEST_LENGTH) {
+ /*
+ * The caller requested a short digest. Digest
+ * into a scratch buffer and return to
+ * the user only what was requested.
+ */
+ SHA1Final(digest_scratch, sha1_ctx);
+ bcopy(digest_scratch, (uchar_t *)digest->
+ cd_uio->uio_iov[vec_idx].iov_base + offset,
+ digest_len);
+ } else {
+ SHA1Final((uchar_t *)digest->
+ cd_uio->uio_iov[vec_idx].iov_base + offset,
+ sha1_ctx);
+ }
+ } else {
+ /*
+ * The computed digest will be crossing one or more iovec's.
+ * This is bad performance-wise but we need to support it.
+ * Allocate a small scratch buffer on the stack and
+ * copy it piece meal to the specified digest iovec's.
+ */
+ uchar_t digest_tmp[SHA1_DIGEST_LENGTH];
+ off_t scratch_offset = 0;
+ size_t length = digest_len;
+ size_t cur_len;
+
+ SHA1Final(digest_tmp, sha1_ctx);
+
+ while (vec_idx < digest->cd_uio->uio_iovcnt && length > 0) {
+ cur_len = MIN(digest->cd_uio->uio_iov[vec_idx].iov_len -
+ offset, length);
+ bcopy(digest_tmp + scratch_offset,
+ digest->cd_uio->uio_iov[vec_idx].iov_base + offset,
+ cur_len);
+
+ length -= cur_len;
+ vec_idx++;
+ scratch_offset += cur_len;
+ offset = 0;
+ }
+
+ if (vec_idx == digest->cd_uio->uio_iovcnt && length > 0) {
+ /*
+ * The end of the specified iovec's was reached but
+ * the length requested could not be processed, i.e.
+ * The caller requested to digest more data than it
+ * provided.
+ */
+ return (CRYPTO_DATA_LEN_RANGE);
+ }
+ }
+
+ return (CRYPTO_SUCCESS);
+}
+
+/* ARGSUSED */
+static int
+sha1_digest(crypto_ctx_t *ctx, crypto_data_t *data, crypto_data_t *digest,
+ crypto_req_handle_t req)
+{
+ int ret = CRYPTO_SUCCESS;
+
+ ASSERT(ctx->cc_provider_private != NULL);
+
+ /*
+ * We need to just return the length needed to store the output.
+ * We should not destroy the context for the following cases.
+ */
+ if ((digest->cd_length == 0) ||
+ (digest->cd_length < SHA1_DIGEST_LENGTH)) {
+ digest->cd_length = SHA1_DIGEST_LENGTH;
+ return (CRYPTO_BUFFER_TOO_SMALL);
+ }
+
+ /*
+ * Do the SHA1 update on the specified input data.
+ */
+ switch (data->cd_format) {
+ case CRYPTO_DATA_RAW:
+ SHA1Update(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx,
+ (uint8_t *)data->cd_raw.iov_base + data->cd_offset,
+ data->cd_length);
+ break;
+ case CRYPTO_DATA_UIO:
+ ret = sha1_digest_update_uio(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx,
+ data);
+ break;
+ default:
+ ret = CRYPTO_ARGUMENTS_BAD;
+ }
+
+ if (ret != CRYPTO_SUCCESS) {
+ /* the update failed, free context and bail */
+ kmem_free(ctx->cc_provider_private, sizeof (sha1_ctx_t));
+ ctx->cc_provider_private = NULL;
+ digest->cd_length = 0;
+ return (ret);
+ }
+
+ /*
+ * Do a SHA1 final, must be done separately since the digest
+ * type can be different than the input data type.
+ */
+ switch (digest->cd_format) {
+ case CRYPTO_DATA_RAW:
+ SHA1Final((unsigned char *)digest->cd_raw.iov_base +
+ digest->cd_offset, &PROV_SHA1_CTX(ctx)->sc_sha1_ctx);
+ break;
+ case CRYPTO_DATA_UIO:
+ ret = sha1_digest_final_uio(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx,
+ digest, SHA1_DIGEST_LENGTH, NULL);
+ break;
+ default:
+ ret = CRYPTO_ARGUMENTS_BAD;
+ }
+
+ /* all done, free context and return */
+
+ if (ret == CRYPTO_SUCCESS) {
+ digest->cd_length = SHA1_DIGEST_LENGTH;
+ } else {
+ digest->cd_length = 0;
+ }
+
+ kmem_free(ctx->cc_provider_private, sizeof (sha1_ctx_t));
+ ctx->cc_provider_private = NULL;
+ return (ret);
+}
+
+/* ARGSUSED */
+static int
+sha1_digest_update(crypto_ctx_t *ctx, crypto_data_t *data,
+ crypto_req_handle_t req)
+{
+ int ret = CRYPTO_SUCCESS;
+
+ ASSERT(ctx->cc_provider_private != NULL);
+
+ /*
+ * Do the SHA1 update on the specified input data.
+ */
+ switch (data->cd_format) {
+ case CRYPTO_DATA_RAW:
+ SHA1Update(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx,
+ (uint8_t *)data->cd_raw.iov_base + data->cd_offset,
+ data->cd_length);
+ break;
+ case CRYPTO_DATA_UIO:
+ ret = sha1_digest_update_uio(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx,
+ data);
+ break;
+ default:
+ ret = CRYPTO_ARGUMENTS_BAD;
+ }
+
+ return (ret);
+}
+
+/* ARGSUSED */
+static int
+sha1_digest_final(crypto_ctx_t *ctx, crypto_data_t *digest,
+ crypto_req_handle_t req)
+{
+ int ret = CRYPTO_SUCCESS;
+
+ ASSERT(ctx->cc_provider_private != NULL);
+
+ /*
+ * We need to just return the length needed to store the output.
+ * We should not destroy the context for the following cases.
+ */
+ if ((digest->cd_length == 0) ||
+ (digest->cd_length < SHA1_DIGEST_LENGTH)) {
+ digest->cd_length = SHA1_DIGEST_LENGTH;
+ return (CRYPTO_BUFFER_TOO_SMALL);
+ }
+
+ /*
+ * Do a SHA1 final.
+ */
+ switch (digest->cd_format) {
+ case CRYPTO_DATA_RAW:
+ SHA1Final((unsigned char *)digest->cd_raw.iov_base +
+ digest->cd_offset, &PROV_SHA1_CTX(ctx)->sc_sha1_ctx);
+ break;
+ case CRYPTO_DATA_UIO:
+ ret = sha1_digest_final_uio(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx,
+ digest, SHA1_DIGEST_LENGTH, NULL);
+ break;
+ default:
+ ret = CRYPTO_ARGUMENTS_BAD;
+ }
+
+ /* all done, free context and return */
+
+ if (ret == CRYPTO_SUCCESS) {
+ digest->cd_length = SHA1_DIGEST_LENGTH;
+ } else {
+ digest->cd_length = 0;
+ }
+
+ kmem_free(ctx->cc_provider_private, sizeof (sha1_ctx_t));
+ ctx->cc_provider_private = NULL;
+
+ return (ret);
+}
+
+/* ARGSUSED */
+static int
+sha1_digest_atomic(crypto_provider_handle_t provider,
+ crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
+ crypto_data_t *data, crypto_data_t *digest,
+ crypto_req_handle_t req)
+{
+ int ret = CRYPTO_SUCCESS;
+ SHA1_CTX sha1_ctx;
+
+ if (mechanism->cm_type != SHA1_MECH_INFO_TYPE)
+ return (CRYPTO_MECHANISM_INVALID);
+
+ /*
+ * Do the SHA1 init.
+ */
+ SHA1Init(&sha1_ctx);
+
+ /*
+ * Do the SHA1 update on the specified input data.
+ */
+ switch (data->cd_format) {
+ case CRYPTO_DATA_RAW:
+ SHA1Update(&sha1_ctx,
+ (uint8_t *)data->cd_raw.iov_base + data->cd_offset,
+ data->cd_length);
+ break;
+ case CRYPTO_DATA_UIO:
+ ret = sha1_digest_update_uio(&sha1_ctx, data);
+ break;
+ default:
+ ret = CRYPTO_ARGUMENTS_BAD;
+ }
+
+ if (ret != CRYPTO_SUCCESS) {
+ /* the update failed, bail */
+ digest->cd_length = 0;
+ return (ret);
+ }
+
+ /*
+ * Do a SHA1 final, must be done separately since the digest
+ * type can be different than the input data type.
+ */
+ switch (digest->cd_format) {
+ case CRYPTO_DATA_RAW:
+ SHA1Final((unsigned char *)digest->cd_raw.iov_base +
+ digest->cd_offset, &sha1_ctx);
+ break;
+ case CRYPTO_DATA_UIO:
+ ret = sha1_digest_final_uio(&sha1_ctx, digest,
+ SHA1_DIGEST_LENGTH, NULL);
+ break;
+ default:
+ ret = CRYPTO_ARGUMENTS_BAD;
+ }
+
+ if (ret == CRYPTO_SUCCESS) {
+ digest->cd_length = SHA1_DIGEST_LENGTH;
+ } else {
+ digest->cd_length = 0;
+ }
+
+ return (ret);
+}
+
+/*
+ * KCF software provider mac entry points.
+ *
+ * SHA1 HMAC is: SHA1(key XOR opad, SHA1(key XOR ipad, text))
+ *
+ * Init:
+ * The initialization routine initializes what we denote
+ * as the inner and outer contexts by doing
+ * - for inner context: SHA1(key XOR ipad)
+ * - for outer context: SHA1(key XOR opad)
+ *
+ * Update:
+ * Each subsequent SHA1 HMAC update will result in an
+ * update of the inner context with the specified data.
+ *
+ * Final:
+ * The SHA1 HMAC final will do a SHA1 final operation on the
+ * inner context, and the resulting digest will be used
+ * as the data for an update on the outer context. Last
+ * but not least, a SHA1 final on the outer context will
+ * be performed to obtain the SHA1 HMAC digest to return
+ * to the user.
+ */
+
+/*
+ * Initialize a SHA1-HMAC context.
+ */
+static void
+sha1_mac_init_ctx(sha1_hmac_ctx_t *ctx, void *keyval, uint_t length_in_bytes)
+{
+ uint32_t ipad[SHA1_HMAC_INTS_PER_BLOCK];
+ uint32_t opad[SHA1_HMAC_INTS_PER_BLOCK];
+ uint_t i;
+
+ bzero(ipad, SHA1_HMAC_BLOCK_SIZE);
+ bzero(opad, SHA1_HMAC_BLOCK_SIZE);
+
+ bcopy(keyval, ipad, length_in_bytes);
+ bcopy(keyval, opad, length_in_bytes);
+
+ /* XOR key with ipad (0x36) and opad (0x5c) */
+ for (i = 0; i < SHA1_HMAC_INTS_PER_BLOCK; i++) {
+ ipad[i] ^= 0x36363636;
+ opad[i] ^= 0x5c5c5c5c;
+ }
+
+ /* perform SHA1 on ipad */
+ SHA1Init(&ctx->hc_icontext);
+ SHA1Update(&ctx->hc_icontext, (uint8_t *)ipad, SHA1_HMAC_BLOCK_SIZE);
+
+ /* perform SHA1 on opad */
+ SHA1Init(&ctx->hc_ocontext);
+ SHA1Update(&ctx->hc_ocontext, (uint8_t *)opad, SHA1_HMAC_BLOCK_SIZE);
+}
+
+/*
+ */
+static int
+sha1_mac_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism,
+ crypto_key_t *key, crypto_spi_ctx_template_t ctx_template,
+ crypto_req_handle_t req)
+{
+ int ret = CRYPTO_SUCCESS;
+ uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length);
+
+ if (mechanism->cm_type != SHA1_HMAC_MECH_INFO_TYPE &&
+ mechanism->cm_type != SHA1_HMAC_GEN_MECH_INFO_TYPE)
+ return (CRYPTO_MECHANISM_INVALID);
+
+ /* Add support for key by attributes (RFE 4706552) */
+ if (key->ck_format != CRYPTO_KEY_RAW)
+ return (CRYPTO_ARGUMENTS_BAD);
+
+ ctx->cc_provider_private = kmem_alloc(sizeof (sha1_hmac_ctx_t),
+ crypto_kmflag(req));
+ if (ctx->cc_provider_private == NULL)
+ return (CRYPTO_HOST_MEMORY);
+
+ if (ctx_template != NULL) {
+ /* reuse context template */
+ bcopy(ctx_template, PROV_SHA1_HMAC_CTX(ctx),
+ sizeof (sha1_hmac_ctx_t));
+ } else {
+ /* no context template, compute context */
+ if (keylen_in_bytes > SHA1_HMAC_BLOCK_SIZE) {
+ uchar_t digested_key[SHA1_DIGEST_LENGTH];
+ sha1_hmac_ctx_t *hmac_ctx = ctx->cc_provider_private;
+
+ /*
+ * Hash the passed-in key to get a smaller key.
+ * The inner context is used since it hasn't been
+ * initialized yet.
+ */
+ PROV_SHA1_DIGEST_KEY(&hmac_ctx->hc_icontext,
+ key->ck_data, keylen_in_bytes, digested_key);
+ sha1_mac_init_ctx(PROV_SHA1_HMAC_CTX(ctx),
+ digested_key, SHA1_DIGEST_LENGTH);
+ } else {
+ sha1_mac_init_ctx(PROV_SHA1_HMAC_CTX(ctx),
+ key->ck_data, keylen_in_bytes);
+ }
+ }
+
+ /*
+ * Get the mechanism parameters, if applicable.
+ */
+ PROV_SHA1_HMAC_CTX(ctx)->hc_mech_type = mechanism->cm_type;
+ if (mechanism->cm_type == SHA1_HMAC_GEN_MECH_INFO_TYPE) {
+ if (mechanism->cm_param == NULL ||
+ mechanism->cm_param_len != sizeof (ulong_t))
+ ret = CRYPTO_MECHANISM_PARAM_INVALID;
+ PROV_SHA1_GET_DIGEST_LEN(mechanism,
+ PROV_SHA1_HMAC_CTX(ctx)->hc_digest_len);
+ if (PROV_SHA1_HMAC_CTX(ctx)->hc_digest_len >
+ SHA1_DIGEST_LENGTH)
+ ret = CRYPTO_MECHANISM_PARAM_INVALID;
+ }
+
+ if (ret != CRYPTO_SUCCESS) {
+ bzero(ctx->cc_provider_private, sizeof (sha1_hmac_ctx_t));
+ kmem_free(ctx->cc_provider_private, sizeof (sha1_hmac_ctx_t));
+ ctx->cc_provider_private = NULL;
+ }
+
+ return (ret);
+}
+
+/* ARGSUSED */
+static int
+sha1_mac_update(crypto_ctx_t *ctx, crypto_data_t *data, crypto_req_handle_t req)
+{
+ int ret = CRYPTO_SUCCESS;
+
+ ASSERT(ctx->cc_provider_private != NULL);
+
+ /*
+ * Do a SHA1 update of the inner context using the specified
+ * data.
+ */
+ switch (data->cd_format) {
+ case CRYPTO_DATA_RAW:
+ SHA1Update(&PROV_SHA1_HMAC_CTX(ctx)->hc_icontext,
+ (uint8_t *)data->cd_raw.iov_base + data->cd_offset,
+ data->cd_length);
+ break;
+ case CRYPTO_DATA_UIO:
+ ret = sha1_digest_update_uio(
+ &PROV_SHA1_HMAC_CTX(ctx)->hc_icontext, data);
+ break;
+ default:
+ ret = CRYPTO_ARGUMENTS_BAD;
+ }
+
+ return (ret);
+}
+
+/* ARGSUSED */
+static int
+sha1_mac_final(crypto_ctx_t *ctx, crypto_data_t *mac, crypto_req_handle_t req)
+{
+ int ret = CRYPTO_SUCCESS;
+ uchar_t digest[SHA1_DIGEST_LENGTH];
+ uint32_t digest_len = SHA1_DIGEST_LENGTH;
+
+ ASSERT(ctx->cc_provider_private != NULL);
+
+ if (PROV_SHA1_HMAC_CTX(ctx)->hc_mech_type ==
+ SHA1_HMAC_GEN_MECH_INFO_TYPE)
+ digest_len = PROV_SHA1_HMAC_CTX(ctx)->hc_digest_len;
+
+ /*
+ * We need to just return the length needed to store the output.
+ * We should not destroy the context for the following cases.
+ */
+ if ((mac->cd_length == 0) || (mac->cd_length < digest_len)) {
+ mac->cd_length = digest_len;
+ return (CRYPTO_BUFFER_TOO_SMALL);
+ }
+
+ /*
+ * Do a SHA1 final on the inner context.
+ */
+ SHA1Final(digest, &PROV_SHA1_HMAC_CTX(ctx)->hc_icontext);
+
+ /*
+ * Do a SHA1 update on the outer context, feeding the inner
+ * digest as data.
+ */
+ SHA1Update(&PROV_SHA1_HMAC_CTX(ctx)->hc_ocontext, digest,
+ SHA1_DIGEST_LENGTH);
+
+ /*
+ * Do a SHA1 final on the outer context, storing the computing
+ * digest in the users buffer.
+ */
+ switch (mac->cd_format) {
+ case CRYPTO_DATA_RAW:
+ if (digest_len != SHA1_DIGEST_LENGTH) {
+ /*
+ * The caller requested a short digest. Digest
+ * into a scratch buffer and return to
+ * the user only what was requested.
+ */
+ SHA1Final(digest,
+ &PROV_SHA1_HMAC_CTX(ctx)->hc_ocontext);
+ bcopy(digest, (unsigned char *)mac->cd_raw.iov_base +
+ mac->cd_offset, digest_len);
+ } else {
+ SHA1Final((unsigned char *)mac->cd_raw.iov_base +
+ mac->cd_offset,
+ &PROV_SHA1_HMAC_CTX(ctx)->hc_ocontext);
+ }
+ break;
+ case CRYPTO_DATA_UIO:
+ ret = sha1_digest_final_uio(
+ &PROV_SHA1_HMAC_CTX(ctx)->hc_ocontext, mac,
+ digest_len, digest);
+ break;
+ default:
+ ret = CRYPTO_ARGUMENTS_BAD;
+ }
+
+ if (ret == CRYPTO_SUCCESS) {
+ mac->cd_length = digest_len;
+ } else {
+ mac->cd_length = 0;
+ }
+
+ bzero(ctx->cc_provider_private, sizeof (sha1_hmac_ctx_t));
+ kmem_free(ctx->cc_provider_private, sizeof (sha1_hmac_ctx_t));
+ ctx->cc_provider_private = NULL;
+
+ return (ret);
+}
+
+#define SHA1_MAC_UPDATE(data, ctx, ret) { \
+ switch (data->cd_format) { \
+ case CRYPTO_DATA_RAW: \
+ SHA1Update(&(ctx).hc_icontext, \
+ (uint8_t *)data->cd_raw.iov_base + \
+ data->cd_offset, data->cd_length); \
+ break; \
+ case CRYPTO_DATA_UIO: \
+ ret = sha1_digest_update_uio(&(ctx).hc_icontext, data); \
+ break; \
+ default: \
+ ret = CRYPTO_ARGUMENTS_BAD; \
+ } \
+}
+
+/* ARGSUSED */
+static int
+sha1_mac_atomic(crypto_provider_handle_t provider,
+ crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
+ crypto_key_t *key, crypto_data_t *data, crypto_data_t *mac,
+ crypto_spi_ctx_template_t ctx_template, crypto_req_handle_t req)
+{
+ int ret = CRYPTO_SUCCESS;
+ uchar_t digest[SHA1_DIGEST_LENGTH];
+ sha1_hmac_ctx_t sha1_hmac_ctx;
+ uint32_t digest_len = SHA1_DIGEST_LENGTH;
+ uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length);
+
+ if (mechanism->cm_type != SHA1_HMAC_MECH_INFO_TYPE &&
+ mechanism->cm_type != SHA1_HMAC_GEN_MECH_INFO_TYPE)
+ return (CRYPTO_MECHANISM_INVALID);
+
+ /* Add support for key by attributes (RFE 4706552) */
+ if (key->ck_format != CRYPTO_KEY_RAW)
+ return (CRYPTO_ARGUMENTS_BAD);
+
+ if (ctx_template != NULL) {
+ /* reuse context template */
+ bcopy(ctx_template, &sha1_hmac_ctx, sizeof (sha1_hmac_ctx_t));
+ } else {
+ /* no context template, initialize context */
+ if (keylen_in_bytes > SHA1_HMAC_BLOCK_SIZE) {
+ /*
+ * Hash the passed-in key to get a smaller key.
+ * The inner context is used since it hasn't been
+ * initialized yet.
+ */
+ PROV_SHA1_DIGEST_KEY(&sha1_hmac_ctx.hc_icontext,
+ key->ck_data, keylen_in_bytes, digest);
+ sha1_mac_init_ctx(&sha1_hmac_ctx, digest,
+ SHA1_DIGEST_LENGTH);
+ } else {
+ sha1_mac_init_ctx(&sha1_hmac_ctx, key->ck_data,
+ keylen_in_bytes);
+ }
+ }
+
+ /* get the mechanism parameters, if applicable */
+ if (mechanism->cm_type == SHA1_HMAC_GEN_MECH_INFO_TYPE) {
+ if (mechanism->cm_param == NULL ||
+ mechanism->cm_param_len != sizeof (ulong_t)) {
+ ret = CRYPTO_MECHANISM_PARAM_INVALID;
+ goto bail;
+ }
+ PROV_SHA1_GET_DIGEST_LEN(mechanism, digest_len);
+ if (digest_len > SHA1_DIGEST_LENGTH) {
+ ret = CRYPTO_MECHANISM_PARAM_INVALID;
+ goto bail;
+ }
+ }
+
+ /* do a SHA1 update of the inner context using the specified data */
+ SHA1_MAC_UPDATE(data, sha1_hmac_ctx, ret);
+ if (ret != CRYPTO_SUCCESS)
+ /* the update failed, free context and bail */
+ goto bail;
+
+ /*
+ * Do a SHA1 final on the inner context.
+ */
+ SHA1Final(digest, &sha1_hmac_ctx.hc_icontext);
+
+ /*
+ * Do an SHA1 update on the outer context, feeding the inner
+ * digest as data.
+ */
+ SHA1Update(&sha1_hmac_ctx.hc_ocontext, digest, SHA1_DIGEST_LENGTH);
+
+ /*
+ * Do a SHA1 final on the outer context, storing the computed
+ * digest in the users buffer.
+ */
+ switch (mac->cd_format) {
+ case CRYPTO_DATA_RAW:
+ if (digest_len != SHA1_DIGEST_LENGTH) {
+ /*
+ * The caller requested a short digest. Digest
+ * into a scratch buffer and return to
+ * the user only what was requested.
+ */
+ SHA1Final(digest, &sha1_hmac_ctx.hc_ocontext);
+ bcopy(digest, (unsigned char *)mac->cd_raw.iov_base +
+ mac->cd_offset, digest_len);
+ } else {
+ SHA1Final((unsigned char *)mac->cd_raw.iov_base +
+ mac->cd_offset, &sha1_hmac_ctx.hc_ocontext);
+ }
+ break;
+ case CRYPTO_DATA_UIO:
+ ret = sha1_digest_final_uio(&sha1_hmac_ctx.hc_ocontext, mac,
+ digest_len, digest);
+ break;
+ default:
+ ret = CRYPTO_ARGUMENTS_BAD;
+ }
+
+ if (ret == CRYPTO_SUCCESS) {
+ mac->cd_length = digest_len;
+ } else {
+ mac->cd_length = 0;
+ }
+ /* Extra paranoia: zeroize the context on the stack */
+ bzero(&sha1_hmac_ctx, sizeof (sha1_hmac_ctx_t));
+
+ return (ret);
+bail:
+ bzero(&sha1_hmac_ctx, sizeof (sha1_hmac_ctx_t));
+ mac->cd_length = 0;
+ return (ret);
+}
+
+/* ARGSUSED */
+static int
+sha1_mac_verify_atomic(crypto_provider_handle_t provider,
+ crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
+ crypto_key_t *key, crypto_data_t *data, crypto_data_t *mac,
+ crypto_spi_ctx_template_t ctx_template, crypto_req_handle_t req)
+{
+ int ret = CRYPTO_SUCCESS;
+ uchar_t digest[SHA1_DIGEST_LENGTH];
+ sha1_hmac_ctx_t sha1_hmac_ctx;
+ uint32_t digest_len = SHA1_DIGEST_LENGTH;
+ uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length);
+
+ if (mechanism->cm_type != SHA1_HMAC_MECH_INFO_TYPE &&
+ mechanism->cm_type != SHA1_HMAC_GEN_MECH_INFO_TYPE)
+ return (CRYPTO_MECHANISM_INVALID);
+
+ /* Add support for key by attributes (RFE 4706552) */
+ if (key->ck_format != CRYPTO_KEY_RAW)
+ return (CRYPTO_ARGUMENTS_BAD);
+
+ if (ctx_template != NULL) {
+ /* reuse context template */
+ bcopy(ctx_template, &sha1_hmac_ctx, sizeof (sha1_hmac_ctx_t));
+ } else {
+ /* no context template, initialize context */
+ if (keylen_in_bytes > SHA1_HMAC_BLOCK_SIZE) {
+ /*
+ * Hash the passed-in key to get a smaller key.
+ * The inner context is used since it hasn't been
+ * initialized yet.
+ */
+ PROV_SHA1_DIGEST_KEY(&sha1_hmac_ctx.hc_icontext,
+ key->ck_data, keylen_in_bytes, digest);
+ sha1_mac_init_ctx(&sha1_hmac_ctx, digest,
+ SHA1_DIGEST_LENGTH);
+ } else {
+ sha1_mac_init_ctx(&sha1_hmac_ctx, key->ck_data,
+ keylen_in_bytes);
+ }
+ }
+
+ /* get the mechanism parameters, if applicable */
+ if (mechanism->cm_type == SHA1_HMAC_GEN_MECH_INFO_TYPE) {
+ if (mechanism->cm_param == NULL ||
+ mechanism->cm_param_len != sizeof (ulong_t)) {
+ ret = CRYPTO_MECHANISM_PARAM_INVALID;
+ goto bail;
+ }
+ PROV_SHA1_GET_DIGEST_LEN(mechanism, digest_len);
+ if (digest_len > SHA1_DIGEST_LENGTH) {
+ ret = CRYPTO_MECHANISM_PARAM_INVALID;
+ goto bail;
+ }
+ }
+
+ if (mac->cd_length != digest_len) {
+ ret = CRYPTO_INVALID_MAC;
+ goto bail;
+ }
+
+ /* do a SHA1 update of the inner context using the specified data */
+ SHA1_MAC_UPDATE(data, sha1_hmac_ctx, ret);
+ if (ret != CRYPTO_SUCCESS)
+ /* the update failed, free context and bail */
+ goto bail;
+
+ /* do a SHA1 final on the inner context */
+ SHA1Final(digest, &sha1_hmac_ctx.hc_icontext);
+
+ /*
+ * Do an SHA1 update on the outer context, feeding the inner
+ * digest as data.
+ */
+ SHA1Update(&sha1_hmac_ctx.hc_ocontext, digest, SHA1_DIGEST_LENGTH);
+
+ /*
+ * Do a SHA1 final on the outer context, storing the computed
+ * digest in the users buffer.
+ */
+ SHA1Final(digest, &sha1_hmac_ctx.hc_ocontext);
+
+ /*
+ * Compare the computed digest against the expected digest passed
+ * as argument.
+ */
+
+ switch (mac->cd_format) {
+
+ case CRYPTO_DATA_RAW:
+ if (bcmp(digest, (unsigned char *)mac->cd_raw.iov_base +
+ mac->cd_offset, digest_len) != 0)
+ ret = CRYPTO_INVALID_MAC;
+ break;
+
+ case CRYPTO_DATA_UIO: {
+ off_t offset = mac->cd_offset;
+ uint_t vec_idx;
+ off_t scratch_offset = 0;
+ size_t length = digest_len;
+ size_t cur_len;
+
+ /* we support only kernel buffer */
+ if (mac->cd_uio->uio_segflg != UIO_SYSSPACE)
+ return (CRYPTO_ARGUMENTS_BAD);
+
+ /* jump to the first iovec containing the expected digest */
+ for (vec_idx = 0;
+ offset >= mac->cd_uio->uio_iov[vec_idx].iov_len &&
+ vec_idx < mac->cd_uio->uio_iovcnt;
+ offset -= mac->cd_uio->uio_iov[vec_idx++].iov_len)
+ ;
+ if (vec_idx == mac->cd_uio->uio_iovcnt) {
+ /*
+ * The caller specified an offset that is
+ * larger than the total size of the buffers
+ * it provided.
+ */
+ ret = CRYPTO_DATA_LEN_RANGE;
+ break;
+ }
+
+ /* do the comparison of computed digest vs specified one */
+ while (vec_idx < mac->cd_uio->uio_iovcnt && length > 0) {
+ cur_len = MIN(mac->cd_uio->uio_iov[vec_idx].iov_len -
+ offset, length);
+
+ if (bcmp(digest + scratch_offset,
+ mac->cd_uio->uio_iov[vec_idx].iov_base + offset,
+ cur_len) != 0) {
+ ret = CRYPTO_INVALID_MAC;
+ break;
+ }
+
+ length -= cur_len;
+ vec_idx++;
+ scratch_offset += cur_len;
+ offset = 0;
+ }
+ break;
+ }
+
+ default:
+ ret = CRYPTO_ARGUMENTS_BAD;
+ }
+
+ bzero(&sha1_hmac_ctx, sizeof (sha1_hmac_ctx_t));
+ return (ret);
+bail:
+ bzero(&sha1_hmac_ctx, sizeof (sha1_hmac_ctx_t));
+ mac->cd_length = 0;
+ return (ret);
+}
+
+/*
+ * KCF software provider context management entry points.
+ */
+
+/* ARGSUSED */
+static int
+sha1_create_ctx_template(crypto_provider_handle_t provider,
+ crypto_mechanism_t *mechanism, crypto_key_t *key,
+ crypto_spi_ctx_template_t *ctx_template, size_t *ctx_template_size,
+ crypto_req_handle_t req)
+{
+ sha1_hmac_ctx_t *sha1_hmac_ctx_tmpl;
+ uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length);
+
+ if ((mechanism->cm_type != SHA1_HMAC_MECH_INFO_TYPE) &&
+ (mechanism->cm_type != SHA1_HMAC_GEN_MECH_INFO_TYPE)) {
+ return (CRYPTO_MECHANISM_INVALID);
+ }
+
+ /* Add support for key by attributes (RFE 4706552) */
+ if (key->ck_format != CRYPTO_KEY_RAW)
+ return (CRYPTO_ARGUMENTS_BAD);
+
+ /*
+ * Allocate and initialize SHA1 context.
+ */
+ sha1_hmac_ctx_tmpl = kmem_alloc(sizeof (sha1_hmac_ctx_t),
+ crypto_kmflag(req));
+ if (sha1_hmac_ctx_tmpl == NULL)
+ return (CRYPTO_HOST_MEMORY);
+
+ if (keylen_in_bytes > SHA1_HMAC_BLOCK_SIZE) {
+ uchar_t digested_key[SHA1_DIGEST_LENGTH];
+
+ /*
+ * Hash the passed-in key to get a smaller key.
+ * The inner context is used since it hasn't been
+ * initialized yet.
+ */
+ PROV_SHA1_DIGEST_KEY(&sha1_hmac_ctx_tmpl->hc_icontext,
+ key->ck_data, keylen_in_bytes, digested_key);
+ sha1_mac_init_ctx(sha1_hmac_ctx_tmpl, digested_key,
+ SHA1_DIGEST_LENGTH);
+ } else {
+ sha1_mac_init_ctx(sha1_hmac_ctx_tmpl, key->ck_data,
+ keylen_in_bytes);
+ }
+
+ sha1_hmac_ctx_tmpl->hc_mech_type = mechanism->cm_type;
+ *ctx_template = (crypto_spi_ctx_template_t)sha1_hmac_ctx_tmpl;
+ *ctx_template_size = sizeof (sha1_hmac_ctx_t);
+
+
+ return (CRYPTO_SUCCESS);
+}
+
+static int
+sha1_free_context(crypto_ctx_t *ctx)
+{
+ uint_t ctx_len;
+ sha1_mech_type_t mech_type;
+
+ if (ctx->cc_provider_private == NULL)
+ return (CRYPTO_SUCCESS);
+
+ /*
+ * We have to free either SHA1 or SHA1-HMAC contexts, which
+ * have different lengths.
+ */
+
+ mech_type = PROV_SHA1_CTX(ctx)->sc_mech_type;
+ if (mech_type == SHA1_MECH_INFO_TYPE)
+ ctx_len = sizeof (sha1_ctx_t);
+ else {
+ ASSERT(mech_type == SHA1_HMAC_MECH_INFO_TYPE ||
+ mech_type == SHA1_HMAC_GEN_MECH_INFO_TYPE);
+ ctx_len = sizeof (sha1_hmac_ctx_t);
+ }
+
+ bzero(ctx->cc_provider_private, ctx_len);
+ kmem_free(ctx->cc_provider_private, ctx_len);
+ ctx->cc_provider_private = NULL;
+
+ return (CRYPTO_SUCCESS);
+}
diff --git a/zfs/module/icp/io/sha2_mod.c b/zfs/module/icp/io/sha2_mod.c
new file mode 100644
index 000000000000..8a3514c80f70
--- /dev/null
+++ b/zfs/module/icp/io/sha2_mod.c
@@ -0,0 +1,1409 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/modctl.h>
+#include <sys/crypto/common.h>
+#include <sys/crypto/spi.h>
+#include <sys/crypto/icp.h>
+#define _SHA2_IMPL
+#include <sys/sha2.h>
+#include <sha2/sha2_impl.h>
+
+/*
+ * The sha2 module is created with two modlinkages:
+ * - a modlmisc that allows consumers to directly call the entry points
+ * SHA2Init, SHA2Update, and SHA2Final.
+ * - a modlcrypto that allows the module to register with the Kernel
+ * Cryptographic Framework (KCF) as a software provider for the SHA2
+ * mechanisms.
+ */
+
+static struct modlcrypto modlcrypto = {
+ &mod_cryptoops,
+ "SHA2 Kernel SW Provider"
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, {&modlcrypto, NULL}
+};
+
+/*
+ * Macros to access the SHA2 or SHA2-HMAC contexts from a context passed
+ * by KCF to one of the entry points.
+ */
+
+#define PROV_SHA2_CTX(ctx) ((sha2_ctx_t *)(ctx)->cc_provider_private)
+#define PROV_SHA2_HMAC_CTX(ctx) ((sha2_hmac_ctx_t *)(ctx)->cc_provider_private)
+
+/* to extract the digest length passed as mechanism parameter */
+#define PROV_SHA2_GET_DIGEST_LEN(m, len) { \
+ if (IS_P2ALIGNED((m)->cm_param, sizeof (ulong_t))) \
+ (len) = (uint32_t)*((ulong_t *)(m)->cm_param); \
+ else { \
+ ulong_t tmp_ulong; \
+ bcopy((m)->cm_param, &tmp_ulong, sizeof (ulong_t)); \
+ (len) = (uint32_t)tmp_ulong; \
+ } \
+}
+
+#define PROV_SHA2_DIGEST_KEY(mech, ctx, key, len, digest) { \
+ SHA2Init(mech, ctx); \
+ SHA2Update(ctx, key, len); \
+ SHA2Final(digest, ctx); \
+}
+
+/*
+ * Mechanism info structure passed to KCF during registration.
+ */
+static crypto_mech_info_t sha2_mech_info_tab[] = {
+ /* SHA256 */
+ {SUN_CKM_SHA256, SHA256_MECH_INFO_TYPE,
+ CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC,
+ 0, 0, CRYPTO_KEYSIZE_UNIT_IN_BITS},
+ /* SHA256-HMAC */
+ {SUN_CKM_SHA256_HMAC, SHA256_HMAC_MECH_INFO_TYPE,
+ CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC,
+ SHA2_HMAC_MIN_KEY_LEN, SHA2_HMAC_MAX_KEY_LEN,
+ CRYPTO_KEYSIZE_UNIT_IN_BYTES},
+ /* SHA256-HMAC GENERAL */
+ {SUN_CKM_SHA256_HMAC_GENERAL, SHA256_HMAC_GEN_MECH_INFO_TYPE,
+ CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC,
+ SHA2_HMAC_MIN_KEY_LEN, SHA2_HMAC_MAX_KEY_LEN,
+ CRYPTO_KEYSIZE_UNIT_IN_BYTES},
+ /* SHA384 */
+ {SUN_CKM_SHA384, SHA384_MECH_INFO_TYPE,
+ CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC,
+ 0, 0, CRYPTO_KEYSIZE_UNIT_IN_BITS},
+ /* SHA384-HMAC */
+ {SUN_CKM_SHA384_HMAC, SHA384_HMAC_MECH_INFO_TYPE,
+ CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC,
+ SHA2_HMAC_MIN_KEY_LEN, SHA2_HMAC_MAX_KEY_LEN,
+ CRYPTO_KEYSIZE_UNIT_IN_BYTES},
+ /* SHA384-HMAC GENERAL */
+ {SUN_CKM_SHA384_HMAC_GENERAL, SHA384_HMAC_GEN_MECH_INFO_TYPE,
+ CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC,
+ SHA2_HMAC_MIN_KEY_LEN, SHA2_HMAC_MAX_KEY_LEN,
+ CRYPTO_KEYSIZE_UNIT_IN_BYTES},
+ /* SHA512 */
+ {SUN_CKM_SHA512, SHA512_MECH_INFO_TYPE,
+ CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC,
+ 0, 0, CRYPTO_KEYSIZE_UNIT_IN_BITS},
+ /* SHA512-HMAC */
+ {SUN_CKM_SHA512_HMAC, SHA512_HMAC_MECH_INFO_TYPE,
+ CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC,
+ SHA2_HMAC_MIN_KEY_LEN, SHA2_HMAC_MAX_KEY_LEN,
+ CRYPTO_KEYSIZE_UNIT_IN_BYTES},
+ /* SHA512-HMAC GENERAL */
+ {SUN_CKM_SHA512_HMAC_GENERAL, SHA512_HMAC_GEN_MECH_INFO_TYPE,
+ CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC,
+ SHA2_HMAC_MIN_KEY_LEN, SHA2_HMAC_MAX_KEY_LEN,
+ CRYPTO_KEYSIZE_UNIT_IN_BYTES}
+};
+
+static void sha2_provider_status(crypto_provider_handle_t, uint_t *);
+
+static crypto_control_ops_t sha2_control_ops = {
+ sha2_provider_status
+};
+
+static int sha2_digest_init(crypto_ctx_t *, crypto_mechanism_t *,
+ crypto_req_handle_t);
+static int sha2_digest(crypto_ctx_t *, crypto_data_t *, crypto_data_t *,
+ crypto_req_handle_t);
+static int sha2_digest_update(crypto_ctx_t *, crypto_data_t *,
+ crypto_req_handle_t);
+static int sha2_digest_final(crypto_ctx_t *, crypto_data_t *,
+ crypto_req_handle_t);
+static int sha2_digest_atomic(crypto_provider_handle_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_data_t *, crypto_data_t *,
+ crypto_req_handle_t);
+
+static crypto_digest_ops_t sha2_digest_ops = {
+ sha2_digest_init,
+ sha2_digest,
+ sha2_digest_update,
+ NULL,
+ sha2_digest_final,
+ sha2_digest_atomic
+};
+
+static int sha2_mac_init(crypto_ctx_t *, crypto_mechanism_t *, crypto_key_t *,
+ crypto_spi_ctx_template_t, crypto_req_handle_t);
+static int sha2_mac_update(crypto_ctx_t *, crypto_data_t *,
+ crypto_req_handle_t);
+static int sha2_mac_final(crypto_ctx_t *, crypto_data_t *, crypto_req_handle_t);
+static int sha2_mac_atomic(crypto_provider_handle_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, crypto_data_t *,
+ crypto_spi_ctx_template_t, crypto_req_handle_t);
+static int sha2_mac_verify_atomic(crypto_provider_handle_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, crypto_data_t *,
+ crypto_spi_ctx_template_t, crypto_req_handle_t);
+
+static crypto_mac_ops_t sha2_mac_ops = {
+ sha2_mac_init,
+ NULL,
+ sha2_mac_update,
+ sha2_mac_final,
+ sha2_mac_atomic,
+ sha2_mac_verify_atomic
+};
+
+static int sha2_create_ctx_template(crypto_provider_handle_t,
+ crypto_mechanism_t *, crypto_key_t *, crypto_spi_ctx_template_t *,
+ size_t *, crypto_req_handle_t);
+static int sha2_free_context(crypto_ctx_t *);
+
+static crypto_ctx_ops_t sha2_ctx_ops = {
+ sha2_create_ctx_template,
+ sha2_free_context
+};
+
+static crypto_ops_t sha2_crypto_ops = {{{{{
+ &sha2_control_ops,
+ &sha2_digest_ops,
+ NULL,
+ &sha2_mac_ops,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ &sha2_ctx_ops
+}}}}};
+
+static crypto_provider_info_t sha2_prov_info = {{{{
+ CRYPTO_SPI_VERSION_1,
+ "SHA2 Software Provider",
+ CRYPTO_SW_PROVIDER,
+ NULL,
+ &sha2_crypto_ops,
+ sizeof (sha2_mech_info_tab)/sizeof (crypto_mech_info_t),
+ sha2_mech_info_tab
+}}}};
+
+static crypto_kcf_provider_handle_t sha2_prov_handle = 0;
+
+int
+sha2_mod_init(void)
+{
+ int ret;
+
+ if ((ret = mod_install(&modlinkage)) != 0)
+ return (ret);
+
+ /*
+ * Register with KCF. If the registration fails, log an
+ * error but do not uninstall the module, since the functionality
+ * provided by misc/sha2 should still be available.
+ */
+ if ((ret = crypto_register_provider(&sha2_prov_info,
+ &sha2_prov_handle)) != CRYPTO_SUCCESS)
+ cmn_err(CE_WARN, "sha2 _init: "
+ "crypto_register_provider() failed (0x%x)", ret);
+
+ return (0);
+}
+
+int
+sha2_mod_fini(void)
+{
+ int ret;
+
+ if (sha2_prov_handle != 0) {
+ if ((ret = crypto_unregister_provider(sha2_prov_handle)) !=
+ CRYPTO_SUCCESS) {
+ cmn_err(CE_WARN,
+ "sha2 _fini: crypto_unregister_provider() "
+ "failed (0x%x)", ret);
+ return (EBUSY);
+ }
+ sha2_prov_handle = 0;
+ }
+
+ return (mod_remove(&modlinkage));
+}
+
+/*
+ * KCF software provider control entry points.
+ */
+/* ARGSUSED */
+static void
+sha2_provider_status(crypto_provider_handle_t provider, uint_t *status)
+{
+ *status = CRYPTO_PROVIDER_READY;
+}
+
+/*
+ * KCF software provider digest entry points.
+ */
+
+static int
+sha2_digest_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism,
+ crypto_req_handle_t req)
+{
+
+ /*
+ * Allocate and initialize SHA2 context.
+ */
+ ctx->cc_provider_private = kmem_alloc(sizeof (sha2_ctx_t),
+ crypto_kmflag(req));
+ if (ctx->cc_provider_private == NULL)
+ return (CRYPTO_HOST_MEMORY);
+
+ PROV_SHA2_CTX(ctx)->sc_mech_type = mechanism->cm_type;
+ SHA2Init(mechanism->cm_type, &PROV_SHA2_CTX(ctx)->sc_sha2_ctx);
+
+ return (CRYPTO_SUCCESS);
+}
+
+/*
+ * Helper SHA2 digest update function for uio data.
+ */
+static int
+sha2_digest_update_uio(SHA2_CTX *sha2_ctx, crypto_data_t *data)
+{
+ off_t offset = data->cd_offset;
+ size_t length = data->cd_length;
+ uint_t vec_idx;
+ size_t cur_len;
+
+ /* we support only kernel buffer */
+ if (data->cd_uio->uio_segflg != UIO_SYSSPACE)
+ return (CRYPTO_ARGUMENTS_BAD);
+
+ /*
+ * Jump to the first iovec containing data to be
+ * digested.
+ */
+ for (vec_idx = 0; vec_idx < data->cd_uio->uio_iovcnt &&
+ offset >= data->cd_uio->uio_iov[vec_idx].iov_len;
+ offset -= data->cd_uio->uio_iov[vec_idx++].iov_len)
+ ;
+ if (vec_idx == data->cd_uio->uio_iovcnt) {
+ /*
+ * The caller specified an offset that is larger than the
+ * total size of the buffers it provided.
+ */
+ return (CRYPTO_DATA_LEN_RANGE);
+ }
+
+ /*
+ * Now do the digesting on the iovecs.
+ */
+ while (vec_idx < data->cd_uio->uio_iovcnt && length > 0) {
+ cur_len = MIN(data->cd_uio->uio_iov[vec_idx].iov_len -
+ offset, length);
+
+ SHA2Update(sha2_ctx, (uint8_t *)data->cd_uio->
+ uio_iov[vec_idx].iov_base + offset, cur_len);
+ length -= cur_len;
+ vec_idx++;
+ offset = 0;
+ }
+
+ if (vec_idx == data->cd_uio->uio_iovcnt && length > 0) {
+ /*
+ * The end of the specified iovec's was reached but
+ * the length requested could not be processed, i.e.
+ * The caller requested to digest more data than it provided.
+ */
+ return (CRYPTO_DATA_LEN_RANGE);
+ }
+
+ return (CRYPTO_SUCCESS);
+}
+
+/*
+ * Helper SHA2 digest final function for uio data.
+ * digest_len is the length of the desired digest. If digest_len
+ * is smaller than the default SHA2 digest length, the caller
+ * must pass a scratch buffer, digest_scratch, which must
+ * be at least the algorithm's digest length bytes.
+ */
+static int
+sha2_digest_final_uio(SHA2_CTX *sha2_ctx, crypto_data_t *digest,
+ ulong_t digest_len, uchar_t *digest_scratch)
+{
+ off_t offset = digest->cd_offset;
+ uint_t vec_idx;
+
+ /* we support only kernel buffer */
+ if (digest->cd_uio->uio_segflg != UIO_SYSSPACE)
+ return (CRYPTO_ARGUMENTS_BAD);
+
+ /*
+ * Jump to the first iovec containing ptr to the digest to
+ * be returned.
+ */
+ for (vec_idx = 0; offset >= digest->cd_uio->uio_iov[vec_idx].iov_len &&
+ vec_idx < digest->cd_uio->uio_iovcnt;
+ offset -= digest->cd_uio->uio_iov[vec_idx++].iov_len)
+ ;
+ if (vec_idx == digest->cd_uio->uio_iovcnt) {
+ /*
+ * The caller specified an offset that is
+ * larger than the total size of the buffers
+ * it provided.
+ */
+ return (CRYPTO_DATA_LEN_RANGE);
+ }
+
+ if (offset + digest_len <=
+ digest->cd_uio->uio_iov[vec_idx].iov_len) {
+ /*
+ * The computed SHA2 digest will fit in the current
+ * iovec.
+ */
+ if (((sha2_ctx->algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) &&
+ (digest_len != SHA256_DIGEST_LENGTH)) ||
+ ((sha2_ctx->algotype > SHA256_HMAC_GEN_MECH_INFO_TYPE) &&
+ (digest_len != SHA512_DIGEST_LENGTH))) {
+ /*
+ * The caller requested a short digest. Digest
+ * into a scratch buffer and return to
+ * the user only what was requested.
+ */
+ SHA2Final(digest_scratch, sha2_ctx);
+
+ bcopy(digest_scratch, (uchar_t *)digest->
+ cd_uio->uio_iov[vec_idx].iov_base + offset,
+ digest_len);
+ } else {
+ SHA2Final((uchar_t *)digest->
+ cd_uio->uio_iov[vec_idx].iov_base + offset,
+ sha2_ctx);
+
+ }
+ } else {
+ /*
+ * The computed digest will be crossing one or more iovec's.
+ * This is bad performance-wise but we need to support it.
+ * Allocate a small scratch buffer on the stack and
+ * copy it piece meal to the specified digest iovec's.
+ */
+ uchar_t digest_tmp[SHA512_DIGEST_LENGTH];
+ off_t scratch_offset = 0;
+ size_t length = digest_len;
+ size_t cur_len;
+
+ SHA2Final(digest_tmp, sha2_ctx);
+
+ while (vec_idx < digest->cd_uio->uio_iovcnt && length > 0) {
+ cur_len =
+ MIN(digest->cd_uio->uio_iov[vec_idx].iov_len -
+ offset, length);
+ bcopy(digest_tmp + scratch_offset,
+ digest->cd_uio->uio_iov[vec_idx].iov_base + offset,
+ cur_len);
+
+ length -= cur_len;
+ vec_idx++;
+ scratch_offset += cur_len;
+ offset = 0;
+ }
+
+ if (vec_idx == digest->cd_uio->uio_iovcnt && length > 0) {
+ /*
+ * The end of the specified iovec's was reached but
+ * the length requested could not be processed, i.e.
+ * The caller requested to digest more data than it
+ * provided.
+ */
+ return (CRYPTO_DATA_LEN_RANGE);
+ }
+ }
+
+ return (CRYPTO_SUCCESS);
+}
+
+/* ARGSUSED */
+static int
+sha2_digest(crypto_ctx_t *ctx, crypto_data_t *data, crypto_data_t *digest,
+ crypto_req_handle_t req)
+{
+ int ret = CRYPTO_SUCCESS;
+ uint_t sha_digest_len;
+
+ ASSERT(ctx->cc_provider_private != NULL);
+
+ switch (PROV_SHA2_CTX(ctx)->sc_mech_type) {
+ case SHA256_MECH_INFO_TYPE:
+ sha_digest_len = SHA256_DIGEST_LENGTH;
+ break;
+ case SHA384_MECH_INFO_TYPE:
+ sha_digest_len = SHA384_DIGEST_LENGTH;
+ break;
+ case SHA512_MECH_INFO_TYPE:
+ sha_digest_len = SHA512_DIGEST_LENGTH;
+ break;
+ default:
+ return (CRYPTO_MECHANISM_INVALID);
+ }
+
+ /*
+ * We need to just return the length needed to store the output.
+ * We should not destroy the context for the following cases.
+ */
+ if ((digest->cd_length == 0) ||
+ (digest->cd_length < sha_digest_len)) {
+ digest->cd_length = sha_digest_len;
+ return (CRYPTO_BUFFER_TOO_SMALL);
+ }
+
+ /*
+ * Do the SHA2 update on the specified input data.
+ */
+ switch (data->cd_format) {
+ case CRYPTO_DATA_RAW:
+ SHA2Update(&PROV_SHA2_CTX(ctx)->sc_sha2_ctx,
+ (uint8_t *)data->cd_raw.iov_base + data->cd_offset,
+ data->cd_length);
+ break;
+ case CRYPTO_DATA_UIO:
+ ret = sha2_digest_update_uio(&PROV_SHA2_CTX(ctx)->sc_sha2_ctx,
+ data);
+ break;
+ default:
+ ret = CRYPTO_ARGUMENTS_BAD;
+ }
+
+ if (ret != CRYPTO_SUCCESS) {
+ /* the update failed, free context and bail */
+ kmem_free(ctx->cc_provider_private, sizeof (sha2_ctx_t));
+ ctx->cc_provider_private = NULL;
+ digest->cd_length = 0;
+ return (ret);
+ }
+
+ /*
+ * Do a SHA2 final, must be done separately since the digest
+ * type can be different than the input data type.
+ */
+ switch (digest->cd_format) {
+ case CRYPTO_DATA_RAW:
+ SHA2Final((unsigned char *)digest->cd_raw.iov_base +
+ digest->cd_offset, &PROV_SHA2_CTX(ctx)->sc_sha2_ctx);
+ break;
+ case CRYPTO_DATA_UIO:
+ ret = sha2_digest_final_uio(&PROV_SHA2_CTX(ctx)->sc_sha2_ctx,
+ digest, sha_digest_len, NULL);
+ break;
+ default:
+ ret = CRYPTO_ARGUMENTS_BAD;
+ }
+
+ /* all done, free context and return */
+
+ if (ret == CRYPTO_SUCCESS)
+ digest->cd_length = sha_digest_len;
+ else
+ digest->cd_length = 0;
+
+ kmem_free(ctx->cc_provider_private, sizeof (sha2_ctx_t));
+ ctx->cc_provider_private = NULL;
+ return (ret);
+}
+
+/* ARGSUSED */
+static int
+sha2_digest_update(crypto_ctx_t *ctx, crypto_data_t *data,
+ crypto_req_handle_t req)
+{
+ int ret = CRYPTO_SUCCESS;
+
+ ASSERT(ctx->cc_provider_private != NULL);
+
+ /*
+ * Do the SHA2 update on the specified input data.
+ */
+ switch (data->cd_format) {
+ case CRYPTO_DATA_RAW:
+ SHA2Update(&PROV_SHA2_CTX(ctx)->sc_sha2_ctx,
+ (uint8_t *)data->cd_raw.iov_base + data->cd_offset,
+ data->cd_length);
+ break;
+ case CRYPTO_DATA_UIO:
+ ret = sha2_digest_update_uio(&PROV_SHA2_CTX(ctx)->sc_sha2_ctx,
+ data);
+ break;
+ default:
+ ret = CRYPTO_ARGUMENTS_BAD;
+ }
+
+ return (ret);
+}
+
+/* ARGSUSED */
+static int
+sha2_digest_final(crypto_ctx_t *ctx, crypto_data_t *digest,
+ crypto_req_handle_t req)
+{
+ int ret = CRYPTO_SUCCESS;
+ uint_t sha_digest_len;
+
+ ASSERT(ctx->cc_provider_private != NULL);
+
+ switch (PROV_SHA2_CTX(ctx)->sc_mech_type) {
+ case SHA256_MECH_INFO_TYPE:
+ sha_digest_len = SHA256_DIGEST_LENGTH;
+ break;
+ case SHA384_MECH_INFO_TYPE:
+ sha_digest_len = SHA384_DIGEST_LENGTH;
+ break;
+ case SHA512_MECH_INFO_TYPE:
+ sha_digest_len = SHA512_DIGEST_LENGTH;
+ break;
+ default:
+ return (CRYPTO_MECHANISM_INVALID);
+ }
+
+ /*
+ * We need to just return the length needed to store the output.
+ * We should not destroy the context for the following cases.
+ */
+ if ((digest->cd_length == 0) ||
+ (digest->cd_length < sha_digest_len)) {
+ digest->cd_length = sha_digest_len;
+ return (CRYPTO_BUFFER_TOO_SMALL);
+ }
+
+ /*
+ * Do a SHA2 final.
+ */
+ switch (digest->cd_format) {
+ case CRYPTO_DATA_RAW:
+ SHA2Final((unsigned char *)digest->cd_raw.iov_base +
+ digest->cd_offset, &PROV_SHA2_CTX(ctx)->sc_sha2_ctx);
+ break;
+ case CRYPTO_DATA_UIO:
+ ret = sha2_digest_final_uio(&PROV_SHA2_CTX(ctx)->sc_sha2_ctx,
+ digest, sha_digest_len, NULL);
+ break;
+ default:
+ ret = CRYPTO_ARGUMENTS_BAD;
+ }
+
+ /* all done, free context and return */
+
+ if (ret == CRYPTO_SUCCESS)
+ digest->cd_length = sha_digest_len;
+ else
+ digest->cd_length = 0;
+
+ kmem_free(ctx->cc_provider_private, sizeof (sha2_ctx_t));
+ ctx->cc_provider_private = NULL;
+
+ return (ret);
+}
+
+/* ARGSUSED */
+static int
+sha2_digest_atomic(crypto_provider_handle_t provider,
+ crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
+ crypto_data_t *data, crypto_data_t *digest,
+ crypto_req_handle_t req)
+{
+ int ret = CRYPTO_SUCCESS;
+ SHA2_CTX sha2_ctx;
+ uint32_t sha_digest_len;
+
+ /*
+ * Do the SHA inits.
+ */
+
+ SHA2Init(mechanism->cm_type, &sha2_ctx);
+
+ switch (data->cd_format) {
+ case CRYPTO_DATA_RAW:
+ SHA2Update(&sha2_ctx, (uint8_t *)data->
+ cd_raw.iov_base + data->cd_offset, data->cd_length);
+ break;
+ case CRYPTO_DATA_UIO:
+ ret = sha2_digest_update_uio(&sha2_ctx, data);
+ break;
+ default:
+ ret = CRYPTO_ARGUMENTS_BAD;
+ }
+
+ /*
+ * Do the SHA updates on the specified input data.
+ */
+
+ if (ret != CRYPTO_SUCCESS) {
+ /* the update failed, bail */
+ digest->cd_length = 0;
+ return (ret);
+ }
+
+ if (mechanism->cm_type <= SHA256_HMAC_GEN_MECH_INFO_TYPE)
+ sha_digest_len = SHA256_DIGEST_LENGTH;
+ else
+ sha_digest_len = SHA512_DIGEST_LENGTH;
+
+ /*
+ * Do a SHA2 final, must be done separately since the digest
+ * type can be different than the input data type.
+ */
+ switch (digest->cd_format) {
+ case CRYPTO_DATA_RAW:
+ SHA2Final((unsigned char *)digest->cd_raw.iov_base +
+ digest->cd_offset, &sha2_ctx);
+ break;
+ case CRYPTO_DATA_UIO:
+ ret = sha2_digest_final_uio(&sha2_ctx, digest,
+ sha_digest_len, NULL);
+ break;
+ default:
+ ret = CRYPTO_ARGUMENTS_BAD;
+ }
+
+ if (ret == CRYPTO_SUCCESS)
+ digest->cd_length = sha_digest_len;
+ else
+ digest->cd_length = 0;
+
+ return (ret);
+}
+
+/*
+ * KCF software provider mac entry points.
+ *
+ * SHA2 HMAC is: SHA2(key XOR opad, SHA2(key XOR ipad, text))
+ *
+ * Init:
+ * The initialization routine initializes what we denote
+ * as the inner and outer contexts by doing
+ * - for inner context: SHA2(key XOR ipad)
+ * - for outer context: SHA2(key XOR opad)
+ *
+ * Update:
+ * Each subsequent SHA2 HMAC update will result in an
+ * update of the inner context with the specified data.
+ *
+ * Final:
+ * The SHA2 HMAC final will do a SHA2 final operation on the
+ * inner context, and the resulting digest will be used
+ * as the data for an update on the outer context. Last
+ * but not least, a SHA2 final on the outer context will
+ * be performed to obtain the SHA2 HMAC digest to return
+ * to the user.
+ */
+
+/*
+ * Initialize a SHA2-HMAC context.
+ */
+static void
+sha2_mac_init_ctx(sha2_hmac_ctx_t *ctx, void *keyval, uint_t length_in_bytes)
+{
+ uint64_t ipad[SHA512_HMAC_BLOCK_SIZE / sizeof (uint64_t)];
+ uint64_t opad[SHA512_HMAC_BLOCK_SIZE / sizeof (uint64_t)];
+ int i, block_size, blocks_per_int64;
+
+ /* Determine the block size */
+ if (ctx->hc_mech_type <= SHA256_HMAC_GEN_MECH_INFO_TYPE) {
+ block_size = SHA256_HMAC_BLOCK_SIZE;
+ blocks_per_int64 = SHA256_HMAC_BLOCK_SIZE / sizeof (uint64_t);
+ } else {
+ block_size = SHA512_HMAC_BLOCK_SIZE;
+ blocks_per_int64 = SHA512_HMAC_BLOCK_SIZE / sizeof (uint64_t);
+ }
+
+ (void) bzero(ipad, block_size);
+ (void) bzero(opad, block_size);
+ (void) bcopy(keyval, ipad, length_in_bytes);
+ (void) bcopy(keyval, opad, length_in_bytes);
+
+ /* XOR key with ipad (0x36) and opad (0x5c) */
+ for (i = 0; i < blocks_per_int64; i ++) {
+ ipad[i] ^= 0x3636363636363636;
+ opad[i] ^= 0x5c5c5c5c5c5c5c5c;
+ }
+
+ /* perform SHA2 on ipad */
+ SHA2Init(ctx->hc_mech_type, &ctx->hc_icontext);
+ SHA2Update(&ctx->hc_icontext, (uint8_t *)ipad, block_size);
+
+ /* perform SHA2 on opad */
+ SHA2Init(ctx->hc_mech_type, &ctx->hc_ocontext);
+ SHA2Update(&ctx->hc_ocontext, (uint8_t *)opad, block_size);
+
+}
+
+/*
+ */
+static int
+sha2_mac_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism,
+ crypto_key_t *key, crypto_spi_ctx_template_t ctx_template,
+ crypto_req_handle_t req)
+{
+ int ret = CRYPTO_SUCCESS;
+ uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length);
+ uint_t sha_digest_len, sha_hmac_block_size;
+
+ /*
+ * Set the digest length and block size to values appropriate to the
+ * mechanism
+ */
+ switch (mechanism->cm_type) {
+ case SHA256_HMAC_MECH_INFO_TYPE:
+ case SHA256_HMAC_GEN_MECH_INFO_TYPE:
+ sha_digest_len = SHA256_DIGEST_LENGTH;
+ sha_hmac_block_size = SHA256_HMAC_BLOCK_SIZE;
+ break;
+ case SHA384_HMAC_MECH_INFO_TYPE:
+ case SHA384_HMAC_GEN_MECH_INFO_TYPE:
+ case SHA512_HMAC_MECH_INFO_TYPE:
+ case SHA512_HMAC_GEN_MECH_INFO_TYPE:
+ sha_digest_len = SHA512_DIGEST_LENGTH;
+ sha_hmac_block_size = SHA512_HMAC_BLOCK_SIZE;
+ break;
+ default:
+ return (CRYPTO_MECHANISM_INVALID);
+ }
+
+ if (key->ck_format != CRYPTO_KEY_RAW)
+ return (CRYPTO_ARGUMENTS_BAD);
+
+ ctx->cc_provider_private = kmem_alloc(sizeof (sha2_hmac_ctx_t),
+ crypto_kmflag(req));
+ if (ctx->cc_provider_private == NULL)
+ return (CRYPTO_HOST_MEMORY);
+
+ PROV_SHA2_HMAC_CTX(ctx)->hc_mech_type = mechanism->cm_type;
+ if (ctx_template != NULL) {
+ /* reuse context template */
+ bcopy(ctx_template, PROV_SHA2_HMAC_CTX(ctx),
+ sizeof (sha2_hmac_ctx_t));
+ } else {
+ /* no context template, compute context */
+ if (keylen_in_bytes > sha_hmac_block_size) {
+ uchar_t digested_key[SHA512_DIGEST_LENGTH];
+ sha2_hmac_ctx_t *hmac_ctx = ctx->cc_provider_private;
+
+ /*
+ * Hash the passed-in key to get a smaller key.
+ * The inner context is used since it hasn't been
+ * initialized yet.
+ */
+ PROV_SHA2_DIGEST_KEY(mechanism->cm_type / 3,
+ &hmac_ctx->hc_icontext,
+ key->ck_data, keylen_in_bytes, digested_key);
+ sha2_mac_init_ctx(PROV_SHA2_HMAC_CTX(ctx),
+ digested_key, sha_digest_len);
+ } else {
+ sha2_mac_init_ctx(PROV_SHA2_HMAC_CTX(ctx),
+ key->ck_data, keylen_in_bytes);
+ }
+ }
+
+ /*
+ * Get the mechanism parameters, if applicable.
+ */
+ if (mechanism->cm_type % 3 == 2) {
+ if (mechanism->cm_param == NULL ||
+ mechanism->cm_param_len != sizeof (ulong_t))
+ ret = CRYPTO_MECHANISM_PARAM_INVALID;
+ PROV_SHA2_GET_DIGEST_LEN(mechanism,
+ PROV_SHA2_HMAC_CTX(ctx)->hc_digest_len);
+ if (PROV_SHA2_HMAC_CTX(ctx)->hc_digest_len > sha_digest_len)
+ ret = CRYPTO_MECHANISM_PARAM_INVALID;
+ }
+
+ if (ret != CRYPTO_SUCCESS) {
+ bzero(ctx->cc_provider_private, sizeof (sha2_hmac_ctx_t));
+ kmem_free(ctx->cc_provider_private, sizeof (sha2_hmac_ctx_t));
+ ctx->cc_provider_private = NULL;
+ }
+
+ return (ret);
+}
+
+/* ARGSUSED */
+static int
+sha2_mac_update(crypto_ctx_t *ctx, crypto_data_t *data,
+ crypto_req_handle_t req)
+{
+ int ret = CRYPTO_SUCCESS;
+
+ ASSERT(ctx->cc_provider_private != NULL);
+
+ /*
+ * Do a SHA2 update of the inner context using the specified
+ * data.
+ */
+ switch (data->cd_format) {
+ case CRYPTO_DATA_RAW:
+ SHA2Update(&PROV_SHA2_HMAC_CTX(ctx)->hc_icontext,
+ (uint8_t *)data->cd_raw.iov_base + data->cd_offset,
+ data->cd_length);
+ break;
+ case CRYPTO_DATA_UIO:
+ ret = sha2_digest_update_uio(
+ &PROV_SHA2_HMAC_CTX(ctx)->hc_icontext, data);
+ break;
+ default:
+ ret = CRYPTO_ARGUMENTS_BAD;
+ }
+
+ return (ret);
+}
+
+/* ARGSUSED */
+static int
+sha2_mac_final(crypto_ctx_t *ctx, crypto_data_t *mac, crypto_req_handle_t req)
+{
+ int ret = CRYPTO_SUCCESS;
+ uchar_t digest[SHA512_DIGEST_LENGTH];
+ uint32_t digest_len, sha_digest_len;
+
+ ASSERT(ctx->cc_provider_private != NULL);
+
+ /* Set the digest lengths to values appropriate to the mechanism */
+ switch (PROV_SHA2_HMAC_CTX(ctx)->hc_mech_type) {
+ case SHA256_HMAC_MECH_INFO_TYPE:
+ sha_digest_len = digest_len = SHA256_DIGEST_LENGTH;
+ break;
+ case SHA384_HMAC_MECH_INFO_TYPE:
+ sha_digest_len = digest_len = SHA384_DIGEST_LENGTH;
+ break;
+ case SHA512_HMAC_MECH_INFO_TYPE:
+ sha_digest_len = digest_len = SHA512_DIGEST_LENGTH;
+ break;
+ case SHA256_HMAC_GEN_MECH_INFO_TYPE:
+ sha_digest_len = SHA256_DIGEST_LENGTH;
+ digest_len = PROV_SHA2_HMAC_CTX(ctx)->hc_digest_len;
+ break;
+ case SHA384_HMAC_GEN_MECH_INFO_TYPE:
+ case SHA512_HMAC_GEN_MECH_INFO_TYPE:
+ sha_digest_len = SHA512_DIGEST_LENGTH;
+ digest_len = PROV_SHA2_HMAC_CTX(ctx)->hc_digest_len;
+ break;
+ default:
+ return (CRYPTO_ARGUMENTS_BAD);
+ }
+
+ /*
+ * We need to just return the length needed to store the output.
+ * We should not destroy the context for the following cases.
+ */
+ if ((mac->cd_length == 0) || (mac->cd_length < digest_len)) {
+ mac->cd_length = digest_len;
+ return (CRYPTO_BUFFER_TOO_SMALL);
+ }
+
+ /*
+ * Do a SHA2 final on the inner context.
+ */
+ SHA2Final(digest, &PROV_SHA2_HMAC_CTX(ctx)->hc_icontext);
+
+ /*
+ * Do a SHA2 update on the outer context, feeding the inner
+ * digest as data.
+ */
+ SHA2Update(&PROV_SHA2_HMAC_CTX(ctx)->hc_ocontext, digest,
+ sha_digest_len);
+
+ /*
+ * Do a SHA2 final on the outer context, storing the computing
+ * digest in the users buffer.
+ */
+ switch (mac->cd_format) {
+ case CRYPTO_DATA_RAW:
+ if (digest_len != sha_digest_len) {
+ /*
+ * The caller requested a short digest. Digest
+ * into a scratch buffer and return to
+ * the user only what was requested.
+ */
+ SHA2Final(digest,
+ &PROV_SHA2_HMAC_CTX(ctx)->hc_ocontext);
+ bcopy(digest, (unsigned char *)mac->cd_raw.iov_base +
+ mac->cd_offset, digest_len);
+ } else {
+ SHA2Final((unsigned char *)mac->cd_raw.iov_base +
+ mac->cd_offset,
+ &PROV_SHA2_HMAC_CTX(ctx)->hc_ocontext);
+ }
+ break;
+ case CRYPTO_DATA_UIO:
+ ret = sha2_digest_final_uio(
+ &PROV_SHA2_HMAC_CTX(ctx)->hc_ocontext, mac,
+ digest_len, digest);
+ break;
+ default:
+ ret = CRYPTO_ARGUMENTS_BAD;
+ }
+
+ if (ret == CRYPTO_SUCCESS)
+ mac->cd_length = digest_len;
+ else
+ mac->cd_length = 0;
+
+ bzero(ctx->cc_provider_private, sizeof (sha2_hmac_ctx_t));
+ kmem_free(ctx->cc_provider_private, sizeof (sha2_hmac_ctx_t));
+ ctx->cc_provider_private = NULL;
+
+ return (ret);
+}
+
+#define SHA2_MAC_UPDATE(data, ctx, ret) { \
+ switch (data->cd_format) { \
+ case CRYPTO_DATA_RAW: \
+ SHA2Update(&(ctx).hc_icontext, \
+ (uint8_t *)data->cd_raw.iov_base + \
+ data->cd_offset, data->cd_length); \
+ break; \
+ case CRYPTO_DATA_UIO: \
+ ret = sha2_digest_update_uio(&(ctx).hc_icontext, data); \
+ break; \
+ default: \
+ ret = CRYPTO_ARGUMENTS_BAD; \
+ } \
+}
+
+/* ARGSUSED */
+static int
+sha2_mac_atomic(crypto_provider_handle_t provider,
+ crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
+ crypto_key_t *key, crypto_data_t *data, crypto_data_t *mac,
+ crypto_spi_ctx_template_t ctx_template, crypto_req_handle_t req)
+{
+ int ret = CRYPTO_SUCCESS;
+ uchar_t digest[SHA512_DIGEST_LENGTH];
+ sha2_hmac_ctx_t sha2_hmac_ctx;
+ uint32_t sha_digest_len, digest_len, sha_hmac_block_size;
+ uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length);
+
+ /*
+ * Set the digest length and block size to values appropriate to the
+ * mechanism
+ */
+ switch (mechanism->cm_type) {
+ case SHA256_HMAC_MECH_INFO_TYPE:
+ case SHA256_HMAC_GEN_MECH_INFO_TYPE:
+ sha_digest_len = digest_len = SHA256_DIGEST_LENGTH;
+ sha_hmac_block_size = SHA256_HMAC_BLOCK_SIZE;
+ break;
+ case SHA384_HMAC_MECH_INFO_TYPE:
+ case SHA384_HMAC_GEN_MECH_INFO_TYPE:
+ case SHA512_HMAC_MECH_INFO_TYPE:
+ case SHA512_HMAC_GEN_MECH_INFO_TYPE:
+ sha_digest_len = digest_len = SHA512_DIGEST_LENGTH;
+ sha_hmac_block_size = SHA512_HMAC_BLOCK_SIZE;
+ break;
+ default:
+ return (CRYPTO_MECHANISM_INVALID);
+ }
+
+ /* Add support for key by attributes (RFE 4706552) */
+ if (key->ck_format != CRYPTO_KEY_RAW)
+ return (CRYPTO_ARGUMENTS_BAD);
+
+ if (ctx_template != NULL) {
+ /* reuse context template */
+ bcopy(ctx_template, &sha2_hmac_ctx, sizeof (sha2_hmac_ctx_t));
+ } else {
+ sha2_hmac_ctx.hc_mech_type = mechanism->cm_type;
+ /* no context template, initialize context */
+ if (keylen_in_bytes > sha_hmac_block_size) {
+ /*
+ * Hash the passed-in key to get a smaller key.
+ * The inner context is used since it hasn't been
+ * initialized yet.
+ */
+ PROV_SHA2_DIGEST_KEY(mechanism->cm_type / 3,
+ &sha2_hmac_ctx.hc_icontext,
+ key->ck_data, keylen_in_bytes, digest);
+ sha2_mac_init_ctx(&sha2_hmac_ctx, digest,
+ sha_digest_len);
+ } else {
+ sha2_mac_init_ctx(&sha2_hmac_ctx, key->ck_data,
+ keylen_in_bytes);
+ }
+ }
+
+ /* get the mechanism parameters, if applicable */
+ if ((mechanism->cm_type % 3) == 2) {
+ if (mechanism->cm_param == NULL ||
+ mechanism->cm_param_len != sizeof (ulong_t)) {
+ ret = CRYPTO_MECHANISM_PARAM_INVALID;
+ goto bail;
+ }
+ PROV_SHA2_GET_DIGEST_LEN(mechanism, digest_len);
+ if (digest_len > sha_digest_len) {
+ ret = CRYPTO_MECHANISM_PARAM_INVALID;
+ goto bail;
+ }
+ }
+
+ /* do a SHA2 update of the inner context using the specified data */
+ SHA2_MAC_UPDATE(data, sha2_hmac_ctx, ret);
+ if (ret != CRYPTO_SUCCESS)
+ /* the update failed, free context and bail */
+ goto bail;
+
+ /*
+ * Do a SHA2 final on the inner context.
+ */
+ SHA2Final(digest, &sha2_hmac_ctx.hc_icontext);
+
+ /*
+ * Do an SHA2 update on the outer context, feeding the inner
+ * digest as data.
+ *
+ * HMAC-SHA384 needs special handling as the outer hash needs only 48
+ * bytes of the inner hash value.
+ */
+ if (mechanism->cm_type == SHA384_HMAC_MECH_INFO_TYPE ||
+ mechanism->cm_type == SHA384_HMAC_GEN_MECH_INFO_TYPE)
+ SHA2Update(&sha2_hmac_ctx.hc_ocontext, digest,
+ SHA384_DIGEST_LENGTH);
+ else
+ SHA2Update(&sha2_hmac_ctx.hc_ocontext, digest, sha_digest_len);
+
+ /*
+ * Do a SHA2 final on the outer context, storing the computed
+ * digest in the users buffer.
+ */
+ switch (mac->cd_format) {
+ case CRYPTO_DATA_RAW:
+ if (digest_len != sha_digest_len) {
+ /*
+ * The caller requested a short digest. Digest
+ * into a scratch buffer and return to
+ * the user only what was requested.
+ */
+ SHA2Final(digest, &sha2_hmac_ctx.hc_ocontext);
+ bcopy(digest, (unsigned char *)mac->cd_raw.iov_base +
+ mac->cd_offset, digest_len);
+ } else {
+ SHA2Final((unsigned char *)mac->cd_raw.iov_base +
+ mac->cd_offset, &sha2_hmac_ctx.hc_ocontext);
+ }
+ break;
+ case CRYPTO_DATA_UIO:
+ ret = sha2_digest_final_uio(&sha2_hmac_ctx.hc_ocontext, mac,
+ digest_len, digest);
+ break;
+ default:
+ ret = CRYPTO_ARGUMENTS_BAD;
+ }
+
+ if (ret == CRYPTO_SUCCESS) {
+ mac->cd_length = digest_len;
+ return (CRYPTO_SUCCESS);
+ }
+bail:
+ bzero(&sha2_hmac_ctx, sizeof (sha2_hmac_ctx_t));
+ mac->cd_length = 0;
+ return (ret);
+}
+
+/* ARGSUSED */
+static int
+sha2_mac_verify_atomic(crypto_provider_handle_t provider,
+ crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
+ crypto_key_t *key, crypto_data_t *data, crypto_data_t *mac,
+ crypto_spi_ctx_template_t ctx_template, crypto_req_handle_t req)
+{
+ int ret = CRYPTO_SUCCESS;
+ uchar_t digest[SHA512_DIGEST_LENGTH];
+ sha2_hmac_ctx_t sha2_hmac_ctx;
+ uint32_t sha_digest_len, digest_len, sha_hmac_block_size;
+ uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length);
+
+ /*
+ * Set the digest length and block size to values appropriate to the
+ * mechanism
+ */
+ switch (mechanism->cm_type) {
+ case SHA256_HMAC_MECH_INFO_TYPE:
+ case SHA256_HMAC_GEN_MECH_INFO_TYPE:
+ sha_digest_len = digest_len = SHA256_DIGEST_LENGTH;
+ sha_hmac_block_size = SHA256_HMAC_BLOCK_SIZE;
+ break;
+ case SHA384_HMAC_MECH_INFO_TYPE:
+ case SHA384_HMAC_GEN_MECH_INFO_TYPE:
+ case SHA512_HMAC_MECH_INFO_TYPE:
+ case SHA512_HMAC_GEN_MECH_INFO_TYPE:
+ sha_digest_len = digest_len = SHA512_DIGEST_LENGTH;
+ sha_hmac_block_size = SHA512_HMAC_BLOCK_SIZE;
+ break;
+ default:
+ return (CRYPTO_MECHANISM_INVALID);
+ }
+
+ /* Add support for key by attributes (RFE 4706552) */
+ if (key->ck_format != CRYPTO_KEY_RAW)
+ return (CRYPTO_ARGUMENTS_BAD);
+
+ if (ctx_template != NULL) {
+ /* reuse context template */
+ bcopy(ctx_template, &sha2_hmac_ctx, sizeof (sha2_hmac_ctx_t));
+ } else {
+ sha2_hmac_ctx.hc_mech_type = mechanism->cm_type;
+ /* no context template, initialize context */
+ if (keylen_in_bytes > sha_hmac_block_size) {
+ /*
+ * Hash the passed-in key to get a smaller key.
+ * The inner context is used since it hasn't been
+ * initialized yet.
+ */
+ PROV_SHA2_DIGEST_KEY(mechanism->cm_type / 3,
+ &sha2_hmac_ctx.hc_icontext,
+ key->ck_data, keylen_in_bytes, digest);
+ sha2_mac_init_ctx(&sha2_hmac_ctx, digest,
+ sha_digest_len);
+ } else {
+ sha2_mac_init_ctx(&sha2_hmac_ctx, key->ck_data,
+ keylen_in_bytes);
+ }
+ }
+
+ /* get the mechanism parameters, if applicable */
+ if (mechanism->cm_type % 3 == 2) {
+ if (mechanism->cm_param == NULL ||
+ mechanism->cm_param_len != sizeof (ulong_t)) {
+ ret = CRYPTO_MECHANISM_PARAM_INVALID;
+ goto bail;
+ }
+ PROV_SHA2_GET_DIGEST_LEN(mechanism, digest_len);
+ if (digest_len > sha_digest_len) {
+ ret = CRYPTO_MECHANISM_PARAM_INVALID;
+ goto bail;
+ }
+ }
+
+ if (mac->cd_length != digest_len) {
+ ret = CRYPTO_INVALID_MAC;
+ goto bail;
+ }
+
+ /* do a SHA2 update of the inner context using the specified data */
+ SHA2_MAC_UPDATE(data, sha2_hmac_ctx, ret);
+ if (ret != CRYPTO_SUCCESS)
+ /* the update failed, free context and bail */
+ goto bail;
+
+ /* do a SHA2 final on the inner context */
+ SHA2Final(digest, &sha2_hmac_ctx.hc_icontext);
+
+ /*
+ * Do an SHA2 update on the outer context, feeding the inner
+ * digest as data.
+ *
+ * HMAC-SHA384 needs special handling as the outer hash needs only 48
+ * bytes of the inner hash value.
+ */
+ if (mechanism->cm_type == SHA384_HMAC_MECH_INFO_TYPE ||
+ mechanism->cm_type == SHA384_HMAC_GEN_MECH_INFO_TYPE)
+ SHA2Update(&sha2_hmac_ctx.hc_ocontext, digest,
+ SHA384_DIGEST_LENGTH);
+ else
+ SHA2Update(&sha2_hmac_ctx.hc_ocontext, digest, sha_digest_len);
+
+ /*
+ * Do a SHA2 final on the outer context, storing the computed
+ * digest in the users buffer.
+ */
+ SHA2Final(digest, &sha2_hmac_ctx.hc_ocontext);
+
+ /*
+ * Compare the computed digest against the expected digest passed
+ * as argument.
+ */
+
+ switch (mac->cd_format) {
+
+ case CRYPTO_DATA_RAW:
+ if (bcmp(digest, (unsigned char *)mac->cd_raw.iov_base +
+ mac->cd_offset, digest_len) != 0)
+ ret = CRYPTO_INVALID_MAC;
+ break;
+
+ case CRYPTO_DATA_UIO: {
+ off_t offset = mac->cd_offset;
+ uint_t vec_idx;
+ off_t scratch_offset = 0;
+ size_t length = digest_len;
+ size_t cur_len;
+
+ /* we support only kernel buffer */
+ if (mac->cd_uio->uio_segflg != UIO_SYSSPACE)
+ return (CRYPTO_ARGUMENTS_BAD);
+
+ /* jump to the first iovec containing the expected digest */
+ for (vec_idx = 0;
+ offset >= mac->cd_uio->uio_iov[vec_idx].iov_len &&
+ vec_idx < mac->cd_uio->uio_iovcnt;
+ offset -= mac->cd_uio->uio_iov[vec_idx++].iov_len)
+ ;
+ if (vec_idx == mac->cd_uio->uio_iovcnt) {
+ /*
+ * The caller specified an offset that is
+ * larger than the total size of the buffers
+ * it provided.
+ */
+ ret = CRYPTO_DATA_LEN_RANGE;
+ break;
+ }
+
+ /* do the comparison of computed digest vs specified one */
+ while (vec_idx < mac->cd_uio->uio_iovcnt && length > 0) {
+ cur_len = MIN(mac->cd_uio->uio_iov[vec_idx].iov_len -
+ offset, length);
+
+ if (bcmp(digest + scratch_offset,
+ mac->cd_uio->uio_iov[vec_idx].iov_base + offset,
+ cur_len) != 0) {
+ ret = CRYPTO_INVALID_MAC;
+ break;
+ }
+
+ length -= cur_len;
+ vec_idx++;
+ scratch_offset += cur_len;
+ offset = 0;
+ }
+ break;
+ }
+
+ default:
+ ret = CRYPTO_ARGUMENTS_BAD;
+ }
+
+ return (ret);
+bail:
+ bzero(&sha2_hmac_ctx, sizeof (sha2_hmac_ctx_t));
+ mac->cd_length = 0;
+ return (ret);
+}
+
+/*
+ * KCF software provider context management entry points.
+ */
+
+/* ARGSUSED */
+static int
+sha2_create_ctx_template(crypto_provider_handle_t provider,
+ crypto_mechanism_t *mechanism, crypto_key_t *key,
+ crypto_spi_ctx_template_t *ctx_template, size_t *ctx_template_size,
+ crypto_req_handle_t req)
+{
+ sha2_hmac_ctx_t *sha2_hmac_ctx_tmpl;
+ uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length);
+ uint32_t sha_digest_len, sha_hmac_block_size;
+
+ /*
+ * Set the digest length and block size to values appropriate to the
+ * mechanism
+ */
+ switch (mechanism->cm_type) {
+ case SHA256_HMAC_MECH_INFO_TYPE:
+ case SHA256_HMAC_GEN_MECH_INFO_TYPE:
+ sha_digest_len = SHA256_DIGEST_LENGTH;
+ sha_hmac_block_size = SHA256_HMAC_BLOCK_SIZE;
+ break;
+ case SHA384_HMAC_MECH_INFO_TYPE:
+ case SHA384_HMAC_GEN_MECH_INFO_TYPE:
+ case SHA512_HMAC_MECH_INFO_TYPE:
+ case SHA512_HMAC_GEN_MECH_INFO_TYPE:
+ sha_digest_len = SHA512_DIGEST_LENGTH;
+ sha_hmac_block_size = SHA512_HMAC_BLOCK_SIZE;
+ break;
+ default:
+ return (CRYPTO_MECHANISM_INVALID);
+ }
+
+ /* Add support for key by attributes (RFE 4706552) */
+ if (key->ck_format != CRYPTO_KEY_RAW)
+ return (CRYPTO_ARGUMENTS_BAD);
+
+ /*
+ * Allocate and initialize SHA2 context.
+ */
+ sha2_hmac_ctx_tmpl = kmem_alloc(sizeof (sha2_hmac_ctx_t),
+ crypto_kmflag(req));
+ if (sha2_hmac_ctx_tmpl == NULL)
+ return (CRYPTO_HOST_MEMORY);
+
+ sha2_hmac_ctx_tmpl->hc_mech_type = mechanism->cm_type;
+
+ if (keylen_in_bytes > sha_hmac_block_size) {
+ uchar_t digested_key[SHA512_DIGEST_LENGTH];
+
+ /*
+ * Hash the passed-in key to get a smaller key.
+ * The inner context is used since it hasn't been
+ * initialized yet.
+ */
+ PROV_SHA2_DIGEST_KEY(mechanism->cm_type / 3,
+ &sha2_hmac_ctx_tmpl->hc_icontext,
+ key->ck_data, keylen_in_bytes, digested_key);
+ sha2_mac_init_ctx(sha2_hmac_ctx_tmpl, digested_key,
+ sha_digest_len);
+ } else {
+ sha2_mac_init_ctx(sha2_hmac_ctx_tmpl, key->ck_data,
+ keylen_in_bytes);
+ }
+
+ *ctx_template = (crypto_spi_ctx_template_t)sha2_hmac_ctx_tmpl;
+ *ctx_template_size = sizeof (sha2_hmac_ctx_t);
+
+ return (CRYPTO_SUCCESS);
+}
+
+static int
+sha2_free_context(crypto_ctx_t *ctx)
+{
+ uint_t ctx_len;
+
+ if (ctx->cc_provider_private == NULL)
+ return (CRYPTO_SUCCESS);
+
+ /*
+ * We have to free either SHA2 or SHA2-HMAC contexts, which
+ * have different lengths.
+ *
+ * Note: Below is dependent on the mechanism ordering.
+ */
+
+ if (PROV_SHA2_CTX(ctx)->sc_mech_type % 3 == 0)
+ ctx_len = sizeof (sha2_ctx_t);
+ else
+ ctx_len = sizeof (sha2_hmac_ctx_t);
+
+ bzero(ctx->cc_provider_private, ctx_len);
+ kmem_free(ctx->cc_provider_private, ctx_len);
+ ctx->cc_provider_private = NULL;
+
+ return (CRYPTO_SUCCESS);
+}
diff --git a/zfs/module/icp/io/skein_mod.c b/zfs/module/icp/io/skein_mod.c
new file mode 100644
index 000000000000..6db31c3559ac
--- /dev/null
+++ b/zfs/module/icp/io/skein_mod.c
@@ -0,0 +1,735 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2013 Saso Kiselkov. All rights reserved.
+ */
+
+#include <sys/modctl.h>
+#include <sys/crypto/common.h>
+#include <sys/crypto/spi.h>
+#include <sys/sysmacros.h>
+#include <sys/systm.h>
+#define SKEIN_MODULE_IMPL
+#include <sys/skein.h>
+
+/*
+ * Like the sha2 module, we create the skein module with two modlinkages:
+ * - modlmisc to allow direct calls to Skein_* API functions.
+ * - modlcrypto to integrate well into the Kernel Crypto Framework (KCF).
+ */
+static struct modlmisc modlmisc = {
+ &mod_cryptoops,
+ "Skein Message-Digest Algorithm"
+};
+
+static struct modlcrypto modlcrypto = {
+ &mod_cryptoops,
+ "Skein Kernel SW Provider"
+};
+
+static struct modlinkage modlinkage = {
+ MODREV_1, {&modlmisc, &modlcrypto, NULL}
+};
+
+static crypto_mech_info_t skein_mech_info_tab[] = {
+ {CKM_SKEIN_256, SKEIN_256_MECH_INFO_TYPE,
+ CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC,
+ 0, 0, CRYPTO_KEYSIZE_UNIT_IN_BITS},
+ {CKM_SKEIN_256_MAC, SKEIN_256_MAC_MECH_INFO_TYPE,
+ CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC, 1, INT_MAX,
+ CRYPTO_KEYSIZE_UNIT_IN_BYTES},
+ {CKM_SKEIN_512, SKEIN_512_MECH_INFO_TYPE,
+ CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC,
+ 0, 0, CRYPTO_KEYSIZE_UNIT_IN_BITS},
+ {CKM_SKEIN_512_MAC, SKEIN_512_MAC_MECH_INFO_TYPE,
+ CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC, 1, INT_MAX,
+ CRYPTO_KEYSIZE_UNIT_IN_BYTES},
+ {CKM_SKEIN1024, SKEIN1024_MECH_INFO_TYPE,
+ CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC,
+ 0, 0, CRYPTO_KEYSIZE_UNIT_IN_BITS},
+ {CKM_SKEIN1024_MAC, SKEIN1024_MAC_MECH_INFO_TYPE,
+ CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC, 1, INT_MAX,
+ CRYPTO_KEYSIZE_UNIT_IN_BYTES}
+};
+
+static void skein_provider_status(crypto_provider_handle_t, uint_t *);
+
+static crypto_control_ops_t skein_control_ops = {
+ skein_provider_status
+};
+
+static int skein_digest_init(crypto_ctx_t *, crypto_mechanism_t *,
+ crypto_req_handle_t);
+static int skein_digest(crypto_ctx_t *, crypto_data_t *, crypto_data_t *,
+ crypto_req_handle_t);
+static int skein_update(crypto_ctx_t *, crypto_data_t *, crypto_req_handle_t);
+static int skein_final(crypto_ctx_t *, crypto_data_t *, crypto_req_handle_t);
+static int skein_digest_atomic(crypto_provider_handle_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_data_t *, crypto_data_t *,
+ crypto_req_handle_t);
+
+static crypto_digest_ops_t skein_digest_ops = {
+ skein_digest_init,
+ skein_digest,
+ skein_update,
+ NULL,
+ skein_final,
+ skein_digest_atomic
+};
+
+static int skein_mac_init(crypto_ctx_t *, crypto_mechanism_t *, crypto_key_t *,
+ crypto_spi_ctx_template_t, crypto_req_handle_t);
+static int skein_mac_atomic(crypto_provider_handle_t, crypto_session_id_t,
+ crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, crypto_data_t *,
+ crypto_spi_ctx_template_t, crypto_req_handle_t);
+
+static crypto_mac_ops_t skein_mac_ops = {
+ skein_mac_init,
+ NULL,
+ skein_update, /* using regular digest update is OK here */
+ skein_final, /* using regular digest final is OK here */
+ skein_mac_atomic,
+ NULL
+};
+
+static int skein_create_ctx_template(crypto_provider_handle_t,
+ crypto_mechanism_t *, crypto_key_t *, crypto_spi_ctx_template_t *,
+ size_t *, crypto_req_handle_t);
+static int skein_free_context(crypto_ctx_t *);
+
+static crypto_ctx_ops_t skein_ctx_ops = {
+ skein_create_ctx_template,
+ skein_free_context
+};
+
+static crypto_ops_t skein_crypto_ops = {{{{{
+ &skein_control_ops,
+ &skein_digest_ops,
+ NULL,
+ &skein_mac_ops,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ &skein_ctx_ops,
+}}}}};
+
+static crypto_provider_info_t skein_prov_info = {{{{
+ CRYPTO_SPI_VERSION_1,
+ "Skein Software Provider",
+ CRYPTO_SW_PROVIDER,
+ NULL,
+ &skein_crypto_ops,
+ sizeof (skein_mech_info_tab) / sizeof (crypto_mech_info_t),
+ skein_mech_info_tab
+}}}};
+
+static crypto_kcf_provider_handle_t skein_prov_handle = 0;
+
+typedef struct skein_ctx {
+ skein_mech_type_t sc_mech_type;
+ size_t sc_digest_bitlen;
+ /*LINTED(E_ANONYMOUS_UNION_DECL)*/
+ union {
+ Skein_256_Ctxt_t sc_256;
+ Skein_512_Ctxt_t sc_512;
+ Skein1024_Ctxt_t sc_1024;
+ };
+} skein_ctx_t;
+#define SKEIN_CTX(_ctx_) ((skein_ctx_t *)((_ctx_)->cc_provider_private))
+#define SKEIN_CTX_LVALUE(_ctx_) (_ctx_)->cc_provider_private
+#define SKEIN_OP(_skein_ctx, _op, ...) \
+ do { \
+ skein_ctx_t *sc = (_skein_ctx); \
+ switch (sc->sc_mech_type) { \
+ case SKEIN_256_MECH_INFO_TYPE: \
+ case SKEIN_256_MAC_MECH_INFO_TYPE: \
+ (void) Skein_256_ ## _op(&sc->sc_256, __VA_ARGS__);\
+ break; \
+ case SKEIN_512_MECH_INFO_TYPE: \
+ case SKEIN_512_MAC_MECH_INFO_TYPE: \
+ (void) Skein_512_ ## _op(&sc->sc_512, __VA_ARGS__);\
+ break; \
+ case SKEIN1024_MECH_INFO_TYPE: \
+ case SKEIN1024_MAC_MECH_INFO_TYPE: \
+ (void) Skein1024_ ## _op(&sc->sc_1024, __VA_ARGS__);\
+ break; \
+ } \
+ _NOTE(CONSTCOND) \
+ } while (0)
+
+static int
+skein_get_digest_bitlen(const crypto_mechanism_t *mechanism, size_t *result)
+{
+ if (mechanism->cm_param != NULL) {
+ /*LINTED(E_BAD_PTR_CAST_ALIGN)*/
+ skein_param_t *param = (skein_param_t *)mechanism->cm_param;
+
+ if (mechanism->cm_param_len != sizeof (*param) ||
+ param->sp_digest_bitlen == 0) {
+ return (CRYPTO_MECHANISM_PARAM_INVALID);
+ }
+ *result = param->sp_digest_bitlen;
+ } else {
+ switch (mechanism->cm_type) {
+ case SKEIN_256_MECH_INFO_TYPE:
+ *result = 256;
+ break;
+ case SKEIN_512_MECH_INFO_TYPE:
+ *result = 512;
+ break;
+ case SKEIN1024_MECH_INFO_TYPE:
+ *result = 1024;
+ break;
+ default:
+ return (CRYPTO_MECHANISM_INVALID);
+ }
+ }
+ return (CRYPTO_SUCCESS);
+}
+
+int
+skein_mod_init(void)
+{
+ int error;
+
+ if ((error = mod_install(&modlinkage)) != 0)
+ return (error);
+
+ /*
+ * Try to register with KCF - failure shouldn't unload us, since we
+ * still may want to continue providing misc/skein functionality.
+ */
+ (void) crypto_register_provider(&skein_prov_info, &skein_prov_handle);
+
+ return (0);
+}
+
+int
+skein_mod_fini(void)
+{
+ int ret;
+
+ if (skein_prov_handle != 0) {
+ if ((ret = crypto_unregister_provider(skein_prov_handle)) !=
+ CRYPTO_SUCCESS) {
+ cmn_err(CE_WARN,
+ "skein _fini: crypto_unregister_provider() "
+ "failed (0x%x)", ret);
+ return (EBUSY);
+ }
+ skein_prov_handle = 0;
+ }
+
+ return (mod_remove(&modlinkage));
+}
+
+/*
+ * KCF software provider control entry points.
+ */
+/* ARGSUSED */
+static void
+skein_provider_status(crypto_provider_handle_t provider, uint_t *status)
+{
+ *status = CRYPTO_PROVIDER_READY;
+}
+
+/*
+ * General Skein hashing helper functions.
+ */
+
+/*
+ * Performs an Update on a context with uio input data.
+ */
+static int
+skein_digest_update_uio(skein_ctx_t *ctx, const crypto_data_t *data)
+{
+ off_t offset = data->cd_offset;
+ size_t length = data->cd_length;
+ uint_t vec_idx;
+ size_t cur_len;
+ const uio_t *uio = data->cd_uio;
+
+ /* we support only kernel buffer */
+ if (uio->uio_segflg != UIO_SYSSPACE)
+ return (CRYPTO_ARGUMENTS_BAD);
+
+ /*
+ * Jump to the first iovec containing data to be
+ * digested.
+ */
+ for (vec_idx = 0; vec_idx < uio->uio_iovcnt &&
+ offset >= uio->uio_iov[vec_idx].iov_len;
+ offset -= uio->uio_iov[vec_idx++].iov_len)
+ ;
+ if (vec_idx == uio->uio_iovcnt) {
+ /*
+ * The caller specified an offset that is larger than the
+ * total size of the buffers it provided.
+ */
+ return (CRYPTO_DATA_LEN_RANGE);
+ }
+
+ /*
+ * Now do the digesting on the iovecs.
+ */
+ while (vec_idx < uio->uio_iovcnt && length > 0) {
+ cur_len = MIN(uio->uio_iov[vec_idx].iov_len - offset, length);
+ SKEIN_OP(ctx, Update, (uint8_t *)uio->uio_iov[vec_idx].iov_base
+ + offset, cur_len);
+ length -= cur_len;
+ vec_idx++;
+ offset = 0;
+ }
+
+ if (vec_idx == uio->uio_iovcnt && length > 0) {
+ /*
+ * The end of the specified iovec's was reached but
+ * the length requested could not be processed, i.e.
+ * The caller requested to digest more data than it provided.
+ */
+ return (CRYPTO_DATA_LEN_RANGE);
+ }
+
+ return (CRYPTO_SUCCESS);
+}
+
+/*
+ * Performs a Final on a context and writes to a uio digest output.
+ */
+static int
+skein_digest_final_uio(skein_ctx_t *ctx, crypto_data_t *digest,
+ crypto_req_handle_t req)
+{
+ off_t offset = digest->cd_offset;
+ uint_t vec_idx;
+ uio_t *uio = digest->cd_uio;
+
+ /* we support only kernel buffer */
+ if (uio->uio_segflg != UIO_SYSSPACE)
+ return (CRYPTO_ARGUMENTS_BAD);
+
+ /*
+ * Jump to the first iovec containing ptr to the digest to be returned.
+ */
+ for (vec_idx = 0; offset >= uio->uio_iov[vec_idx].iov_len &&
+ vec_idx < uio->uio_iovcnt;
+ offset -= uio->uio_iov[vec_idx++].iov_len)
+ ;
+ if (vec_idx == uio->uio_iovcnt) {
+ /*
+ * The caller specified an offset that is larger than the
+ * total size of the buffers it provided.
+ */
+ return (CRYPTO_DATA_LEN_RANGE);
+ }
+ if (offset + CRYPTO_BITS2BYTES(ctx->sc_digest_bitlen) <=
+ uio->uio_iov[vec_idx].iov_len) {
+ /* The computed digest will fit in the current iovec. */
+ SKEIN_OP(ctx, Final,
+ (uchar_t *)uio->uio_iov[vec_idx].iov_base + offset);
+ } else {
+ uint8_t *digest_tmp;
+ off_t scratch_offset = 0;
+ size_t length = CRYPTO_BITS2BYTES(ctx->sc_digest_bitlen);
+ size_t cur_len;
+
+ digest_tmp = kmem_alloc(CRYPTO_BITS2BYTES(
+ ctx->sc_digest_bitlen), crypto_kmflag(req));
+ if (digest_tmp == NULL)
+ return (CRYPTO_HOST_MEMORY);
+ SKEIN_OP(ctx, Final, digest_tmp);
+ while (vec_idx < uio->uio_iovcnt && length > 0) {
+ cur_len = MIN(uio->uio_iov[vec_idx].iov_len - offset,
+ length);
+ bcopy(digest_tmp + scratch_offset,
+ uio->uio_iov[vec_idx].iov_base + offset, cur_len);
+
+ length -= cur_len;
+ vec_idx++;
+ scratch_offset += cur_len;
+ offset = 0;
+ }
+ kmem_free(digest_tmp, CRYPTO_BITS2BYTES(ctx->sc_digest_bitlen));
+
+ if (vec_idx == uio->uio_iovcnt && length > 0) {
+ /*
+ * The end of the specified iovec's was reached but
+ * the length requested could not be processed, i.e.
+ * The caller requested to digest more data than it
+ * provided.
+ */
+ return (CRYPTO_DATA_LEN_RANGE);
+ }
+ }
+
+ return (CRYPTO_SUCCESS);
+}
+
+/*
+ * KCF software provider digest entry points.
+ */
+
+/*
+ * Initializes a skein digest context to the configuration in `mechanism'.
+ * The mechanism cm_type must be one of SKEIN_*_MECH_INFO_TYPE. The cm_param
+ * field may contain a skein_param_t structure indicating the length of the
+ * digest the algorithm should produce. Otherwise the default output lengths
+ * are applied (32 bytes for Skein-256, 64 bytes for Skein-512 and 128 bytes
+ * for Skein-1024).
+ */
+static int
+skein_digest_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism,
+ crypto_req_handle_t req)
+{
+ int error = CRYPTO_SUCCESS;
+
+ if (!VALID_SKEIN_DIGEST_MECH(mechanism->cm_type))
+ return (CRYPTO_MECHANISM_INVALID);
+
+ SKEIN_CTX_LVALUE(ctx) = kmem_alloc(sizeof (*SKEIN_CTX(ctx)),
+ crypto_kmflag(req));
+ if (SKEIN_CTX(ctx) == NULL)
+ return (CRYPTO_HOST_MEMORY);
+
+ SKEIN_CTX(ctx)->sc_mech_type = mechanism->cm_type;
+ error = skein_get_digest_bitlen(mechanism,
+ &SKEIN_CTX(ctx)->sc_digest_bitlen);
+ if (error != CRYPTO_SUCCESS)
+ goto errout;
+ SKEIN_OP(SKEIN_CTX(ctx), Init, SKEIN_CTX(ctx)->sc_digest_bitlen);
+
+ return (CRYPTO_SUCCESS);
+errout:
+ bzero(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx)));
+ kmem_free(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx)));
+ SKEIN_CTX_LVALUE(ctx) = NULL;
+ return (error);
+}
+
+/*
+ * Executes a skein_update and skein_digest on a pre-initialized crypto
+ * context in a single step. See the documentation to these functions to
+ * see what to pass here.
+ */
+static int
+skein_digest(crypto_ctx_t *ctx, crypto_data_t *data, crypto_data_t *digest,
+ crypto_req_handle_t req)
+{
+ int error = CRYPTO_SUCCESS;
+
+ ASSERT(SKEIN_CTX(ctx) != NULL);
+
+ if (digest->cd_length <
+ CRYPTO_BITS2BYTES(SKEIN_CTX(ctx)->sc_digest_bitlen)) {
+ digest->cd_length =
+ CRYPTO_BITS2BYTES(SKEIN_CTX(ctx)->sc_digest_bitlen);
+ return (CRYPTO_BUFFER_TOO_SMALL);
+ }
+
+ error = skein_update(ctx, data, req);
+ if (error != CRYPTO_SUCCESS) {
+ bzero(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx)));
+ kmem_free(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx)));
+ SKEIN_CTX_LVALUE(ctx) = NULL;
+ digest->cd_length = 0;
+ return (error);
+ }
+ error = skein_final(ctx, digest, req);
+
+ return (error);
+}
+
+/*
+ * Performs a skein Update with the input message in `data' (successive calls
+ * can push more data). This is used both for digest and MAC operation.
+ * Supported input data formats are raw, uio and mblk.
+ */
+/*ARGSUSED*/
+static int
+skein_update(crypto_ctx_t *ctx, crypto_data_t *data, crypto_req_handle_t req)
+{
+ int error = CRYPTO_SUCCESS;
+
+ ASSERT(SKEIN_CTX(ctx) != NULL);
+
+ switch (data->cd_format) {
+ case CRYPTO_DATA_RAW:
+ SKEIN_OP(SKEIN_CTX(ctx), Update,
+ (uint8_t *)data->cd_raw.iov_base + data->cd_offset,
+ data->cd_length);
+ break;
+ case CRYPTO_DATA_UIO:
+ error = skein_digest_update_uio(SKEIN_CTX(ctx), data);
+ break;
+ default:
+ error = CRYPTO_ARGUMENTS_BAD;
+ }
+
+ return (error);
+}
+
+/*
+ * Performs a skein Final, writing the output to `digest'. This is used both
+ * for digest and MAC operation.
+ * Supported output digest formats are raw, uio and mblk.
+ */
+/*ARGSUSED*/
+static int
+skein_final(crypto_ctx_t *ctx, crypto_data_t *digest, crypto_req_handle_t req)
+{
+ int error = CRYPTO_SUCCESS;
+
+ ASSERT(SKEIN_CTX(ctx) != NULL);
+
+ if (digest->cd_length <
+ CRYPTO_BITS2BYTES(SKEIN_CTX(ctx)->sc_digest_bitlen)) {
+ digest->cd_length =
+ CRYPTO_BITS2BYTES(SKEIN_CTX(ctx)->sc_digest_bitlen);
+ return (CRYPTO_BUFFER_TOO_SMALL);
+ }
+
+ switch (digest->cd_format) {
+ case CRYPTO_DATA_RAW:
+ SKEIN_OP(SKEIN_CTX(ctx), Final,
+ (uint8_t *)digest->cd_raw.iov_base + digest->cd_offset);
+ break;
+ case CRYPTO_DATA_UIO:
+ error = skein_digest_final_uio(SKEIN_CTX(ctx), digest, req);
+ break;
+ default:
+ error = CRYPTO_ARGUMENTS_BAD;
+ }
+
+ if (error == CRYPTO_SUCCESS)
+ digest->cd_length =
+ CRYPTO_BITS2BYTES(SKEIN_CTX(ctx)->sc_digest_bitlen);
+ else
+ digest->cd_length = 0;
+
+ bzero(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx)));
+ kmem_free(SKEIN_CTX(ctx), sizeof (*(SKEIN_CTX(ctx))));
+ SKEIN_CTX_LVALUE(ctx) = NULL;
+
+ return (error);
+}
+
+/*
+ * Performs a full skein digest computation in a single call, configuring the
+ * algorithm according to `mechanism', reading the input to be digested from
+ * `data' and writing the output to `digest'.
+ * Supported input/output formats are raw, uio and mblk.
+ */
+/*ARGSUSED*/
+static int
+skein_digest_atomic(crypto_provider_handle_t provider,
+ crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
+ crypto_data_t *data, crypto_data_t *digest, crypto_req_handle_t req)
+{
+ int error;
+ skein_ctx_t skein_ctx;
+ crypto_ctx_t ctx;
+ SKEIN_CTX_LVALUE(&ctx) = &skein_ctx;
+
+ /* Init */
+ if (!VALID_SKEIN_DIGEST_MECH(mechanism->cm_type))
+ return (CRYPTO_MECHANISM_INVALID);
+ skein_ctx.sc_mech_type = mechanism->cm_type;
+ error = skein_get_digest_bitlen(mechanism, &skein_ctx.sc_digest_bitlen);
+ if (error != CRYPTO_SUCCESS)
+ goto out;
+ SKEIN_OP(&skein_ctx, Init, skein_ctx.sc_digest_bitlen);
+
+ if ((error = skein_update(&ctx, data, digest)) != CRYPTO_SUCCESS)
+ goto out;
+ if ((error = skein_final(&ctx, data, digest)) != CRYPTO_SUCCESS)
+ goto out;
+
+out:
+ if (error == CRYPTO_SUCCESS)
+ digest->cd_length =
+ CRYPTO_BITS2BYTES(skein_ctx.sc_digest_bitlen);
+ else
+ digest->cd_length = 0;
+ bzero(&skein_ctx, sizeof (skein_ctx));
+
+ return (error);
+}
+
+/*
+ * Helper function that builds a Skein MAC context from the provided
+ * mechanism and key.
+ */
+static int
+skein_mac_ctx_build(skein_ctx_t *ctx, crypto_mechanism_t *mechanism,
+ crypto_key_t *key)
+{
+ int error;
+
+ if (!VALID_SKEIN_MAC_MECH(mechanism->cm_type))
+ return (CRYPTO_MECHANISM_INVALID);
+ if (key->ck_format != CRYPTO_KEY_RAW)
+ return (CRYPTO_ARGUMENTS_BAD);
+ ctx->sc_mech_type = mechanism->cm_type;
+ error = skein_get_digest_bitlen(mechanism, &ctx->sc_digest_bitlen);
+ if (error != CRYPTO_SUCCESS)
+ return (error);
+ SKEIN_OP(ctx, InitExt, ctx->sc_digest_bitlen, 0, key->ck_data,
+ CRYPTO_BITS2BYTES(key->ck_length));
+
+ return (CRYPTO_SUCCESS);
+}
+
+/*
+ * KCF software provide mac entry points.
+ */
+/*
+ * Initializes a skein MAC context. You may pass a ctx_template, in which
+ * case the template will be reused to make initialization more efficient.
+ * Otherwise a new context will be constructed. The mechanism cm_type must
+ * be one of SKEIN_*_MAC_MECH_INFO_TYPE. Same as in skein_digest_init, you
+ * may pass a skein_param_t in cm_param to configure the length of the
+ * digest. The key must be in raw format.
+ */
+static int
+skein_mac_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism,
+ crypto_key_t *key, crypto_spi_ctx_template_t ctx_template,
+ crypto_req_handle_t req)
+{
+ int error;
+
+ SKEIN_CTX_LVALUE(ctx) = kmem_alloc(sizeof (*SKEIN_CTX(ctx)),
+ crypto_kmflag(req));
+ if (SKEIN_CTX(ctx) == NULL)
+ return (CRYPTO_HOST_MEMORY);
+
+ if (ctx_template != NULL) {
+ bcopy(ctx_template, SKEIN_CTX(ctx),
+ sizeof (*SKEIN_CTX(ctx)));
+ } else {
+ error = skein_mac_ctx_build(SKEIN_CTX(ctx), mechanism, key);
+ if (error != CRYPTO_SUCCESS)
+ goto errout;
+ }
+
+ return (CRYPTO_SUCCESS);
+errout:
+ bzero(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx)));
+ kmem_free(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx)));
+ return (error);
+}
+
+/*
+ * The MAC update and final calls are reused from the regular digest code.
+ */
+
+/*ARGSUSED*/
+/*
+ * Same as skein_digest_atomic, performs an atomic Skein MAC operation in
+ * one step. All the same properties apply to the arguments of this
+ * function as to those of the partial operations above.
+ */
+static int
+skein_mac_atomic(crypto_provider_handle_t provider,
+ crypto_session_id_t session_id, crypto_mechanism_t *mechanism,
+ crypto_key_t *key, crypto_data_t *data, crypto_data_t *mac,
+ crypto_spi_ctx_template_t ctx_template, crypto_req_handle_t req)
+{
+ /* faux crypto context just for skein_digest_{update,final} */
+ int error;
+ crypto_ctx_t ctx;
+ skein_ctx_t skein_ctx;
+ SKEIN_CTX_LVALUE(&ctx) = &skein_ctx;
+
+ if (ctx_template != NULL) {
+ bcopy(ctx_template, &skein_ctx, sizeof (skein_ctx));
+ } else {
+ error = skein_mac_ctx_build(&skein_ctx, mechanism, key);
+ if (error != CRYPTO_SUCCESS)
+ goto errout;
+ }
+
+ if ((error = skein_update(&ctx, data, req)) != CRYPTO_SUCCESS)
+ goto errout;
+ if ((error = skein_final(&ctx, mac, req)) != CRYPTO_SUCCESS)
+ goto errout;
+
+ return (CRYPTO_SUCCESS);
+errout:
+ bzero(&skein_ctx, sizeof (skein_ctx));
+ return (error);
+}
+
+/*
+ * KCF software provider context management entry points.
+ */
+
+/*
+ * Constructs a context template for the Skein MAC algorithm. The same
+ * properties apply to the arguments of this function as to those of
+ * skein_mac_init.
+ */
+/*ARGSUSED*/
+static int
+skein_create_ctx_template(crypto_provider_handle_t provider,
+ crypto_mechanism_t *mechanism, crypto_key_t *key,
+ crypto_spi_ctx_template_t *ctx_template, size_t *ctx_template_size,
+ crypto_req_handle_t req)
+{
+ int error;
+ skein_ctx_t *ctx_tmpl;
+
+ ctx_tmpl = kmem_alloc(sizeof (*ctx_tmpl), crypto_kmflag(req));
+ if (ctx_tmpl == NULL)
+ return (CRYPTO_HOST_MEMORY);
+ error = skein_mac_ctx_build(ctx_tmpl, mechanism, key);
+ if (error != CRYPTO_SUCCESS)
+ goto errout;
+ *ctx_template = ctx_tmpl;
+ *ctx_template_size = sizeof (*ctx_tmpl);
+
+ return (CRYPTO_SUCCESS);
+errout:
+ bzero(ctx_tmpl, sizeof (*ctx_tmpl));
+ kmem_free(ctx_tmpl, sizeof (*ctx_tmpl));
+ return (error);
+}
+
+/*
+ * Frees a skein context in a parent crypto context.
+ */
+static int
+skein_free_context(crypto_ctx_t *ctx)
+{
+ if (SKEIN_CTX(ctx) != NULL) {
+ bzero(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx)));
+ kmem_free(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx)));
+ SKEIN_CTX_LVALUE(ctx) = NULL;
+ }
+
+ return (CRYPTO_SUCCESS);
+}
diff --git a/zfs/module/icp/os/modconf.c b/zfs/module/icp/os/modconf.c
new file mode 100644
index 000000000000..eb50767b74d8
--- /dev/null
+++ b/zfs/module/icp/os/modconf.c
@@ -0,0 +1,171 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/modctl.h>
+
+/*
+ * Null operations; used for uninitialized and "misc" modules.
+ */
+static int mod_null(struct modlmisc *, struct modlinkage *);
+static int mod_infonull(void *, struct modlinkage *, int *);
+
+/*
+ * Cryptographic Modules
+ */
+struct mod_ops mod_cryptoops = {
+ mod_null, mod_null, mod_infonull
+};
+
+/*
+ * Null operation; return 0.
+ */
+static int
+mod_null(struct modlmisc *modl, struct modlinkage *modlp)
+{
+ return (0);
+}
+
+/*
+ * Status for User modules.
+ */
+static int
+mod_infonull(void *modl, struct modlinkage *modlp, int *p0)
+{
+ *p0 = -1; /* for modinfo display */
+ return (0);
+}
+
+/*
+ * Install a module.
+ * (This routine is in the Solaris SPARC DDI/DKI)
+ */
+int
+mod_install(struct modlinkage *modlp)
+{
+ int retval = -1; /* No linkage structures */
+ struct modlmisc **linkpp;
+ struct modlmisc **linkpp1;
+
+ if (modlp->ml_rev != MODREV_1) {
+ cmn_err(CE_WARN, "mod_install: "
+ "modlinkage structure is not MODREV_1\n");
+ return (EINVAL);
+ }
+ linkpp = (struct modlmisc **)&modlp->ml_linkage[0];
+
+ while (*linkpp != NULL) {
+ if ((retval = MODL_INSTALL(*linkpp, modlp)) != 0) {
+ linkpp1 = (struct modlmisc **)&modlp->ml_linkage[0];
+
+ while (linkpp1 != linkpp) {
+ MODL_REMOVE(*linkpp1, modlp); /* clean up */
+ linkpp1++;
+ }
+ break;
+ }
+ linkpp++;
+ }
+ return (retval);
+}
+
+static char *reins_err =
+ "Could not reinstall %s\nReboot to correct the problem";
+
+/*
+ * Remove a module. This is called by the module wrapper routine.
+ * (This routine is in the Solaris SPARC DDI/DKI)
+ */
+int
+mod_remove(struct modlinkage *modlp)
+{
+ int retval = 0;
+ struct modlmisc **linkpp, *last_linkp;
+
+ linkpp = (struct modlmisc **)&modlp->ml_linkage[0];
+
+ while (*linkpp != NULL) {
+ if ((retval = MODL_REMOVE(*linkpp, modlp)) != 0) {
+ last_linkp = *linkpp;
+ linkpp = (struct modlmisc **)&modlp->ml_linkage[0];
+ while (*linkpp != last_linkp) {
+ if (MODL_INSTALL(*linkpp, modlp) != 0) {
+ cmn_err(CE_WARN, reins_err,
+ (*linkpp)->misc_linkinfo);
+ break;
+ }
+ linkpp++;
+ }
+ break;
+ }
+ linkpp++;
+ }
+ return (retval);
+}
+
+/*
+ * Get module status.
+ * (This routine is in the Solaris SPARC DDI/DKI)
+ */
+int
+mod_info(struct modlinkage *modlp, struct modinfo *modinfop)
+{
+ int i;
+ int retval = 0;
+ struct modspecific_info *msip;
+ struct modlmisc **linkpp;
+
+ modinfop->mi_rev = modlp->ml_rev;
+
+ linkpp = (struct modlmisc **)modlp->ml_linkage;
+ msip = &modinfop->mi_msinfo[0];
+
+ for (i = 0; i < MODMAXLINK; i++) {
+ if (*linkpp == NULL) {
+ msip->msi_linkinfo[0] = '\0';
+ } else {
+ (void) strlcpy(msip->msi_linkinfo,
+ (*linkpp)->misc_linkinfo, MODMAXLINKINFOLEN);
+ retval = MODL_INFO(*linkpp, modlp, &msip->msi_p0);
+ if (retval != 0)
+ break;
+ linkpp++;
+ }
+ msip++;
+ }
+
+ if (modinfop->mi_info == MI_INFO_LINKAGE) {
+ /*
+ * Slight kludge used to extract the address of the
+ * modlinkage structure from the module (just after
+ * loading a module for the very first time)
+ */
+ modinfop->mi_base = (void *)modlp;
+ }
+
+ if (retval == 0)
+ return (1);
+ return (0);
+}
diff --git a/zfs/module/icp/os/modhash.c b/zfs/module/icp/os/modhash.c
new file mode 100644
index 000000000000..1ff782afc0ce
--- /dev/null
+++ b/zfs/module/icp/os/modhash.c
@@ -0,0 +1,925 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * mod_hash: flexible hash table implementation.
+ *
+ * This is a reasonably fast, reasonably flexible hash table implementation
+ * which features pluggable hash algorithms to support storing arbitrary keys
+ * and values. It is designed to handle small (< 100,000 items) amounts of
+ * data. The hash uses chaining to resolve collisions, and does not feature a
+ * mechanism to grow the hash. Care must be taken to pick nchains to be large
+ * enough for the application at hand, or lots of time will be wasted searching
+ * hash chains.
+ *
+ * The client of the hash is required to supply a number of items to support
+ * the various hash functions:
+ *
+ * - Destructor functions for the key and value being hashed.
+ * A destructor is responsible for freeing an object when the hash
+ * table is no longer storing it. Since keys and values can be of
+ * arbitrary type, separate destructors for keys & values are used.
+ * These may be mod_hash_null_keydtor and mod_hash_null_valdtor if no
+ * destructor is needed for either a key or value.
+ *
+ * - A hashing algorithm which returns a uint_t representing a hash index
+ * The number returned need _not_ be between 0 and nchains. The mod_hash
+ * code will take care of doing that. The second argument (after the
+ * key) to the hashing function is a void * that represents
+ * hash_alg_data-- this is provided so that the hashing algrorithm can
+ * maintain some state across calls, or keep algorithm-specific
+ * constants associated with the hash table.
+ *
+ * A pointer-hashing and a string-hashing algorithm are supplied in
+ * this file.
+ *
+ * - A key comparator (a la qsort).
+ * This is used when searching the hash chain. The key comparator
+ * determines if two keys match. It should follow the return value
+ * semantics of strcmp.
+ *
+ * string and pointer comparators are supplied in this file.
+ *
+ * mod_hash_create_strhash() and mod_hash_create_ptrhash() provide good
+ * examples of how to create a customized hash table.
+ *
+ * Basic hash operations:
+ *
+ * mod_hash_create_strhash(name, nchains, dtor),
+ * create a hash using strings as keys.
+ * NOTE: This create a hash which automatically cleans up the string
+ * values it is given for keys.
+ *
+ * mod_hash_create_ptrhash(name, nchains, dtor, key_elem_size):
+ * create a hash using pointers as keys.
+ *
+ * mod_hash_create_extended(name, nchains, kdtor, vdtor,
+ * hash_alg, hash_alg_data,
+ * keycmp, sleep)
+ * create a customized hash table.
+ *
+ * mod_hash_destroy_hash(hash):
+ * destroy the given hash table, calling the key and value destructors
+ * on each key-value pair stored in the hash.
+ *
+ * mod_hash_insert(hash, key, val):
+ * place a key, value pair into the given hash.
+ * duplicate keys are rejected.
+ *
+ * mod_hash_insert_reserve(hash, key, val, handle):
+ * place a key, value pair into the given hash, using handle to indicate
+ * the reserved storage for the pair. (no memory allocation is needed
+ * during a mod_hash_insert_reserve.) duplicate keys are rejected.
+ *
+ * mod_hash_reserve(hash, *handle):
+ * reserve storage for a key-value pair using the memory allocation
+ * policy of 'hash', returning the storage handle in 'handle'.
+ *
+ * mod_hash_reserve_nosleep(hash, *handle): reserve storage for a key-value
+ * pair ignoring the memory allocation policy of 'hash' and always without
+ * sleep, returning the storage handle in 'handle'.
+ *
+ * mod_hash_remove(hash, key, *val):
+ * remove a key-value pair with key 'key' from 'hash', destroying the
+ * stored key, and returning the value in val.
+ *
+ * mod_hash_replace(hash, key, val)
+ * atomically remove an existing key-value pair from a hash, and replace
+ * the key and value with the ones supplied. The removed key and value
+ * (if any) are destroyed.
+ *
+ * mod_hash_destroy(hash, key):
+ * remove a key-value pair with key 'key' from 'hash', destroying both
+ * stored key and stored value.
+ *
+ * mod_hash_find(hash, key, val):
+ * find a value in the hash table corresponding to the given key.
+ *
+ * mod_hash_find_cb(hash, key, val, found_callback)
+ * find a value in the hash table corresponding to the given key.
+ * If a value is found, call specified callback passing key and val to it.
+ * The callback is called with the hash lock held.
+ * It is intended to be used in situations where the act of locating the
+ * data must also modify it - such as in reference counting schemes.
+ *
+ * mod_hash_walk(hash, callback(key, elem, arg), arg)
+ * walks all the elements in the hashtable and invokes the callback
+ * function with the key/value pair for each element. the hashtable
+ * is locked for readers so the callback function should not attempt
+ * to do any updates to the hashable. the callback function should
+ * return MH_WALK_CONTINUE to continue walking the hashtable or
+ * MH_WALK_TERMINATE to abort the walk of the hashtable.
+ *
+ * mod_hash_clear(hash):
+ * clears the given hash table of entries, calling the key and value
+ * destructors for every element in the hash.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/bitmap.h>
+#include <sys/modhash_impl.h>
+#include <sys/sysmacros.h>
+
+/*
+ * MH_KEY_DESTROY()
+ * Invoke the key destructor.
+ */
+#define MH_KEY_DESTROY(hash, key) ((hash->mh_kdtor)(key))
+
+/*
+ * MH_VAL_DESTROY()
+ * Invoke the value destructor.
+ */
+#define MH_VAL_DESTROY(hash, val) ((hash->mh_vdtor)(val))
+
+/*
+ * MH_KEYCMP()
+ * Call the key comparator for the given hash keys.
+ */
+#define MH_KEYCMP(hash, key1, key2) ((hash->mh_keycmp)(key1, key2))
+
+/*
+ * Cache for struct mod_hash_entry
+ */
+kmem_cache_t *mh_e_cache = NULL;
+mod_hash_t *mh_head = NULL;
+kmutex_t mh_head_lock;
+
+/*
+ * mod_hash_null_keydtor()
+ * mod_hash_null_valdtor()
+ * no-op key and value destructors.
+ */
+/*ARGSUSED*/
+void
+mod_hash_null_keydtor(mod_hash_key_t key)
+{
+}
+
+/*ARGSUSED*/
+void
+mod_hash_null_valdtor(mod_hash_val_t val)
+{
+}
+
+/*
+ * mod_hash_bystr()
+ * mod_hash_strkey_cmp()
+ * mod_hash_strkey_dtor()
+ * mod_hash_strval_dtor()
+ * Hash and key comparison routines for hashes with string keys.
+ *
+ * mod_hash_create_strhash()
+ * Create a hash using strings as keys
+ *
+ * The string hashing algorithm is from the "Dragon Book" --
+ * "Compilers: Principles, Tools & Techniques", by Aho, Sethi, Ullman
+ */
+
+/*ARGSUSED*/
+uint_t
+mod_hash_bystr(void *hash_data, mod_hash_key_t key)
+{
+ uint_t hash = 0;
+ uint_t g;
+ char *p, *k = (char *)key;
+
+ ASSERT(k);
+ for (p = k; *p != '\0'; p++) {
+ hash = (hash << 4) + *p;
+ if ((g = (hash & 0xf0000000)) != 0) {
+ hash ^= (g >> 24);
+ hash ^= g;
+ }
+ }
+ return (hash);
+}
+
+int
+mod_hash_strkey_cmp(mod_hash_key_t key1, mod_hash_key_t key2)
+{
+ return (strcmp((char *)key1, (char *)key2));
+}
+
+void
+mod_hash_strkey_dtor(mod_hash_key_t key)
+{
+ char *c = (char *)key;
+ kmem_free(c, strlen(c) + 1);
+}
+
+void
+mod_hash_strval_dtor(mod_hash_val_t val)
+{
+ char *c = (char *)val;
+ kmem_free(c, strlen(c) + 1);
+}
+
+mod_hash_t *
+mod_hash_create_strhash_nodtr(char *name, size_t nchains,
+ void (*val_dtor)(mod_hash_val_t))
+{
+ return mod_hash_create_extended(name, nchains, mod_hash_null_keydtor,
+ val_dtor, mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
+}
+
+mod_hash_t *
+mod_hash_create_strhash(char *name, size_t nchains,
+ void (*val_dtor)(mod_hash_val_t))
+{
+ return mod_hash_create_extended(name, nchains, mod_hash_strkey_dtor,
+ val_dtor, mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
+}
+
+void
+mod_hash_destroy_strhash(mod_hash_t *strhash)
+{
+ ASSERT(strhash);
+ mod_hash_destroy_hash(strhash);
+}
+
+
+/*
+ * mod_hash_byptr()
+ * mod_hash_ptrkey_cmp()
+ * Hash and key comparison routines for hashes with pointer keys.
+ *
+ * mod_hash_create_ptrhash()
+ * mod_hash_destroy_ptrhash()
+ * Create a hash that uses pointers as keys. This hash algorithm
+ * picks an appropriate set of middle bits in the address to hash on
+ * based on the size of the hash table and a hint about the size of
+ * the items pointed at.
+ */
+uint_t
+mod_hash_byptr(void *hash_data, mod_hash_key_t key)
+{
+ uintptr_t k = (uintptr_t)key;
+ k >>= (int)(uintptr_t)hash_data;
+
+ return ((uint_t)k);
+}
+
+int
+mod_hash_ptrkey_cmp(mod_hash_key_t key1, mod_hash_key_t key2)
+{
+ uintptr_t k1 = (uintptr_t)key1;
+ uintptr_t k2 = (uintptr_t)key2;
+ if (k1 > k2)
+ return (-1);
+ else if (k1 < k2)
+ return (1);
+ else
+ return (0);
+}
+
+mod_hash_t *
+mod_hash_create_ptrhash(char *name, size_t nchains,
+ void (*val_dtor)(mod_hash_val_t), size_t key_elem_size)
+{
+ size_t rshift;
+
+ /*
+ * We want to hash on the bits in the middle of the address word
+ * Bits far to the right in the word have little significance, and
+ * are likely to all look the same (for example, an array of
+ * 256-byte structures will have the bottom 8 bits of address
+ * words the same). So we want to right-shift each address to
+ * ignore the bottom bits.
+ *
+ * The high bits, which are also unused, will get taken out when
+ * mod_hash takes hashkey % nchains.
+ */
+ rshift = highbit(key_elem_size);
+
+ return mod_hash_create_extended(name, nchains, mod_hash_null_keydtor,
+ val_dtor, mod_hash_byptr, (void *)rshift, mod_hash_ptrkey_cmp,
+ KM_SLEEP);
+}
+
+void
+mod_hash_destroy_ptrhash(mod_hash_t *hash)
+{
+ ASSERT(hash);
+ mod_hash_destroy_hash(hash);
+}
+
+/*
+ * mod_hash_byid()
+ * mod_hash_idkey_cmp()
+ * Hash and key comparison routines for hashes with 32-bit unsigned keys.
+ *
+ * mod_hash_create_idhash()
+ * mod_hash_destroy_idhash()
+ * mod_hash_iddata_gen()
+ * Create a hash that uses numeric keys.
+ *
+ * The hash algorithm is documented in "Introduction to Algorithms"
+ * (Cormen, Leiserson, Rivest); when the hash table is created, it
+ * attempts to find the next largest prime above the number of hash
+ * slots. The hash index is then this number times the key modulo
+ * the hash size, or (key * prime) % nchains.
+ */
+uint_t
+mod_hash_byid(void *hash_data, mod_hash_key_t key)
+{
+ uint_t kval = (uint_t)(uintptr_t)hash_data;
+ return ((uint_t)(uintptr_t)key * (uint_t)kval);
+}
+
+int
+mod_hash_idkey_cmp(mod_hash_key_t key1, mod_hash_key_t key2)
+{
+ return ((uint_t)(uintptr_t)key1 - (uint_t)(uintptr_t)key2);
+}
+
+/*
+ * Generate the next largest prime number greater than nchains; this value
+ * is intended to be later passed in to mod_hash_create_extended() as the
+ * hash_data.
+ */
+uint_t
+mod_hash_iddata_gen(size_t nchains)
+{
+ uint_t kval, i, prime;
+
+ /*
+ * Pick the first (odd) prime greater than nchains. Make sure kval is
+ * odd (so start with nchains +1 or +2 as appropriate).
+ */
+ kval = (nchains % 2 == 0) ? nchains + 1 : nchains + 2;
+
+ for (;;) {
+ prime = 1;
+ for (i = 3; i * i <= kval; i += 2) {
+ if (kval % i == 0)
+ prime = 0;
+ }
+ if (prime == 1)
+ break;
+ kval += 2;
+ }
+ return (kval);
+}
+
+mod_hash_t *
+mod_hash_create_idhash(char *name, size_t nchains,
+ void (*val_dtor)(mod_hash_val_t))
+{
+ uint_t kval = mod_hash_iddata_gen(nchains);
+
+ return (mod_hash_create_extended(name, nchains, mod_hash_null_keydtor,
+ val_dtor, mod_hash_byid, (void *)(uintptr_t)kval,
+ mod_hash_idkey_cmp, KM_SLEEP));
+}
+
+void
+mod_hash_destroy_idhash(mod_hash_t *hash)
+{
+ ASSERT(hash);
+ mod_hash_destroy_hash(hash);
+}
+
+void
+mod_hash_fini(void)
+{
+ mutex_destroy(&mh_head_lock);
+
+ if (mh_e_cache) {
+ kmem_cache_destroy(mh_e_cache);
+ mh_e_cache = NULL;
+ }
+}
+
+/*
+ * mod_hash_init()
+ * sets up globals, etc for mod_hash_*
+ */
+void
+mod_hash_init(void)
+{
+ ASSERT(mh_e_cache == NULL);
+ mh_e_cache = kmem_cache_create("mod_hash_entries",
+ sizeof (struct mod_hash_entry), 0, NULL, NULL, NULL, NULL,
+ NULL, 0);
+
+ mutex_init(&mh_head_lock, NULL, MUTEX_DEFAULT, NULL);
+}
+
+/*
+ * mod_hash_create_extended()
+ * The full-blown hash creation function.
+ *
+ * notes:
+ * nchains - how many hash slots to create. More hash slots will
+ * result in shorter hash chains, but will consume
+ * slightly more memory up front.
+ * sleep - should be KM_SLEEP or KM_NOSLEEP, to indicate whether
+ * to sleep for memory, or fail in low-memory conditions.
+ *
+ * Fails only if KM_NOSLEEP was specified, and no memory was available.
+ */
+mod_hash_t *
+mod_hash_create_extended(
+ char *hname, /* descriptive name for hash */
+ size_t nchains, /* number of hash slots */
+ void (*kdtor)(mod_hash_key_t), /* key destructor */
+ void (*vdtor)(mod_hash_val_t), /* value destructor */
+ uint_t (*hash_alg)(void *, mod_hash_key_t), /* hash algorithm */
+ void *hash_alg_data, /* pass-thru arg for hash_alg */
+ int (*keycmp)(mod_hash_key_t, mod_hash_key_t), /* key comparator */
+ int sleep) /* whether to sleep for mem */
+{
+ mod_hash_t *mod_hash;
+ ASSERT(hname && keycmp && hash_alg && vdtor && kdtor);
+
+ if ((mod_hash = kmem_zalloc(MH_SIZE(nchains), sleep)) == NULL)
+ return (NULL);
+
+ mod_hash->mh_name = kmem_alloc(strlen(hname) + 1, sleep);
+ if (mod_hash->mh_name == NULL) {
+ kmem_free(mod_hash, MH_SIZE(nchains));
+ return (NULL);
+ }
+ (void) strcpy(mod_hash->mh_name, hname);
+
+ rw_init(&mod_hash->mh_contents, NULL, RW_DEFAULT, NULL);
+ mod_hash->mh_sleep = sleep;
+ mod_hash->mh_nchains = nchains;
+ mod_hash->mh_kdtor = kdtor;
+ mod_hash->mh_vdtor = vdtor;
+ mod_hash->mh_hashalg = hash_alg;
+ mod_hash->mh_hashalg_data = hash_alg_data;
+ mod_hash->mh_keycmp = keycmp;
+
+ /*
+ * Link the hash up on the list of hashes
+ */
+ mutex_enter(&mh_head_lock);
+ mod_hash->mh_next = mh_head;
+ mh_head = mod_hash;
+ mutex_exit(&mh_head_lock);
+
+ return (mod_hash);
+}
+
+/*
+ * mod_hash_destroy_hash()
+ * destroy a hash table, destroying all of its stored keys and values
+ * as well.
+ */
+void
+mod_hash_destroy_hash(mod_hash_t *hash)
+{
+ mod_hash_t *mhp, *mhpp;
+
+ mutex_enter(&mh_head_lock);
+ /*
+ * Remove the hash from the hash list
+ */
+ if (hash == mh_head) { /* removing 1st list elem */
+ mh_head = mh_head->mh_next;
+ } else {
+ /*
+ * mhpp can start out NULL since we know the 1st elem isn't the
+ * droid we're looking for.
+ */
+ mhpp = NULL;
+ for (mhp = mh_head; mhp != NULL; mhp = mhp->mh_next) {
+ if (mhp == hash) {
+ mhpp->mh_next = mhp->mh_next;
+ break;
+ }
+ mhpp = mhp;
+ }
+ }
+ mutex_exit(&mh_head_lock);
+
+ /*
+ * Clean out keys and values.
+ */
+ mod_hash_clear(hash);
+
+ rw_destroy(&hash->mh_contents);
+ kmem_free(hash->mh_name, strlen(hash->mh_name) + 1);
+ kmem_free(hash, MH_SIZE(hash->mh_nchains));
+}
+
+/*
+ * i_mod_hash()
+ * Call the hashing algorithm for this hash table, with the given key.
+ */
+uint_t
+i_mod_hash(mod_hash_t *hash, mod_hash_key_t key)
+{
+ uint_t h;
+ /*
+ * Prevent div by 0 problems;
+ * Also a nice shortcut when using a hash as a list
+ */
+ if (hash->mh_nchains == 1)
+ return (0);
+
+ h = (hash->mh_hashalg)(hash->mh_hashalg_data, key);
+ return (h % (hash->mh_nchains - 1));
+}
+
+/*
+ * i_mod_hash_insert_nosync()
+ * mod_hash_insert()
+ * mod_hash_insert_reserve()
+ * insert 'val' into the hash table, using 'key' as its key. If 'key' is
+ * already a key in the hash, an error will be returned, and the key-val
+ * pair will not be inserted. i_mod_hash_insert_nosync() supports a simple
+ * handle abstraction, allowing hash entry allocation to be separated from
+ * the hash insertion. this abstraction allows simple use of the mod_hash
+ * structure in situations where mod_hash_insert() with a KM_SLEEP
+ * allocation policy would otherwise be unsafe.
+ */
+int
+i_mod_hash_insert_nosync(mod_hash_t *hash, mod_hash_key_t key,
+ mod_hash_val_t val, mod_hash_hndl_t handle)
+{
+ uint_t hashidx;
+ struct mod_hash_entry *entry;
+
+ ASSERT(hash);
+
+ /*
+ * If we've not been given reserved storage, allocate storage directly,
+ * using the hash's allocation policy.
+ */
+ if (handle == (mod_hash_hndl_t)0) {
+ entry = kmem_cache_alloc(mh_e_cache, hash->mh_sleep);
+ if (entry == NULL) {
+ hash->mh_stat.mhs_nomem++;
+ return (MH_ERR_NOMEM);
+ }
+ } else {
+ entry = (struct mod_hash_entry *)handle;
+ }
+
+ hashidx = i_mod_hash(hash, key);
+ entry->mhe_key = key;
+ entry->mhe_val = val;
+ entry->mhe_next = hash->mh_entries[hashidx];
+
+ hash->mh_entries[hashidx] = entry;
+ hash->mh_stat.mhs_nelems++;
+
+ return (0);
+}
+
+int
+mod_hash_insert(mod_hash_t *hash, mod_hash_key_t key, mod_hash_val_t val)
+{
+ int res;
+ mod_hash_val_t v;
+
+ rw_enter(&hash->mh_contents, RW_WRITER);
+
+ /*
+ * Disallow duplicate keys in the hash
+ */
+ if (i_mod_hash_find_nosync(hash, key, &v) == 0) {
+ rw_exit(&hash->mh_contents);
+ hash->mh_stat.mhs_coll++;
+ return (MH_ERR_DUPLICATE);
+ }
+
+ res = i_mod_hash_insert_nosync(hash, key, val, (mod_hash_hndl_t)0);
+ rw_exit(&hash->mh_contents);
+
+ return (res);
+}
+
+int
+mod_hash_insert_reserve(mod_hash_t *hash, mod_hash_key_t key,
+ mod_hash_val_t val, mod_hash_hndl_t handle)
+{
+ int res;
+ mod_hash_val_t v;
+
+ rw_enter(&hash->mh_contents, RW_WRITER);
+
+ /*
+ * Disallow duplicate keys in the hash
+ */
+ if (i_mod_hash_find_nosync(hash, key, &v) == 0) {
+ rw_exit(&hash->mh_contents);
+ hash->mh_stat.mhs_coll++;
+ return (MH_ERR_DUPLICATE);
+ }
+ res = i_mod_hash_insert_nosync(hash, key, val, handle);
+ rw_exit(&hash->mh_contents);
+
+ return (res);
+}
+
+/*
+ * mod_hash_reserve()
+ * mod_hash_reserve_nosleep()
+ * mod_hash_cancel()
+ * Make or cancel a mod_hash_entry_t reservation. Reservations are used in
+ * mod_hash_insert_reserve() above.
+ */
+int
+mod_hash_reserve(mod_hash_t *hash, mod_hash_hndl_t *handlep)
+{
+ *handlep = kmem_cache_alloc(mh_e_cache, hash->mh_sleep);
+ if (*handlep == NULL) {
+ hash->mh_stat.mhs_nomem++;
+ return (MH_ERR_NOMEM);
+ }
+
+ return (0);
+}
+
+int
+mod_hash_reserve_nosleep(mod_hash_t *hash, mod_hash_hndl_t *handlep)
+{
+ *handlep = kmem_cache_alloc(mh_e_cache, KM_NOSLEEP);
+ if (*handlep == NULL) {
+ hash->mh_stat.mhs_nomem++;
+ return (MH_ERR_NOMEM);
+ }
+
+ return (0);
+
+}
+
+/*ARGSUSED*/
+void
+mod_hash_cancel(mod_hash_t *hash, mod_hash_hndl_t *handlep)
+{
+ kmem_cache_free(mh_e_cache, *handlep);
+ *handlep = (mod_hash_hndl_t)0;
+}
+
+/*
+ * i_mod_hash_remove_nosync()
+ * mod_hash_remove()
+ * Remove an element from the hash table.
+ */
+int
+i_mod_hash_remove_nosync(mod_hash_t *hash, mod_hash_key_t key,
+ mod_hash_val_t *val)
+{
+ int hashidx;
+ struct mod_hash_entry *e, *ep;
+
+ hashidx = i_mod_hash(hash, key);
+ ep = NULL; /* e's parent */
+
+ for (e = hash->mh_entries[hashidx]; e != NULL; e = e->mhe_next) {
+ if (MH_KEYCMP(hash, e->mhe_key, key) == 0)
+ break;
+ ep = e;
+ }
+
+ if (e == NULL) { /* not found */
+ return (MH_ERR_NOTFOUND);
+ }
+
+ if (ep == NULL) /* special case 1st element in bucket */
+ hash->mh_entries[hashidx] = e->mhe_next;
+ else
+ ep->mhe_next = e->mhe_next;
+
+ /*
+ * Clean up resources used by the node's key.
+ */
+ MH_KEY_DESTROY(hash, e->mhe_key);
+
+ *val = e->mhe_val;
+ kmem_cache_free(mh_e_cache, e);
+ hash->mh_stat.mhs_nelems--;
+
+ return (0);
+}
+
+int
+mod_hash_remove(mod_hash_t *hash, mod_hash_key_t key, mod_hash_val_t *val)
+{
+ int res;
+
+ rw_enter(&hash->mh_contents, RW_WRITER);
+ res = i_mod_hash_remove_nosync(hash, key, val);
+ rw_exit(&hash->mh_contents);
+
+ return (res);
+}
+
+/*
+ * mod_hash_replace()
+ * atomically remove an existing key-value pair from a hash, and replace
+ * the key and value with the ones supplied. The removed key and value
+ * (if any) are destroyed.
+ */
+int
+mod_hash_replace(mod_hash_t *hash, mod_hash_key_t key, mod_hash_val_t val)
+{
+ int res;
+ mod_hash_val_t v;
+
+ rw_enter(&hash->mh_contents, RW_WRITER);
+
+ if (i_mod_hash_remove_nosync(hash, key, &v) == 0) {
+ /*
+ * mod_hash_remove() takes care of freeing up the key resources.
+ */
+ MH_VAL_DESTROY(hash, v);
+ }
+ res = i_mod_hash_insert_nosync(hash, key, val, (mod_hash_hndl_t)0);
+
+ rw_exit(&hash->mh_contents);
+
+ return (res);
+}
+
+/*
+ * mod_hash_destroy()
+ * Remove an element from the hash table matching 'key', and destroy it.
+ */
+int
+mod_hash_destroy(mod_hash_t *hash, mod_hash_key_t key)
+{
+ mod_hash_val_t val;
+ int rv;
+
+ rw_enter(&hash->mh_contents, RW_WRITER);
+
+ if ((rv = i_mod_hash_remove_nosync(hash, key, &val)) == 0) {
+ /*
+ * mod_hash_remove() takes care of freeing up the key resources.
+ */
+ MH_VAL_DESTROY(hash, val);
+ }
+
+ rw_exit(&hash->mh_contents);
+ return (rv);
+}
+
+/*
+ * i_mod_hash_find_nosync()
+ * mod_hash_find()
+ * Find a value in the hash table corresponding to the given key.
+ */
+int
+i_mod_hash_find_nosync(mod_hash_t *hash, mod_hash_key_t key,
+ mod_hash_val_t *val)
+{
+ uint_t hashidx;
+ struct mod_hash_entry *e;
+
+ hashidx = i_mod_hash(hash, key);
+
+ for (e = hash->mh_entries[hashidx]; e != NULL; e = e->mhe_next) {
+ if (MH_KEYCMP(hash, e->mhe_key, key) == 0) {
+ *val = e->mhe_val;
+ hash->mh_stat.mhs_hit++;
+ return (0);
+ }
+ }
+ hash->mh_stat.mhs_miss++;
+ return (MH_ERR_NOTFOUND);
+}
+
+int
+mod_hash_find(mod_hash_t *hash, mod_hash_key_t key, mod_hash_val_t *val)
+{
+ int res;
+
+ rw_enter(&hash->mh_contents, RW_READER);
+ res = i_mod_hash_find_nosync(hash, key, val);
+ rw_exit(&hash->mh_contents);
+
+ return (res);
+}
+
+int
+mod_hash_find_cb(mod_hash_t *hash, mod_hash_key_t key, mod_hash_val_t *val,
+ void (*find_cb)(mod_hash_key_t, mod_hash_val_t))
+{
+ int res;
+
+ rw_enter(&hash->mh_contents, RW_READER);
+ res = i_mod_hash_find_nosync(hash, key, val);
+ if (res == 0) {
+ find_cb(key, *val);
+ }
+ rw_exit(&hash->mh_contents);
+
+ return (res);
+}
+
+int
+mod_hash_find_cb_rval(mod_hash_t *hash, mod_hash_key_t key, mod_hash_val_t *val,
+ int (*find_cb)(mod_hash_key_t, mod_hash_val_t), int *cb_rval)
+{
+ int res;
+
+ rw_enter(&hash->mh_contents, RW_READER);
+ res = i_mod_hash_find_nosync(hash, key, val);
+ if (res == 0) {
+ *cb_rval = find_cb(key, *val);
+ }
+ rw_exit(&hash->mh_contents);
+
+ return (res);
+}
+
+void
+i_mod_hash_walk_nosync(mod_hash_t *hash,
+ uint_t (*callback)(mod_hash_key_t, mod_hash_val_t *, void *), void *arg)
+{
+ struct mod_hash_entry *e;
+ uint_t hashidx;
+ int res = MH_WALK_CONTINUE;
+
+ for (hashidx = 0;
+ (hashidx < (hash->mh_nchains - 1)) && (res == MH_WALK_CONTINUE);
+ hashidx++) {
+ e = hash->mh_entries[hashidx];
+ while ((e != NULL) && (res == MH_WALK_CONTINUE)) {
+ res = callback(e->mhe_key, e->mhe_val, arg);
+ e = e->mhe_next;
+ }
+ }
+}
+
+/*
+ * mod_hash_walk()
+ * Walks all the elements in the hashtable and invokes the callback
+ * function with the key/value pair for each element. The hashtable
+ * is locked for readers so the callback function should not attempt
+ * to do any updates to the hashable. The callback function should
+ * return MH_WALK_CONTINUE to continue walking the hashtable or
+ * MH_WALK_TERMINATE to abort the walk of the hashtable.
+ */
+void
+mod_hash_walk(mod_hash_t *hash,
+ uint_t (*callback)(mod_hash_key_t, mod_hash_val_t *, void *), void *arg)
+{
+ rw_enter(&hash->mh_contents, RW_READER);
+ i_mod_hash_walk_nosync(hash, callback, arg);
+ rw_exit(&hash->mh_contents);
+}
+
+
+/*
+ * i_mod_hash_clear_nosync()
+ * mod_hash_clear()
+ * Clears the given hash table by calling the destructor of every hash
+ * element and freeing up all mod_hash_entry's.
+ */
+void
+i_mod_hash_clear_nosync(mod_hash_t *hash)
+{
+ int i;
+ struct mod_hash_entry *e, *old_e;
+
+ for (i = 0; i < hash->mh_nchains; i++) {
+ e = hash->mh_entries[i];
+ while (e != NULL) {
+ MH_KEY_DESTROY(hash, e->mhe_key);
+ MH_VAL_DESTROY(hash, e->mhe_val);
+ old_e = e;
+ e = e->mhe_next;
+ kmem_cache_free(mh_e_cache, old_e);
+ }
+ hash->mh_entries[i] = NULL;
+ }
+ hash->mh_stat.mhs_nelems = 0;
+}
+
+void
+mod_hash_clear(mod_hash_t *hash)
+{
+ ASSERT(hash);
+ rw_enter(&hash->mh_contents, RW_WRITER);
+ i_mod_hash_clear_nosync(hash);
+ rw_exit(&hash->mh_contents);
+}
diff --git a/zfs/module/icp/spi/kcf_spi.c b/zfs/module/icp/spi/kcf_spi.c
new file mode 100644
index 000000000000..c2c2b54bc8d8
--- /dev/null
+++ b/zfs/module/icp/spi/kcf_spi.c
@@ -0,0 +1,924 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/*
+ * This file is part of the core Kernel Cryptographic Framework.
+ * It implements the SPI functions exported to cryptographic
+ * providers.
+ */
+
+
+#include <sys/zfs_context.h>
+#include <sys/crypto/common.h>
+#include <sys/crypto/impl.h>
+#include <sys/crypto/sched_impl.h>
+#include <sys/crypto/spi.h>
+
+/*
+ * minalloc and maxalloc values to be used for taskq_create().
+ */
+int crypto_taskq_threads = CRYPTO_TASKQ_THREADS;
+int crypto_taskq_minalloc = CYRPTO_TASKQ_MIN;
+int crypto_taskq_maxalloc = CRYPTO_TASKQ_MAX;
+
+static void remove_provider(kcf_provider_desc_t *);
+static void process_logical_providers(crypto_provider_info_t *,
+ kcf_provider_desc_t *);
+static int init_prov_mechs(crypto_provider_info_t *, kcf_provider_desc_t *);
+static int kcf_prov_kstat_update(kstat_t *, int);
+static void delete_kstat(kcf_provider_desc_t *);
+
+static kcf_prov_stats_t kcf_stats_ks_data_template = {
+ { "kcf_ops_total", KSTAT_DATA_UINT64 },
+ { "kcf_ops_passed", KSTAT_DATA_UINT64 },
+ { "kcf_ops_failed", KSTAT_DATA_UINT64 },
+ { "kcf_ops_returned_busy", KSTAT_DATA_UINT64 }
+};
+
+#define KCF_SPI_COPY_OPS(src, dst, ops) if ((src)->ops != NULL) \
+ *((dst)->ops) = *((src)->ops);
+
+/*
+ * Copy an ops vector from src to dst. Used during provider registration
+ * to copy the ops vector from the provider info structure to the
+ * provider descriptor maintained by KCF.
+ * Copying the ops vector specified by the provider is needed since the
+ * framework does not require the provider info structure to be
+ * persistent.
+ */
+static void
+copy_ops_vector_v1(crypto_ops_t *src_ops, crypto_ops_t *dst_ops)
+{
+ KCF_SPI_COPY_OPS(src_ops, dst_ops, co_control_ops);
+ KCF_SPI_COPY_OPS(src_ops, dst_ops, co_digest_ops);
+ KCF_SPI_COPY_OPS(src_ops, dst_ops, co_cipher_ops);
+ KCF_SPI_COPY_OPS(src_ops, dst_ops, co_mac_ops);
+ KCF_SPI_COPY_OPS(src_ops, dst_ops, co_sign_ops);
+ KCF_SPI_COPY_OPS(src_ops, dst_ops, co_verify_ops);
+ KCF_SPI_COPY_OPS(src_ops, dst_ops, co_dual_ops);
+ KCF_SPI_COPY_OPS(src_ops, dst_ops, co_dual_cipher_mac_ops);
+ KCF_SPI_COPY_OPS(src_ops, dst_ops, co_random_ops);
+ KCF_SPI_COPY_OPS(src_ops, dst_ops, co_session_ops);
+ KCF_SPI_COPY_OPS(src_ops, dst_ops, co_object_ops);
+ KCF_SPI_COPY_OPS(src_ops, dst_ops, co_key_ops);
+ KCF_SPI_COPY_OPS(src_ops, dst_ops, co_provider_ops);
+ KCF_SPI_COPY_OPS(src_ops, dst_ops, co_ctx_ops);
+}
+
+static void
+copy_ops_vector_v2(crypto_ops_t *src_ops, crypto_ops_t *dst_ops)
+{
+ KCF_SPI_COPY_OPS(src_ops, dst_ops, co_mech_ops);
+}
+
+static void
+copy_ops_vector_v3(crypto_ops_t *src_ops, crypto_ops_t *dst_ops)
+{
+ KCF_SPI_COPY_OPS(src_ops, dst_ops, co_nostore_key_ops);
+}
+
+/*
+ * This routine is used to add cryptographic providers to the KEF framework.
+ * Providers pass a crypto_provider_info structure to crypto_register_provider()
+ * and get back a handle. The crypto_provider_info structure contains a
+ * list of mechanisms supported by the provider and an ops vector containing
+ * provider entry points. Hardware providers call this routine in their attach
+ * routines. Software providers call this routine in their _init() routine.
+ */
+int
+crypto_register_provider(crypto_provider_info_t *info,
+ crypto_kcf_provider_handle_t *handle)
+{
+ char ks_name[KSTAT_STRLEN];
+
+ kcf_provider_desc_t *prov_desc = NULL;
+ int ret = CRYPTO_ARGUMENTS_BAD;
+
+ if (info->pi_interface_version > CRYPTO_SPI_VERSION_3)
+ return (CRYPTO_VERSION_MISMATCH);
+
+ /*
+ * Check provider type, must be software, hardware, or logical.
+ */
+ if (info->pi_provider_type != CRYPTO_HW_PROVIDER &&
+ info->pi_provider_type != CRYPTO_SW_PROVIDER &&
+ info->pi_provider_type != CRYPTO_LOGICAL_PROVIDER)
+ return (CRYPTO_ARGUMENTS_BAD);
+
+ /*
+ * Allocate and initialize a new provider descriptor. We also
+ * hold it and release it when done.
+ */
+ prov_desc = kcf_alloc_provider_desc(info);
+ KCF_PROV_REFHOLD(prov_desc);
+
+ prov_desc->pd_prov_type = info->pi_provider_type;
+
+ /* provider-private handle, opaque to KCF */
+ prov_desc->pd_prov_handle = info->pi_provider_handle;
+
+ /* copy provider description string */
+ if (info->pi_provider_description != NULL) {
+ /*
+ * pi_provider_descriptor is a string that can contain
+ * up to CRYPTO_PROVIDER_DESCR_MAX_LEN + 1 characters
+ * INCLUDING the terminating null character. A bcopy()
+ * is necessary here as pd_description should not have
+ * a null character. See comments in kcf_alloc_provider_desc()
+ * for details on pd_description field.
+ */
+ bcopy(info->pi_provider_description, prov_desc->pd_description,
+ MIN(strlen(info->pi_provider_description),
+ (size_t)CRYPTO_PROVIDER_DESCR_MAX_LEN));
+ }
+
+ if (info->pi_provider_type != CRYPTO_LOGICAL_PROVIDER) {
+ if (info->pi_ops_vector == NULL) {
+ goto bail;
+ }
+ copy_ops_vector_v1(info->pi_ops_vector,
+ prov_desc->pd_ops_vector);
+ if (info->pi_interface_version >= CRYPTO_SPI_VERSION_2) {
+ copy_ops_vector_v2(info->pi_ops_vector,
+ prov_desc->pd_ops_vector);
+ prov_desc->pd_flags = info->pi_flags;
+ }
+ if (info->pi_interface_version == CRYPTO_SPI_VERSION_3) {
+ copy_ops_vector_v3(info->pi_ops_vector,
+ prov_desc->pd_ops_vector);
+ }
+ }
+
+ /* object_ops and nostore_key_ops are mutually exclusive */
+ if (prov_desc->pd_ops_vector->co_object_ops &&
+ prov_desc->pd_ops_vector->co_nostore_key_ops) {
+ goto bail;
+ }
+
+ /* process the mechanisms supported by the provider */
+ if ((ret = init_prov_mechs(info, prov_desc)) != CRYPTO_SUCCESS)
+ goto bail;
+
+ /*
+ * Add provider to providers tables, also sets the descriptor
+ * pd_prov_id field.
+ */
+ if ((ret = kcf_prov_tab_add_provider(prov_desc)) != CRYPTO_SUCCESS) {
+ undo_register_provider(prov_desc, B_FALSE);
+ goto bail;
+ }
+
+ /*
+ * We create a taskq only for a hardware provider. The global
+ * software queue is used for software providers. We handle ordering
+ * of multi-part requests in the taskq routine. So, it is safe to
+ * have multiple threads for the taskq. We pass TASKQ_PREPOPULATE flag
+ * to keep some entries cached to improve performance.
+ */
+ if (prov_desc->pd_prov_type == CRYPTO_HW_PROVIDER)
+ prov_desc->pd_sched_info.ks_taskq = taskq_create("kcf_taskq",
+ crypto_taskq_threads, minclsyspri,
+ crypto_taskq_minalloc, crypto_taskq_maxalloc,
+ TASKQ_PREPOPULATE);
+ else
+ prov_desc->pd_sched_info.ks_taskq = NULL;
+
+ /* no kernel session to logical providers */
+ if (prov_desc->pd_prov_type != CRYPTO_LOGICAL_PROVIDER) {
+ /*
+ * Open a session for session-oriented providers. This session
+ * is used for all kernel consumers. This is fine as a provider
+ * is required to support multiple thread access to a session.
+ * We can do this only after the taskq has been created as we
+ * do a kcf_submit_request() to open the session.
+ */
+ if (KCF_PROV_SESSION_OPS(prov_desc) != NULL) {
+ kcf_req_params_t params;
+
+ KCF_WRAP_SESSION_OPS_PARAMS(¶ms,
+ KCF_OP_SESSION_OPEN, &prov_desc->pd_sid, 0,
+ CRYPTO_USER, NULL, 0, prov_desc);
+ ret = kcf_submit_request(prov_desc, NULL, NULL, ¶ms,
+ B_FALSE);
+
+ if (ret != CRYPTO_SUCCESS) {
+ undo_register_provider(prov_desc, B_TRUE);
+ ret = CRYPTO_FAILED;
+ goto bail;
+ }
+ }
+ }
+
+ if (prov_desc->pd_prov_type != CRYPTO_LOGICAL_PROVIDER) {
+ /*
+ * Create the kstat for this provider. There is a kstat
+ * installed for each successfully registered provider.
+ * This kstat is deleted, when the provider unregisters.
+ */
+ if (prov_desc->pd_prov_type == CRYPTO_SW_PROVIDER) {
+ (void) snprintf(ks_name, KSTAT_STRLEN, "%s_%s",
+ "NONAME", "provider_stats");
+ } else {
+ (void) snprintf(ks_name, KSTAT_STRLEN, "%s_%d_%u_%s",
+ "NONAME", 0,
+ prov_desc->pd_prov_id, "provider_stats");
+ }
+
+ prov_desc->pd_kstat = kstat_create("kcf", 0, ks_name, "crypto",
+ KSTAT_TYPE_NAMED, sizeof (kcf_prov_stats_t) /
+ sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
+
+ if (prov_desc->pd_kstat != NULL) {
+ bcopy(&kcf_stats_ks_data_template,
+ &prov_desc->pd_ks_data,
+ sizeof (kcf_stats_ks_data_template));
+ prov_desc->pd_kstat->ks_data = &prov_desc->pd_ks_data;
+ KCF_PROV_REFHOLD(prov_desc);
+ KCF_PROV_IREFHOLD(prov_desc);
+ prov_desc->pd_kstat->ks_private = prov_desc;
+ prov_desc->pd_kstat->ks_update = kcf_prov_kstat_update;
+ kstat_install(prov_desc->pd_kstat);
+ }
+ }
+
+ if (prov_desc->pd_prov_type == CRYPTO_HW_PROVIDER)
+ process_logical_providers(info, prov_desc);
+
+ mutex_enter(&prov_desc->pd_lock);
+ prov_desc->pd_state = KCF_PROV_READY;
+ mutex_exit(&prov_desc->pd_lock);
+ kcf_do_notify(prov_desc, B_TRUE);
+
+ *handle = prov_desc->pd_kcf_prov_handle;
+ ret = CRYPTO_SUCCESS;
+
+bail:
+ KCF_PROV_REFRELE(prov_desc);
+ return (ret);
+}
+
+/*
+ * This routine is used to notify the framework when a provider is being
+ * removed. Hardware providers call this routine in their detach routines.
+ * Software providers call this routine in their _fini() routine.
+ */
+int
+crypto_unregister_provider(crypto_kcf_provider_handle_t handle)
+{
+ uint_t mech_idx;
+ kcf_provider_desc_t *desc;
+ kcf_prov_state_t saved_state;
+
+ /* lookup provider descriptor */
+ if ((desc = kcf_prov_tab_lookup((crypto_provider_id_t)handle)) == NULL)
+ return (CRYPTO_UNKNOWN_PROVIDER);
+
+ mutex_enter(&desc->pd_lock);
+ /*
+ * Check if any other thread is disabling or removing
+ * this provider. We return if this is the case.
+ */
+ if (desc->pd_state >= KCF_PROV_DISABLED) {
+ mutex_exit(&desc->pd_lock);
+ /* Release reference held by kcf_prov_tab_lookup(). */
+ KCF_PROV_REFRELE(desc);
+ return (CRYPTO_BUSY);
+ }
+
+ saved_state = desc->pd_state;
+ desc->pd_state = KCF_PROV_REMOVED;
+
+ if (saved_state == KCF_PROV_BUSY) {
+ /*
+ * The per-provider taskq threads may be waiting. We
+ * signal them so that they can start failing requests.
+ */
+ cv_broadcast(&desc->pd_resume_cv);
+ }
+
+ if (desc->pd_prov_type == CRYPTO_SW_PROVIDER) {
+ /*
+ * Check if this provider is currently being used.
+ * pd_irefcnt is the number of holds from the internal
+ * structures. We add one to account for the above lookup.
+ */
+ if (desc->pd_refcnt > desc->pd_irefcnt + 1) {
+ desc->pd_state = saved_state;
+ mutex_exit(&desc->pd_lock);
+ /* Release reference held by kcf_prov_tab_lookup(). */
+ KCF_PROV_REFRELE(desc);
+ /*
+ * The administrator presumably will stop the clients
+ * thus removing the holds, when they get the busy
+ * return value. Any retry will succeed then.
+ */
+ return (CRYPTO_BUSY);
+ }
+ }
+ mutex_exit(&desc->pd_lock);
+
+ if (desc->pd_prov_type != CRYPTO_SW_PROVIDER) {
+ remove_provider(desc);
+ }
+
+ if (desc->pd_prov_type != CRYPTO_LOGICAL_PROVIDER) {
+ /* remove the provider from the mechanisms tables */
+ for (mech_idx = 0; mech_idx < desc->pd_mech_list_count;
+ mech_idx++) {
+ kcf_remove_mech_provider(
+ desc->pd_mechanisms[mech_idx].cm_mech_name, desc);
+ }
+ }
+
+ /* remove provider from providers table */
+ if (kcf_prov_tab_rem_provider((crypto_provider_id_t)handle) !=
+ CRYPTO_SUCCESS) {
+ /* Release reference held by kcf_prov_tab_lookup(). */
+ KCF_PROV_REFRELE(desc);
+ return (CRYPTO_UNKNOWN_PROVIDER);
+ }
+
+ delete_kstat(desc);
+
+ if (desc->pd_prov_type == CRYPTO_SW_PROVIDER) {
+ /* Release reference held by kcf_prov_tab_lookup(). */
+ KCF_PROV_REFRELE(desc);
+
+ /*
+ * Wait till the existing requests complete.
+ */
+ mutex_enter(&desc->pd_lock);
+ while (desc->pd_state != KCF_PROV_FREED)
+ cv_wait(&desc->pd_remove_cv, &desc->pd_lock);
+ mutex_exit(&desc->pd_lock);
+ } else {
+ /*
+ * Wait until requests that have been sent to the provider
+ * complete.
+ */
+ mutex_enter(&desc->pd_lock);
+ while (desc->pd_irefcnt > 0)
+ cv_wait(&desc->pd_remove_cv, &desc->pd_lock);
+ mutex_exit(&desc->pd_lock);
+ }
+
+ kcf_do_notify(desc, B_FALSE);
+
+ if (desc->pd_prov_type == CRYPTO_SW_PROVIDER) {
+ /*
+ * This is the only place where kcf_free_provider_desc()
+ * is called directly. KCF_PROV_REFRELE() should free the
+ * structure in all other places.
+ */
+ ASSERT(desc->pd_state == KCF_PROV_FREED &&
+ desc->pd_refcnt == 0);
+ kcf_free_provider_desc(desc);
+ } else {
+ KCF_PROV_REFRELE(desc);
+ }
+
+ return (CRYPTO_SUCCESS);
+}
+
+/*
+ * This routine is used to notify the framework that the state of
+ * a cryptographic provider has changed. Valid state codes are:
+ *
+ * CRYPTO_PROVIDER_READY
+ * The provider indicates that it can process more requests. A provider
+ * will notify with this event if it previously has notified us with a
+ * CRYPTO_PROVIDER_BUSY.
+ *
+ * CRYPTO_PROVIDER_BUSY
+ * The provider can not take more requests.
+ *
+ * CRYPTO_PROVIDER_FAILED
+ * The provider encountered an internal error. The framework will not
+ * be sending any more requests to the provider. The provider may notify
+ * with a CRYPTO_PROVIDER_READY, if it is able to recover from the error.
+ *
+ * This routine can be called from user or interrupt context.
+ */
+void
+crypto_provider_notification(crypto_kcf_provider_handle_t handle, uint_t state)
+{
+ kcf_provider_desc_t *pd;
+
+ /* lookup the provider from the given handle */
+ if ((pd = kcf_prov_tab_lookup((crypto_provider_id_t)handle)) == NULL)
+ return;
+
+ mutex_enter(&pd->pd_lock);
+
+ if (pd->pd_state <= KCF_PROV_VERIFICATION_FAILED)
+ goto out;
+
+ if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) {
+ cmn_err(CE_WARN, "crypto_provider_notification: "
+ "logical provider (%x) ignored\n", handle);
+ goto out;
+ }
+ switch (state) {
+ case CRYPTO_PROVIDER_READY:
+ switch (pd->pd_state) {
+ case KCF_PROV_BUSY:
+ pd->pd_state = KCF_PROV_READY;
+ /*
+ * Signal the per-provider taskq threads that they
+ * can start submitting requests.
+ */
+ cv_broadcast(&pd->pd_resume_cv);
+ break;
+
+ case KCF_PROV_FAILED:
+ /*
+ * The provider recovered from the error. Let us
+ * use it now.
+ */
+ pd->pd_state = KCF_PROV_READY;
+ break;
+ default:
+ break;
+ }
+ break;
+
+ case CRYPTO_PROVIDER_BUSY:
+ switch (pd->pd_state) {
+ case KCF_PROV_READY:
+ pd->pd_state = KCF_PROV_BUSY;
+ break;
+ default:
+ break;
+ }
+ break;
+
+ case CRYPTO_PROVIDER_FAILED:
+ /*
+ * We note the failure and return. The per-provider taskq
+ * threads check this flag and start failing the
+ * requests, if it is set. See process_req_hwp() for details.
+ */
+ switch (pd->pd_state) {
+ case KCF_PROV_READY:
+ pd->pd_state = KCF_PROV_FAILED;
+ break;
+
+ case KCF_PROV_BUSY:
+ pd->pd_state = KCF_PROV_FAILED;
+ /*
+ * The per-provider taskq threads may be waiting. We
+ * signal them so that they can start failing requests.
+ */
+ cv_broadcast(&pd->pd_resume_cv);
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+out:
+ mutex_exit(&pd->pd_lock);
+ KCF_PROV_REFRELE(pd);
+}
+
+/*
+ * This routine is used to notify the framework the result of
+ * an asynchronous request handled by a provider. Valid error
+ * codes are the same as the CRYPTO_* errors defined in common.h.
+ *
+ * This routine can be called from user or interrupt context.
+ */
+void
+crypto_op_notification(crypto_req_handle_t handle, int error)
+{
+ kcf_call_type_t ctype;
+
+ if (handle == NULL)
+ return;
+
+ if ((ctype = GET_REQ_TYPE(handle)) == CRYPTO_SYNCH) {
+ kcf_sreq_node_t *sreq = (kcf_sreq_node_t *)handle;
+
+ if (error != CRYPTO_SUCCESS)
+ sreq->sn_provider->pd_sched_info.ks_nfails++;
+ KCF_PROV_IREFRELE(sreq->sn_provider);
+ kcf_sop_done(sreq, error);
+ } else {
+ kcf_areq_node_t *areq = (kcf_areq_node_t *)handle;
+
+ ASSERT(ctype == CRYPTO_ASYNCH);
+ if (error != CRYPTO_SUCCESS)
+ areq->an_provider->pd_sched_info.ks_nfails++;
+ KCF_PROV_IREFRELE(areq->an_provider);
+ kcf_aop_done(areq, error);
+ }
+}
+
+/*
+ * This routine is used by software providers to determine
+ * whether to use KM_SLEEP or KM_NOSLEEP during memory allocation.
+ * Note that hardware providers can always use KM_SLEEP. So,
+ * they do not need to call this routine.
+ *
+ * This routine can be called from user or interrupt context.
+ */
+int
+crypto_kmflag(crypto_req_handle_t handle)
+{
+ return (REQHNDL2_KMFLAG(handle));
+}
+
+/*
+ * Process the mechanism info structures specified by the provider
+ * during registration. A NULL crypto_provider_info_t indicates
+ * an already initialized provider descriptor.
+ *
+ * Mechanisms are not added to the kernel's mechanism table if the
+ * provider is a logical provider.
+ *
+ * Returns CRYPTO_SUCCESS on success, CRYPTO_ARGUMENTS if one
+ * of the specified mechanisms was malformed, or CRYPTO_HOST_MEMORY
+ * if the table of mechanisms is full.
+ */
+static int
+init_prov_mechs(crypto_provider_info_t *info, kcf_provider_desc_t *desc)
+{
+ uint_t mech_idx;
+ uint_t cleanup_idx;
+ int err = CRYPTO_SUCCESS;
+ kcf_prov_mech_desc_t *pmd;
+ int desc_use_count = 0;
+ int mcount = desc->pd_mech_list_count;
+
+ if (desc->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) {
+ if (info != NULL) {
+ ASSERT(info->pi_mechanisms != NULL);
+ bcopy(info->pi_mechanisms, desc->pd_mechanisms,
+ sizeof (crypto_mech_info_t) * mcount);
+ }
+ return (CRYPTO_SUCCESS);
+ }
+
+ /*
+ * Copy the mechanism list from the provider info to the provider
+ * descriptor. desc->pd_mechanisms has an extra crypto_mech_info_t
+ * element if the provider has random_ops since we keep an internal
+ * mechanism, SUN_RANDOM, in this case.
+ */
+ if (info != NULL) {
+ if (info->pi_ops_vector->co_random_ops != NULL) {
+ crypto_mech_info_t *rand_mi;
+
+ /*
+ * Need the following check as it is possible to have
+ * a provider that implements just random_ops and has
+ * pi_mechanisms == NULL.
+ */
+ if (info->pi_mechanisms != NULL) {
+ bcopy(info->pi_mechanisms, desc->pd_mechanisms,
+ sizeof (crypto_mech_info_t) * (mcount - 1));
+ }
+ rand_mi = &desc->pd_mechanisms[mcount - 1];
+
+ bzero(rand_mi, sizeof (crypto_mech_info_t));
+ (void) strncpy(rand_mi->cm_mech_name, SUN_RANDOM,
+ CRYPTO_MAX_MECH_NAME);
+ rand_mi->cm_func_group_mask = CRYPTO_FG_RANDOM;
+ } else {
+ ASSERT(info->pi_mechanisms != NULL);
+ bcopy(info->pi_mechanisms, desc->pd_mechanisms,
+ sizeof (crypto_mech_info_t) * mcount);
+ }
+ }
+
+ /*
+ * For each mechanism support by the provider, add the provider
+ * to the corresponding KCF mechanism mech_entry chain.
+ */
+ for (mech_idx = 0; mech_idx < desc->pd_mech_list_count; mech_idx++) {
+ crypto_mech_info_t *mi = &desc->pd_mechanisms[mech_idx];
+
+ if ((mi->cm_mech_flags & CRYPTO_KEYSIZE_UNIT_IN_BITS) &&
+ (mi->cm_mech_flags & CRYPTO_KEYSIZE_UNIT_IN_BYTES)) {
+ err = CRYPTO_ARGUMENTS_BAD;
+ break;
+ }
+
+ if (desc->pd_flags & CRYPTO_HASH_NO_UPDATE &&
+ mi->cm_func_group_mask & CRYPTO_FG_DIGEST) {
+ /*
+ * We ask the provider to specify the limit
+ * per hash mechanism. But, in practice, a
+ * hardware limitation means all hash mechanisms
+ * will have the same maximum size allowed for
+ * input data. So, we make it a per provider
+ * limit to keep it simple.
+ */
+ if (mi->cm_max_input_length == 0) {
+ err = CRYPTO_ARGUMENTS_BAD;
+ break;
+ } else {
+ desc->pd_hash_limit = mi->cm_max_input_length;
+ }
+ }
+
+ if ((err = kcf_add_mech_provider(mech_idx, desc, &pmd)) !=
+ KCF_SUCCESS)
+ break;
+
+ if (pmd == NULL)
+ continue;
+
+ /* The provider will be used for this mechanism */
+ desc_use_count++;
+ }
+
+ /*
+ * Don't allow multiple software providers with disabled mechanisms
+ * to register. Subsequent enabling of mechanisms will result in
+ * an unsupported configuration, i.e. multiple software providers
+ * per mechanism.
+ */
+ if (desc_use_count == 0 && desc->pd_prov_type == CRYPTO_SW_PROVIDER)
+ return (CRYPTO_ARGUMENTS_BAD);
+
+ if (err == KCF_SUCCESS)
+ return (CRYPTO_SUCCESS);
+
+ /*
+ * An error occurred while adding the mechanism, cleanup
+ * and bail.
+ */
+ for (cleanup_idx = 0; cleanup_idx < mech_idx; cleanup_idx++) {
+ kcf_remove_mech_provider(
+ desc->pd_mechanisms[cleanup_idx].cm_mech_name, desc);
+ }
+
+ if (err == KCF_MECH_TAB_FULL)
+ return (CRYPTO_HOST_MEMORY);
+
+ return (CRYPTO_ARGUMENTS_BAD);
+}
+
+/*
+ * Update routine for kstat. Only privileged users are allowed to
+ * access this information, since this information is sensitive.
+ * There are some cryptographic attacks (e.g. traffic analysis)
+ * which can use this information.
+ */
+static int
+kcf_prov_kstat_update(kstat_t *ksp, int rw)
+{
+ kcf_prov_stats_t *ks_data;
+ kcf_provider_desc_t *pd = (kcf_provider_desc_t *)ksp->ks_private;
+
+ if (rw == KSTAT_WRITE)
+ return (EACCES);
+
+ ks_data = ksp->ks_data;
+
+ ks_data->ps_ops_total.value.ui64 = pd->pd_sched_info.ks_ndispatches;
+ ks_data->ps_ops_failed.value.ui64 = pd->pd_sched_info.ks_nfails;
+ ks_data->ps_ops_busy_rval.value.ui64 = pd->pd_sched_info.ks_nbusy_rval;
+ ks_data->ps_ops_passed.value.ui64 =
+ pd->pd_sched_info.ks_ndispatches -
+ pd->pd_sched_info.ks_nfails -
+ pd->pd_sched_info.ks_nbusy_rval;
+
+ return (0);
+}
+
+
+/*
+ * Utility routine called from failure paths in crypto_register_provider()
+ * and from crypto_load_soft_disabled().
+ */
+void
+undo_register_provider(kcf_provider_desc_t *desc, boolean_t remove_prov)
+{
+ uint_t mech_idx;
+
+ /* remove the provider from the mechanisms tables */
+ for (mech_idx = 0; mech_idx < desc->pd_mech_list_count;
+ mech_idx++) {
+ kcf_remove_mech_provider(
+ desc->pd_mechanisms[mech_idx].cm_mech_name, desc);
+ }
+
+ /* remove provider from providers table */
+ if (remove_prov)
+ (void) kcf_prov_tab_rem_provider(desc->pd_prov_id);
+}
+
+/*
+ * Utility routine called from crypto_load_soft_disabled(). Callers
+ * should have done a prior undo_register_provider().
+ */
+void
+redo_register_provider(kcf_provider_desc_t *pd)
+{
+ /* process the mechanisms supported by the provider */
+ (void) init_prov_mechs(NULL, pd);
+
+ /*
+ * Hold provider in providers table. We should not call
+ * kcf_prov_tab_add_provider() here as the provider descriptor
+ * is still valid which means it has an entry in the provider
+ * table.
+ */
+ KCF_PROV_REFHOLD(pd);
+ KCF_PROV_IREFHOLD(pd);
+}
+
+/*
+ * Add provider (p1) to another provider's array of providers (p2).
+ * Hardware and logical providers use this array to cross-reference
+ * each other.
+ */
+static void
+add_provider_to_array(kcf_provider_desc_t *p1, kcf_provider_desc_t *p2)
+{
+ kcf_provider_list_t *new;
+
+ new = kmem_alloc(sizeof (kcf_provider_list_t), KM_SLEEP);
+ mutex_enter(&p2->pd_lock);
+ new->pl_next = p2->pd_provider_list;
+ p2->pd_provider_list = new;
+ KCF_PROV_IREFHOLD(p1);
+ new->pl_provider = p1;
+ mutex_exit(&p2->pd_lock);
+}
+
+/*
+ * Remove provider (p1) from another provider's array of providers (p2).
+ * Hardware and logical providers use this array to cross-reference
+ * each other.
+ */
+static void
+remove_provider_from_array(kcf_provider_desc_t *p1, kcf_provider_desc_t *p2)
+{
+
+ kcf_provider_list_t *pl = NULL, **prev;
+
+ mutex_enter(&p2->pd_lock);
+ for (pl = p2->pd_provider_list, prev = &p2->pd_provider_list;
+ pl != NULL; prev = &pl->pl_next, pl = pl->pl_next) {
+ if (pl->pl_provider == p1) {
+ break;
+ }
+ }
+
+ if (p1 == NULL) {
+ mutex_exit(&p2->pd_lock);
+ return;
+ }
+
+ /* detach and free kcf_provider_list structure */
+ KCF_PROV_IREFRELE(p1);
+ *prev = pl->pl_next;
+ kmem_free(pl, sizeof (*pl));
+ mutex_exit(&p2->pd_lock);
+}
+
+/*
+ * Convert an array of logical provider handles (crypto_provider_id)
+ * stored in a crypto_provider_info structure into an array of provider
+ * descriptors (kcf_provider_desc_t) attached to a logical provider.
+ */
+static void
+process_logical_providers(crypto_provider_info_t *info, kcf_provider_desc_t *hp)
+{
+ kcf_provider_desc_t *lp;
+ crypto_provider_id_t handle;
+ int count = info->pi_logical_provider_count;
+ int i;
+
+ /* add hardware provider to each logical provider */
+ for (i = 0; i < count; i++) {
+ handle = info->pi_logical_providers[i];
+ lp = kcf_prov_tab_lookup((crypto_provider_id_t)handle);
+ if (lp == NULL) {
+ continue;
+ }
+ add_provider_to_array(hp, lp);
+ hp->pd_flags |= KCF_LPROV_MEMBER;
+
+ /*
+ * A hardware provider has to have the provider descriptor of
+ * every logical provider it belongs to, so it can be removed
+ * from the logical provider if the hardware provider
+ * unregisters from the framework.
+ */
+ add_provider_to_array(lp, hp);
+ KCF_PROV_REFRELE(lp);
+ }
+}
+
+/*
+ * This routine removes a provider from all of the logical or
+ * hardware providers it belongs to, and frees the provider's
+ * array of pointers to providers.
+ */
+static void
+remove_provider(kcf_provider_desc_t *pp)
+{
+ kcf_provider_desc_t *p;
+ kcf_provider_list_t *e, *next;
+
+ mutex_enter(&pp->pd_lock);
+ for (e = pp->pd_provider_list; e != NULL; e = next) {
+ p = e->pl_provider;
+ remove_provider_from_array(pp, p);
+ if (p->pd_prov_type == CRYPTO_HW_PROVIDER &&
+ p->pd_provider_list == NULL)
+ p->pd_flags &= ~KCF_LPROV_MEMBER;
+ KCF_PROV_IREFRELE(p);
+ next = e->pl_next;
+ kmem_free(e, sizeof (*e));
+ }
+ pp->pd_provider_list = NULL;
+ mutex_exit(&pp->pd_lock);
+}
+
+/*
+ * Dispatch events as needed for a provider. is_added flag tells
+ * whether the provider is registering or unregistering.
+ */
+void
+kcf_do_notify(kcf_provider_desc_t *prov_desc, boolean_t is_added)
+{
+ int i;
+ crypto_notify_event_change_t ec;
+
+ ASSERT(prov_desc->pd_state > KCF_PROV_VERIFICATION_FAILED);
+
+ /*
+ * Inform interested clients of the mechanisms becoming
+ * available/unavailable. We skip this for logical providers
+ * as they do not affect mechanisms.
+ */
+ if (prov_desc->pd_prov_type != CRYPTO_LOGICAL_PROVIDER) {
+ ec.ec_provider_type = prov_desc->pd_prov_type;
+ ec.ec_change = is_added ? CRYPTO_MECH_ADDED :
+ CRYPTO_MECH_REMOVED;
+ for (i = 0; i < prov_desc->pd_mech_list_count; i++) {
+ (void) strlcpy(ec.ec_mech_name,
+ prov_desc->pd_mechanisms[i].cm_mech_name,
+ CRYPTO_MAX_MECH_NAME);
+ kcf_walk_ntfylist(CRYPTO_EVENT_MECHS_CHANGED, &ec);
+ }
+
+ }
+
+ /*
+ * Inform interested clients about the new or departing provider.
+ * In case of a logical provider, we need to notify the event only
+ * for the logical provider and not for the underlying
+ * providers which are known by the KCF_LPROV_MEMBER bit.
+ */
+ if (prov_desc->pd_prov_type == CRYPTO_LOGICAL_PROVIDER ||
+ (prov_desc->pd_flags & KCF_LPROV_MEMBER) == 0) {
+ kcf_walk_ntfylist(is_added ? CRYPTO_EVENT_PROVIDER_REGISTERED :
+ CRYPTO_EVENT_PROVIDER_UNREGISTERED, prov_desc);
+ }
+}
+
+static void
+delete_kstat(kcf_provider_desc_t *desc)
+{
+ /* destroy the kstat created for this provider */
+ if (desc->pd_kstat != NULL) {
+ kcf_provider_desc_t *kspd = desc->pd_kstat->ks_private;
+
+ /* release reference held by desc->pd_kstat->ks_private */
+ ASSERT(desc == kspd);
+ kstat_delete(kspd->pd_kstat);
+ desc->pd_kstat = NULL;
+ KCF_PROV_REFRELE(kspd);
+ KCF_PROV_IREFRELE(kspd);
+ }
+}
diff --git a/zfs/module/nvpair/nvpair.c b/zfs/module/nvpair/nvpair.c
index 16a0fc3eb571..2e3820981abb 100644
--- a/zfs/module/nvpair/nvpair.c
+++ b/zfs/module/nvpair/nvpair.c
@@ -1260,6 +1260,8 @@ nvpair_type_is_array(nvpair_t *nvp)
static int
nvpair_value_common(nvpair_t *nvp, data_type_t type, uint_t *nelem, void *data)
{
+ int value_sz;
+
if (nvp == NULL || nvpair_type(nvp) != type)
return (EINVAL);
@@ -1289,8 +1291,9 @@ nvpair_value_common(nvpair_t *nvp, data_type_t type, uint_t *nelem, void *data)
#endif
if (data == NULL)
return (EINVAL);
- bcopy(NVP_VALUE(nvp), data,
- (size_t)i_get_value_size(type, NULL, 1));
+ if ((value_sz = i_get_value_size(type, NULL, 1)) < 0)
+ return (EINVAL);
+ bcopy(NVP_VALUE(nvp), data, (size_t)value_sz);
if (nelem != NULL)
*nelem = 1;
break;
@@ -2029,7 +2032,7 @@ typedef struct {
/*
* nvs operations are:
* - nvs_nvlist
- * encoding / decoding of a nvlist header (nvlist_t)
+ * encoding / decoding of an nvlist header (nvlist_t)
* calculates the size used for header and end detection
*
* - nvs_nvpair
@@ -2392,7 +2395,7 @@ nvlist_xpack(nvlist_t *nvl, char **bufp, size_t *buflen, int encoding,
* 1. The nvlist has fixed allocator properties.
* All other nvlist routines (like nvlist_add_*, ...) use
* these properties.
- * 2. When using nvlist_pack() the user can specify his own
+ * 2. When using nvlist_pack() the user can specify their own
* allocator properties (e.g. by using KM_NOSLEEP).
*
* We use the user specified properties (2). A clearer solution
diff --git a/zfs/module/nvpair/nvpair_alloc_fixed.c b/zfs/module/nvpair/nvpair_alloc_fixed.c
index 20081ba56c4d..e3ac12938def 100644
--- a/zfs/module/nvpair/nvpair_alloc_fixed.c
+++ b/zfs/module/nvpair/nvpair_alloc_fixed.c
@@ -42,7 +42,7 @@
* - it uses a pre-allocated buffer for memory allocations.
* - it does _not_ free memory in the pre-allocated buffer.
*
- * The reason for the selected implemention is simplicity.
+ * The reason for the selected implementation is simplicity.
* This allocator is designed for the usage in interrupt context when
* the caller may not wait for free memory.
*/
diff --git a/zfs/module/unicode/u8_textprep.c b/zfs/module/unicode/u8_textprep.c
index 26cc39f3bcd2..0330032fa0ef 100644
--- a/zfs/module/unicode/u8_textprep.c
+++ b/zfs/module/unicode/u8_textprep.c
@@ -567,7 +567,7 @@ do_case_conv(int uv, uchar_t *u8s, uchar_t *s, int sz, boolean_t is_it_toupper)
*/
static int
do_case_compare(size_t uv, uchar_t *s1, uchar_t *s2, size_t n1,
- size_t n2, boolean_t is_it_toupper, int *errnum)
+ size_t n2, boolean_t is_it_toupper, int *errnum)
{
int f;
int sz1;
@@ -745,7 +745,7 @@ combining_class(size_t uv, uchar_t *s, size_t sz)
*/
static size_t
do_decomp(size_t uv, uchar_t *u8s, uchar_t *s, int sz,
- boolean_t canonical_decomposition, u8_normalization_states_t *state)
+ boolean_t canonical_decomposition, u8_normalization_states_t *state)
{
uint16_t b1 = 0;
uint16_t b2 = 0;
@@ -842,7 +842,7 @@ do_decomp(size_t uv, uchar_t *u8s, uchar_t *s, int sz,
}
/*
- * At this point, this rountine does not know what it would get.
+ * At this point, this routine does not know what it would get.
* The caller should sort it out if the state isn't a Hangul one.
*/
*state = U8_STATE_START;
@@ -1057,10 +1057,10 @@ blocked(uchar_t *comb_class, size_t last)
*/
static size_t
do_composition(size_t uv, uchar_t *s, uchar_t *comb_class, uchar_t *start,
- uchar_t *disp, size_t last, uchar_t **os, uchar_t *oslast)
+ uchar_t *disp, size_t last, uchar_t **os, uchar_t *oslast)
{
uchar_t t[U8_STREAM_SAFE_TEXT_MAX + 1];
- uchar_t tc[U8_MB_CUR_MAX];
+ uchar_t tc[U8_MB_CUR_MAX] = { '\0' };
uint8_t saved_marks[U8_MAX_CHARS_A_SEQ];
size_t saved_marks_count;
uchar_t *p;
@@ -1379,12 +1379,10 @@ do_composition(size_t uv, uchar_t *s, uchar_t *comb_class, uchar_t *start,
*/
static size_t
collect_a_seq(size_t uv, uchar_t *u8s, uchar_t **source, uchar_t *slast,
- boolean_t is_it_toupper,
- boolean_t is_it_tolower,
- boolean_t canonical_decomposition,
- boolean_t compatibility_decomposition,
- boolean_t canonical_composition,
- int *errnum, u8_normalization_states_t *state)
+ boolean_t is_it_toupper, boolean_t is_it_tolower,
+ boolean_t canonical_decomposition, boolean_t compatibility_decomposition,
+ boolean_t canonical_composition,
+ int *errnum, u8_normalization_states_t *state)
{
uchar_t *s;
int sz;
@@ -1396,7 +1394,7 @@ collect_a_seq(size_t uv, uchar_t *u8s, uchar_t **source, uchar_t *slast,
uchar_t comb_class[U8_MAX_CHARS_A_SEQ];
uchar_t disp[U8_MAX_CHARS_A_SEQ];
uchar_t start[U8_MAX_CHARS_A_SEQ];
- uchar_t u8t[U8_MB_CUR_MAX];
+ uchar_t u8t[U8_MB_CUR_MAX] = { '\0' };
uchar_t uts[U8_STREAM_SAFE_TEXT_MAX + 1];
uchar_t tc;
size_t last;
@@ -1727,7 +1725,7 @@ collect_a_seq(size_t uv, uchar_t *u8s, uchar_t **source, uchar_t *slast,
*/
static int
do_norm_compare(size_t uv, uchar_t *s1, uchar_t *s2, size_t n1, size_t n2,
- int flag, int *errnum)
+ int flag, int *errnum)
{
int result;
size_t sz1;
@@ -1843,7 +1841,7 @@ do_norm_compare(size_t uv, uchar_t *s1, uchar_t *s2, size_t n1, size_t n2,
*/
int
u8_strcmp(const char *s1, const char *s2, size_t n, int flag, size_t uv,
- int *errnum)
+ int *errnum)
{
int f;
size_t n1;
@@ -1913,7 +1911,7 @@ u8_strcmp(const char *s1, const char *s2, size_t n, int flag, size_t uv,
size_t
u8_textprep_str(char *inarray, size_t *inlen, char *outarray, size_t *outlen,
- int flag, size_t unicode_version, int *errnum)
+ int flag, size_t unicode_version, int *errnum)
{
int f;
int sz;
@@ -2134,7 +2132,8 @@ u8_textprep_str(char *inarray, size_t *inlen, char *outarray, size_t *outlen,
#if defined(_KERNEL) && defined(HAVE_SPL)
static int __init
-unicode_init(void) {
+unicode_init(void)
+{
return (0);
}
diff --git a/zfs/module/zcommon/Makefile.in b/zfs/module/zcommon/Makefile.in
index 67e474ee089d..74bb4378eabb 100644
--- a/zfs/module/zcommon/Makefile.in
+++ b/zfs/module/zcommon/Makefile.in
@@ -15,3 +15,10 @@ $(MODULE)-objs += zfs_comutil.o
$(MODULE)-objs += zfs_fletcher.o
$(MODULE)-objs += zfs_uio.o
$(MODULE)-objs += zpool_prop.o
+$(MODULE)-objs += zfs_fletcher_superscalar.o
+$(MODULE)-objs += zfs_fletcher_superscalar4.o
+
+$(MODULE)-$(CONFIG_X86) += zfs_fletcher_intel.o
+$(MODULE)-$(CONFIG_X86) += zfs_fletcher_sse.o
+$(MODULE)-$(CONFIG_X86) += zfs_fletcher_avx512.o
+$(MODULE)-$(CONFIG_ARM64) += zfs_fletcher_aarch64_neon.o
diff --git a/zfs/module/zcommon/zfs_comutil.c b/zfs/module/zcommon/zfs_comutil.c
index 6d0314fa78d0..52cb7e365559 100644
--- a/zfs/module/zcommon/zfs_comutil.c
+++ b/zfs/module/zcommon/zfs_comutil.c
@@ -40,6 +40,7 @@
#include <sys/int_limits.h>
#include <sys/nvpair.h>
#include "zfs_comutil.h"
+#include <sys/zfs_ratelimit.h>
/*
* Are there allocatable vdevs?
@@ -206,10 +207,85 @@ const char *zfs_history_event_names[ZFS_NUM_LEGACY_HISTORY_EVENTS] = {
"pool split",
};
+/*
+ * Initialize rate limit struct
+ *
+ * rl: zfs_ratelimit_t struct
+ * burst: Number to allow in an interval before rate limiting
+ * interval: Interval time in seconds
+ */
+void
+zfs_ratelimit_init(zfs_ratelimit_t *rl, unsigned int burst,
+ unsigned int interval)
+{
+ rl->count = 0;
+ rl->start = 0;
+ rl->interval = interval;
+ rl->burst = burst;
+ mutex_init(&rl->lock, NULL, MUTEX_DEFAULT, NULL);
+}
+
+/*
+ * Finalize rate limit struct
+ *
+ * rl: zfs_ratelimit_t struct
+ */
+void
+zfs_ratelimit_fini(zfs_ratelimit_t *rl)
+{
+ mutex_destroy(&rl->lock);
+}
+
+/*
+ * Re-implementation of the kernel's __ratelimit() function
+ *
+ * We had to write our own rate limiter because the kernel's __ratelimit()
+ * function annoyingly prints out how many times it rate limited to the kernel
+ * logs (and there's no way to turn it off):
+ *
+ * __ratelimit: 59 callbacks suppressed
+ *
+ * If the kernel ever allows us to disable these prints, we should go back to
+ * using __ratelimit() instead.
+ *
+ * Return values are the same as __ratelimit():
+ *
+ * 0: If we're rate limiting
+ * 1: If we're not rate limiting.
+ */
+int
+zfs_ratelimit(zfs_ratelimit_t *rl)
+{
+ hrtime_t now;
+ hrtime_t elapsed;
+ int rc = 1;
+
+ mutex_enter(&rl->lock);
+
+ now = gethrtime();
+ elapsed = now - rl->start;
+
+ rl->count++;
+ if (NSEC2SEC(elapsed) >= rl->interval) {
+ rl->start = now;
+ rl->count = 0;
+ } else {
+ if (rl->count >= rl->burst) {
+ rc = 0; /* We're ratelimiting */
+ }
+ }
+ mutex_exit(&rl->lock);
+
+ return (rc);
+}
+
#if defined(_KERNEL) && defined(HAVE_SPL)
EXPORT_SYMBOL(zfs_allocatable_devs);
EXPORT_SYMBOL(zpool_get_rewind_policy);
EXPORT_SYMBOL(zfs_zpl_version_map);
EXPORT_SYMBOL(zfs_spa_version_map);
EXPORT_SYMBOL(zfs_history_event_names);
+EXPORT_SYMBOL(zfs_ratelimit_init);
+EXPORT_SYMBOL(zfs_ratelimit_fini);
+EXPORT_SYMBOL(zfs_ratelimit);
#endif
diff --git a/zfs/module/zcommon/zfs_deleg.c b/zfs/module/zcommon/zfs_deleg.c
index f6e41da9d7ea..ce659d7f5cc8 100644
--- a/zfs/module/zcommon/zfs_deleg.c
+++ b/zfs/module/zcommon/zfs_deleg.c
@@ -22,6 +22,7 @@
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2010 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright 2016 Igor Kozhukhov <ikozhukhov at gmail.com>
*/
#include <sys/zfs_context.h>
@@ -62,6 +63,10 @@ zfs_deleg_perm_tab_t zfs_deleg_perm_tab[] = {
{ZFS_DELEG_PERM_GROUPQUOTA},
{ZFS_DELEG_PERM_USERUSED},
{ZFS_DELEG_PERM_GROUPUSED},
+ {ZFS_DELEG_PERM_USEROBJQUOTA},
+ {ZFS_DELEG_PERM_GROUPOBJQUOTA},
+ {ZFS_DELEG_PERM_USEROBJUSED},
+ {ZFS_DELEG_PERM_GROUPOBJUSED},
{ZFS_DELEG_PERM_HOLD},
{ZFS_DELEG_PERM_RELEASE},
{NULL}
@@ -177,8 +182,9 @@ zfs_deleg_verify_nvlist(nvlist_t *nvp)
nvpair_name(perm_name));
if (error)
return (-1);
- } while ((perm_name = nvlist_next_nvpair(perms, perm_name)));
- } while ((who = nvlist_next_nvpair(nvp, who)));
+ } while ((perm_name = nvlist_next_nvpair(perms, perm_name))
+ != NULL);
+ } while ((who = nvlist_next_nvpair(nvp, who)) != NULL);
return (0);
}
diff --git a/zfs/module/zcommon/zfs_fletcher.c b/zfs/module/zcommon/zfs_fletcher.c
index edd0cbe6c611..9cdf36d44077 100644
--- a/zfs/module/zcommon/zfs_fletcher.c
+++ b/zfs/module/zcommon/zfs_fletcher.c
@@ -21,6 +21,14 @@
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
+ * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
+ */
+/*
+ * Copyright 2013 Saso Kiselkov. All rights reserved.
+ */
+
+/*
+ * Copyright (c) 2016 by Delphix. All rights reserved.
*/
/*
@@ -128,17 +136,114 @@
#include <sys/types.h>
#include <sys/sysmacros.h>
#include <sys/byteorder.h>
-#include <sys/zio.h>
#include <sys/spa.h>
+#include <sys/zio_checksum.h>
+#include <sys/zfs_context.h>
+#include <zfs_fletcher.h>
+
+#define FLETCHER_MIN_SIMD_SIZE 64
+
+static void fletcher_4_scalar_init(fletcher_4_ctx_t *ctx);
+static void fletcher_4_scalar_fini(fletcher_4_ctx_t *ctx, zio_cksum_t *zcp);
+static void fletcher_4_scalar_native(fletcher_4_ctx_t *ctx,
+ const void *buf, uint64_t size);
+static void fletcher_4_scalar_byteswap(fletcher_4_ctx_t *ctx,
+ const void *buf, uint64_t size);
+static boolean_t fletcher_4_scalar_valid(void);
+
+static const fletcher_4_ops_t fletcher_4_scalar_ops = {
+ .init_native = fletcher_4_scalar_init,
+ .fini_native = fletcher_4_scalar_fini,
+ .compute_native = fletcher_4_scalar_native,
+ .init_byteswap = fletcher_4_scalar_init,
+ .fini_byteswap = fletcher_4_scalar_fini,
+ .compute_byteswap = fletcher_4_scalar_byteswap,
+ .valid = fletcher_4_scalar_valid,
+ .name = "scalar"
+};
+
+static fletcher_4_ops_t fletcher_4_fastest_impl = {
+ .name = "fastest",
+ .valid = fletcher_4_scalar_valid
+};
+
+static const fletcher_4_ops_t *fletcher_4_impls[] = {
+ &fletcher_4_scalar_ops,
+ &fletcher_4_superscalar_ops,
+ &fletcher_4_superscalar4_ops,
+#if defined(HAVE_SSE2)
+ &fletcher_4_sse2_ops,
+#endif
+#if defined(HAVE_SSE2) && defined(HAVE_SSSE3)
+ &fletcher_4_ssse3_ops,
+#endif
+#if defined(HAVE_AVX) && defined(HAVE_AVX2)
+ &fletcher_4_avx2_ops,
+#endif
+#if defined(__x86_64) && defined(HAVE_AVX512F)
+ &fletcher_4_avx512f_ops,
+#endif
+#if defined(__aarch64__)
+ &fletcher_4_aarch64_neon_ops,
+#endif
+};
+/* Hold all supported implementations */
+static uint32_t fletcher_4_supp_impls_cnt = 0;
+static fletcher_4_ops_t *fletcher_4_supp_impls[ARRAY_SIZE(fletcher_4_impls)];
+
+/* Select fletcher4 implementation */
+#define IMPL_FASTEST (UINT32_MAX)
+#define IMPL_CYCLE (UINT32_MAX - 1)
+#define IMPL_SCALAR (0)
+
+static uint32_t fletcher_4_impl_chosen = IMPL_FASTEST;
+
+#define IMPL_READ(i) (*(volatile uint32_t *) &(i))
+
+static struct fletcher_4_impl_selector {
+ const char *fis_name;
+ uint32_t fis_sel;
+} fletcher_4_impl_selectors[] = {
+#if !defined(_KERNEL)
+ { "cycle", IMPL_CYCLE },
+#endif
+ { "fastest", IMPL_FASTEST },
+ { "scalar", IMPL_SCALAR }
+};
+
+static kstat_t *fletcher_4_kstat;
+
+static struct fletcher_4_kstat {
+ uint64_t native;
+ uint64_t byteswap;
+} fletcher_4_stat_data[ARRAY_SIZE(fletcher_4_impls) + 1];
+
+/* Indicate that benchmark has been completed */
+static boolean_t fletcher_4_initialized = B_FALSE;
+
+/*ARGSUSED*/
void
-fletcher_2_native(const void *buf, uint64_t size, zio_cksum_t *zcp)
+fletcher_init(zio_cksum_t *zcp)
{
+ ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0);
+}
+
+int
+fletcher_2_incremental_native(void *buf, size_t size, void *data)
+{
+ zio_cksum_t *zcp = data;
+
const uint64_t *ip = buf;
const uint64_t *ipend = ip + (size / sizeof (uint64_t));
uint64_t a0, b0, a1, b1;
- for (a0 = b0 = a1 = b1 = 0; ip < ipend; ip += 2) {
+ a0 = zcp->zc_word[0];
+ a1 = zcp->zc_word[1];
+ b0 = zcp->zc_word[2];
+ b1 = zcp->zc_word[3];
+
+ for (; ip < ipend; ip += 2) {
a0 += ip[0];
a1 += ip[1];
b0 += a0;
@@ -146,16 +251,33 @@ fletcher_2_native(const void *buf, uint64_t size, zio_cksum_t *zcp)
}
ZIO_SET_CHECKSUM(zcp, a0, a1, b0, b1);
+ return (0);
}
+/*ARGSUSED*/
void
-fletcher_2_byteswap(const void *buf, uint64_t size, zio_cksum_t *zcp)
+fletcher_2_native(const void *buf, uint64_t size,
+ const void *ctx_template, zio_cksum_t *zcp)
{
+ fletcher_init(zcp);
+ (void) fletcher_2_incremental_native((void *) buf, size, zcp);
+}
+
+int
+fletcher_2_incremental_byteswap(void *buf, size_t size, void *data)
+{
+ zio_cksum_t *zcp = data;
+
const uint64_t *ip = buf;
const uint64_t *ipend = ip + (size / sizeof (uint64_t));
uint64_t a0, b0, a1, b1;
- for (a0 = b0 = a1 = b1 = 0; ip < ipend; ip += 2) {
+ a0 = zcp->zc_word[0];
+ a1 = zcp->zc_word[1];
+ b0 = zcp->zc_word[2];
+ b1 = zcp->zc_word[3];
+
+ for (; ip < ipend; ip += 2) {
a0 += BSWAP_64(ip[0]);
a1 += BSWAP_64(ip[1]);
b0 += a0;
@@ -163,93 +285,631 @@ fletcher_2_byteswap(const void *buf, uint64_t size, zio_cksum_t *zcp)
}
ZIO_SET_CHECKSUM(zcp, a0, a1, b0, b1);
+ return (0);
}
+/*ARGSUSED*/
void
-fletcher_4_native(const void *buf, uint64_t size, zio_cksum_t *zcp)
+fletcher_2_byteswap(const void *buf, uint64_t size,
+ const void *ctx_template, zio_cksum_t *zcp)
+{
+ fletcher_init(zcp);
+ (void) fletcher_2_incremental_byteswap((void *) buf, size, zcp);
+}
+
+static void
+fletcher_4_scalar_init(fletcher_4_ctx_t *ctx)
+{
+ ZIO_SET_CHECKSUM(&ctx->scalar, 0, 0, 0, 0);
+}
+
+static void
+fletcher_4_scalar_fini(fletcher_4_ctx_t *ctx, zio_cksum_t *zcp)
+{
+ memcpy(zcp, &ctx->scalar, sizeof (zio_cksum_t));
+}
+
+static void
+fletcher_4_scalar_native(fletcher_4_ctx_t *ctx, const void *buf,
+ uint64_t size)
{
const uint32_t *ip = buf;
const uint32_t *ipend = ip + (size / sizeof (uint32_t));
uint64_t a, b, c, d;
- for (a = b = c = d = 0; ip < ipend; ip++) {
+ a = ctx->scalar.zc_word[0];
+ b = ctx->scalar.zc_word[1];
+ c = ctx->scalar.zc_word[2];
+ d = ctx->scalar.zc_word[3];
+
+ for (; ip < ipend; ip++) {
a += ip[0];
b += a;
c += b;
d += c;
}
- ZIO_SET_CHECKSUM(zcp, a, b, c, d);
+ ZIO_SET_CHECKSUM(&ctx->scalar, a, b, c, d);
}
-void
-fletcher_4_byteswap(const void *buf, uint64_t size, zio_cksum_t *zcp)
+static void
+fletcher_4_scalar_byteswap(fletcher_4_ctx_t *ctx, const void *buf,
+ uint64_t size)
{
const uint32_t *ip = buf;
const uint32_t *ipend = ip + (size / sizeof (uint32_t));
uint64_t a, b, c, d;
- for (a = b = c = d = 0; ip < ipend; ip++) {
+ a = ctx->scalar.zc_word[0];
+ b = ctx->scalar.zc_word[1];
+ c = ctx->scalar.zc_word[2];
+ d = ctx->scalar.zc_word[3];
+
+ for (; ip < ipend; ip++) {
a += BSWAP_32(ip[0]);
b += a;
c += b;
d += c;
}
- ZIO_SET_CHECKSUM(zcp, a, b, c, d);
+ ZIO_SET_CHECKSUM(&ctx->scalar, a, b, c, d);
+}
+
+static boolean_t
+fletcher_4_scalar_valid(void)
+{
+ return (B_TRUE);
}
+int
+fletcher_4_impl_set(const char *val)
+{
+ int err = -EINVAL;
+ uint32_t impl = IMPL_READ(fletcher_4_impl_chosen);
+ size_t i, val_len;
+
+ val_len = strlen(val);
+ while ((val_len > 0) && !!isspace(val[val_len-1])) /* trim '\n' */
+ val_len--;
+
+ /* check mandatory implementations */
+ for (i = 0; i < ARRAY_SIZE(fletcher_4_impl_selectors); i++) {
+ const char *name = fletcher_4_impl_selectors[i].fis_name;
+
+ if (val_len == strlen(name) &&
+ strncmp(val, name, val_len) == 0) {
+ impl = fletcher_4_impl_selectors[i].fis_sel;
+ err = 0;
+ break;
+ }
+ }
+
+ if (err != 0 && fletcher_4_initialized) {
+ /* check all supported implementations */
+ for (i = 0; i < fletcher_4_supp_impls_cnt; i++) {
+ const char *name = fletcher_4_supp_impls[i]->name;
+
+ if (val_len == strlen(name) &&
+ strncmp(val, name, val_len) == 0) {
+ impl = i;
+ err = 0;
+ break;
+ }
+ }
+ }
+
+ if (err == 0) {
+ atomic_swap_32(&fletcher_4_impl_chosen, impl);
+ membar_producer();
+ }
+
+ return (err);
+}
+
+static inline const fletcher_4_ops_t *
+fletcher_4_impl_get(void)
+{
+ fletcher_4_ops_t *ops = NULL;
+ const uint32_t impl = IMPL_READ(fletcher_4_impl_chosen);
+
+ switch (impl) {
+ case IMPL_FASTEST:
+ ASSERT(fletcher_4_initialized);
+ ops = &fletcher_4_fastest_impl;
+ break;
+#if !defined(_KERNEL)
+ case IMPL_CYCLE: {
+ ASSERT(fletcher_4_initialized);
+ ASSERT3U(fletcher_4_supp_impls_cnt, >, 0);
+
+ static uint32_t cycle_count = 0;
+ uint32_t idx = (++cycle_count) % fletcher_4_supp_impls_cnt;
+ ops = fletcher_4_supp_impls[idx];
+ }
+ break;
+#endif
+ default:
+ ASSERT3U(fletcher_4_supp_impls_cnt, >, 0);
+ ASSERT3U(impl, <, fletcher_4_supp_impls_cnt);
+
+ ops = fletcher_4_supp_impls[impl];
+ break;
+ }
+
+ ASSERT3P(ops, !=, NULL);
+
+ return (ops);
+}
+
+static inline void
+fletcher_4_native_impl(const void *buf, uint64_t size, zio_cksum_t *zcp)
+{
+ fletcher_4_ctx_t ctx;
+ const fletcher_4_ops_t *ops = fletcher_4_impl_get();
+
+ ops->init_native(&ctx);
+ ops->compute_native(&ctx, buf, size);
+ ops->fini_native(&ctx, zcp);
+}
+
+/*ARGSUSED*/
void
-fletcher_4_incremental_native(const void *buf, uint64_t size,
- zio_cksum_t *zcp)
+fletcher_4_native(const void *buf, uint64_t size,
+ const void *ctx_template, zio_cksum_t *zcp)
{
- const uint32_t *ip = buf;
- const uint32_t *ipend = ip + (size / sizeof (uint32_t));
- uint64_t a, b, c, d;
+ const uint64_t p2size = P2ALIGN(size, FLETCHER_MIN_SIMD_SIZE);
- a = zcp->zc_word[0];
- b = zcp->zc_word[1];
- c = zcp->zc_word[2];
- d = zcp->zc_word[3];
+ ASSERT(IS_P2ALIGNED(size, sizeof (uint32_t)));
- for (; ip < ipend; ip++) {
- a += ip[0];
- b += a;
- c += b;
- d += c;
+ if (size == 0 || p2size == 0) {
+ ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0);
+
+ if (size > 0)
+ fletcher_4_scalar_native((fletcher_4_ctx_t *)zcp,
+ buf, size);
+ } else {
+ fletcher_4_native_impl(buf, p2size, zcp);
+
+ if (p2size < size)
+ fletcher_4_scalar_native((fletcher_4_ctx_t *)zcp,
+ (char *)buf + p2size, size - p2size);
}
+}
+
+void
+fletcher_4_native_varsize(const void *buf, uint64_t size, zio_cksum_t *zcp)
+{
+ ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0);
+ fletcher_4_scalar_native((fletcher_4_ctx_t *)zcp, buf, size);
+}
- ZIO_SET_CHECKSUM(zcp, a, b, c, d);
+static inline void
+fletcher_4_byteswap_impl(const void *buf, uint64_t size, zio_cksum_t *zcp)
+{
+ fletcher_4_ctx_t ctx;
+ const fletcher_4_ops_t *ops = fletcher_4_impl_get();
+
+ ops->init_byteswap(&ctx);
+ ops->compute_byteswap(&ctx, buf, size);
+ ops->fini_byteswap(&ctx, zcp);
}
+/*ARGSUSED*/
void
-fletcher_4_incremental_byteswap(const void *buf, uint64_t size,
+fletcher_4_byteswap(const void *buf, uint64_t size,
+ const void *ctx_template, zio_cksum_t *zcp)
+{
+ const uint64_t p2size = P2ALIGN(size, FLETCHER_MIN_SIMD_SIZE);
+
+ ASSERT(IS_P2ALIGNED(size, sizeof (uint32_t)));
+
+ if (size == 0 || p2size == 0) {
+ ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0);
+
+ if (size > 0)
+ fletcher_4_scalar_byteswap((fletcher_4_ctx_t *)zcp,
+ buf, size);
+ } else {
+ fletcher_4_byteswap_impl(buf, p2size, zcp);
+
+ if (p2size < size)
+ fletcher_4_scalar_byteswap((fletcher_4_ctx_t *)zcp,
+ (char *)buf + p2size, size - p2size);
+ }
+}
+
+/* Incremental Fletcher 4 */
+
+#define ZFS_FLETCHER_4_INC_MAX_SIZE (8ULL << 20)
+
+static inline void
+fletcher_4_incremental_combine(zio_cksum_t *zcp, const uint64_t size,
+ const zio_cksum_t *nzcp)
+{
+ const uint64_t c1 = size / sizeof (uint32_t);
+ const uint64_t c2 = c1 * (c1 + 1) / 2;
+ const uint64_t c3 = c2 * (c1 + 2) / 3;
+
+ /*
+ * Value of 'c3' overflows on buffer sizes close to 16MiB. For that
+ * reason we split incremental fletcher4 computation of large buffers
+ * to steps of (ZFS_FLETCHER_4_INC_MAX_SIZE) size.
+ */
+ ASSERT3U(size, <=, ZFS_FLETCHER_4_INC_MAX_SIZE);
+
+ zcp->zc_word[3] += nzcp->zc_word[3] + c1 * zcp->zc_word[2] +
+ c2 * zcp->zc_word[1] + c3 * zcp->zc_word[0];
+ zcp->zc_word[2] += nzcp->zc_word[2] + c1 * zcp->zc_word[1] +
+ c2 * zcp->zc_word[0];
+ zcp->zc_word[1] += nzcp->zc_word[1] + c1 * zcp->zc_word[0];
+ zcp->zc_word[0] += nzcp->zc_word[0];
+}
+
+static inline void
+fletcher_4_incremental_impl(boolean_t native, const void *buf, uint64_t size,
zio_cksum_t *zcp)
{
- const uint32_t *ip = buf;
- const uint32_t *ipend = ip + (size / sizeof (uint32_t));
- uint64_t a, b, c, d;
+ while (size > 0) {
+ zio_cksum_t nzc;
+ uint64_t len = MIN(size, ZFS_FLETCHER_4_INC_MAX_SIZE);
- a = zcp->zc_word[0];
- b = zcp->zc_word[1];
- c = zcp->zc_word[2];
- d = zcp->zc_word[3];
+ if (native)
+ fletcher_4_native(buf, len, NULL, &nzc);
+ else
+ fletcher_4_byteswap(buf, len, NULL, &nzc);
- for (; ip < ipend; ip++) {
- a += BSWAP_32(ip[0]);
- b += a;
- c += b;
- d += c;
+ fletcher_4_incremental_combine(zcp, len, &nzc);
+
+ size -= len;
+ buf += len;
}
+}
- ZIO_SET_CHECKSUM(zcp, a, b, c, d);
+int
+fletcher_4_incremental_native(void *buf, size_t size, void *data)
+{
+ zio_cksum_t *zcp = data;
+ /* Use scalar impl to directly update cksum of small blocks */
+ if (size < SPA_MINBLOCKSIZE)
+ fletcher_4_scalar_native((fletcher_4_ctx_t *)zcp, buf, size);
+ else
+ fletcher_4_incremental_impl(B_TRUE, buf, size, zcp);
+ return (0);
}
+int
+fletcher_4_incremental_byteswap(void *buf, size_t size, void *data)
+{
+ zio_cksum_t *zcp = data;
+ /* Use scalar impl to directly update cksum of small blocks */
+ if (size < SPA_MINBLOCKSIZE)
+ fletcher_4_scalar_byteswap((fletcher_4_ctx_t *)zcp, buf, size);
+ else
+ fletcher_4_incremental_impl(B_FALSE, buf, size, zcp);
+ return (0);
+}
+
+
+/* Fletcher 4 kstats */
+
+static int
+fletcher_4_kstat_headers(char *buf, size_t size)
+{
+ ssize_t off = 0;
+
+ off += snprintf(buf + off, size, "%-17s", "implementation");
+ off += snprintf(buf + off, size - off, "%-15s", "native");
+ (void) snprintf(buf + off, size - off, "%-15s\n", "byteswap");
+
+ return (0);
+}
+
+static int
+fletcher_4_kstat_data(char *buf, size_t size, void *data)
+{
+ struct fletcher_4_kstat *fastest_stat =
+ &fletcher_4_stat_data[fletcher_4_supp_impls_cnt];
+ struct fletcher_4_kstat *curr_stat = (struct fletcher_4_kstat *)data;
+ ssize_t off = 0;
+
+ if (curr_stat == fastest_stat) {
+ off += snprintf(buf + off, size - off, "%-17s", "fastest");
+ off += snprintf(buf + off, size - off, "%-15s",
+ fletcher_4_supp_impls[fastest_stat->native]->name);
+ off += snprintf(buf + off, size - off, "%-15s\n",
+ fletcher_4_supp_impls[fastest_stat->byteswap]->name);
+ } else {
+ ptrdiff_t id = curr_stat - fletcher_4_stat_data;
+
+ off += snprintf(buf + off, size - off, "%-17s",
+ fletcher_4_supp_impls[id]->name);
+ off += snprintf(buf + off, size - off, "%-15llu",
+ (u_longlong_t)curr_stat->native);
+ off += snprintf(buf + off, size - off, "%-15llu\n",
+ (u_longlong_t)curr_stat->byteswap);
+ }
+
+ return (0);
+}
+
+static void *
+fletcher_4_kstat_addr(kstat_t *ksp, loff_t n)
+{
+ if (n <= fletcher_4_supp_impls_cnt)
+ ksp->ks_private = (void *) (fletcher_4_stat_data + n);
+ else
+ ksp->ks_private = NULL;
+
+ return (ksp->ks_private);
+}
+
+#define FLETCHER_4_FASTEST_FN_COPY(type, src) \
+{ \
+ fletcher_4_fastest_impl.init_ ## type = src->init_ ## type; \
+ fletcher_4_fastest_impl.fini_ ## type = src->fini_ ## type; \
+ fletcher_4_fastest_impl.compute_ ## type = src->compute_ ## type; \
+}
+
+#define FLETCHER_4_BENCH_NS (MSEC2NSEC(50)) /* 50ms */
+
+typedef void fletcher_checksum_func_t(const void *, uint64_t, const void *,
+ zio_cksum_t *);
+
+static void
+fletcher_4_benchmark_impl(boolean_t native, char *data, uint64_t data_size)
+{
+
+ struct fletcher_4_kstat *fastest_stat =
+ &fletcher_4_stat_data[fletcher_4_supp_impls_cnt];
+ hrtime_t start;
+ uint64_t run_bw, run_time_ns, best_run = 0;
+ zio_cksum_t zc;
+ uint32_t i, l, sel_save = IMPL_READ(fletcher_4_impl_chosen);
+
+
+ fletcher_checksum_func_t *fletcher_4_test = native ?
+ fletcher_4_native : fletcher_4_byteswap;
+
+ for (i = 0; i < fletcher_4_supp_impls_cnt; i++) {
+ struct fletcher_4_kstat *stat = &fletcher_4_stat_data[i];
+ uint64_t run_count = 0;
+
+ /* temporary set an implementation */
+ fletcher_4_impl_chosen = i;
+
+ kpreempt_disable();
+ start = gethrtime();
+ do {
+ for (l = 0; l < 32; l++, run_count++)
+ fletcher_4_test(data, data_size, NULL, &zc);
+
+ run_time_ns = gethrtime() - start;
+ } while (run_time_ns < FLETCHER_4_BENCH_NS);
+ kpreempt_enable();
+
+ run_bw = data_size * run_count * NANOSEC;
+ run_bw /= run_time_ns; /* B/s */
+
+ if (native)
+ stat->native = run_bw;
+ else
+ stat->byteswap = run_bw;
+
+ if (run_bw > best_run) {
+ best_run = run_bw;
+
+ if (native) {
+ fastest_stat->native = i;
+ FLETCHER_4_FASTEST_FN_COPY(native,
+ fletcher_4_supp_impls[i]);
+ } else {
+ fastest_stat->byteswap = i;
+ FLETCHER_4_FASTEST_FN_COPY(byteswap,
+ fletcher_4_supp_impls[i]);
+ }
+ }
+ }
+
+ /* restore original selection */
+ atomic_swap_32(&fletcher_4_impl_chosen, sel_save);
+}
+
+void
+fletcher_4_init(void)
+{
+ static const size_t data_size = 1 << SPA_OLD_MAXBLOCKSHIFT; /* 128kiB */
+ fletcher_4_ops_t *curr_impl;
+ char *databuf;
+ int i, c;
+
+ /* move supported impl into fletcher_4_supp_impls */
+ for (i = 0, c = 0; i < ARRAY_SIZE(fletcher_4_impls); i++) {
+ curr_impl = (fletcher_4_ops_t *)fletcher_4_impls[i];
+
+ if (curr_impl->valid && curr_impl->valid())
+ fletcher_4_supp_impls[c++] = curr_impl;
+ }
+ membar_producer(); /* complete fletcher_4_supp_impls[] init */
+ fletcher_4_supp_impls_cnt = c; /* number of supported impl */
+
+#if !defined(_KERNEL)
+ /* Skip benchmarking and use last implementation as fastest */
+ memcpy(&fletcher_4_fastest_impl,
+ fletcher_4_supp_impls[fletcher_4_supp_impls_cnt-1],
+ sizeof (fletcher_4_fastest_impl));
+ fletcher_4_fastest_impl.name = "fastest";
+ membar_producer();
+
+ fletcher_4_initialized = B_TRUE;
+ return;
+#endif
+ /* Benchmark all supported implementations */
+ databuf = vmem_alloc(data_size, KM_SLEEP);
+ for (i = 0; i < data_size / sizeof (uint64_t); i++)
+ ((uint64_t *)databuf)[i] = (uintptr_t)(databuf+i); /* warm-up */
+
+ fletcher_4_benchmark_impl(B_FALSE, databuf, data_size);
+ fletcher_4_benchmark_impl(B_TRUE, databuf, data_size);
+
+ vmem_free(databuf, data_size);
+
+ /* install kstats for all implementations */
+ fletcher_4_kstat = kstat_create("zfs", 0, "fletcher_4_bench", "misc",
+ KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
+ if (fletcher_4_kstat != NULL) {
+ fletcher_4_kstat->ks_data = NULL;
+ fletcher_4_kstat->ks_ndata = UINT32_MAX;
+ kstat_set_raw_ops(fletcher_4_kstat,
+ fletcher_4_kstat_headers,
+ fletcher_4_kstat_data,
+ fletcher_4_kstat_addr);
+ kstat_install(fletcher_4_kstat);
+ }
+
+ /* Finish initialization */
+ fletcher_4_initialized = B_TRUE;
+}
+
+void
+fletcher_4_fini(void)
+{
+ if (fletcher_4_kstat != NULL) {
+ kstat_delete(fletcher_4_kstat);
+ fletcher_4_kstat = NULL;
+ }
+}
+
+/* ABD adapters */
+
+static void
+abd_fletcher_4_init(zio_abd_checksum_data_t *cdp)
+{
+ const fletcher_4_ops_t *ops = fletcher_4_impl_get();
+ cdp->acd_private = (void *) ops;
+
+ if (cdp->acd_byteorder == ZIO_CHECKSUM_NATIVE)
+ ops->init_native(cdp->acd_ctx);
+ else
+ ops->init_byteswap(cdp->acd_ctx);
+}
+
+static void
+abd_fletcher_4_fini(zio_abd_checksum_data_t *cdp)
+{
+ fletcher_4_ops_t *ops = (fletcher_4_ops_t *)cdp->acd_private;
+
+ ASSERT(ops);
+
+ if (cdp->acd_byteorder == ZIO_CHECKSUM_NATIVE)
+ ops->fini_native(cdp->acd_ctx, cdp->acd_zcp);
+ else
+ ops->fini_byteswap(cdp->acd_ctx, cdp->acd_zcp);
+}
+
+static void
+abd_fletcher_4_simd2scalar(boolean_t native, void *data, size_t size,
+ zio_abd_checksum_data_t *cdp)
+{
+ zio_cksum_t *zcp = cdp->acd_zcp;
+
+ ASSERT3U(size, <, FLETCHER_MIN_SIMD_SIZE);
+
+ abd_fletcher_4_fini(cdp);
+ cdp->acd_private = (void *)&fletcher_4_scalar_ops;
+
+ if (native)
+ fletcher_4_incremental_native(data, size, zcp);
+ else
+ fletcher_4_incremental_byteswap(data, size, zcp);
+}
+
+static int
+abd_fletcher_4_iter(void *data, size_t size, void *private)
+{
+ zio_abd_checksum_data_t *cdp = (zio_abd_checksum_data_t *)private;
+ fletcher_4_ctx_t *ctx = cdp->acd_ctx;
+ fletcher_4_ops_t *ops = (fletcher_4_ops_t *)cdp->acd_private;
+ boolean_t native = cdp->acd_byteorder == ZIO_CHECKSUM_NATIVE;
+ uint64_t asize = P2ALIGN(size, FLETCHER_MIN_SIMD_SIZE);
+
+ ASSERT(IS_P2ALIGNED(size, sizeof (uint32_t)));
+
+ if (asize > 0) {
+ if (native)
+ ops->compute_native(ctx, data, asize);
+ else
+ ops->compute_byteswap(ctx, data, asize);
+
+ size -= asize;
+ data = (char *)data + asize;
+ }
+
+ if (size > 0) {
+ ASSERT3U(size, <, FLETCHER_MIN_SIMD_SIZE);
+ /* At this point we have to switch to scalar impl */
+ abd_fletcher_4_simd2scalar(native, data, size, cdp);
+ }
+
+ return (0);
+}
+
+zio_abd_checksum_func_t fletcher_4_abd_ops = {
+ .acf_init = abd_fletcher_4_init,
+ .acf_fini = abd_fletcher_4_fini,
+ .acf_iter = abd_fletcher_4_iter
+};
+
+
#if defined(_KERNEL) && defined(HAVE_SPL)
+#include <linux/mod_compat.h>
+
+static int
+fletcher_4_param_get(char *buffer, zfs_kernel_param_t *unused)
+{
+ const uint32_t impl = IMPL_READ(fletcher_4_impl_chosen);
+ char *fmt;
+ int i, cnt = 0;
+
+ /* list fastest */
+ fmt = (impl == IMPL_FASTEST) ? "[%s] " : "%s ";
+ cnt += sprintf(buffer + cnt, fmt, "fastest");
+
+ /* list all supported implementations */
+ for (i = 0; i < fletcher_4_supp_impls_cnt; i++) {
+ fmt = (i == impl) ? "[%s] " : "%s ";
+ cnt += sprintf(buffer + cnt, fmt,
+ fletcher_4_supp_impls[i]->name);
+ }
+
+ return (cnt);
+}
+
+static int
+fletcher_4_param_set(const char *val, zfs_kernel_param_t *unused)
+{
+ return (fletcher_4_impl_set(val));
+}
+
+/*
+ * Choose a fletcher 4 implementation in ZFS.
+ * Users can choose "cycle" to exercise all implementations, but this is
+ * for testing purpose therefore it can only be set in user space.
+ */
+module_param_call(zfs_fletcher_4_impl,
+ fletcher_4_param_set, fletcher_4_param_get, NULL, 0644);
+MODULE_PARM_DESC(zfs_fletcher_4_impl, "Select fletcher 4 implementation.");
+
+EXPORT_SYMBOL(fletcher_init);
+EXPORT_SYMBOL(fletcher_2_incremental_native);
+EXPORT_SYMBOL(fletcher_2_incremental_byteswap);
+EXPORT_SYMBOL(fletcher_4_init);
+EXPORT_SYMBOL(fletcher_4_fini);
EXPORT_SYMBOL(fletcher_2_native);
EXPORT_SYMBOL(fletcher_2_byteswap);
EXPORT_SYMBOL(fletcher_4_native);
+EXPORT_SYMBOL(fletcher_4_native_varsize);
EXPORT_SYMBOL(fletcher_4_byteswap);
EXPORT_SYMBOL(fletcher_4_incremental_native);
EXPORT_SYMBOL(fletcher_4_incremental_byteswap);
+EXPORT_SYMBOL(fletcher_4_abd_ops);
#endif
diff --git a/zfs/module/zcommon/zfs_fletcher_aarch64_neon.c b/zfs/module/zcommon/zfs_fletcher_aarch64_neon.c
new file mode 100644
index 000000000000..b72c1bafd2d1
--- /dev/null
+++ b/zfs/module/zcommon/zfs_fletcher_aarch64_neon.c
@@ -0,0 +1,215 @@
+/*
+ * Implement fast Fletcher4 with NEON instructions. (aarch64)
+ *
+ * Use the 128-bit NEON SIMD instructions and registers to compute
+ * Fletcher4 in two incremental 64-bit parallel accumulator streams,
+ * and then combine the streams to form the final four checksum words.
+ * This implementation is a derivative of the AVX SIMD implementation by
+ * James Guilford and Jinshan Xiong from Intel (see zfs_fletcher_intel.c).
+ *
+ * Copyright (C) 2016 Romain Dolbeau.
+ *
+ * Authors:
+ * Romain Dolbeau <romain.dolbeau at atos.net>
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#if defined(__aarch64__)
+
+#include <linux/simd_aarch64.h>
+#include <sys/spa_checksum.h>
+#include <zfs_fletcher.h>
+#include <strings.h>
+
+static void
+fletcher_4_aarch64_neon_init(fletcher_4_ctx_t *ctx)
+{
+ bzero(ctx->aarch64_neon, 4 * sizeof (zfs_fletcher_aarch64_neon_t));
+}
+
+static void
+fletcher_4_aarch64_neon_fini(fletcher_4_ctx_t *ctx, zio_cksum_t *zcp)
+{
+ uint64_t A, B, C, D;
+ A = ctx->aarch64_neon[0].v[0] + ctx->aarch64_neon[0].v[1];
+ B = 2 * ctx->aarch64_neon[1].v[0] + 2 * ctx->aarch64_neon[1].v[1] -
+ ctx->aarch64_neon[0].v[1];
+ C = 4 * ctx->aarch64_neon[2].v[0] - ctx->aarch64_neon[1].v[0] +
+ 4 * ctx->aarch64_neon[2].v[1] - 3 * ctx->aarch64_neon[1].v[1];
+ D = 8 * ctx->aarch64_neon[3].v[0] - 4 * ctx->aarch64_neon[2].v[0] +
+ 8 * ctx->aarch64_neon[3].v[1] - 8 * ctx->aarch64_neon[2].v[1] +
+ ctx->aarch64_neon[1].v[1];
+ ZIO_SET_CHECKSUM(zcp, A, B, C, D);
+}
+
+#define NEON_INIT_LOOP() \
+ asm("eor %[ZERO].16b,%[ZERO].16b,%[ZERO].16b\n" \
+ "ld1 { %[ACC0].4s }, %[CTX0]\n" \
+ "ld1 { %[ACC1].4s }, %[CTX1]\n" \
+ "ld1 { %[ACC2].4s }, %[CTX2]\n" \
+ "ld1 { %[ACC3].4s }, %[CTX3]\n" \
+ : [ZERO] "=w" (ZERO), \
+ [ACC0] "=w" (ACC0), [ACC1] "=w" (ACC1), \
+ [ACC2] "=w" (ACC2), [ACC3] "=w" (ACC3) \
+ : [CTX0] "Q" (ctx->aarch64_neon[0]), \
+ [CTX1] "Q" (ctx->aarch64_neon[1]), \
+ [CTX2] "Q" (ctx->aarch64_neon[2]), \
+ [CTX3] "Q" (ctx->aarch64_neon[3]))
+
+#define NEON_DO_REVERSE "rev32 %[SRC].16b, %[SRC].16b\n"
+
+#define NEON_DONT_REVERSE ""
+
+#define NEON_MAIN_LOOP(REVERSE) \
+ asm("ld1 { %[SRC].4s }, %[IP]\n" \
+ REVERSE \
+ "zip1 %[TMP1].4s, %[SRC].4s, %[ZERO].4s\n" \
+ "zip2 %[TMP2].4s, %[SRC].4s, %[ZERO].4s\n" \
+ "add %[ACC0].2d, %[ACC0].2d, %[TMP1].2d\n" \
+ "add %[ACC1].2d, %[ACC1].2d, %[ACC0].2d\n" \
+ "add %[ACC2].2d, %[ACC2].2d, %[ACC1].2d\n" \
+ "add %[ACC3].2d, %[ACC3].2d, %[ACC2].2d\n" \
+ "add %[ACC0].2d, %[ACC0].2d, %[TMP2].2d\n" \
+ "add %[ACC1].2d, %[ACC1].2d, %[ACC0].2d\n" \
+ "add %[ACC2].2d, %[ACC2].2d, %[ACC1].2d\n" \
+ "add %[ACC3].2d, %[ACC3].2d, %[ACC2].2d\n" \
+ : [SRC] "=&w" (SRC), \
+ [TMP1] "=&w" (TMP1), [TMP2] "=&w" (TMP2), \
+ [ACC0] "+w" (ACC0), [ACC1] "+w" (ACC1), \
+ [ACC2] "+w" (ACC2), [ACC3] "+w" (ACC3) \
+ : [ZERO] "w" (ZERO), [IP] "Q" (*ip))
+
+#define NEON_FINI_LOOP() \
+ asm("st1 { %[ACC0].4s },%[DST0]\n" \
+ "st1 { %[ACC1].4s },%[DST1]\n" \
+ "st1 { %[ACC2].4s },%[DST2]\n" \
+ "st1 { %[ACC3].4s },%[DST3]\n" \
+ : [DST0] "=Q" (ctx->aarch64_neon[0]), \
+ [DST1] "=Q" (ctx->aarch64_neon[1]), \
+ [DST2] "=Q" (ctx->aarch64_neon[2]), \
+ [DST3] "=Q" (ctx->aarch64_neon[3]) \
+ : [ACC0] "w" (ACC0), [ACC1] "w" (ACC1), \
+ [ACC2] "w" (ACC2), [ACC3] "w" (ACC3))
+
+static void
+fletcher_4_aarch64_neon_native(fletcher_4_ctx_t *ctx,
+ const void *buf, uint64_t size)
+{
+ const uint64_t *ip = buf;
+ const uint64_t *ipend = (uint64_t *)((uint8_t *)ip + size);
+#if defined(_KERNEL)
+register unsigned char ZERO asm("v0") __attribute__((vector_size(16)));
+register unsigned char ACC0 asm("v1") __attribute__((vector_size(16)));
+register unsigned char ACC1 asm("v2") __attribute__((vector_size(16)));
+register unsigned char ACC2 asm("v3") __attribute__((vector_size(16)));
+register unsigned char ACC3 asm("v4") __attribute__((vector_size(16)));
+register unsigned char TMP1 asm("v5") __attribute__((vector_size(16)));
+register unsigned char TMP2 asm("v6") __attribute__((vector_size(16)));
+register unsigned char SRC asm("v7") __attribute__((vector_size(16)));
+#else
+unsigned char ZERO __attribute__((vector_size(16)));
+unsigned char ACC0 __attribute__((vector_size(16)));
+unsigned char ACC1 __attribute__((vector_size(16)));
+unsigned char ACC2 __attribute__((vector_size(16)));
+unsigned char ACC3 __attribute__((vector_size(16)));
+unsigned char TMP1 __attribute__((vector_size(16)));
+unsigned char TMP2 __attribute__((vector_size(16)));
+unsigned char SRC __attribute__((vector_size(16)));
+#endif
+
+ kfpu_begin();
+
+ NEON_INIT_LOOP();
+
+ for (; ip < ipend; ip += 2) {
+ NEON_MAIN_LOOP(NEON_DONT_REVERSE);
+ }
+
+ NEON_FINI_LOOP();
+
+ kfpu_end();
+}
+
+static void
+fletcher_4_aarch64_neon_byteswap(fletcher_4_ctx_t *ctx,
+ const void *buf, uint64_t size)
+{
+ const uint64_t *ip = buf;
+ const uint64_t *ipend = (uint64_t *)((uint8_t *)ip + size);
+#if defined(_KERNEL)
+register unsigned char ZERO asm("v0") __attribute__((vector_size(16)));
+register unsigned char ACC0 asm("v1") __attribute__((vector_size(16)));
+register unsigned char ACC1 asm("v2") __attribute__((vector_size(16)));
+register unsigned char ACC2 asm("v3") __attribute__((vector_size(16)));
+register unsigned char ACC3 asm("v4") __attribute__((vector_size(16)));
+register unsigned char TMP1 asm("v5") __attribute__((vector_size(16)));
+register unsigned char TMP2 asm("v6") __attribute__((vector_size(16)));
+register unsigned char SRC asm("v7") __attribute__((vector_size(16)));
+#else
+unsigned char ZERO __attribute__((vector_size(16)));
+unsigned char ACC0 __attribute__((vector_size(16)));
+unsigned char ACC1 __attribute__((vector_size(16)));
+unsigned char ACC2 __attribute__((vector_size(16)));
+unsigned char ACC3 __attribute__((vector_size(16)));
+unsigned char TMP1 __attribute__((vector_size(16)));
+unsigned char TMP2 __attribute__((vector_size(16)));
+unsigned char SRC __attribute__((vector_size(16)));
+#endif
+
+ kfpu_begin();
+
+ NEON_INIT_LOOP();
+
+ for (; ip < ipend; ip += 2) {
+ NEON_MAIN_LOOP(NEON_DO_REVERSE);
+ }
+
+ NEON_FINI_LOOP();
+
+ kfpu_end();
+}
+
+static boolean_t fletcher_4_aarch64_neon_valid(void)
+{
+ return (B_TRUE);
+}
+
+const fletcher_4_ops_t fletcher_4_aarch64_neon_ops = {
+ .init_native = fletcher_4_aarch64_neon_init,
+ .compute_native = fletcher_4_aarch64_neon_native,
+ .fini_native = fletcher_4_aarch64_neon_fini,
+ .init_byteswap = fletcher_4_aarch64_neon_init,
+ .compute_byteswap = fletcher_4_aarch64_neon_byteswap,
+ .fini_byteswap = fletcher_4_aarch64_neon_fini,
+ .valid = fletcher_4_aarch64_neon_valid,
+ .name = "aarch64_neon"
+};
+
+#endif /* defined(__aarch64__) */
diff --git a/zfs/module/zcommon/zfs_fletcher_avx512.c b/zfs/module/zcommon/zfs_fletcher_avx512.c
new file mode 100644
index 000000000000..2d28ffb112d7
--- /dev/null
+++ b/zfs/module/zcommon/zfs_fletcher_avx512.c
@@ -0,0 +1,171 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
+ */
+
+#if defined(__x86_64) && defined(HAVE_AVX512F)
+
+#include <linux/simd_x86.h>
+#include <sys/byteorder.h>
+#include <sys/spa_checksum.h>
+#include <zfs_fletcher.h>
+#include <strings.h>
+
+#define __asm __asm__ __volatile__
+
+static void
+fletcher_4_avx512f_init(fletcher_4_ctx_t *ctx)
+{
+ bzero(ctx->avx512, 4 * sizeof (zfs_fletcher_avx512_t));
+}
+
+static void
+fletcher_4_avx512f_fini(fletcher_4_ctx_t *ctx, zio_cksum_t *zcp)
+{
+ static const uint64_t
+ CcA[] = { 0, 0, 1, 3, 6, 10, 15, 21 },
+ CcB[] = { 28, 36, 44, 52, 60, 68, 76, 84 },
+ DcA[] = { 0, 0, 0, 1, 4, 10, 20, 35 },
+ DcB[] = { 56, 84, 120, 164, 216, 276, 344, 420 },
+ DcC[] = { 448, 512, 576, 640, 704, 768, 832, 896 };
+
+ uint64_t A, B, C, D;
+ uint64_t i;
+
+ A = ctx->avx512[0].v[0];
+ B = 8 * ctx->avx512[1].v[0];
+ C = 64 * ctx->avx512[2].v[0] - CcB[0] * ctx->avx512[1].v[0];
+ D = 512 * ctx->avx512[3].v[0] - DcC[0] * ctx->avx512[2].v[0] +
+ DcB[0] * ctx->avx512[1].v[0];
+
+ for (i = 1; i < 8; i++) {
+ A += ctx->avx512[0].v[i];
+ B += 8 * ctx->avx512[1].v[i] - i * ctx->avx512[0].v[i];
+ C += 64 * ctx->avx512[2].v[i] - CcB[i] * ctx->avx512[1].v[i] +
+ CcA[i] * ctx->avx512[0].v[i];
+ D += 512 * ctx->avx512[3].v[i] - DcC[i] * ctx->avx512[2].v[i] +
+ DcB[i] * ctx->avx512[1].v[i] - DcA[i] * ctx->avx512[0].v[i];
+ }
+
+ ZIO_SET_CHECKSUM(zcp, A, B, C, D);
+}
+
+#define FLETCHER_4_AVX512_RESTORE_CTX(ctx) \
+{ \
+ __asm("vmovdqu64 %0, %%zmm0" :: "m" ((ctx)->avx512[0])); \
+ __asm("vmovdqu64 %0, %%zmm1" :: "m" ((ctx)->avx512[1])); \
+ __asm("vmovdqu64 %0, %%zmm2" :: "m" ((ctx)->avx512[2])); \
+ __asm("vmovdqu64 %0, %%zmm3" :: "m" ((ctx)->avx512[3])); \
+}
+
+#define FLETCHER_4_AVX512_SAVE_CTX(ctx) \
+{ \
+ __asm("vmovdqu64 %%zmm0, %0" : "=m" ((ctx)->avx512[0])); \
+ __asm("vmovdqu64 %%zmm1, %0" : "=m" ((ctx)->avx512[1])); \
+ __asm("vmovdqu64 %%zmm2, %0" : "=m" ((ctx)->avx512[2])); \
+ __asm("vmovdqu64 %%zmm3, %0" : "=m" ((ctx)->avx512[3])); \
+}
+
+static void
+fletcher_4_avx512f_native(fletcher_4_ctx_t *ctx, const void *buf, uint64_t size)
+{
+ const uint32_t *ip = buf;
+ const uint32_t *ipend = (uint32_t *)((uint8_t *)ip + size);
+
+ kfpu_begin();
+
+ FLETCHER_4_AVX512_RESTORE_CTX(ctx);
+
+ for (; ip < ipend; ip += 8) {
+ __asm("vpmovzxdq %0, %%zmm4"::"m" (*ip));
+ __asm("vpaddq %zmm4, %zmm0, %zmm0");
+ __asm("vpaddq %zmm0, %zmm1, %zmm1");
+ __asm("vpaddq %zmm1, %zmm2, %zmm2");
+ __asm("vpaddq %zmm2, %zmm3, %zmm3");
+ }
+
+ FLETCHER_4_AVX512_SAVE_CTX(ctx);
+
+ kfpu_end();
+}
+
+static void
+fletcher_4_avx512f_byteswap(fletcher_4_ctx_t *ctx, const void *buf,
+ uint64_t size)
+{
+ static const uint64_t byteswap_mask = 0xFFULL;
+ const uint32_t *ip = buf;
+ const uint32_t *ipend = (uint32_t *)((uint8_t *)ip + size);
+
+ kfpu_begin();
+
+ FLETCHER_4_AVX512_RESTORE_CTX(ctx);
+
+ __asm("vpbroadcastq %0, %%zmm8" :: "r" (byteswap_mask));
+ __asm("vpsllq $8, %zmm8, %zmm9");
+ __asm("vpsllq $16, %zmm8, %zmm10");
+ __asm("vpsllq $24, %zmm8, %zmm11");
+
+ for (; ip < ipend; ip += 8) {
+ __asm("vpmovzxdq %0, %%zmm5"::"m" (*ip));
+
+ __asm("vpsrlq $24, %zmm5, %zmm6");
+ __asm("vpandd %zmm8, %zmm6, %zmm6");
+ __asm("vpsrlq $8, %zmm5, %zmm7");
+ __asm("vpandd %zmm9, %zmm7, %zmm7");
+ __asm("vpord %zmm6, %zmm7, %zmm4");
+ __asm("vpsllq $8, %zmm5, %zmm6");
+ __asm("vpandd %zmm10, %zmm6, %zmm6");
+ __asm("vpord %zmm6, %zmm4, %zmm4");
+ __asm("vpsllq $24, %zmm5, %zmm5");
+ __asm("vpandd %zmm11, %zmm5, %zmm5");
+ __asm("vpord %zmm5, %zmm4, %zmm4");
+
+ __asm("vpaddq %zmm4, %zmm0, %zmm0");
+ __asm("vpaddq %zmm0, %zmm1, %zmm1");
+ __asm("vpaddq %zmm1, %zmm2, %zmm2");
+ __asm("vpaddq %zmm2, %zmm3, %zmm3");
+ }
+
+ FLETCHER_4_AVX512_SAVE_CTX(ctx)
+
+ kfpu_end();
+}
+
+static boolean_t
+fletcher_4_avx512f_valid(void)
+{
+ return (zfs_avx512f_available());
+}
+
+const fletcher_4_ops_t fletcher_4_avx512f_ops = {
+ .init_native = fletcher_4_avx512f_init,
+ .fini_native = fletcher_4_avx512f_fini,
+ .compute_native = fletcher_4_avx512f_native,
+ .init_byteswap = fletcher_4_avx512f_init,
+ .fini_byteswap = fletcher_4_avx512f_fini,
+ .compute_byteswap = fletcher_4_avx512f_byteswap,
+ .valid = fletcher_4_avx512f_valid,
+ .name = "avx512f"
+};
+
+#endif /* defined(__x86_64) && defined(HAVE_AVX512F) */
diff --git a/zfs/module/zcommon/zfs_fletcher_intel.c b/zfs/module/zcommon/zfs_fletcher_intel.c
new file mode 100644
index 000000000000..a479b9d5694f
--- /dev/null
+++ b/zfs/module/zcommon/zfs_fletcher_intel.c
@@ -0,0 +1,173 @@
+/*
+ * Implement fast Fletcher4 with AVX2 instructions. (x86_64)
+ *
+ * Use the 256-bit AVX2 SIMD instructions and registers to compute
+ * Fletcher4 in four incremental 64-bit parallel accumulator streams,
+ * and then combine the streams to form the final four checksum words.
+ *
+ * Copyright (C) 2015 Intel Corporation.
+ *
+ * Authors:
+ * James Guilford <james.guilford at intel.com>
+ * Jinshan Xiong <jinshan.xiong at intel.com>
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#if defined(HAVE_AVX) && defined(HAVE_AVX2)
+
+#include <linux/simd_x86.h>
+#include <sys/spa_checksum.h>
+#include <zfs_fletcher.h>
+#include <strings.h>
+
+static void
+fletcher_4_avx2_init(fletcher_4_ctx_t *ctx)
+{
+ bzero(ctx->avx, 4 * sizeof (zfs_fletcher_avx_t));
+}
+
+static void
+fletcher_4_avx2_fini(fletcher_4_ctx_t *ctx, zio_cksum_t *zcp)
+{
+ uint64_t A, B, C, D;
+
+ A = ctx->avx[0].v[0] + ctx->avx[0].v[1] +
+ ctx->avx[0].v[2] + ctx->avx[0].v[3];
+ B = 0 - ctx->avx[0].v[1] - 2 * ctx->avx[0].v[2] - 3 * ctx->avx[0].v[3] +
+ 4 * ctx->avx[1].v[0] + 4 * ctx->avx[1].v[1] + 4 * ctx->avx[1].v[2] +
+ 4 * ctx->avx[1].v[3];
+
+ C = ctx->avx[0].v[2] + 3 * ctx->avx[0].v[3] - 6 * ctx->avx[1].v[0] -
+ 10 * ctx->avx[1].v[1] - 14 * ctx->avx[1].v[2] -
+ 18 * ctx->avx[1].v[3] + 16 * ctx->avx[2].v[0] +
+ 16 * ctx->avx[2].v[1] + 16 * ctx->avx[2].v[2] +
+ 16 * ctx->avx[2].v[3];
+
+ D = 0 - ctx->avx[0].v[3] + 4 * ctx->avx[1].v[0] +
+ 10 * ctx->avx[1].v[1] + 20 * ctx->avx[1].v[2] +
+ 34 * ctx->avx[1].v[3] - 48 * ctx->avx[2].v[0] -
+ 64 * ctx->avx[2].v[1] - 80 * ctx->avx[2].v[2] -
+ 96 * ctx->avx[2].v[3] + 64 * ctx->avx[3].v[0] +
+ 64 * ctx->avx[3].v[1] + 64 * ctx->avx[3].v[2] +
+ 64 * ctx->avx[3].v[3];
+
+ ZIO_SET_CHECKSUM(zcp, A, B, C, D);
+}
+
+#define FLETCHER_4_AVX2_RESTORE_CTX(ctx) \
+{ \
+ asm volatile("vmovdqu %0, %%ymm0" :: "m" ((ctx)->avx[0])); \
+ asm volatile("vmovdqu %0, %%ymm1" :: "m" ((ctx)->avx[1])); \
+ asm volatile("vmovdqu %0, %%ymm2" :: "m" ((ctx)->avx[2])); \
+ asm volatile("vmovdqu %0, %%ymm3" :: "m" ((ctx)->avx[3])); \
+}
+
+#define FLETCHER_4_AVX2_SAVE_CTX(ctx) \
+{ \
+ asm volatile("vmovdqu %%ymm0, %0" : "=m" ((ctx)->avx[0])); \
+ asm volatile("vmovdqu %%ymm1, %0" : "=m" ((ctx)->avx[1])); \
+ asm volatile("vmovdqu %%ymm2, %0" : "=m" ((ctx)->avx[2])); \
+ asm volatile("vmovdqu %%ymm3, %0" : "=m" ((ctx)->avx[3])); \
+}
+
+
+static void
+fletcher_4_avx2_native(fletcher_4_ctx_t *ctx, const void *buf, uint64_t size)
+{
+ const uint64_t *ip = buf;
+ const uint64_t *ipend = (uint64_t *)((uint8_t *)ip + size);
+
+ kfpu_begin();
+
+ FLETCHER_4_AVX2_RESTORE_CTX(ctx);
+
+ for (; ip < ipend; ip += 2) {
+ asm volatile("vpmovzxdq %0, %%ymm4"::"m" (*ip));
+ asm volatile("vpaddq %ymm4, %ymm0, %ymm0");
+ asm volatile("vpaddq %ymm0, %ymm1, %ymm1");
+ asm volatile("vpaddq %ymm1, %ymm2, %ymm2");
+ asm volatile("vpaddq %ymm2, %ymm3, %ymm3");
+ }
+
+ FLETCHER_4_AVX2_SAVE_CTX(ctx);
+ asm volatile("vzeroupper");
+
+ kfpu_end();
+}
+
+static void
+fletcher_4_avx2_byteswap(fletcher_4_ctx_t *ctx, const void *buf, uint64_t size)
+{
+ static const zfs_fletcher_avx_t mask = {
+ .v = { 0xFFFFFFFF00010203, 0xFFFFFFFF08090A0B,
+ 0xFFFFFFFF00010203, 0xFFFFFFFF08090A0B }
+ };
+ const uint64_t *ip = buf;
+ const uint64_t *ipend = (uint64_t *)((uint8_t *)ip + size);
+
+ kfpu_begin();
+
+ FLETCHER_4_AVX2_RESTORE_CTX(ctx);
+
+ asm volatile("vmovdqu %0, %%ymm5" :: "m" (mask));
+
+ for (; ip < ipend; ip += 2) {
+ asm volatile("vpmovzxdq %0, %%ymm4"::"m" (*ip));
+ asm volatile("vpshufb %ymm5, %ymm4, %ymm4");
+
+ asm volatile("vpaddq %ymm4, %ymm0, %ymm0");
+ asm volatile("vpaddq %ymm0, %ymm1, %ymm1");
+ asm volatile("vpaddq %ymm1, %ymm2, %ymm2");
+ asm volatile("vpaddq %ymm2, %ymm3, %ymm3");
+ }
+
+ FLETCHER_4_AVX2_SAVE_CTX(ctx);
+ asm volatile("vzeroupper");
+
+ kfpu_end();
+}
+
+static boolean_t fletcher_4_avx2_valid(void)
+{
+ return (zfs_avx_available() && zfs_avx2_available());
+}
+
+const fletcher_4_ops_t fletcher_4_avx2_ops = {
+ .init_native = fletcher_4_avx2_init,
+ .fini_native = fletcher_4_avx2_fini,
+ .compute_native = fletcher_4_avx2_native,
+ .init_byteswap = fletcher_4_avx2_init,
+ .fini_byteswap = fletcher_4_avx2_fini,
+ .compute_byteswap = fletcher_4_avx2_byteswap,
+ .valid = fletcher_4_avx2_valid,
+ .name = "avx2"
+};
+
+#endif /* defined(HAVE_AVX) && defined(HAVE_AVX2) */
diff --git a/zfs/module/zcommon/zfs_fletcher_sse.c b/zfs/module/zcommon/zfs_fletcher_sse.c
new file mode 100644
index 000000000000..90b7d7d4ef72
--- /dev/null
+++ b/zfs/module/zcommon/zfs_fletcher_sse.c
@@ -0,0 +1,231 @@
+/*
+ * Implement fast Fletcher4 with SSE2,SSSE3 instructions. (x86)
+ *
+ * Use the 128-bit SSE2/SSSE3 SIMD instructions and registers to compute
+ * Fletcher4 in two incremental 64-bit parallel accumulator streams,
+ * and then combine the streams to form the final four checksum words.
+ * This implementation is a derivative of the AVX SIMD implementation by
+ * James Guilford and Jinshan Xiong from Intel (see zfs_fletcher_intel.c).
+ *
+ * Copyright (C) 2016 Tyler J. Stachecki.
+ *
+ * Authors:
+ * Tyler J. Stachecki <stachecki.tyler at gmail.com>
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#if defined(HAVE_SSE2)
+
+#include <linux/simd_x86.h>
+#include <sys/spa_checksum.h>
+#include <sys/byteorder.h>
+#include <zfs_fletcher.h>
+#include <strings.h>
+
+static void
+fletcher_4_sse2_init(fletcher_4_ctx_t *ctx)
+{
+ bzero(ctx->sse, 4 * sizeof (zfs_fletcher_sse_t));
+}
+
+static void
+fletcher_4_sse2_fini(fletcher_4_ctx_t *ctx, zio_cksum_t *zcp)
+{
+ uint64_t A, B, C, D;
+
+ /*
+ * The mixing matrix for checksum calculation is:
+ * a = a0 + a1
+ * b = 2b0 + 2b1 - a1
+ * c = 4c0 - b0 + 4c1 -3b1
+ * d = 8d0 - 4c0 + 8d1 - 8c1 + b1;
+ *
+ * c and d are multiplied by 4 and 8, respectively,
+ * before spilling the vectors out to memory.
+ */
+ A = ctx->sse[0].v[0] + ctx->sse[0].v[1];
+ B = 2 * ctx->sse[1].v[0] + 2 * ctx->sse[1].v[1] - ctx->sse[0].v[1];
+ C = 4 * ctx->sse[2].v[0] - ctx->sse[1].v[0] + 4 * ctx->sse[2].v[1] -
+ 3 * ctx->sse[1].v[1];
+ D = 8 * ctx->sse[3].v[0] - 4 * ctx->sse[2].v[0] + 8 * ctx->sse[3].v[1] -
+ 8 * ctx->sse[2].v[1] + ctx->sse[1].v[1];
+
+ ZIO_SET_CHECKSUM(zcp, A, B, C, D);
+}
+
+#define FLETCHER_4_SSE_RESTORE_CTX(ctx) \
+{ \
+ asm volatile("movdqu %0, %%xmm0" :: "m" ((ctx)->sse[0])); \
+ asm volatile("movdqu %0, %%xmm1" :: "m" ((ctx)->sse[1])); \
+ asm volatile("movdqu %0, %%xmm2" :: "m" ((ctx)->sse[2])); \
+ asm volatile("movdqu %0, %%xmm3" :: "m" ((ctx)->sse[3])); \
+}
+
+#define FLETCHER_4_SSE_SAVE_CTX(ctx) \
+{ \
+ asm volatile("movdqu %%xmm0, %0" : "=m" ((ctx)->sse[0])); \
+ asm volatile("movdqu %%xmm1, %0" : "=m" ((ctx)->sse[1])); \
+ asm volatile("movdqu %%xmm2, %0" : "=m" ((ctx)->sse[2])); \
+ asm volatile("movdqu %%xmm3, %0" : "=m" ((ctx)->sse[3])); \
+}
+
+static void
+fletcher_4_sse2_native(fletcher_4_ctx_t *ctx, const void *buf, uint64_t size)
+{
+ const uint64_t *ip = buf;
+ const uint64_t *ipend = (uint64_t *)((uint8_t *)ip + size);
+
+ kfpu_begin();
+
+ FLETCHER_4_SSE_RESTORE_CTX(ctx);
+
+ asm volatile("pxor %xmm4, %xmm4");
+
+ for (; ip < ipend; ip += 2) {
+ asm volatile("movdqu %0, %%xmm5" :: "m"(*ip));
+ asm volatile("movdqa %xmm5, %xmm6");
+ asm volatile("punpckldq %xmm4, %xmm5");
+ asm volatile("punpckhdq %xmm4, %xmm6");
+ asm volatile("paddq %xmm5, %xmm0");
+ asm volatile("paddq %xmm0, %xmm1");
+ asm volatile("paddq %xmm1, %xmm2");
+ asm volatile("paddq %xmm2, %xmm3");
+ asm volatile("paddq %xmm6, %xmm0");
+ asm volatile("paddq %xmm0, %xmm1");
+ asm volatile("paddq %xmm1, %xmm2");
+ asm volatile("paddq %xmm2, %xmm3");
+ }
+
+ FLETCHER_4_SSE_SAVE_CTX(ctx);
+
+ kfpu_end();
+}
+
+static void
+fletcher_4_sse2_byteswap(fletcher_4_ctx_t *ctx, const void *buf, uint64_t size)
+{
+ const uint32_t *ip = buf;
+ const uint32_t *ipend = (uint32_t *)((uint8_t *)ip + size);
+
+ kfpu_begin();
+
+ FLETCHER_4_SSE_RESTORE_CTX(ctx);
+
+ for (; ip < ipend; ip += 2) {
+ uint32_t scratch1 = BSWAP_32(ip[0]);
+ uint32_t scratch2 = BSWAP_32(ip[1]);
+ asm volatile("movd %0, %%xmm5" :: "r"(scratch1));
+ asm volatile("movd %0, %%xmm6" :: "r"(scratch2));
+ asm volatile("punpcklqdq %xmm6, %xmm5");
+ asm volatile("paddq %xmm5, %xmm0");
+ asm volatile("paddq %xmm0, %xmm1");
+ asm volatile("paddq %xmm1, %xmm2");
+ asm volatile("paddq %xmm2, %xmm3");
+ }
+
+ FLETCHER_4_SSE_SAVE_CTX(ctx);
+
+ kfpu_end();
+}
+
+static boolean_t fletcher_4_sse2_valid(void)
+{
+ return (zfs_sse2_available());
+}
+
+const fletcher_4_ops_t fletcher_4_sse2_ops = {
+ .init_native = fletcher_4_sse2_init,
+ .fini_native = fletcher_4_sse2_fini,
+ .compute_native = fletcher_4_sse2_native,
+ .init_byteswap = fletcher_4_sse2_init,
+ .fini_byteswap = fletcher_4_sse2_fini,
+ .compute_byteswap = fletcher_4_sse2_byteswap,
+ .valid = fletcher_4_sse2_valid,
+ .name = "sse2"
+};
+
+#endif /* defined(HAVE_SSE2) */
+
+#if defined(HAVE_SSE2) && defined(HAVE_SSSE3)
+static void
+fletcher_4_ssse3_byteswap(fletcher_4_ctx_t *ctx, const void *buf, uint64_t size)
+{
+ static const zfs_fletcher_sse_t mask = {
+ .v = { 0x0405060700010203, 0x0C0D0E0F08090A0B }
+ };
+
+ const uint64_t *ip = buf;
+ const uint64_t *ipend = (uint64_t *)((uint8_t *)ip + size);
+
+ kfpu_begin();
+
+ FLETCHER_4_SSE_RESTORE_CTX(ctx);
+
+ asm volatile("movdqu %0, %%xmm7"::"m" (mask));
+ asm volatile("pxor %xmm4, %xmm4");
+
+ for (; ip < ipend; ip += 2) {
+ asm volatile("movdqu %0, %%xmm5"::"m" (*ip));
+ asm volatile("pshufb %xmm7, %xmm5");
+ asm volatile("movdqa %xmm5, %xmm6");
+ asm volatile("punpckldq %xmm4, %xmm5");
+ asm volatile("punpckhdq %xmm4, %xmm6");
+ asm volatile("paddq %xmm5, %xmm0");
+ asm volatile("paddq %xmm0, %xmm1");
+ asm volatile("paddq %xmm1, %xmm2");
+ asm volatile("paddq %xmm2, %xmm3");
+ asm volatile("paddq %xmm6, %xmm0");
+ asm volatile("paddq %xmm0, %xmm1");
+ asm volatile("paddq %xmm1, %xmm2");
+ asm volatile("paddq %xmm2, %xmm3");
+ }
+
+ FLETCHER_4_SSE_SAVE_CTX(ctx);
+
+ kfpu_end();
+}
+
+static boolean_t fletcher_4_ssse3_valid(void)
+{
+ return (zfs_sse2_available() && zfs_ssse3_available());
+}
+
+const fletcher_4_ops_t fletcher_4_ssse3_ops = {
+ .init_native = fletcher_4_sse2_init,
+ .fini_native = fletcher_4_sse2_fini,
+ .compute_native = fletcher_4_sse2_native,
+ .init_byteswap = fletcher_4_sse2_init,
+ .fini_byteswap = fletcher_4_sse2_fini,
+ .compute_byteswap = fletcher_4_ssse3_byteswap,
+ .valid = fletcher_4_ssse3_valid,
+ .name = "ssse3"
+};
+
+#endif /* defined(HAVE_SSE2) && defined(HAVE_SSSE3) */
diff --git a/zfs/module/zcommon/zfs_fletcher_superscalar.c b/zfs/module/zcommon/zfs_fletcher_superscalar.c
new file mode 100644
index 000000000000..02c5d53c7d99
--- /dev/null
+++ b/zfs/module/zcommon/zfs_fletcher_superscalar.c
@@ -0,0 +1,162 @@
+/*
+ * Implement fast Fletcher4 using superscalar pipelines.
+ *
+ * Use regular C code to compute
+ * Fletcher4 in two incremental 64-bit parallel accumulator streams,
+ * and then combine the streams to form the final four checksum words.
+ * This implementation is a derivative of the AVX SIMD implementation by
+ * James Guilford and Jinshan Xiong from Intel (see zfs_fletcher_intel.c).
+ *
+ * Copyright (C) 2016 Romain Dolbeau.
+ *
+ * Authors:
+ * Romain Dolbeau <romain.dolbeau at atos.net>
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <sys/byteorder.h>
+#include <sys/spa_checksum.h>
+#include <zfs_fletcher.h>
+#include <strings.h>
+
+static void
+fletcher_4_superscalar_init(fletcher_4_ctx_t *ctx)
+{
+ bzero(ctx->superscalar, 4 * sizeof (zfs_fletcher_superscalar_t));
+}
+
+static void
+fletcher_4_superscalar_fini(fletcher_4_ctx_t *ctx, zio_cksum_t *zcp)
+{
+ uint64_t A, B, C, D;
+ A = ctx->superscalar[0].v[0] + ctx->superscalar[0].v[1];
+ B = 2 * ctx->superscalar[1].v[0] + 2 * ctx->superscalar[1].v[1] -
+ ctx->superscalar[0].v[1];
+ C = 4 * ctx->superscalar[2].v[0] - ctx->superscalar[1].v[0] +
+ 4 * ctx->superscalar[2].v[1] - 3 * ctx->superscalar[1].v[1];
+ D = 8 * ctx->superscalar[3].v[0] - 4 * ctx->superscalar[2].v[0] +
+ 8 * ctx->superscalar[3].v[1] - 8 * ctx->superscalar[2].v[1] +
+ ctx->superscalar[1].v[1];
+ ZIO_SET_CHECKSUM(zcp, A, B, C, D);
+}
+
+static void
+fletcher_4_superscalar_native(fletcher_4_ctx_t *ctx,
+ const void *buf, uint64_t size)
+{
+ const uint32_t *ip = buf;
+ const uint32_t *ipend = ip + (size / sizeof (uint32_t));
+ uint64_t a, b, c, d;
+ uint64_t a2, b2, c2, d2;
+
+ a = ctx->superscalar[0].v[0];
+ b = ctx->superscalar[1].v[0];
+ c = ctx->superscalar[2].v[0];
+ d = ctx->superscalar[3].v[0];
+ a2 = ctx->superscalar[0].v[1];
+ b2 = ctx->superscalar[1].v[1];
+ c2 = ctx->superscalar[2].v[1];
+ d2 = ctx->superscalar[3].v[1];
+
+ for (; ip < ipend; ip += 2) {
+ a += ip[0];
+ a2 += ip[1];
+ b += a;
+ b2 += a2;
+ c += b;
+ c2 += b2;
+ d += c;
+ d2 += c2;
+ }
+
+ ctx->superscalar[0].v[0] = a;
+ ctx->superscalar[1].v[0] = b;
+ ctx->superscalar[2].v[0] = c;
+ ctx->superscalar[3].v[0] = d;
+ ctx->superscalar[0].v[1] = a2;
+ ctx->superscalar[1].v[1] = b2;
+ ctx->superscalar[2].v[1] = c2;
+ ctx->superscalar[3].v[1] = d2;
+}
+
+static void
+fletcher_4_superscalar_byteswap(fletcher_4_ctx_t *ctx,
+ const void *buf, uint64_t size)
+{
+ const uint32_t *ip = buf;
+ const uint32_t *ipend = ip + (size / sizeof (uint32_t));
+ uint64_t a, b, c, d;
+ uint64_t a2, b2, c2, d2;
+
+ a = ctx->superscalar[0].v[0];
+ b = ctx->superscalar[1].v[0];
+ c = ctx->superscalar[2].v[0];
+ d = ctx->superscalar[3].v[0];
+ a2 = ctx->superscalar[0].v[1];
+ b2 = ctx->superscalar[1].v[1];
+ c2 = ctx->superscalar[2].v[1];
+ d2 = ctx->superscalar[3].v[1];
+
+ for (; ip < ipend; ip += 2) {
+ a += BSWAP_32(ip[0]);
+ a2 += BSWAP_32(ip[1]);
+ b += a;
+ b2 += a2;
+ c += b;
+ c2 += b2;
+ d += c;
+ d2 += c2;
+ }
+
+ ctx->superscalar[0].v[0] = a;
+ ctx->superscalar[1].v[0] = b;
+ ctx->superscalar[2].v[0] = c;
+ ctx->superscalar[3].v[0] = d;
+ ctx->superscalar[0].v[1] = a2;
+ ctx->superscalar[1].v[1] = b2;
+ ctx->superscalar[2].v[1] = c2;
+ ctx->superscalar[3].v[1] = d2;
+}
+
+static boolean_t fletcher_4_superscalar_valid(void)
+{
+ return (B_TRUE);
+}
+
+const fletcher_4_ops_t fletcher_4_superscalar_ops = {
+ .init_native = fletcher_4_superscalar_init,
+ .compute_native = fletcher_4_superscalar_native,
+ .fini_native = fletcher_4_superscalar_fini,
+ .init_byteswap = fletcher_4_superscalar_init,
+ .compute_byteswap = fletcher_4_superscalar_byteswap,
+ .fini_byteswap = fletcher_4_superscalar_fini,
+ .valid = fletcher_4_superscalar_valid,
+ .name = "superscalar"
+};
diff --git a/zfs/module/zcommon/zfs_fletcher_superscalar4.c b/zfs/module/zcommon/zfs_fletcher_superscalar4.c
new file mode 100644
index 000000000000..4fd37d91c41f
--- /dev/null
+++ b/zfs/module/zcommon/zfs_fletcher_superscalar4.c
@@ -0,0 +1,228 @@
+/*
+ * Implement fast Fletcher4 using superscalar pipelines.
+ *
+ * Use regular C code to compute
+ * Fletcher4 in four incremental 64-bit parallel accumulator streams,
+ * and then combine the streams to form the final four checksum words.
+ * This implementation is a derivative of the AVX SIMD implementation by
+ * James Guilford and Jinshan Xiong from Intel (see zfs_fletcher_intel.c).
+ *
+ * Copyright (C) 2016 Romain Dolbeau.
+ *
+ * Authors:
+ * Romain Dolbeau <romain.dolbeau at atos.net>
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <sys/byteorder.h>
+#include <sys/spa_checksum.h>
+#include <zfs_fletcher.h>
+#include <strings.h>
+
+static void
+fletcher_4_superscalar4_init(fletcher_4_ctx_t *ctx)
+{
+ bzero(ctx->superscalar, 4 * sizeof (zfs_fletcher_superscalar_t));
+}
+
+static void
+fletcher_4_superscalar4_fini(fletcher_4_ctx_t *ctx, zio_cksum_t *zcp)
+{
+ uint64_t A, B, C, D;
+
+ A = ctx->superscalar[0].v[0] + ctx->superscalar[0].v[1] +
+ ctx->superscalar[0].v[2] + ctx->superscalar[0].v[3];
+ B = 0 - ctx->superscalar[0].v[1] - 2 * ctx->superscalar[0].v[2] -
+ 3 * ctx->superscalar[0].v[3] + 4 * ctx->superscalar[1].v[0] +
+ 4 * ctx->superscalar[1].v[1] + 4 * ctx->superscalar[1].v[2] +
+ 4 * ctx->superscalar[1].v[3];
+
+ C = ctx->superscalar[0].v[2] + 3 * ctx->superscalar[0].v[3] -
+ 6 * ctx->superscalar[1].v[0] - 10 * ctx->superscalar[1].v[1] -
+ 14 * ctx->superscalar[1].v[2] - 18 * ctx->superscalar[1].v[3] +
+ 16 * ctx->superscalar[2].v[0] + 16 * ctx->superscalar[2].v[1] +
+ 16 * ctx->superscalar[2].v[2] + 16 * ctx->superscalar[2].v[3];
+
+ D = 0 - ctx->superscalar[0].v[3] + 4 * ctx->superscalar[1].v[0] +
+ 10 * ctx->superscalar[1].v[1] + 20 * ctx->superscalar[1].v[2] +
+ 34 * ctx->superscalar[1].v[3] - 48 * ctx->superscalar[2].v[0] -
+ 64 * ctx->superscalar[2].v[1] - 80 * ctx->superscalar[2].v[2] -
+ 96 * ctx->superscalar[2].v[3] + 64 * ctx->superscalar[3].v[0] +
+ 64 * ctx->superscalar[3].v[1] + 64 * ctx->superscalar[3].v[2] +
+ 64 * ctx->superscalar[3].v[3];
+
+ ZIO_SET_CHECKSUM(zcp, A, B, C, D);
+}
+
+static void
+fletcher_4_superscalar4_native(fletcher_4_ctx_t *ctx,
+ const void *buf, uint64_t size)
+{
+ const uint32_t *ip = buf;
+ const uint32_t *ipend = ip + (size / sizeof (uint32_t));
+ uint64_t a, b, c, d;
+ uint64_t a2, b2, c2, d2;
+ uint64_t a3, b3, c3, d3;
+ uint64_t a4, b4, c4, d4;
+
+ a = ctx->superscalar[0].v[0];
+ b = ctx->superscalar[1].v[0];
+ c = ctx->superscalar[2].v[0];
+ d = ctx->superscalar[3].v[0];
+ a2 = ctx->superscalar[0].v[1];
+ b2 = ctx->superscalar[1].v[1];
+ c2 = ctx->superscalar[2].v[1];
+ d2 = ctx->superscalar[3].v[1];
+ a3 = ctx->superscalar[0].v[2];
+ b3 = ctx->superscalar[1].v[2];
+ c3 = ctx->superscalar[2].v[2];
+ d3 = ctx->superscalar[3].v[2];
+ a4 = ctx->superscalar[0].v[3];
+ b4 = ctx->superscalar[1].v[3];
+ c4 = ctx->superscalar[2].v[3];
+ d4 = ctx->superscalar[3].v[3];
+
+ for (; ip < ipend; ip += 4) {
+ a += ip[0];
+ a2 += ip[1];
+ a3 += ip[2];
+ a4 += ip[3];
+ b += a;
+ b2 += a2;
+ b3 += a3;
+ b4 += a4;
+ c += b;
+ c2 += b2;
+ c3 += b3;
+ c4 += b4;
+ d += c;
+ d2 += c2;
+ d3 += c3;
+ d4 += c4;
+ }
+
+ ctx->superscalar[0].v[0] = a;
+ ctx->superscalar[1].v[0] = b;
+ ctx->superscalar[2].v[0] = c;
+ ctx->superscalar[3].v[0] = d;
+ ctx->superscalar[0].v[1] = a2;
+ ctx->superscalar[1].v[1] = b2;
+ ctx->superscalar[2].v[1] = c2;
+ ctx->superscalar[3].v[1] = d2;
+ ctx->superscalar[0].v[2] = a3;
+ ctx->superscalar[1].v[2] = b3;
+ ctx->superscalar[2].v[2] = c3;
+ ctx->superscalar[3].v[2] = d3;
+ ctx->superscalar[0].v[3] = a4;
+ ctx->superscalar[1].v[3] = b4;
+ ctx->superscalar[2].v[3] = c4;
+ ctx->superscalar[3].v[3] = d4;
+}
+
+static void
+fletcher_4_superscalar4_byteswap(fletcher_4_ctx_t *ctx,
+ const void *buf, uint64_t size)
+{
+ const uint32_t *ip = buf;
+ const uint32_t *ipend = ip + (size / sizeof (uint32_t));
+ uint64_t a, b, c, d;
+ uint64_t a2, b2, c2, d2;
+ uint64_t a3, b3, c3, d3;
+ uint64_t a4, b4, c4, d4;
+
+ a = ctx->superscalar[0].v[0];
+ b = ctx->superscalar[1].v[0];
+ c = ctx->superscalar[2].v[0];
+ d = ctx->superscalar[3].v[0];
+ a2 = ctx->superscalar[0].v[1];
+ b2 = ctx->superscalar[1].v[1];
+ c2 = ctx->superscalar[2].v[1];
+ d2 = ctx->superscalar[3].v[1];
+ a3 = ctx->superscalar[0].v[2];
+ b3 = ctx->superscalar[1].v[2];
+ c3 = ctx->superscalar[2].v[2];
+ d3 = ctx->superscalar[3].v[2];
+ a4 = ctx->superscalar[0].v[3];
+ b4 = ctx->superscalar[1].v[3];
+ c4 = ctx->superscalar[2].v[3];
+ d4 = ctx->superscalar[3].v[3];
+
+ for (; ip < ipend; ip += 4) {
+ a += BSWAP_32(ip[0]);
+ a2 += BSWAP_32(ip[1]);
+ a3 += BSWAP_32(ip[2]);
+ a4 += BSWAP_32(ip[3]);
+ b += a;
+ b2 += a2;
+ b3 += a3;
+ b4 += a4;
+ c += b;
+ c2 += b2;
+ c3 += b3;
+ c4 += b4;
+ d += c;
+ d2 += c2;
+ d3 += c3;
+ d4 += c4;
+ }
+
+ ctx->superscalar[0].v[0] = a;
+ ctx->superscalar[1].v[0] = b;
+ ctx->superscalar[2].v[0] = c;
+ ctx->superscalar[3].v[0] = d;
+ ctx->superscalar[0].v[1] = a2;
+ ctx->superscalar[1].v[1] = b2;
+ ctx->superscalar[2].v[1] = c2;
+ ctx->superscalar[3].v[1] = d2;
+ ctx->superscalar[0].v[2] = a3;
+ ctx->superscalar[1].v[2] = b3;
+ ctx->superscalar[2].v[2] = c3;
+ ctx->superscalar[3].v[2] = d3;
+ ctx->superscalar[0].v[3] = a4;
+ ctx->superscalar[1].v[3] = b4;
+ ctx->superscalar[2].v[3] = c4;
+ ctx->superscalar[3].v[3] = d4;
+}
+
+static boolean_t fletcher_4_superscalar4_valid(void)
+{
+ return (B_TRUE);
+}
+
+const fletcher_4_ops_t fletcher_4_superscalar4_ops = {
+ .init_native = fletcher_4_superscalar4_init,
+ .compute_native = fletcher_4_superscalar4_native,
+ .fini_native = fletcher_4_superscalar4_fini,
+ .init_byteswap = fletcher_4_superscalar4_init,
+ .compute_byteswap = fletcher_4_superscalar4_byteswap,
+ .fini_byteswap = fletcher_4_superscalar4_fini,
+ .valid = fletcher_4_superscalar4_valid,
+ .name = "superscalar4"
+};
diff --git a/zfs/module/zcommon/zfs_namecheck.c b/zfs/module/zcommon/zfs_namecheck.c
index ff724be588cc..e8db93be7bc9 100644
--- a/zfs/module/zcommon/zfs_namecheck.c
+++ b/zfs/module/zcommon/zfs_namecheck.c
@@ -44,6 +44,7 @@
#include <string.h>
#endif
+#include <sys/dsl_dir.h>
#include <sys/param.h>
#include <sys/nvpair.h>
#include "zfs_namecheck.h"
@@ -69,7 +70,7 @@ zfs_component_namecheck(const char *path, namecheck_err_t *why, char *what)
{
const char *loc;
- if (strlen(path) >= MAXNAMELEN) {
+ if (strlen(path) >= ZFS_MAX_DATASET_NAME_LEN) {
if (why)
*why = NAME_ERR_TOOLONG;
return (-1);
@@ -120,9 +121,9 @@ permset_namecheck(const char *path, namecheck_err_t *why, char *what)
}
/*
- * Dataset names must be of the following form:
+ * Entity names must be of the following form:
*
- * [component][/]*[component][@component]
+ * [component/]*[component][(@|#)component]?
*
* Where each component is made up of alphanumeric characters plus the following
* characters:
@@ -133,34 +134,15 @@ permset_namecheck(const char *path, namecheck_err_t *why, char *what)
* names for temporary clones (for online recv).
*/
int
-dataset_namecheck(const char *path, namecheck_err_t *why, char *what)
+entity_namecheck(const char *path, namecheck_err_t *why, char *what)
{
- const char *loc, *end;
- int found_snapshot;
+ const char *start, *end, *loc;
+ int found_delim;
/*
* Make sure the name is not too long.
- *
- * ZFS_MAXNAMELEN is the maximum dataset length used in the userland
- * which is the same as MAXNAMELEN used in the kernel.
- * If ZFS_MAXNAMELEN value is changed, make sure to cleanup all
- * places using MAXNAMELEN.
- *
- * When HAVE_KOBJ_NAME_LEN is defined the maximum safe kobject name
- * length is 20 bytes. This 20 bytes is broken down as follows to
- * provide a maximum safe <pool>/<dataset>[@snapshot] length of only
- * 18 bytes. To ensure bytes are left for <dataset>[@snapshot] the
- * <pool> portition is futher limited to 9 bytes. For 2.6.27 and
- * newer kernels this limit is set to MAXNAMELEN.
- *
- * <pool>/<dataset> + <partition> + <newline>
- * (18) + (1) + (1)
*/
-#ifdef HAVE_KOBJ_NAME_LEN
- if (strlen(path) > 18) {
-#else
- if (strlen(path) >= MAXNAMELEN) {
-#endif /* HAVE_KOBJ_NAME_LEN */
+ if (strlen(path) >= ZFS_MAX_DATASET_NAME_LEN) {
if (why)
*why = NAME_ERR_TOOLONG;
return (-1);
@@ -179,12 +161,13 @@ dataset_namecheck(const char *path, namecheck_err_t *why, char *what)
return (-1);
}
- loc = path;
- found_snapshot = 0;
+ start = path;
+ found_delim = 0;
for (;;) {
/* Find the end of this component */
- end = loc;
- while (*end != '/' && *end != '@' && *end != '\0')
+ end = start;
+ while (*end != '/' && *end != '@' && *end != '#' &&
+ *end != '\0')
end++;
if (*end == '\0' && end[-1] == '/') {
@@ -194,25 +177,8 @@ dataset_namecheck(const char *path, namecheck_err_t *why, char *what)
return (-1);
}
- /* Zero-length components are not allowed */
- if (loc == end) {
- if (why) {
- /*
- * Make sure this is really a zero-length
- * component and not a '@@'.
- */
- if (*end == '@' && found_snapshot) {
- *why = NAME_ERR_MULTIPLE_AT;
- } else {
- *why = NAME_ERR_EMPTY_COMPONENT;
- }
- }
-
- return (-1);
- }
-
/* Validate the contents of this component */
- while (loc != end) {
+ for (loc = start; loc != end; loc++) {
if (!valid_char(*loc) && *loc != '%') {
if (why) {
*why = NAME_ERR_INVALCHAR;
@@ -220,43 +186,64 @@ dataset_namecheck(const char *path, namecheck_err_t *why, char *what)
}
return (-1);
}
- loc++;
}
- /* If we've reached the end of the string, we're OK */
- if (*end == '\0')
- return (0);
-
- if (*end == '@') {
- /*
- * If we've found an @ symbol, indicate that we're in
- * the snapshot component, and report a second '@'
- * character as an error.
- */
- if (found_snapshot) {
+ /* Snapshot or bookmark delimiter found */
+ if (*end == '@' || *end == '#') {
+ /* Multiple delimiters are not allowed */
+ if (found_delim != 0) {
if (why)
- *why = NAME_ERR_MULTIPLE_AT;
+ *why = NAME_ERR_MULTIPLE_DELIMITERS;
return (-1);
}
- found_snapshot = 1;
+ found_delim = 1;
}
+ /* Zero-length components are not allowed */
+ if (start == end) {
+ if (why)
+ *why = NAME_ERR_EMPTY_COMPONENT;
+ return (-1);
+ }
+
+ /* If we've reached the end of the string, we're OK */
+ if (*end == '\0')
+ return (0);
+
/*
- * If there is a '/' in a snapshot name
+ * If there is a '/' in a snapshot or bookmark name
* then report an error
*/
- if (*end == '/' && found_snapshot) {
+ if (*end == '/' && found_delim != 0) {
if (why)
*why = NAME_ERR_TRAILING_SLASH;
return (-1);
}
/* Update to the next component */
- loc = end + 1;
+ start = end + 1;
}
}
+/*
+ * Dataset is any entity, except bookmark
+ */
+int
+dataset_namecheck(const char *path, namecheck_err_t *why, char *what)
+{
+ int ret = entity_namecheck(path, why, what);
+
+ if (ret == 0 && strchr(path, '#') != NULL) {
+ if (why != NULL) {
+ *why = NAME_ERR_INVALCHAR;
+ *what = '#';
+ }
+ return (-1);
+ }
+
+ return (ret);
+}
/*
* mountpoint names must be of the following form:
@@ -289,7 +276,7 @@ mountpoint_namecheck(const char *path, namecheck_err_t *why)
while (*end != '/' && *end != '\0')
end++;
- if (end - start >= MAXNAMELEN) {
+ if (end - start >= ZFS_MAX_DATASET_NAME_LEN) {
if (why)
*why = NAME_ERR_TOOLONG;
return (-1);
@@ -314,27 +301,14 @@ pool_namecheck(const char *pool, namecheck_err_t *why, char *what)
/*
* Make sure the name is not too long.
- *
- * ZPOOL_MAXNAMELEN is the maximum pool length used in the userland
- * which is the same as MAXNAMELEN used in the kernel.
- * If ZPOOL_MAXNAMELEN value is changed, make sure to cleanup all
- * places using MAXNAMELEN.
- *
- * When HAVE_KOBJ_NAME_LEN is defined the maximum safe kobject name
- * length is 20 bytes. This 20 bytes is broken down as follows to
- * provide a maximum safe <pool>/<dataset>[@snapshot] length of only
- * 18 bytes. To ensure bytes are left for <dataset>[@snapshot] the
- * <pool> portition is futher limited to 8 bytes. For 2.6.27 and
- * newer kernels this limit is set to MAXNAMELEN.
- *
- * <pool>/<dataset> + <partition> + <newline>
- * (18) + (1) + (1)
+ * If we're creating a pool with version >= SPA_VERSION_DSL_SCRUB (v11)
+ * we need to account for additional space needed by the origin ds which
+ * will also be snapshotted: "poolname"+"/"+"$ORIGIN"+"@"+"$ORIGIN".
+ * Play it safe and enforce this limit even if the pool version is < 11
+ * so it can be upgraded without issues.
*/
-#ifdef HAVE_KOBJ_NAME_LEN
- if (strlen(pool) > 8) {
-#else
- if (strlen(pool) >= MAXNAMELEN) {
-#endif /* HAVE_KOBJ_NAME_LEN */
+ if (strlen(pool) >= (ZFS_MAX_DATASET_NAME_LEN - 2 -
+ strlen(ORIGIN_DIR_NAME) * 2)) {
if (why)
*why = NAME_ERR_TOOLONG;
return (-1);
diff --git a/zfs/module/zcommon/zfs_prop.c b/zfs/module/zcommon/zfs_prop.c
index aaebab444cfa..93c89e4aa2fd 100644
--- a/zfs/module/zcommon/zfs_prop.c
+++ b/zfs/module/zcommon/zfs_prop.c
@@ -22,6 +22,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+ * Copyright 2016, Joyent, Inc.
*/
/* Portions Copyright 2010 Robert Milkowski */
@@ -35,6 +36,7 @@
#include "zfs_prop.h"
#include "zfs_deleg.h"
+#include "zfs_fletcher.h"
#if defined(_KERNEL)
#include <sys/systm.h>
@@ -51,7 +53,11 @@ const char *zfs_userquota_prop_prefixes[] = {
"userused@",
"userquota@",
"groupused@",
- "groupquota@"
+ "groupquota@",
+ "userobjused@",
+ "userobjquota@",
+ "groupobjused@",
+ "groupobjquota@"
};
zprop_desc_t *
@@ -69,6 +75,10 @@ zfs_prop_init(void)
{ "fletcher2", ZIO_CHECKSUM_FLETCHER_2 },
{ "fletcher4", ZIO_CHECKSUM_FLETCHER_4 },
{ "sha256", ZIO_CHECKSUM_SHA256 },
+ { "noparity", ZIO_CHECKSUM_NOPARITY },
+ { "sha512", ZIO_CHECKSUM_SHA512 },
+ { "skein", ZIO_CHECKSUM_SKEIN },
+ { "edonr", ZIO_CHECKSUM_EDONR },
{ NULL }
};
@@ -79,6 +89,14 @@ zfs_prop_init(void)
{ "sha256", ZIO_CHECKSUM_SHA256 },
{ "sha256,verify",
ZIO_CHECKSUM_SHA256 | ZIO_CHECKSUM_VERIFY },
+ { "sha512", ZIO_CHECKSUM_SHA512 },
+ { "sha512,verify",
+ ZIO_CHECKSUM_SHA512 | ZIO_CHECKSUM_VERIFY },
+ { "skein", ZIO_CHECKSUM_SKEIN },
+ { "skein,verify",
+ ZIO_CHECKSUM_SKEIN | ZIO_CHECKSUM_VERIFY },
+ { "edonr,verify",
+ ZIO_CHECKSUM_EDONR | ZIO_CHECKSUM_VERIFY },
{ NULL }
};
@@ -126,7 +144,7 @@ zfs_prop_init(void)
{ "noallow", ZFS_ACL_NOALLOW },
{ "restricted", ZFS_ACL_RESTRICTED },
{ "passthrough", ZFS_ACL_PASSTHROUGH },
- { "secure", ZFS_ACL_RESTRICTED }, /* bkwrd compatability */
+ { "secure", ZFS_ACL_RESTRICTED }, /* bkwrd compatibility */
{ "passthrough-x", ZFS_ACL_PASSTHROUGH_X },
{ NULL }
};
@@ -210,12 +228,32 @@ zfs_prop_init(void)
{ NULL }
};
+ static zprop_index_t dnsize_table[] = {
+ { "legacy", ZFS_DNSIZE_LEGACY },
+ { "auto", ZFS_DNSIZE_AUTO },
+ { "1k", ZFS_DNSIZE_1K },
+ { "2k", ZFS_DNSIZE_2K },
+ { "4k", ZFS_DNSIZE_4K },
+ { "8k", ZFS_DNSIZE_8K },
+ { "16k", ZFS_DNSIZE_16K },
+ { NULL }
+ };
+
static zprop_index_t redundant_metadata_table[] = {
{ "all", ZFS_REDUNDANT_METADATA_ALL },
{ "most", ZFS_REDUNDANT_METADATA_MOST },
{ NULL }
};
+ static zprop_index_t volmode_table[] = {
+ { "default", ZFS_VOLMODE_DEFAULT },
+ { "full", ZFS_VOLMODE_GEOM },
+ { "geom", ZFS_VOLMODE_GEOM },
+ { "dev", ZFS_VOLMODE_DEV },
+ { "none", ZFS_VOLMODE_NONE },
+ { NULL }
+ };
+
/* inherit index properties */
zprop_register_index(ZFS_PROP_REDUNDANT_METADATA, "redundant_metadata",
ZFS_REDUNDANT_METADATA_ALL,
@@ -229,12 +267,12 @@ zfs_prop_init(void)
zprop_register_index(ZFS_PROP_CHECKSUM, "checksum",
ZIO_CHECKSUM_DEFAULT, PROP_INHERIT, ZFS_TYPE_FILESYSTEM |
ZFS_TYPE_VOLUME,
- "on | off | fletcher2 | fletcher4 | sha256", "CHECKSUM",
- checksum_table);
+ "on | off | fletcher2 | fletcher4 | sha256 | sha512 | "
+ "skein | edonr", "CHECKSUM", checksum_table);
zprop_register_index(ZFS_PROP_DEDUP, "dedup", ZIO_CHECKSUM_OFF,
PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
- "on | off | verify | sha256[,verify]", "DEDUP",
- dedup_table);
+ "on | off | verify | sha256[,verify], sha512[,verify], "
+ "skein[,verify], edonr,verify", "DEDUP", dedup_table);
zprop_register_index(ZFS_PROP_COMPRESSION, "compression",
ZIO_COMPRESS_DEFAULT, PROP_INHERIT,
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
@@ -270,6 +308,13 @@ zfs_prop_init(void)
zprop_register_index(ZFS_PROP_XATTR, "xattr", ZFS_XATTR_DIR,
PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT,
"on | off | dir | sa", "XATTR", xattr_table);
+ zprop_register_index(ZFS_PROP_DNODESIZE, "dnodesize",
+ ZFS_DNSIZE_LEGACY, PROP_INHERIT, ZFS_TYPE_FILESYSTEM,
+ "legacy | auto | 1k | 2k | 4k | 8k | 16k", "DNSIZE", dnsize_table);
+ zprop_register_index(ZFS_PROP_VOLMODE, "volmode",
+ ZFS_VOLMODE_DEFAULT, PROP_INHERIT,
+ ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
+ "default | full | geom | dev | none", "VOLMODE", volmode_table);
/* inherit index (boolean) properties */
zprop_register_index(ZFS_PROP_ATIME, "atime", 1, PROP_INHERIT,
@@ -360,6 +405,10 @@ zfs_prop_init(void)
zprop_register_string(ZFS_PROP_SELINUX_ROOTCONTEXT, "rootcontext",
"none", PROP_DEFAULT, ZFS_TYPE_DATASET, "<selinux rootcontext>",
"ROOTCONTEXT");
+ zprop_register_string(ZFS_PROP_RECEIVE_RESUME_TOKEN,
+ "receive_resume_token",
+ NULL, PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
+ "<string token>", "RESUMETOK");
/* readonly number properties */
zprop_register_number(ZFS_PROP_USED, "used", 0, PROP_READONLY,
@@ -397,6 +446,16 @@ zfs_prop_init(void)
PROP_READONLY, ZFS_TYPE_DATASET, "<size>", "LUSED");
zprop_register_number(ZFS_PROP_LOGICALREFERENCED, "logicalreferenced",
0, PROP_READONLY, ZFS_TYPE_DATASET, "<size>", "LREFER");
+ zprop_register_number(ZFS_PROP_FILESYSTEM_COUNT, "filesystem_count",
+ UINT64_MAX, PROP_READONLY, ZFS_TYPE_FILESYSTEM,
+ "<count>", "FSCOUNT");
+ zprop_register_number(ZFS_PROP_SNAPSHOT_COUNT, "snapshot_count",
+ UINT64_MAX, PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
+ "<count>", "SSCOUNT");
+ zprop_register_number(ZFS_PROP_GUID, "guid", 0, PROP_READONLY,
+ ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK, "<uint64>", "GUID");
+ zprop_register_number(ZFS_PROP_CREATETXG, "createtxg", 0, PROP_READONLY,
+ ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK, "<uint64>", "CREATETXG");
/* default number properties */
zprop_register_number(ZFS_PROP_QUOTA, "quota", 0, PROP_DEFAULT,
@@ -417,12 +476,6 @@ zfs_prop_init(void)
zprop_register_number(ZFS_PROP_SNAPSHOT_LIMIT, "snapshot_limit",
UINT64_MAX, PROP_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
"<count> | none", "SSLIMIT");
- zprop_register_number(ZFS_PROP_FILESYSTEM_COUNT, "filesystem_count",
- UINT64_MAX, PROP_DEFAULT, ZFS_TYPE_FILESYSTEM,
- "<count>", "FSCOUNT");
- zprop_register_number(ZFS_PROP_SNAPSHOT_COUNT, "snapshot_count",
- UINT64_MAX, PROP_DEFAULT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
- "<count>", "SSCOUNT");
/* inherit number properties */
zprop_register_number(ZFS_PROP_RECORDSIZE, "recordsize",
@@ -430,8 +483,6 @@ zfs_prop_init(void)
ZFS_TYPE_FILESYSTEM, "512 to 1M, power of 2", "RECSIZE");
/* hidden properties */
- zprop_register_hidden(ZFS_PROP_CREATETXG, "createtxg", PROP_TYPE_NUMBER,
- PROP_READONLY, ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK, "CREATETXG");
zprop_register_hidden(ZFS_PROP_NUMCLONES, "numclones", PROP_TYPE_NUMBER,
PROP_READONLY, ZFS_TYPE_SNAPSHOT, "NUMCLONES");
zprop_register_hidden(ZFS_PROP_NAME, "name", PROP_TYPE_STRING,
@@ -441,8 +492,6 @@ zfs_prop_init(void)
zprop_register_hidden(ZFS_PROP_STMF_SHAREINFO, "stmf_sbd_lu",
PROP_TYPE_STRING, PROP_INHERIT, ZFS_TYPE_VOLUME,
"STMF_SBD_LU");
- zprop_register_hidden(ZFS_PROP_GUID, "guid", PROP_TYPE_NUMBER,
- PROP_READONLY, ZFS_TYPE_DATASET | ZFS_TYPE_BOOKMARK, "GUID");
zprop_register_hidden(ZFS_PROP_USERACCOUNTING, "useraccounting",
PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_DATASET,
"USERACCOUNTING");
@@ -452,6 +501,8 @@ zfs_prop_init(void)
PROP_READONLY, ZFS_TYPE_DATASET, "OBJSETID");
zprop_register_hidden(ZFS_PROP_INCONSISTENT, "inconsistent",
PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_DATASET, "INCONSISTENT");
+ zprop_register_hidden(ZFS_PROP_PREV_SNAP, "prevsnap", PROP_TYPE_STRING,
+ PROP_READONLY, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "PREVSNAP");
/*
* Property to be removed once libbe is integrated
@@ -693,12 +744,14 @@ zfs_prop_align_right(zfs_prop_t prop)
static int __init
zcommon_init(void)
{
+ fletcher_4_init();
return (0);
}
static void __exit
zcommon_fini(void)
{
+ fletcher_4_fini();
}
module_init(zcommon_init);
diff --git a/zfs/module/zcommon/zfs_uio.c b/zfs/module/zcommon/zfs_uio.c
index f78db68e4ea6..7b4175bbeeeb 100644
--- a/zfs/module/zcommon/zfs_uio.c
+++ b/zfs/module/zcommon/zfs_uio.c
@@ -164,7 +164,7 @@ uio_prefaultpages(ssize_t n, struct uio *uio)
caddr_t p;
uint8_t tmp;
int iovcnt;
- size_t skip = uio->uio_skip;
+ size_t skip;
/* no need to fault in kernel pages */
switch (uio->uio_segflg) {
@@ -180,16 +180,20 @@ uio_prefaultpages(ssize_t n, struct uio *uio)
iov = uio->uio_iov;
iovcnt = uio->uio_iovcnt;
+ skip = uio->uio_skip;
- while ((n > 0) && (iovcnt > 0)) {
+ for (; n > 0 && iovcnt > 0; iov++, iovcnt--, skip = 0) {
cnt = MIN(iov->iov_len - skip, n);
+ /* empty iov */
+ if (cnt == 0)
+ continue;
n -= cnt;
/*
* touch each page in this segment.
*/
p = iov->iov_base + skip;
while (cnt) {
- if (fuword8((uint8_t *) p, &tmp))
+ if (fuword8((uint8_t *)p, &tmp))
return;
incr = MIN(cnt, PAGESIZE);
p += incr;
@@ -199,11 +203,8 @@ uio_prefaultpages(ssize_t n, struct uio *uio)
* touch the last byte in case it straddles a page.
*/
p--;
- if (fuword8((uint8_t *) p, &tmp))
+ if (fuword8((uint8_t *)p, &tmp))
return;
- iov++;
- iovcnt--;
- skip = 0;
}
}
EXPORT_SYMBOL(uio_prefaultpages);
diff --git a/zfs/module/zcommon/zpool_prop.c b/zfs/module/zcommon/zpool_prop.c
index 910c56dcc2a9..fd21f31176a5 100644
--- a/zfs/module/zcommon/zpool_prop.c
+++ b/zfs/module/zcommon/zpool_prop.c
@@ -99,15 +99,13 @@ zpool_prop_init(void)
PROP_READONLY, ZFS_TYPE_POOL, "<1.00x or higher if deduped>",
"DEDUP");
- /* readonly onetime number properties */
- zprop_register_number(ZPOOL_PROP_ASHIFT, "ashift", 0, PROP_ONETIME,
- ZFS_TYPE_POOL, "<ashift, 9-13, or 0=default>", "ASHIFT");
-
/* default number properties */
zprop_register_number(ZPOOL_PROP_VERSION, "version", SPA_VERSION,
PROP_DEFAULT, ZFS_TYPE_POOL, "<version>", "VERSION");
zprop_register_number(ZPOOL_PROP_DEDUPDITTO, "dedupditto", 0,
PROP_DEFAULT, ZFS_TYPE_POOL, "<threshold (min 100)>", "DEDUPDITTO");
+ zprop_register_number(ZPOOL_PROP_ASHIFT, "ashift", 0, PROP_DEFAULT,
+ ZFS_TYPE_POOL, "<ashift, 9-16, or 0=default>", "ASHIFT");
/* default index (boolean) properties */
zprop_register_index(ZPOOL_PROP_DELEGATION, "delegation", 1,
@@ -122,6 +120,9 @@ zpool_prop_init(void)
PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "EXPAND", boolean_table);
zprop_register_index(ZPOOL_PROP_READONLY, "readonly", 0,
PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "RDONLY", boolean_table);
+ zprop_register_index(ZPOOL_PROP_MULTIHOST, "multihost", 0,
+ PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "MULTIHOST",
+ boolean_table);
/* default index properties */
zprop_register_index(ZPOOL_PROP_FAILUREMODE, "failmode",
@@ -135,6 +136,8 @@ zpool_prop_init(void)
PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_POOL, "MAXBLOCKSIZE");
zprop_register_hidden(ZPOOL_PROP_TNAME, "tname", PROP_TYPE_STRING,
PROP_ONETIME, ZFS_TYPE_POOL, "TNAME");
+ zprop_register_hidden(ZPOOL_PROP_MAXDNODESIZE, "maxdnodesize",
+ PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_POOL, "MAXDNODESIZE");
}
/*
diff --git a/zfs/module/zfs/Makefile.in b/zfs/module/zfs/Makefile.in
index 55f8cef16b6d..d6336f3142ca 100644
--- a/zfs/module/zfs/Makefile.in
+++ b/zfs/module/zfs/Makefile.in
@@ -7,6 +7,7 @@ EXTRA_CFLAGS = $(ZFS_MODULE_CFLAGS) @KERNELCPPFLAGS@
obj-$(CONFIG_ZFS) := $(MODULE).o
+$(MODULE)-objs += abd.o
$(MODULE)-objs += arc.o
$(MODULE)-objs += blkptr.o
$(MODULE)-objs += bplist.o
@@ -14,6 +15,7 @@ $(MODULE)-objs += bpobj.o
$(MODULE)-objs += dbuf.o
$(MODULE)-objs += dbuf_stats.o
$(MODULE)-objs += bptree.o
+$(MODULE)-objs += bqueue.o
$(MODULE)-objs += ddt.o
$(MODULE)-objs += ddt_zap.o
$(MODULE)-objs += dmu.o
@@ -35,17 +37,22 @@ $(MODULE)-objs += dsl_pool.o
$(MODULE)-objs += dsl_prop.o
$(MODULE)-objs += dsl_scan.o
$(MODULE)-objs += dsl_synctask.o
+$(MODULE)-objs += edonr_zfs.o
$(MODULE)-objs += fm.o
$(MODULE)-objs += gzip.o
$(MODULE)-objs += lzjb.o
$(MODULE)-objs += lz4.o
$(MODULE)-objs += metaslab.o
+$(MODULE)-objs += mmp.o
$(MODULE)-objs += multilist.o
+$(MODULE)-objs += pathname.o
+$(MODULE)-objs += policy.o
$(MODULE)-objs += range_tree.o
$(MODULE)-objs += refcount.o
$(MODULE)-objs += rrwlock.o
$(MODULE)-objs += sa.o
$(MODULE)-objs += sha256.o
+$(MODULE)-objs += skein_zfs.o
$(MODULE)-objs += spa.o
$(MODULE)-objs += spa_boot.o
$(MODULE)-objs += spa_config.o
@@ -68,6 +75,8 @@ $(MODULE)-objs += vdev_mirror.o
$(MODULE)-objs += vdev_missing.o
$(MODULE)-objs += vdev_queue.o
$(MODULE)-objs += vdev_raidz.o
+$(MODULE)-objs += vdev_raidz_math.o
+$(MODULE)-objs += vdev_raidz_math_scalar.o
$(MODULE)-objs += vdev_root.o
$(MODULE)-objs += zap.o
$(MODULE)-objs += zap_leaf.o
@@ -106,3 +115,13 @@ $(MODULE)-objs += zrlock.o
$(MODULE)-objs += zvol.o
$(MODULE)-objs += dsl_destroy.o
$(MODULE)-objs += dsl_userhold.o
+$(MODULE)-objs += qat_compress.o
+
+$(MODULE)-$(CONFIG_X86) += vdev_raidz_math_sse2.o
+$(MODULE)-$(CONFIG_X86) += vdev_raidz_math_ssse3.o
+$(MODULE)-$(CONFIG_X86) += vdev_raidz_math_avx2.o
+$(MODULE)-$(CONFIG_X86) += vdev_raidz_math_avx512f.o
+$(MODULE)-$(CONFIG_X86) += vdev_raidz_math_avx512bw.o
+
+$(MODULE)-$(CONFIG_ARM64) += vdev_raidz_math_aarch64_neon.o
+$(MODULE)-$(CONFIG_ARM64) += vdev_raidz_math_aarch64_neonx2.o
diff --git a/zfs/module/zfs/abd.c b/zfs/module/zfs/abd.c
new file mode 100644
index 000000000000..765ac7fb72e6
--- /dev/null
+++ b/zfs/module/zfs/abd.c
@@ -0,0 +1,1543 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2014 by Chunwei Chen. All rights reserved.
+ * Copyright (c) 2016 by Delphix. All rights reserved.
+ */
+
+/*
+ * ARC buffer data (ABD).
+ *
+ * ABDs are an abstract data structure for the ARC which can use two
+ * different ways of storing the underlying data:
+ *
+ * (a) Linear buffer. In this case, all the data in the ABD is stored in one
+ * contiguous buffer in memory (from a zio_[data_]buf_* kmem cache).
+ *
+ * +-------------------+
+ * | ABD (linear) |
+ * | abd_flags = ... |
+ * | abd_size = ... | +--------------------------------+
+ * | abd_buf ------------->| raw buffer of size abd_size |
+ * +-------------------+ +--------------------------------+
+ * no abd_chunks
+ *
+ * (b) Scattered buffer. In this case, the data in the ABD is split into
+ * equal-sized chunks (from the abd_chunk_cache kmem_cache), with pointers
+ * to the chunks recorded in an array at the end of the ABD structure.
+ *
+ * +-------------------+
+ * | ABD (scattered) |
+ * | abd_flags = ... |
+ * | abd_size = ... |
+ * | abd_offset = 0 | +-----------+
+ * | abd_chunks[0] ----------------------------->| chunk 0 |
+ * | abd_chunks[1] ---------------------+ +-----------+
+ * | ... | | +-----------+
+ * | abd_chunks[N-1] ---------+ +------->| chunk 1 |
+ * +-------------------+ | +-----------+
+ * | ...
+ * | +-----------+
+ * +----------------->| chunk N-1 |
+ * +-----------+
+ *
+ * Linear buffers act exactly like normal buffers and are always mapped into the
+ * kernel's virtual memory space, while scattered ABD data chunks are allocated
+ * as physical pages and then mapped in only while they are actually being
+ * accessed through one of the abd_* library functions. Using scattered ABDs
+ * provides several benefits:
+ *
+ * (1) They avoid use of kmem_*, preventing performance problems where running
+ * kmem_reap on very large memory systems never finishes and causes
+ * constant TLB shootdowns.
+ *
+ * (2) Fragmentation is less of an issue since when we are at the limit of
+ * allocatable space, we won't have to search around for a long free
+ * hole in the VA space for large ARC allocations. Each chunk is mapped in
+ * individually, so even if we weren't using segkpm (see next point) we
+ * wouldn't need to worry about finding a contiguous address range.
+ *
+ * (3) Use of segkpm will avoid the need for map / unmap / TLB shootdown costs
+ * on each ABD access. (If segkpm isn't available then we use all linear
+ * ABDs to avoid this penalty.) See seg_kpm.c for more details.
+ *
+ * It is possible to make all ABDs linear by setting zfs_abd_scatter_enabled to
+ * B_FALSE. However, it is not possible to use scattered ABDs if segkpm is not
+ * available, which is the case on all 32-bit systems and any 64-bit systems
+ * where kpm_enable is turned off.
+ *
+ * In addition to directly allocating a linear or scattered ABD, it is also
+ * possible to create an ABD by requesting the "sub-ABD" starting at an offset
+ * within an existing ABD. In linear buffers this is simple (set abd_buf of
+ * the new ABD to the starting point within the original raw buffer), but
+ * scattered ABDs are a little more complex. The new ABD makes a copy of the
+ * relevant abd_chunks pointers (but not the underlying data). However, to
+ * provide arbitrary rather than only chunk-aligned starting offsets, it also
+ * tracks an abd_offset field which represents the starting point of the data
+ * within the first chunk in abd_chunks. For both linear and scattered ABDs,
+ * creating an offset ABD marks the original ABD as the offset's parent, and the
+ * original ABD's abd_children refcount is incremented. This data allows us to
+ * ensure the root ABD isn't deleted before its children.
+ *
+ * Most consumers should never need to know what type of ABD they're using --
+ * the ABD public API ensures that it's possible to transparently switch from
+ * using a linear ABD to a scattered one when doing so would be beneficial.
+ *
+ * If you need to use the data within an ABD directly, if you know it's linear
+ * (because you allocated it) you can use abd_to_buf() to access the underlying
+ * raw buffer. Otherwise, you should use one of the abd_borrow_buf* functions
+ * which will allocate a raw buffer if necessary. Use the abd_return_buf*
+ * functions to return any raw buffers that are no longer necessary when you're
+ * done using them.
+ *
+ * There are a variety of ABD APIs that implement basic buffer operations:
+ * compare, copy, read, write, and fill with zeroes. If you need a custom
+ * function which progressively accesses the whole ABD, use the abd_iterate_*
+ * functions.
+ */
+
+#include <sys/abd.h>
+#include <sys/param.h>
+#include <sys/zio.h>
+#include <sys/zfs_context.h>
+#include <sys/zfs_znode.h>
+#ifdef _KERNEL
+#include <linux/scatterlist.h>
+#include <linux/kmap_compat.h>
+#else
+#define MAX_ORDER 1
+#endif
+
+typedef struct abd_stats {
+ kstat_named_t abdstat_struct_size;
+ kstat_named_t abdstat_linear_cnt;
+ kstat_named_t abdstat_linear_data_size;
+ kstat_named_t abdstat_scatter_cnt;
+ kstat_named_t abdstat_scatter_data_size;
+ kstat_named_t abdstat_scatter_chunk_waste;
+ kstat_named_t abdstat_scatter_orders[MAX_ORDER];
+ kstat_named_t abdstat_scatter_page_multi_chunk;
+ kstat_named_t abdstat_scatter_page_multi_zone;
+ kstat_named_t abdstat_scatter_page_alloc_retry;
+ kstat_named_t abdstat_scatter_sg_table_retry;
+} abd_stats_t;
+
+static abd_stats_t abd_stats = {
+ /* Amount of memory occupied by all of the abd_t struct allocations */
+ { "struct_size", KSTAT_DATA_UINT64 },
+ /*
+ * The number of linear ABDs which are currently allocated, excluding
+ * ABDs which don't own their data (for instance the ones which were
+ * allocated through abd_get_offset() and abd_get_from_buf()). If an
+ * ABD takes ownership of its buf then it will become tracked.
+ */
+ { "linear_cnt", KSTAT_DATA_UINT64 },
+ /* Amount of data stored in all linear ABDs tracked by linear_cnt */
+ { "linear_data_size", KSTAT_DATA_UINT64 },
+ /*
+ * The number of scatter ABDs which are currently allocated, excluding
+ * ABDs which don't own their data (for instance the ones which were
+ * allocated through abd_get_offset()).
+ */
+ { "scatter_cnt", KSTAT_DATA_UINT64 },
+ /* Amount of data stored in all scatter ABDs tracked by scatter_cnt */
+ { "scatter_data_size", KSTAT_DATA_UINT64 },
+ /*
+ * The amount of space wasted at the end of the last chunk across all
+ * scatter ABDs tracked by scatter_cnt.
+ */
+ { "scatter_chunk_waste", KSTAT_DATA_UINT64 },
+ /*
+ * The number of compound allocations of a given order. These
+ * allocations are spread over all currently allocated ABDs, and
+ * act as a measure of memory fragmentation.
+ */
+ { { "scatter_order_N", KSTAT_DATA_UINT64 } },
+ /*
+ * The number of scatter ABDs which contain multiple chunks.
+ * ABDs are preferentially allocated from the minimum number of
+ * contiguous multi-page chunks, a single chunk is optimal.
+ */
+ { "scatter_page_multi_chunk", KSTAT_DATA_UINT64 },
+ /*
+ * The number of scatter ABDs which are split across memory zones.
+ * ABDs are preferentially allocated using pages from a single zone.
+ */
+ { "scatter_page_multi_zone", KSTAT_DATA_UINT64 },
+ /*
+ * The total number of retries encountered when attempting to
+ * allocate the pages to populate the scatter ABD.
+ */
+ { "scatter_page_alloc_retry", KSTAT_DATA_UINT64 },
+ /*
+ * The total number of retries encountered when attempting to
+ * allocate the sg table for an ABD.
+ */
+ { "scatter_sg_table_retry", KSTAT_DATA_UINT64 },
+};
+
+#define ABDSTAT(stat) (abd_stats.stat.value.ui64)
+#define ABDSTAT_INCR(stat, val) \
+ atomic_add_64(&abd_stats.stat.value.ui64, (val))
+#define ABDSTAT_BUMP(stat) ABDSTAT_INCR(stat, 1)
+#define ABDSTAT_BUMPDOWN(stat) ABDSTAT_INCR(stat, -1)
+
+#define ABD_SCATTER(abd) (abd->abd_u.abd_scatter)
+#define ABD_BUF(abd) (abd->abd_u.abd_linear.abd_buf)
+#define abd_for_each_sg(abd, sg, n, i) \
+ for_each_sg(ABD_SCATTER(abd).abd_sgl, sg, n, i)
+
+/* see block comment above for description */
+int zfs_abd_scatter_enabled = B_TRUE;
+unsigned zfs_abd_scatter_max_order = MAX_ORDER - 1;
+
+static kmem_cache_t *abd_cache = NULL;
+static kstat_t *abd_ksp;
+
+static inline size_t
+abd_chunkcnt_for_bytes(size_t size)
+{
+ return (P2ROUNDUP(size, PAGESIZE) / PAGESIZE);
+}
+
+#ifdef _KERNEL
+#ifndef CONFIG_HIGHMEM
+
+#ifndef __GFP_RECLAIM
+#define __GFP_RECLAIM __GFP_WAIT
+#endif
+
+static unsigned long
+abd_alloc_chunk(int nid, gfp_t gfp, unsigned int order)
+{
+ struct page *page;
+
+ page = alloc_pages_node(nid, gfp, order);
+ if (!page)
+ return (0);
+
+ return ((unsigned long) page_address(page));
+}
+
+/*
+ * The goal is to minimize fragmentation by preferentially populating ABDs
+ * with higher order compound pages from a single zone. Allocation size is
+ * progressively decreased until it can be satisfied without performing
+ * reclaim or compaction. When necessary this function will degenerate to
+ * allocating individual pages and allowing reclaim to satisfy allocations.
+ */
+static void
+abd_alloc_pages(abd_t *abd, size_t size)
+{
+ struct list_head pages;
+ struct sg_table table;
+ struct scatterlist *sg;
+ struct page *page, *tmp_page;
+ gfp_t gfp = __GFP_NOWARN | GFP_NOIO;
+ gfp_t gfp_comp = (gfp | __GFP_NORETRY | __GFP_COMP) & ~__GFP_RECLAIM;
+ int max_order = MIN(zfs_abd_scatter_max_order, MAX_ORDER - 1);
+ int nr_pages = abd_chunkcnt_for_bytes(size);
+ int chunks = 0, zones = 0;
+ size_t remaining_size;
+ int nid = NUMA_NO_NODE;
+ int alloc_pages = 0;
+ int order;
+
+ INIT_LIST_HEAD(&pages);
+
+ while (alloc_pages < nr_pages) {
+ unsigned long paddr;
+ unsigned chunk_pages;
+
+ order = MIN(highbit64(nr_pages - alloc_pages) - 1, max_order);
+ chunk_pages = (1U << order);
+
+ paddr = abd_alloc_chunk(nid, order ? gfp_comp : gfp, order);
+ if (paddr == 0) {
+ if (order == 0) {
+ ABDSTAT_BUMP(abdstat_scatter_page_alloc_retry);
+ schedule_timeout_interruptible(1);
+ } else {
+ max_order = MAX(0, order - 1);
+ }
+ continue;
+ }
+
+ page = virt_to_page(paddr);
+ list_add_tail(&page->lru, &pages);
+
+ if ((nid != NUMA_NO_NODE) && (page_to_nid(page) != nid))
+ zones++;
+
+ nid = page_to_nid(page);
+ ABDSTAT_BUMP(abdstat_scatter_orders[order]);
+ chunks++;
+ alloc_pages += chunk_pages;
+ }
+
+ ASSERT3S(alloc_pages, ==, nr_pages);
+
+ while (sg_alloc_table(&table, chunks, gfp)) {
+ ABDSTAT_BUMP(abdstat_scatter_sg_table_retry);
+ schedule_timeout_interruptible(1);
+ }
+
+ sg = table.sgl;
+ remaining_size = size;
+ list_for_each_entry_safe(page, tmp_page, &pages, lru) {
+ size_t sg_size = MIN(PAGESIZE << compound_order(page),
+ remaining_size);
+ sg_set_page(sg, page, sg_size, 0);
+ remaining_size -= sg_size;
+
+ sg = sg_next(sg);
+ list_del(&page->lru);
+ }
+
+ if (chunks > 1) {
+ ABDSTAT_BUMP(abdstat_scatter_page_multi_chunk);
+ abd->abd_flags |= ABD_FLAG_MULTI_CHUNK;
+
+ if (zones) {
+ ABDSTAT_BUMP(abdstat_scatter_page_multi_zone);
+ abd->abd_flags |= ABD_FLAG_MULTI_ZONE;
+ }
+ }
+
+ ABD_SCATTER(abd).abd_sgl = table.sgl;
+ ABD_SCATTER(abd).abd_nents = table.nents;
+}
+#else
+/*
+ * Allocate N individual pages to construct a scatter ABD. This function
+ * makes no attempt to request contiguous pages and requires the minimal
+ * number of kernel interfaces. It's designed for maximum compatibility.
+ */
+static void
+abd_alloc_pages(abd_t *abd, size_t size)
+{
+ struct scatterlist *sg;
+ struct sg_table table;
+ struct page *page;
+ gfp_t gfp = __GFP_NOWARN | GFP_NOIO;
+ int nr_pages = abd_chunkcnt_for_bytes(size);
+ int i;
+
+ while (sg_alloc_table(&table, nr_pages, gfp)) {
+ ABDSTAT_BUMP(abdstat_scatter_sg_table_retry);
+ schedule_timeout_interruptible(1);
+ }
+
+ ASSERT3U(table.nents, ==, nr_pages);
+ ABD_SCATTER(abd).abd_sgl = table.sgl;
+ ABD_SCATTER(abd).abd_nents = nr_pages;
+
+ abd_for_each_sg(abd, sg, nr_pages, i) {
+ while ((page = __page_cache_alloc(gfp)) == NULL) {
+ ABDSTAT_BUMP(abdstat_scatter_page_alloc_retry);
+ schedule_timeout_interruptible(1);
+ }
+
+ ABDSTAT_BUMP(abdstat_scatter_orders[0]);
+ sg_set_page(sg, page, PAGESIZE, 0);
+ }
+
+ if (nr_pages > 1) {
+ ABDSTAT_BUMP(abdstat_scatter_page_multi_chunk);
+ abd->abd_flags |= ABD_FLAG_MULTI_CHUNK;
+ }
+}
+#endif /* !CONFIG_HIGHMEM */
+
+static void
+abd_free_pages(abd_t *abd)
+{
+ struct scatterlist *sg;
+ struct sg_table table;
+ struct page *page;
+ int nr_pages = ABD_SCATTER(abd).abd_nents;
+ int order, i;
+
+ if (abd->abd_flags & ABD_FLAG_MULTI_ZONE)
+ ABDSTAT_BUMPDOWN(abdstat_scatter_page_multi_zone);
+
+ if (abd->abd_flags & ABD_FLAG_MULTI_CHUNK)
+ ABDSTAT_BUMPDOWN(abdstat_scatter_page_multi_chunk);
+
+ abd_for_each_sg(abd, sg, nr_pages, i) {
+ page = sg_page(sg);
+ order = compound_order(page);
+ __free_pages(page, order);
+ ASSERT3U(sg->length, <=, PAGE_SIZE << order);
+ ABDSTAT_BUMPDOWN(abdstat_scatter_orders[order]);
+ }
+
+ table.sgl = ABD_SCATTER(abd).abd_sgl;
+ table.nents = table.orig_nents = nr_pages;
+ sg_free_table(&table);
+}
+
+#else /* _KERNEL */
+
+#ifndef PAGE_SHIFT
+#define PAGE_SHIFT (highbit64(PAGESIZE)-1)
+#endif
+
+struct page;
+
+#define kpm_enable 1
+#define abd_alloc_chunk(o) \
+ ((struct page *)umem_alloc_aligned(PAGESIZE << (o), 64, KM_SLEEP))
+#define abd_free_chunk(chunk, o) umem_free(chunk, PAGESIZE << (o))
+#define zfs_kmap_atomic(chunk, km) ((void *)chunk)
+#define zfs_kunmap_atomic(addr, km) do { (void)(addr); } while (0)
+#define local_irq_save(flags) do { (void)(flags); } while (0)
+#define local_irq_restore(flags) do { (void)(flags); } while (0)
+#define nth_page(pg, i) \
+ ((struct page *)((void *)(pg) + (i) * PAGESIZE))
+
+struct scatterlist {
+ struct page *page;
+ int length;
+ int end;
+};
+
+static void
+sg_init_table(struct scatterlist *sg, int nr)
+{
+ memset(sg, 0, nr * sizeof (struct scatterlist));
+ sg[nr - 1].end = 1;
+}
+
+#define for_each_sg(sgl, sg, nr, i) \
+ for ((i) = 0, (sg) = (sgl); (i) < (nr); (i)++, (sg) = sg_next(sg))
+
+static inline void
+sg_set_page(struct scatterlist *sg, struct page *page, unsigned int len,
+ unsigned int offset)
+{
+ /* currently we don't use offset */
+ ASSERT(offset == 0);
+ sg->page = page;
+ sg->length = len;
+}
+
+static inline struct page *
+sg_page(struct scatterlist *sg)
+{
+ return (sg->page);
+}
+
+static inline struct scatterlist *
+sg_next(struct scatterlist *sg)
+{
+ if (sg->end)
+ return (NULL);
+
+ return (sg + 1);
+}
+
+static void
+abd_alloc_pages(abd_t *abd, size_t size)
+{
+ unsigned nr_pages = abd_chunkcnt_for_bytes(size);
+ struct scatterlist *sg;
+ int i;
+
+ ABD_SCATTER(abd).abd_sgl = vmem_alloc(nr_pages *
+ sizeof (struct scatterlist), KM_SLEEP);
+ sg_init_table(ABD_SCATTER(abd).abd_sgl, nr_pages);
+
+ abd_for_each_sg(abd, sg, nr_pages, i) {
+ struct page *p = abd_alloc_chunk(0);
+ sg_set_page(sg, p, PAGESIZE, 0);
+ }
+ ABD_SCATTER(abd).abd_nents = nr_pages;
+}
+
+static void
+abd_free_pages(abd_t *abd)
+{
+ int i, n = ABD_SCATTER(abd).abd_nents;
+ struct scatterlist *sg;
+ int j;
+
+ abd_for_each_sg(abd, sg, n, i) {
+ for (j = 0; j < sg->length; j += PAGESIZE) {
+ struct page *p = nth_page(sg_page(sg), j>>PAGE_SHIFT);
+ abd_free_chunk(p, 0);
+ }
+ }
+
+ vmem_free(ABD_SCATTER(abd).abd_sgl, n * sizeof (struct scatterlist));
+}
+
+#endif /* _KERNEL */
+
+void
+abd_init(void)
+{
+ int i;
+
+ abd_cache = kmem_cache_create("abd_t", sizeof (abd_t),
+ 0, NULL, NULL, NULL, NULL, NULL, 0);
+
+ abd_ksp = kstat_create("zfs", 0, "abdstats", "misc", KSTAT_TYPE_NAMED,
+ sizeof (abd_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
+ if (abd_ksp != NULL) {
+ abd_ksp->ks_data = &abd_stats;
+ kstat_install(abd_ksp);
+
+ for (i = 0; i < MAX_ORDER; i++) {
+ snprintf(abd_stats.abdstat_scatter_orders[i].name,
+ KSTAT_STRLEN, "scatter_order_%d", i);
+ abd_stats.abdstat_scatter_orders[i].data_type =
+ KSTAT_DATA_UINT64;
+ }
+ }
+}
+
+void
+abd_fini(void)
+{
+ if (abd_ksp != NULL) {
+ kstat_delete(abd_ksp);
+ abd_ksp = NULL;
+ }
+
+ if (abd_cache) {
+ kmem_cache_destroy(abd_cache);
+ abd_cache = NULL;
+ }
+}
+
+static inline void
+abd_verify(abd_t *abd)
+{
+ ASSERT3U(abd->abd_size, >, 0);
+ ASSERT3U(abd->abd_size, <=, SPA_MAXBLOCKSIZE);
+ ASSERT3U(abd->abd_flags, ==, abd->abd_flags & (ABD_FLAG_LINEAR |
+ ABD_FLAG_OWNER | ABD_FLAG_META | ABD_FLAG_MULTI_ZONE |
+ ABD_FLAG_MULTI_CHUNK));
+ IMPLY(abd->abd_parent != NULL, !(abd->abd_flags & ABD_FLAG_OWNER));
+ IMPLY(abd->abd_flags & ABD_FLAG_META, abd->abd_flags & ABD_FLAG_OWNER);
+ if (abd_is_linear(abd)) {
+ ASSERT3P(abd->abd_u.abd_linear.abd_buf, !=, NULL);
+ } else {
+ size_t n;
+ int i;
+ struct scatterlist *sg;
+
+ ASSERT3U(ABD_SCATTER(abd).abd_nents, >, 0);
+ ASSERT3U(ABD_SCATTER(abd).abd_offset, <,
+ ABD_SCATTER(abd).abd_sgl->length);
+ n = ABD_SCATTER(abd).abd_nents;
+ abd_for_each_sg(abd, sg, n, i) {
+ ASSERT3P(sg_page(sg), !=, NULL);
+ }
+ }
+}
+
+static inline abd_t *
+abd_alloc_struct(void)
+{
+ abd_t *abd = kmem_cache_alloc(abd_cache, KM_PUSHPAGE);
+
+ ASSERT3P(abd, !=, NULL);
+ ABDSTAT_INCR(abdstat_struct_size, sizeof (abd_t));
+
+ return (abd);
+}
+
+static inline void
+abd_free_struct(abd_t *abd)
+{
+ kmem_cache_free(abd_cache, abd);
+ ABDSTAT_INCR(abdstat_struct_size, -sizeof (abd_t));
+}
+
+/*
+ * Allocate an ABD, along with its own underlying data buffers. Use this if you
+ * don't care whether the ABD is linear or not.
+ */
+abd_t *
+abd_alloc(size_t size, boolean_t is_metadata)
+{
+ abd_t *abd;
+
+ if (!zfs_abd_scatter_enabled || size <= PAGESIZE)
+ return (abd_alloc_linear(size, is_metadata));
+
+ VERIFY3U(size, <=, SPA_MAXBLOCKSIZE);
+
+ abd = abd_alloc_struct();
+ abd->abd_flags = ABD_FLAG_OWNER;
+ abd_alloc_pages(abd, size);
+
+ if (is_metadata) {
+ abd->abd_flags |= ABD_FLAG_META;
+ }
+ abd->abd_size = size;
+ abd->abd_parent = NULL;
+ refcount_create(&abd->abd_children);
+
+ abd->abd_u.abd_scatter.abd_offset = 0;
+
+ ABDSTAT_BUMP(abdstat_scatter_cnt);
+ ABDSTAT_INCR(abdstat_scatter_data_size, size);
+ ABDSTAT_INCR(abdstat_scatter_chunk_waste,
+ P2ROUNDUP(size, PAGESIZE) - size);
+
+ return (abd);
+}
+
+static void
+abd_free_scatter(abd_t *abd)
+{
+ abd_free_pages(abd);
+
+ refcount_destroy(&abd->abd_children);
+ ABDSTAT_BUMPDOWN(abdstat_scatter_cnt);
+ ABDSTAT_INCR(abdstat_scatter_data_size, -(int)abd->abd_size);
+ ABDSTAT_INCR(abdstat_scatter_chunk_waste,
+ abd->abd_size - P2ROUNDUP(abd->abd_size, PAGESIZE));
+
+ abd_free_struct(abd);
+}
+
+/*
+ * Allocate an ABD that must be linear, along with its own underlying data
+ * buffer. Only use this when it would be very annoying to write your ABD
+ * consumer with a scattered ABD.
+ */
+abd_t *
+abd_alloc_linear(size_t size, boolean_t is_metadata)
+{
+ abd_t *abd = abd_alloc_struct();
+
+ VERIFY3U(size, <=, SPA_MAXBLOCKSIZE);
+
+ abd->abd_flags = ABD_FLAG_LINEAR | ABD_FLAG_OWNER;
+ if (is_metadata) {
+ abd->abd_flags |= ABD_FLAG_META;
+ }
+ abd->abd_size = size;
+ abd->abd_parent = NULL;
+ refcount_create(&abd->abd_children);
+
+ if (is_metadata) {
+ abd->abd_u.abd_linear.abd_buf = zio_buf_alloc(size);
+ } else {
+ abd->abd_u.abd_linear.abd_buf = zio_data_buf_alloc(size);
+ }
+
+ ABDSTAT_BUMP(abdstat_linear_cnt);
+ ABDSTAT_INCR(abdstat_linear_data_size, size);
+
+ return (abd);
+}
+
+static void
+abd_free_linear(abd_t *abd)
+{
+ if (abd->abd_flags & ABD_FLAG_META) {
+ zio_buf_free(abd->abd_u.abd_linear.abd_buf, abd->abd_size);
+ } else {
+ zio_data_buf_free(abd->abd_u.abd_linear.abd_buf, abd->abd_size);
+ }
+
+ refcount_destroy(&abd->abd_children);
+ ABDSTAT_BUMPDOWN(abdstat_linear_cnt);
+ ABDSTAT_INCR(abdstat_linear_data_size, -(int)abd->abd_size);
+
+ abd_free_struct(abd);
+}
+
+/*
+ * Free an ABD. Only use this on ABDs allocated with abd_alloc() or
+ * abd_alloc_linear().
+ */
+void
+abd_free(abd_t *abd)
+{
+ abd_verify(abd);
+ ASSERT3P(abd->abd_parent, ==, NULL);
+ ASSERT(abd->abd_flags & ABD_FLAG_OWNER);
+ if (abd_is_linear(abd))
+ abd_free_linear(abd);
+ else
+ abd_free_scatter(abd);
+}
+
+/*
+ * Allocate an ABD of the same format (same metadata flag, same scatterize
+ * setting) as another ABD.
+ */
+abd_t *
+abd_alloc_sametype(abd_t *sabd, size_t size)
+{
+ boolean_t is_metadata = (sabd->abd_flags & ABD_FLAG_META) != 0;
+ if (abd_is_linear(sabd)) {
+ return (abd_alloc_linear(size, is_metadata));
+ } else {
+ return (abd_alloc(size, is_metadata));
+ }
+}
+
+/*
+ * If we're going to use this ABD for doing I/O using the block layer, the
+ * consumer of the ABD data doesn't care if it's scattered or not, and we don't
+ * plan to store this ABD in memory for a long period of time, we should
+ * allocate the ABD type that requires the least data copying to do the I/O.
+ *
+ * On Illumos this is linear ABDs, however if ldi_strategy() can ever issue I/Os
+ * using a scatter/gather list we should switch to that and replace this call
+ * with vanilla abd_alloc().
+ *
+ * On Linux the optimal thing to do would be to use abd_get_offset() and
+ * construct a new ABD which shares the original pages thereby eliminating
+ * the copy. But for the moment a new linear ABD is allocated until this
+ * performance optimization can be implemented.
+ */
+abd_t *
+abd_alloc_for_io(size_t size, boolean_t is_metadata)
+{
+ return (abd_alloc(size, is_metadata));
+}
+
+/*
+ * Allocate a new ABD to point to offset off of sabd. It shares the underlying
+ * buffer data with sabd. Use abd_put() to free. sabd must not be freed while
+ * any derived ABDs exist.
+ */
+static inline abd_t *
+abd_get_offset_impl(abd_t *sabd, size_t off, size_t size)
+{
+ abd_t *abd;
+
+ abd_verify(sabd);
+ ASSERT3U(off, <=, sabd->abd_size);
+
+ if (abd_is_linear(sabd)) {
+ abd = abd_alloc_struct();
+
+ /*
+ * Even if this buf is filesystem metadata, we only track that
+ * if we own the underlying data buffer, which is not true in
+ * this case. Therefore, we don't ever use ABD_FLAG_META here.
+ */
+ abd->abd_flags = ABD_FLAG_LINEAR;
+
+ abd->abd_u.abd_linear.abd_buf =
+ (char *)sabd->abd_u.abd_linear.abd_buf + off;
+ } else {
+ int i;
+ struct scatterlist *sg;
+ size_t new_offset = sabd->abd_u.abd_scatter.abd_offset + off;
+
+ abd = abd_alloc_struct();
+
+ /*
+ * Even if this buf is filesystem metadata, we only track that
+ * if we own the underlying data buffer, which is not true in
+ * this case. Therefore, we don't ever use ABD_FLAG_META here.
+ */
+ abd->abd_flags = 0;
+
+ abd_for_each_sg(sabd, sg, ABD_SCATTER(sabd).abd_nents, i) {
+ if (new_offset < sg->length)
+ break;
+ new_offset -= sg->length;
+ }
+
+ ABD_SCATTER(abd).abd_sgl = sg;
+ ABD_SCATTER(abd).abd_offset = new_offset;
+ ABD_SCATTER(abd).abd_nents = ABD_SCATTER(sabd).abd_nents - i;
+ }
+
+ abd->abd_size = size;
+ abd->abd_parent = sabd;
+ refcount_create(&abd->abd_children);
+ (void) refcount_add_many(&sabd->abd_children, abd->abd_size, abd);
+
+ return (abd);
+}
+
+abd_t *
+abd_get_offset(abd_t *sabd, size_t off)
+{
+ size_t size = sabd->abd_size > off ? sabd->abd_size - off : 0;
+
+ VERIFY3U(size, >, 0);
+
+ return (abd_get_offset_impl(sabd, off, size));
+}
+
+abd_t *
+abd_get_offset_size(abd_t *sabd, size_t off, size_t size)
+{
+ ASSERT3U(off + size, <=, sabd->abd_size);
+
+ return (abd_get_offset_impl(sabd, off, size));
+}
+
+/*
+ * Allocate a linear ABD structure for buf. You must free this with abd_put()
+ * since the resulting ABD doesn't own its own buffer.
+ */
+abd_t *
+abd_get_from_buf(void *buf, size_t size)
+{
+ abd_t *abd = abd_alloc_struct();
+
+ VERIFY3U(size, <=, SPA_MAXBLOCKSIZE);
+
+ /*
+ * Even if this buf is filesystem metadata, we only track that if we
+ * own the underlying data buffer, which is not true in this case.
+ * Therefore, we don't ever use ABD_FLAG_META here.
+ */
+ abd->abd_flags = ABD_FLAG_LINEAR;
+ abd->abd_size = size;
+ abd->abd_parent = NULL;
+ refcount_create(&abd->abd_children);
+
+ abd->abd_u.abd_linear.abd_buf = buf;
+
+ return (abd);
+}
+
+/*
+ * Free an ABD allocated from abd_get_offset() or abd_get_from_buf(). Will not
+ * free the underlying scatterlist or buffer.
+ */
+void
+abd_put(abd_t *abd)
+{
+ abd_verify(abd);
+ ASSERT(!(abd->abd_flags & ABD_FLAG_OWNER));
+
+ if (abd->abd_parent != NULL) {
+ (void) refcount_remove_many(&abd->abd_parent->abd_children,
+ abd->abd_size, abd);
+ }
+
+ refcount_destroy(&abd->abd_children);
+ abd_free_struct(abd);
+}
+
+/*
+ * Get the raw buffer associated with a linear ABD.
+ */
+void *
+abd_to_buf(abd_t *abd)
+{
+ ASSERT(abd_is_linear(abd));
+ abd_verify(abd);
+ return (abd->abd_u.abd_linear.abd_buf);
+}
+
+/*
+ * Borrow a raw buffer from an ABD without copying the contents of the ABD
+ * into the buffer. If the ABD is scattered, this will allocate a raw buffer
+ * whose contents are undefined. To copy over the existing data in the ABD, use
+ * abd_borrow_buf_copy() instead.
+ */
+void *
+abd_borrow_buf(abd_t *abd, size_t n)
+{
+ void *buf;
+ abd_verify(abd);
+ ASSERT3U(abd->abd_size, >=, n);
+ if (abd_is_linear(abd)) {
+ buf = abd_to_buf(abd);
+ } else {
+ buf = zio_buf_alloc(n);
+ }
+ (void) refcount_add_many(&abd->abd_children, n, buf);
+
+ return (buf);
+}
+
+void *
+abd_borrow_buf_copy(abd_t *abd, size_t n)
+{
+ void *buf = abd_borrow_buf(abd, n);
+ if (!abd_is_linear(abd)) {
+ abd_copy_to_buf(buf, abd, n);
+ }
+ return (buf);
+}
+
+/*
+ * Return a borrowed raw buffer to an ABD. If the ABD is scattered, this will
+ * not change the contents of the ABD and will ASSERT that you didn't modify
+ * the buffer since it was borrowed. If you want any changes you made to buf to
+ * be copied back to abd, use abd_return_buf_copy() instead.
+ */
+void
+abd_return_buf(abd_t *abd, void *buf, size_t n)
+{
+ abd_verify(abd);
+ ASSERT3U(abd->abd_size, >=, n);
+ if (abd_is_linear(abd)) {
+ ASSERT3P(buf, ==, abd_to_buf(abd));
+ } else {
+ ASSERT0(abd_cmp_buf(abd, buf, n));
+ zio_buf_free(buf, n);
+ }
+ (void) refcount_remove_many(&abd->abd_children, n, buf);
+}
+
+void
+abd_return_buf_copy(abd_t *abd, void *buf, size_t n)
+{
+ if (!abd_is_linear(abd)) {
+ abd_copy_from_buf(abd, buf, n);
+ }
+ abd_return_buf(abd, buf, n);
+}
+
+/*
+ * Give this ABD ownership of the buffer that it's storing. Can only be used on
+ * linear ABDs which were allocated via abd_get_from_buf(), or ones allocated
+ * with abd_alloc_linear() which subsequently released ownership of their buf
+ * with abd_release_ownership_of_buf().
+ */
+void
+abd_take_ownership_of_buf(abd_t *abd, boolean_t is_metadata)
+{
+ ASSERT(abd_is_linear(abd));
+ ASSERT(!(abd->abd_flags & ABD_FLAG_OWNER));
+ abd_verify(abd);
+
+ abd->abd_flags |= ABD_FLAG_OWNER;
+ if (is_metadata) {
+ abd->abd_flags |= ABD_FLAG_META;
+ }
+
+ ABDSTAT_BUMP(abdstat_linear_cnt);
+ ABDSTAT_INCR(abdstat_linear_data_size, abd->abd_size);
+}
+
+void
+abd_release_ownership_of_buf(abd_t *abd)
+{
+ ASSERT(abd_is_linear(abd));
+ ASSERT(abd->abd_flags & ABD_FLAG_OWNER);
+ abd_verify(abd);
+
+ abd->abd_flags &= ~ABD_FLAG_OWNER;
+ /* Disable this flag since we no longer own the data buffer */
+ abd->abd_flags &= ~ABD_FLAG_META;
+
+ ABDSTAT_BUMPDOWN(abdstat_linear_cnt);
+ ABDSTAT_INCR(abdstat_linear_data_size, -(int)abd->abd_size);
+}
+
+#ifndef HAVE_1ARG_KMAP_ATOMIC
+#define NR_KM_TYPE (6)
+#ifdef _KERNEL
+int km_table[NR_KM_TYPE] = {
+ KM_USER0,
+ KM_USER1,
+ KM_BIO_SRC_IRQ,
+ KM_BIO_DST_IRQ,
+ KM_PTE0,
+ KM_PTE1,
+};
+#endif
+#endif
+
+struct abd_iter {
+ /* public interface */
+ void *iter_mapaddr; /* addr corresponding to iter_pos */
+ size_t iter_mapsize; /* length of data valid at mapaddr */
+
+ /* private */
+ abd_t *iter_abd; /* ABD being iterated through */
+ size_t iter_pos;
+ size_t iter_offset; /* offset in current sg/abd_buf, */
+ /* abd_offset included */
+ struct scatterlist *iter_sg; /* current sg */
+#ifndef HAVE_1ARG_KMAP_ATOMIC
+ int iter_km; /* KM_* for kmap_atomic */
+#endif
+};
+
+/*
+ * Initialize the abd_iter.
+ */
+static void
+abd_iter_init(struct abd_iter *aiter, abd_t *abd, int km_type)
+{
+ abd_verify(abd);
+ aiter->iter_abd = abd;
+ aiter->iter_mapaddr = NULL;
+ aiter->iter_mapsize = 0;
+ aiter->iter_pos = 0;
+ if (abd_is_linear(abd)) {
+ aiter->iter_offset = 0;
+ aiter->iter_sg = NULL;
+ } else {
+ aiter->iter_offset = ABD_SCATTER(abd).abd_offset;
+ aiter->iter_sg = ABD_SCATTER(abd).abd_sgl;
+ }
+#ifndef HAVE_1ARG_KMAP_ATOMIC
+ ASSERT3U(km_type, <, NR_KM_TYPE);
+ aiter->iter_km = km_type;
+#endif
+}
+
+/*
+ * Advance the iterator by a certain amount. Cannot be called when a chunk is
+ * in use. This can be safely called when the aiter has already exhausted, in
+ * which case this does nothing.
+ */
+static void
+abd_iter_advance(struct abd_iter *aiter, size_t amount)
+{
+ ASSERT3P(aiter->iter_mapaddr, ==, NULL);
+ ASSERT0(aiter->iter_mapsize);
+
+ /* There's nothing left to advance to, so do nothing */
+ if (aiter->iter_pos == aiter->iter_abd->abd_size)
+ return;
+
+ aiter->iter_pos += amount;
+ aiter->iter_offset += amount;
+ if (!abd_is_linear(aiter->iter_abd)) {
+ while (aiter->iter_offset >= aiter->iter_sg->length) {
+ aiter->iter_offset -= aiter->iter_sg->length;
+ aiter->iter_sg = sg_next(aiter->iter_sg);
+ if (aiter->iter_sg == NULL) {
+ ASSERT0(aiter->iter_offset);
+ break;
+ }
+ }
+ }
+}
+
+/*
+ * Map the current chunk into aiter. This can be safely called when the aiter
+ * has already exhausted, in which case this does nothing.
+ */
+static void
+abd_iter_map(struct abd_iter *aiter)
+{
+ void *paddr;
+ size_t offset = 0;
+
+ ASSERT3P(aiter->iter_mapaddr, ==, NULL);
+ ASSERT0(aiter->iter_mapsize);
+
+ /* There's nothing left to iterate over, so do nothing */
+ if (aiter->iter_pos == aiter->iter_abd->abd_size)
+ return;
+
+ if (abd_is_linear(aiter->iter_abd)) {
+ ASSERT3U(aiter->iter_pos, ==, aiter->iter_offset);
+ offset = aiter->iter_offset;
+ aiter->iter_mapsize = aiter->iter_abd->abd_size - offset;
+ paddr = aiter->iter_abd->abd_u.abd_linear.abd_buf;
+ } else {
+ offset = aiter->iter_offset;
+ aiter->iter_mapsize = MIN(aiter->iter_sg->length - offset,
+ aiter->iter_abd->abd_size - aiter->iter_pos);
+
+ paddr = zfs_kmap_atomic(sg_page(aiter->iter_sg),
+ km_table[aiter->iter_km]);
+ }
+
+ aiter->iter_mapaddr = (char *)paddr + offset;
+}
+
+/*
+ * Unmap the current chunk from aiter. This can be safely called when the aiter
+ * has already exhausted, in which case this does nothing.
+ */
+static void
+abd_iter_unmap(struct abd_iter *aiter)
+{
+ /* There's nothing left to unmap, so do nothing */
+ if (aiter->iter_pos == aiter->iter_abd->abd_size)
+ return;
+
+ if (!abd_is_linear(aiter->iter_abd)) {
+ /* LINTED E_FUNC_SET_NOT_USED */
+ zfs_kunmap_atomic(aiter->iter_mapaddr - aiter->iter_offset,
+ km_table[aiter->iter_km]);
+ }
+
+ ASSERT3P(aiter->iter_mapaddr, !=, NULL);
+ ASSERT3U(aiter->iter_mapsize, >, 0);
+
+ aiter->iter_mapaddr = NULL;
+ aiter->iter_mapsize = 0;
+}
+
+int
+abd_iterate_func(abd_t *abd, size_t off, size_t size,
+ abd_iter_func_t *func, void *private)
+{
+ int ret = 0;
+ struct abd_iter aiter;
+
+ abd_verify(abd);
+ ASSERT3U(off + size, <=, abd->abd_size);
+
+ abd_iter_init(&aiter, abd, 0);
+ abd_iter_advance(&aiter, off);
+
+ while (size > 0) {
+ size_t len;
+ abd_iter_map(&aiter);
+
+ len = MIN(aiter.iter_mapsize, size);
+ ASSERT3U(len, >, 0);
+
+ ret = func(aiter.iter_mapaddr, len, private);
+
+ abd_iter_unmap(&aiter);
+
+ if (ret != 0)
+ break;
+
+ size -= len;
+ abd_iter_advance(&aiter, len);
+ }
+
+ return (ret);
+}
+
+struct buf_arg {
+ void *arg_buf;
+};
+
+static int
+abd_copy_to_buf_off_cb(void *buf, size_t size, void *private)
+{
+ struct buf_arg *ba_ptr = private;
+
+ (void) memcpy(ba_ptr->arg_buf, buf, size);
+ ba_ptr->arg_buf = (char *)ba_ptr->arg_buf + size;
+
+ return (0);
+}
+
+/*
+ * Copy abd to buf. (off is the offset in abd.)
+ */
+void
+abd_copy_to_buf_off(void *buf, abd_t *abd, size_t off, size_t size)
+{
+ struct buf_arg ba_ptr = { buf };
+
+ (void) abd_iterate_func(abd, off, size, abd_copy_to_buf_off_cb,
+ &ba_ptr);
+}
+
+static int
+abd_cmp_buf_off_cb(void *buf, size_t size, void *private)
+{
+ int ret;
+ struct buf_arg *ba_ptr = private;
+
+ ret = memcmp(buf, ba_ptr->arg_buf, size);
+ ba_ptr->arg_buf = (char *)ba_ptr->arg_buf + size;
+
+ return (ret);
+}
+
+/*
+ * Compare the contents of abd to buf. (off is the offset in abd.)
+ */
+int
+abd_cmp_buf_off(abd_t *abd, const void *buf, size_t off, size_t size)
+{
+ struct buf_arg ba_ptr = { (void *) buf };
+
+ return (abd_iterate_func(abd, off, size, abd_cmp_buf_off_cb, &ba_ptr));
+}
+
+static int
+abd_copy_from_buf_off_cb(void *buf, size_t size, void *private)
+{
+ struct buf_arg *ba_ptr = private;
+
+ (void) memcpy(buf, ba_ptr->arg_buf, size);
+ ba_ptr->arg_buf = (char *)ba_ptr->arg_buf + size;
+
+ return (0);
+}
+
+/*
+ * Copy from buf to abd. (off is the offset in abd.)
+ */
+void
+abd_copy_from_buf_off(abd_t *abd, const void *buf, size_t off, size_t size)
+{
+ struct buf_arg ba_ptr = { (void *) buf };
+
+ (void) abd_iterate_func(abd, off, size, abd_copy_from_buf_off_cb,
+ &ba_ptr);
+}
+
+/*ARGSUSED*/
+static int
+abd_zero_off_cb(void *buf, size_t size, void *private)
+{
+ (void) memset(buf, 0, size);
+ return (0);
+}
+
+/*
+ * Zero out the abd from a particular offset to the end.
+ */
+void
+abd_zero_off(abd_t *abd, size_t off, size_t size)
+{
+ (void) abd_iterate_func(abd, off, size, abd_zero_off_cb, NULL);
+}
+
+/*
+ * Iterate over two ABDs and call func incrementally on the two ABDs' data in
+ * equal-sized chunks (passed to func as raw buffers). func could be called many
+ * times during this iteration.
+ */
+int
+abd_iterate_func2(abd_t *dabd, abd_t *sabd, size_t doff, size_t soff,
+ size_t size, abd_iter_func2_t *func, void *private)
+{
+ int ret = 0;
+ struct abd_iter daiter, saiter;
+
+ abd_verify(dabd);
+ abd_verify(sabd);
+
+ ASSERT3U(doff + size, <=, dabd->abd_size);
+ ASSERT3U(soff + size, <=, sabd->abd_size);
+
+ abd_iter_init(&daiter, dabd, 0);
+ abd_iter_init(&saiter, sabd, 1);
+ abd_iter_advance(&daiter, doff);
+ abd_iter_advance(&saiter, soff);
+
+ while (size > 0) {
+ size_t dlen, slen, len;
+ abd_iter_map(&daiter);
+ abd_iter_map(&saiter);
+
+ dlen = MIN(daiter.iter_mapsize, size);
+ slen = MIN(saiter.iter_mapsize, size);
+ len = MIN(dlen, slen);
+ ASSERT(dlen > 0 || slen > 0);
+
+ ret = func(daiter.iter_mapaddr, saiter.iter_mapaddr, len,
+ private);
+
+ abd_iter_unmap(&saiter);
+ abd_iter_unmap(&daiter);
+
+ if (ret != 0)
+ break;
+
+ size -= len;
+ abd_iter_advance(&daiter, len);
+ abd_iter_advance(&saiter, len);
+ }
+
+ return (ret);
+}
+
+/*ARGSUSED*/
+static int
+abd_copy_off_cb(void *dbuf, void *sbuf, size_t size, void *private)
+{
+ (void) memcpy(dbuf, sbuf, size);
+ return (0);
+}
+
+/*
+ * Copy from sabd to dabd starting from soff and doff.
+ */
+void
+abd_copy_off(abd_t *dabd, abd_t *sabd, size_t doff, size_t soff, size_t size)
+{
+ (void) abd_iterate_func2(dabd, sabd, doff, soff, size,
+ abd_copy_off_cb, NULL);
+}
+
+/*ARGSUSED*/
+static int
+abd_cmp_cb(void *bufa, void *bufb, size_t size, void *private)
+{
+ return (memcmp(bufa, bufb, size));
+}
+
+/*
+ * Compares the contents of two ABDs.
+ */
+int
+abd_cmp(abd_t *dabd, abd_t *sabd)
+{
+ ASSERT3U(dabd->abd_size, ==, sabd->abd_size);
+ return (abd_iterate_func2(dabd, sabd, 0, 0, dabd->abd_size,
+ abd_cmp_cb, NULL));
+}
+
+/*
+ * Iterate over code ABDs and a data ABD and call @func_raidz_gen.
+ *
+ * @cabds parity ABDs, must have equal size
+ * @dabd data ABD. Can be NULL (in this case @dsize = 0)
+ * @func_raidz_gen should be implemented so that its behaviour
+ * is the same when taking linear and when taking scatter
+ */
+void
+abd_raidz_gen_iterate(abd_t **cabds, abd_t *dabd,
+ ssize_t csize, ssize_t dsize, const unsigned parity,
+ void (*func_raidz_gen)(void **, const void *, size_t, size_t))
+{
+ int i;
+ ssize_t len, dlen;
+ struct abd_iter caiters[3];
+ struct abd_iter daiter = {0};
+ void *caddrs[3];
+ unsigned long flags;
+
+ ASSERT3U(parity, <=, 3);
+
+ for (i = 0; i < parity; i++)
+ abd_iter_init(&caiters[i], cabds[i], i);
+
+ if (dabd)
+ abd_iter_init(&daiter, dabd, i);
+
+ ASSERT3S(dsize, >=, 0);
+
+ local_irq_save(flags);
+ while (csize > 0) {
+ len = csize;
+
+ if (dabd && dsize > 0)
+ abd_iter_map(&daiter);
+
+ for (i = 0; i < parity; i++) {
+ abd_iter_map(&caiters[i]);
+ caddrs[i] = caiters[i].iter_mapaddr;
+ }
+
+ switch (parity) {
+ case 3:
+ len = MIN(caiters[2].iter_mapsize, len);
+ case 2:
+ len = MIN(caiters[1].iter_mapsize, len);
+ case 1:
+ len = MIN(caiters[0].iter_mapsize, len);
+ }
+
+ /* must be progressive */
+ ASSERT3S(len, >, 0);
+
+ if (dabd && dsize > 0) {
+ /* this needs precise iter.length */
+ len = MIN(daiter.iter_mapsize, len);
+ dlen = len;
+ } else
+ dlen = 0;
+
+ /* must be progressive */
+ ASSERT3S(len, >, 0);
+ /*
+ * The iterated function likely will not do well if each
+ * segment except the last one is not multiple of 512 (raidz).
+ */
+ ASSERT3U(((uint64_t)len & 511ULL), ==, 0);
+
+ func_raidz_gen(caddrs, daiter.iter_mapaddr, len, dlen);
+
+ for (i = parity-1; i >= 0; i--) {
+ abd_iter_unmap(&caiters[i]);
+ abd_iter_advance(&caiters[i], len);
+ }
+
+ if (dabd && dsize > 0) {
+ abd_iter_unmap(&daiter);
+ abd_iter_advance(&daiter, dlen);
+ dsize -= dlen;
+ }
+
+ csize -= len;
+
+ ASSERT3S(dsize, >=, 0);
+ ASSERT3S(csize, >=, 0);
+ }
+ local_irq_restore(flags);
+}
+
+/*
+ * Iterate over code ABDs and data reconstruction target ABDs and call
+ * @func_raidz_rec. Function maps at most 6 pages atomically.
+ *
+ * @cabds parity ABDs, must have equal size
+ * @tabds rec target ABDs, at most 3
+ * @tsize size of data target columns
+ * @func_raidz_rec expects syndrome data in target columns. Function
+ * reconstructs data and overwrites target columns.
+ */
+void
+abd_raidz_rec_iterate(abd_t **cabds, abd_t **tabds,
+ ssize_t tsize, const unsigned parity,
+ void (*func_raidz_rec)(void **t, const size_t tsize, void **c,
+ const unsigned *mul),
+ const unsigned *mul)
+{
+ int i;
+ ssize_t len;
+ struct abd_iter citers[3];
+ struct abd_iter xiters[3];
+ void *caddrs[3], *xaddrs[3];
+ unsigned long flags;
+
+ ASSERT3U(parity, <=, 3);
+
+ for (i = 0; i < parity; i++) {
+ abd_iter_init(&citers[i], cabds[i], 2*i);
+ abd_iter_init(&xiters[i], tabds[i], 2*i+1);
+ }
+
+ local_irq_save(flags);
+ while (tsize > 0) {
+
+ for (i = 0; i < parity; i++) {
+ abd_iter_map(&citers[i]);
+ abd_iter_map(&xiters[i]);
+ caddrs[i] = citers[i].iter_mapaddr;
+ xaddrs[i] = xiters[i].iter_mapaddr;
+ }
+
+ len = tsize;
+ switch (parity) {
+ case 3:
+ len = MIN(xiters[2].iter_mapsize, len);
+ len = MIN(citers[2].iter_mapsize, len);
+ case 2:
+ len = MIN(xiters[1].iter_mapsize, len);
+ len = MIN(citers[1].iter_mapsize, len);
+ case 1:
+ len = MIN(xiters[0].iter_mapsize, len);
+ len = MIN(citers[0].iter_mapsize, len);
+ }
+ /* must be progressive */
+ ASSERT3S(len, >, 0);
+ /*
+ * The iterated function likely will not do well if each
+ * segment except the last one is not multiple of 512 (raidz).
+ */
+ ASSERT3U(((uint64_t)len & 511ULL), ==, 0);
+
+ func_raidz_rec(xaddrs, len, caddrs, mul);
+
+ for (i = parity-1; i >= 0; i--) {
+ abd_iter_unmap(&xiters[i]);
+ abd_iter_unmap(&citers[i]);
+ abd_iter_advance(&xiters[i], len);
+ abd_iter_advance(&citers[i], len);
+ }
+
+ tsize -= len;
+ ASSERT3S(tsize, >=, 0);
+ }
+ local_irq_restore(flags);
+}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+/*
+ * bio_nr_pages for ABD.
+ * @off is the offset in @abd
+ */
+unsigned long
+abd_nr_pages_off(abd_t *abd, unsigned int size, size_t off)
+{
+ unsigned long pos;
+
+ if (abd_is_linear(abd))
+ pos = (unsigned long)abd_to_buf(abd) + off;
+ else
+ pos = abd->abd_u.abd_scatter.abd_offset + off;
+
+ return ((pos + size + PAGESIZE - 1) >> PAGE_SHIFT) -
+ (pos >> PAGE_SHIFT);
+}
+
+/*
+ * bio_map for scatter ABD.
+ * @off is the offset in @abd
+ * Remaining IO size is returned
+ */
+unsigned int
+abd_scatter_bio_map_off(struct bio *bio, abd_t *abd,
+ unsigned int io_size, size_t off)
+{
+ int i;
+ struct abd_iter aiter;
+
+ ASSERT(!abd_is_linear(abd));
+ ASSERT3U(io_size, <=, abd->abd_size - off);
+
+ abd_iter_init(&aiter, abd, 0);
+ abd_iter_advance(&aiter, off);
+
+ for (i = 0; i < bio->bi_max_vecs; i++) {
+ struct page *pg;
+ size_t len, sgoff, pgoff;
+ struct scatterlist *sg;
+
+ if (io_size <= 0)
+ break;
+
+ sg = aiter.iter_sg;
+ sgoff = aiter.iter_offset;
+ pgoff = sgoff & (PAGESIZE - 1);
+ len = MIN(io_size, PAGESIZE - pgoff);
+ ASSERT(len > 0);
+
+ pg = nth_page(sg_page(sg), sgoff >> PAGE_SHIFT);
+ if (bio_add_page(bio, pg, len, pgoff) != len)
+ break;
+
+ io_size -= len;
+ abd_iter_advance(&aiter, len);
+ }
+
+ return (io_size);
+}
+
+/* Tunable Parameters */
+module_param(zfs_abd_scatter_enabled, int, 0644);
+MODULE_PARM_DESC(zfs_abd_scatter_enabled,
+ "Toggle whether ABD allocations must be linear.");
+/* CSTYLED */
+module_param(zfs_abd_scatter_max_order, uint, 0644);
+MODULE_PARM_DESC(zfs_abd_scatter_max_order,
+ "Maximum order allocation used for a scatter ABD.");
+#endif
diff --git a/zfs/module/zfs/arc.c b/zfs/module/zfs/arc.c
index c72ced758563..48950245e295 100644
--- a/zfs/module/zfs/arc.c
+++ b/zfs/module/zfs/arc.c
@@ -21,9 +21,9 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
- * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
* Copyright (c) 2014 by Saso Kiselkov. All rights reserved.
- * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
+ * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
*/
/*
@@ -77,10 +77,10 @@
* A new reference to a cache buffer can be obtained in two
* ways: 1) via a hash table lookup using the DVA as a key,
* or 2) via one of the ARC lists. The arc_read() interface
- * uses method 1, while the internal arc algorithms for
+ * uses method 1, while the internal ARC algorithms for
* adjusting the cache use method 2. We therefore provide two
* types of locks: 1) the hash table lock array, and 2) the
- * arc list locks.
+ * ARC list locks.
*
* Buffers do not have their own mutexes, rather they rely on the
* hash table mutexes for the bulk of their protection (i.e. most
@@ -93,21 +93,12 @@
* buf_hash_remove() expects the appropriate hash mutex to be
* already held before it is invoked.
*
- * Each arc state also has a mutex which is used to protect the
+ * Each ARC state also has a mutex which is used to protect the
* buffer list associated with the state. When attempting to
- * obtain a hash table lock while holding an arc list lock you
+ * obtain a hash table lock while holding an ARC list lock you
* must use: mutex_tryenter() to avoid deadlock. Also note that
* the active state mutex must be held before the ghost state mutex.
*
- * Arc buffers may have an associated eviction callback function.
- * This function will be invoked prior to removing the buffer (e.g.
- * in arc_do_user_evicts()). Note however that the data associated
- * with the buffer may be evicted prior to the callback. The callback
- * must be made with *no locks held* (to prevent deadlock). Additionally,
- * the users of callbacks must ensure that their private data is
- * protected from simultaneous callbacks from arc_clear_callback()
- * and arc_do_user_evicts().
- *
* It as also possible to register a callback which is run when the
* arc_meta_limit is reached and no buffers can be safely evicted. In
* this case the arc user should drop a reference on some arc buffers so
@@ -128,16 +119,161 @@
* - ARC header release, as it removes from L2ARC buflists
*/
+/*
+ * ARC operation:
+ *
+ * Every block that is in the ARC is tracked by an arc_buf_hdr_t structure.
+ * This structure can point either to a block that is still in the cache or to
+ * one that is only accessible in an L2 ARC device, or it can provide
+ * information about a block that was recently evicted. If a block is
+ * only accessible in the L2ARC, then the arc_buf_hdr_t only has enough
+ * information to retrieve it from the L2ARC device. This information is
+ * stored in the l2arc_buf_hdr_t sub-structure of the arc_buf_hdr_t. A block
+ * that is in this state cannot access the data directly.
+ *
+ * Blocks that are actively being referenced or have not been evicted
+ * are cached in the L1ARC. The L1ARC (l1arc_buf_hdr_t) is a structure within
+ * the arc_buf_hdr_t that will point to the data block in memory. A block can
+ * only be read by a consumer if it has an l1arc_buf_hdr_t. The L1ARC
+ * caches data in two ways -- in a list of ARC buffers (arc_buf_t) and
+ * also in the arc_buf_hdr_t's private physical data block pointer (b_pabd).
+ *
+ * The L1ARC's data pointer may or may not be uncompressed. The ARC has the
+ * ability to store the physical data (b_pabd) associated with the DVA of the
+ * arc_buf_hdr_t. Since the b_pabd is a copy of the on-disk physical block,
+ * it will match its on-disk compression characteristics. This behavior can be
+ * disabled by setting 'zfs_compressed_arc_enabled' to B_FALSE. When the
+ * compressed ARC functionality is disabled, the b_pabd will point to an
+ * uncompressed version of the on-disk data.
+ *
+ * Data in the L1ARC is not accessed by consumers of the ARC directly. Each
+ * arc_buf_hdr_t can have multiple ARC buffers (arc_buf_t) which reference it.
+ * Each ARC buffer (arc_buf_t) is being actively accessed by a specific ARC
+ * consumer. The ARC will provide references to this data and will keep it
+ * cached until it is no longer in use. The ARC caches only the L1ARC's physical
+ * data block and will evict any arc_buf_t that is no longer referenced. The
+ * amount of memory consumed by the arc_buf_ts' data buffers can be seen via the
+ * "overhead_size" kstat.
+ *
+ * Depending on the consumer, an arc_buf_t can be requested in uncompressed or
+ * compressed form. The typical case is that consumers will want uncompressed
+ * data, and when that happens a new data buffer is allocated where the data is
+ * decompressed for them to use. Currently the only consumer who wants
+ * compressed arc_buf_t's is "zfs send", when it streams data exactly as it
+ * exists on disk. When this happens, the arc_buf_t's data buffer is shared
+ * with the arc_buf_hdr_t.
+ *
+ * Here is a diagram showing an arc_buf_hdr_t referenced by two arc_buf_t's. The
+ * first one is owned by a compressed send consumer (and therefore references
+ * the same compressed data buffer as the arc_buf_hdr_t) and the second could be
+ * used by any other consumer (and has its own uncompressed copy of the data
+ * buffer).
+ *
+ * arc_buf_hdr_t
+ * +-----------+
+ * | fields |
+ * | common to |
+ * | L1- and |
+ * | L2ARC |
+ * +-----------+
+ * | l2arc_buf_hdr_t
+ * | |
+ * +-----------+
+ * | l1arc_buf_hdr_t
+ * | | arc_buf_t
+ * | b_buf +------------>+-----------+ arc_buf_t
+ * | b_pabd +-+ |b_next +---->+-----------+
+ * +-----------+ | |-----------| |b_next +-->NULL
+ * | |b_comp = T | +-----------+
+ * | |b_data +-+ |b_comp = F |
+ * | +-----------+ | |b_data +-+
+ * +->+------+ | +-----------+ |
+ * compressed | | | |
+ * data | |<--------------+ | uncompressed
+ * +------+ compressed, | data
+ * shared +-->+------+
+ * data | |
+ * | |
+ * +------+
+ *
+ * When a consumer reads a block, the ARC must first look to see if the
+ * arc_buf_hdr_t is cached. If the hdr is cached then the ARC allocates a new
+ * arc_buf_t and either copies uncompressed data into a new data buffer from an
+ * existing uncompressed arc_buf_t, decompresses the hdr's b_pabd buffer into a
+ * new data buffer, or shares the hdr's b_pabd buffer, depending on whether the
+ * hdr is compressed and the desired compression characteristics of the
+ * arc_buf_t consumer. If the arc_buf_t ends up sharing data with the
+ * arc_buf_hdr_t and both of them are uncompressed then the arc_buf_t must be
+ * the last buffer in the hdr's b_buf list, however a shared compressed buf can
+ * be anywhere in the hdr's list.
+ *
+ * The diagram below shows an example of an uncompressed ARC hdr that is
+ * sharing its data with an arc_buf_t (note that the shared uncompressed buf is
+ * the last element in the buf list):
+ *
+ * arc_buf_hdr_t
+ * +-----------+
+ * | |
+ * | |
+ * | |
+ * +-----------+
+ * l2arc_buf_hdr_t| |
+ * | |
+ * +-----------+
+ * l1arc_buf_hdr_t| |
+ * | | arc_buf_t (shared)
+ * | b_buf +------------>+---------+ arc_buf_t
+ * | | |b_next +---->+---------+
+ * | b_pabd +-+ |---------| |b_next +-->NULL
+ * +-----------+ | | | +---------+
+ * | |b_data +-+ | |
+ * | +---------+ | |b_data +-+
+ * +->+------+ | +---------+ |
+ * | | | |
+ * uncompressed | | | |
+ * data +------+ | |
+ * ^ +->+------+ |
+ * | uncompressed | | |
+ * | data | | |
+ * | +------+ |
+ * +---------------------------------+
+ *
+ * Writing to the ARC requires that the ARC first discard the hdr's b_pabd
+ * since the physical block is about to be rewritten. The new data contents
+ * will be contained in the arc_buf_t. As the I/O pipeline performs the write,
+ * it may compress the data before writing it to disk. The ARC will be called
+ * with the transformed data and will bcopy the transformed on-disk block into
+ * a newly allocated b_pabd. Writes are always done into buffers which have
+ * either been loaned (and hence are new and don't have other readers) or
+ * buffers which have been released (and hence have their own hdr, if there
+ * were originally other readers of the buf's original hdr). This ensures that
+ * the ARC only needs to update a single buf and its hdr after a write occurs.
+ *
+ * When the L2ARC is in use, it will also take advantage of the b_pabd. The
+ * L2ARC will always write the contents of b_pabd to the L2ARC. This means
+ * that when compressed ARC is enabled that the L2ARC blocks are identical
+ * to the on-disk block in the main data pool. This provides a significant
+ * advantage since the ARC can leverage the bp's checksum when reading from the
+ * L2ARC to determine if the contents are valid. However, if the compressed
+ * ARC is disabled, then the L2ARC's block must be transformed to look
+ * like the physical block in the main data pool before comparing the
+ * checksum and determining its validity.
+ */
+
#include <sys/spa.h>
#include <sys/zio.h>
+#include <sys/spa_impl.h>
#include <sys/zio_compress.h>
+#include <sys/zio_checksum.h>
#include <sys/zfs_context.h>
#include <sys/arc.h>
#include <sys/refcount.h>
#include <sys/vdev.h>
#include <sys/vdev_impl.h>
#include <sys/dsl_pool.h>
+#include <sys/zio_checksum.h>
#include <sys/multilist.h>
+#include <sys/abd.h>
#ifdef _KERNEL
#include <sys/vmsystm.h>
#include <vm/anon.h>
@@ -162,10 +298,6 @@ static kcondvar_t arc_reclaim_thread_cv;
static boolean_t arc_reclaim_thread_exit;
static kcondvar_t arc_reclaim_waiters_cv;
-static kmutex_t arc_user_evicts_lock;
-static kcondvar_t arc_user_evicts_cv;
-static boolean_t arc_user_evicts_thread_exit;
-
/*
* The number of headers to evict in arc_evict_state_impl() before
* dropping the sublist lock and evicting from another sublist. A lower
@@ -175,17 +307,10 @@ static boolean_t arc_user_evicts_thread_exit;
*/
int zfs_arc_evict_batch_limit = 10;
-/*
- * The number of sublists used for each of the arc state lists. If this
- * is not set to a suitable value by the user, it will be configured to
- * the number of CPUs on the system in arc_init().
- */
-int zfs_arc_num_sublists_per_state = 0;
-
/* number of seconds before growing cache again */
static int arc_grow_retry = 5;
-/* shift of arc_c for calculating overflow limit in arc_get_data_buf */
+/* shift of arc_c for calculating overflow limit in arc_get_data_impl */
int zfs_arc_overflow_shift = 8;
/* shift of arc_c for calculating both min and max arc_p */
@@ -194,6 +319,11 @@ static int arc_p_min_shift = 4;
/* log2(fraction of arc to reclaim) */
static int arc_shrink_shift = 7;
+/* percent of pagecache to reclaim arc to */
+#ifdef _KERNEL
+static uint_t zfs_arc_pc_percent = 0;
+#endif
+
/*
* log2(fraction of ARC which must be free to allow growing).
* I.e. If there is less than arc_c >> arc_no_grow_shift free memory,
@@ -224,6 +354,11 @@ static int arc_dead;
*/
static boolean_t arc_warm;
+/*
+ * log2 fraction of the zio arena to keep free.
+ */
+int arc_zio_arena_free_shift = 2;
+
/*
* These tunables are for performance analysis.
*/
@@ -231,12 +366,26 @@ unsigned long zfs_arc_max = 0;
unsigned long zfs_arc_min = 0;
unsigned long zfs_arc_meta_limit = 0;
unsigned long zfs_arc_meta_min = 0;
+unsigned long zfs_arc_dnode_limit = 0;
+unsigned long zfs_arc_dnode_reduce_percent = 10;
int zfs_arc_grow_retry = 0;
int zfs_arc_shrink_shift = 0;
int zfs_arc_p_min_shift = 0;
-int zfs_disable_dup_eviction = 0;
int zfs_arc_average_blocksize = 8 * 1024; /* 8KB */
+int zfs_compressed_arc_enabled = B_TRUE;
+
+/*
+ * ARC will evict meta buffers that exceed arc_meta_limit. This
+ * tunable make arc_meta_limit adjustable for different workloads.
+ */
+unsigned long zfs_arc_meta_limit_percent = 75;
+
+/*
+ * Percentage that can be consumed by dnodes of ARC meta buffers.
+ */
+unsigned long zfs_arc_dnode_limit_percent = 10;
+
/*
* These tunables are Linux specific
*/
@@ -305,6 +454,26 @@ typedef struct arc_stats {
kstat_named_t arcstat_c_min;
kstat_named_t arcstat_c_max;
kstat_named_t arcstat_size;
+ /*
+ * Number of compressed bytes stored in the arc_buf_hdr_t's b_pabd.
+ * Note that the compressed bytes may match the uncompressed bytes
+ * if the block is either not compressed or compressed arc is disabled.
+ */
+ kstat_named_t arcstat_compressed_size;
+ /*
+ * Uncompressed size of the data stored in b_pabd. If compressed
+ * arc is disabled then this value will be identical to the stat
+ * above.
+ */
+ kstat_named_t arcstat_uncompressed_size;
+ /*
+ * Number of bytes stored in all the arc_buf_t's. This is classified
+ * as "overhead" since this data is typically short-lived and will
+ * be evicted from the arc when it becomes unreferenced unless the
+ * zfs_keep_uncompressed_metadata or zfs_keep_uncompressed_level
+ * values have been set (see comment in dbuf.c for more information).
+ */
+ kstat_named_t arcstat_overhead_size;
/*
* Number of bytes consumed by internal ARC structures necessary
* for tracking purposes; these structures are not actually
@@ -328,13 +497,17 @@ typedef struct arc_stats {
*/
kstat_named_t arcstat_metadata_size;
/*
- * Number of bytes consumed by various buffers and structures
- * not actually backed with ARC buffers. This includes bonus
- * buffers (allocated directly via zio_buf_* functions),
- * dmu_buf_impl_t structures (allocated via dmu_buf_impl_t
- * cache), and dnode_t structures (allocated via dnode_t cache).
+ * Number of bytes consumed by dmu_buf_impl_t objects.
+ */
+ kstat_named_t arcstat_dbuf_size;
+ /*
+ * Number of bytes consumed by dnode_t objects.
+ */
+ kstat_named_t arcstat_dnode_size;
+ /*
+ * Number of bytes consumed by bonus buffers.
*/
- kstat_named_t arcstat_other_size;
+ kstat_named_t arcstat_bonus_size;
/*
* Total number of bytes consumed by ARC buffers residing in the
* arc_anon state. This includes *all* buffers in the arc_anon
@@ -450,30 +623,29 @@ typedef struct arc_stats {
kstat_named_t arcstat_l2_evict_reading;
kstat_named_t arcstat_l2_evict_l1cached;
kstat_named_t arcstat_l2_free_on_write;
- kstat_named_t arcstat_l2_cdata_free_on_write;
kstat_named_t arcstat_l2_abort_lowmem;
kstat_named_t arcstat_l2_cksum_bad;
kstat_named_t arcstat_l2_io_error;
- kstat_named_t arcstat_l2_size;
- kstat_named_t arcstat_l2_asize;
+ kstat_named_t arcstat_l2_lsize;
+ kstat_named_t arcstat_l2_psize;
kstat_named_t arcstat_l2_hdr_size;
- kstat_named_t arcstat_l2_compress_successes;
- kstat_named_t arcstat_l2_compress_zeros;
- kstat_named_t arcstat_l2_compress_failures;
kstat_named_t arcstat_memory_throttle_count;
- kstat_named_t arcstat_duplicate_buffers;
- kstat_named_t arcstat_duplicate_buffers_size;
- kstat_named_t arcstat_duplicate_reads;
kstat_named_t arcstat_memory_direct_count;
kstat_named_t arcstat_memory_indirect_count;
+ kstat_named_t arcstat_memory_all_bytes;
+ kstat_named_t arcstat_memory_free_bytes;
+ kstat_named_t arcstat_memory_available_bytes;
kstat_named_t arcstat_no_grow;
kstat_named_t arcstat_tempreserve;
kstat_named_t arcstat_loaned_bytes;
kstat_named_t arcstat_prune;
kstat_named_t arcstat_meta_used;
kstat_named_t arcstat_meta_limit;
+ kstat_named_t arcstat_dnode_limit;
kstat_named_t arcstat_meta_max;
kstat_named_t arcstat_meta_min;
+ kstat_named_t arcstat_sync_wait_for_async;
+ kstat_named_t arcstat_demand_hit_predictive_prefetch;
kstat_named_t arcstat_need_free;
kstat_named_t arcstat_sys_free;
} arc_stats_t;
@@ -511,10 +683,15 @@ static arc_stats_t arc_stats = {
{ "c_min", KSTAT_DATA_UINT64 },
{ "c_max", KSTAT_DATA_UINT64 },
{ "size", KSTAT_DATA_UINT64 },
+ { "compressed_size", KSTAT_DATA_UINT64 },
+ { "uncompressed_size", KSTAT_DATA_UINT64 },
+ { "overhead_size", KSTAT_DATA_UINT64 },
{ "hdr_size", KSTAT_DATA_UINT64 },
{ "data_size", KSTAT_DATA_UINT64 },
{ "metadata_size", KSTAT_DATA_UINT64 },
- { "other_size", KSTAT_DATA_UINT64 },
+ { "dbuf_size", KSTAT_DATA_UINT64 },
+ { "dnode_size", KSTAT_DATA_UINT64 },
+ { "bonus_size", KSTAT_DATA_UINT64 },
{ "anon_size", KSTAT_DATA_UINT64 },
{ "anon_evictable_data", KSTAT_DATA_UINT64 },
{ "anon_evictable_metadata", KSTAT_DATA_UINT64 },
@@ -544,30 +721,29 @@ static arc_stats_t arc_stats = {
{ "l2_evict_reading", KSTAT_DATA_UINT64 },
{ "l2_evict_l1cached", KSTAT_DATA_UINT64 },
{ "l2_free_on_write", KSTAT_DATA_UINT64 },
- { "l2_cdata_free_on_write", KSTAT_DATA_UINT64 },
{ "l2_abort_lowmem", KSTAT_DATA_UINT64 },
{ "l2_cksum_bad", KSTAT_DATA_UINT64 },
{ "l2_io_error", KSTAT_DATA_UINT64 },
{ "l2_size", KSTAT_DATA_UINT64 },
{ "l2_asize", KSTAT_DATA_UINT64 },
{ "l2_hdr_size", KSTAT_DATA_UINT64 },
- { "l2_compress_successes", KSTAT_DATA_UINT64 },
- { "l2_compress_zeros", KSTAT_DATA_UINT64 },
- { "l2_compress_failures", KSTAT_DATA_UINT64 },
{ "memory_throttle_count", KSTAT_DATA_UINT64 },
- { "duplicate_buffers", KSTAT_DATA_UINT64 },
- { "duplicate_buffers_size", KSTAT_DATA_UINT64 },
- { "duplicate_reads", KSTAT_DATA_UINT64 },
{ "memory_direct_count", KSTAT_DATA_UINT64 },
{ "memory_indirect_count", KSTAT_DATA_UINT64 },
+ { "memory_all_bytes", KSTAT_DATA_UINT64 },
+ { "memory_free_bytes", KSTAT_DATA_UINT64 },
+ { "memory_available_bytes", KSTAT_DATA_INT64 },
{ "arc_no_grow", KSTAT_DATA_UINT64 },
{ "arc_tempreserve", KSTAT_DATA_UINT64 },
{ "arc_loaned_bytes", KSTAT_DATA_UINT64 },
{ "arc_prune", KSTAT_DATA_UINT64 },
{ "arc_meta_used", KSTAT_DATA_UINT64 },
{ "arc_meta_limit", KSTAT_DATA_UINT64 },
+ { "arc_dnode_limit", KSTAT_DATA_UINT64 },
{ "arc_meta_max", KSTAT_DATA_UINT64 },
{ "arc_meta_min", KSTAT_DATA_UINT64 },
+ { "sync_wait_for_async", KSTAT_DATA_UINT64 },
+ { "demand_hit_predictive_prefetch", KSTAT_DATA_UINT64 },
{ "arc_need_free", KSTAT_DATA_UINT64 },
{ "arc_sys_free", KSTAT_DATA_UINT64 }
};
@@ -631,24 +807,30 @@ static arc_state_t *arc_l2c_only;
#define arc_c ARCSTAT(arcstat_c) /* target size of cache */
#define arc_c_min ARCSTAT(arcstat_c_min) /* min target cache size */
#define arc_c_max ARCSTAT(arcstat_c_max) /* max target cache size */
-#define arc_no_grow ARCSTAT(arcstat_no_grow)
+#define arc_no_grow ARCSTAT(arcstat_no_grow) /* do not grow cache size */
#define arc_tempreserve ARCSTAT(arcstat_tempreserve)
#define arc_loaned_bytes ARCSTAT(arcstat_loaned_bytes)
#define arc_meta_limit ARCSTAT(arcstat_meta_limit) /* max size for metadata */
+#define arc_dnode_limit ARCSTAT(arcstat_dnode_limit) /* max size for dnodes */
#define arc_meta_min ARCSTAT(arcstat_meta_min) /* min size for metadata */
#define arc_meta_used ARCSTAT(arcstat_meta_used) /* size of metadata */
#define arc_meta_max ARCSTAT(arcstat_meta_max) /* max size of metadata */
+#define arc_dbuf_size ARCSTAT(arcstat_dbuf_size) /* dbuf metadata */
+#define arc_dnode_size ARCSTAT(arcstat_dnode_size) /* dnode metadata */
+#define arc_bonus_size ARCSTAT(arcstat_bonus_size) /* bonus buffer metadata */
#define arc_need_free ARCSTAT(arcstat_need_free) /* bytes to be freed */
#define arc_sys_free ARCSTAT(arcstat_sys_free) /* target system free bytes */
-#define L2ARC_IS_VALID_COMPRESS(_c_) \
- ((_c_) == ZIO_COMPRESS_LZ4 || (_c_) == ZIO_COMPRESS_EMPTY)
+/* compressed size of entire arc */
+#define arc_compressed_size ARCSTAT(arcstat_compressed_size)
+/* uncompressed size of entire arc */
+#define arc_uncompressed_size ARCSTAT(arcstat_uncompressed_size)
+/* number of bytes in the arc from arc_buf_t's */
+#define arc_overhead_size ARCSTAT(arcstat_overhead_size)
static list_t arc_prune_list;
static kmutex_t arc_prune_mtx;
static taskq_t *arc_prune_taskq;
-static arc_buf_t *arc_eviction_list;
-static arc_buf_hdr_t arc_eviction_hdr;
#define GHOST_STATE(state) \
((state) == arc_mru_ghost || (state) == arc_mfu_ghost || \
@@ -658,25 +840,37 @@ static arc_buf_hdr_t arc_eviction_hdr;
#define HDR_IO_IN_PROGRESS(hdr) ((hdr)->b_flags & ARC_FLAG_IO_IN_PROGRESS)
#define HDR_IO_ERROR(hdr) ((hdr)->b_flags & ARC_FLAG_IO_ERROR)
#define HDR_PREFETCH(hdr) ((hdr)->b_flags & ARC_FLAG_PREFETCH)
-#define HDR_FREED_IN_READ(hdr) ((hdr)->b_flags & ARC_FLAG_FREED_IN_READ)
-#define HDR_BUF_AVAILABLE(hdr) ((hdr)->b_flags & ARC_FLAG_BUF_AVAILABLE)
+#define HDR_COMPRESSION_ENABLED(hdr) \
+ ((hdr)->b_flags & ARC_FLAG_COMPRESSED_ARC)
#define HDR_L2CACHE(hdr) ((hdr)->b_flags & ARC_FLAG_L2CACHE)
-#define HDR_L2COMPRESS(hdr) ((hdr)->b_flags & ARC_FLAG_L2COMPRESS)
#define HDR_L2_READING(hdr) \
- (((hdr)->b_flags & ARC_FLAG_IO_IN_PROGRESS) && \
- ((hdr)->b_flags & ARC_FLAG_HAS_L2HDR))
+ (((hdr)->b_flags & ARC_FLAG_IO_IN_PROGRESS) && \
+ ((hdr)->b_flags & ARC_FLAG_HAS_L2HDR))
#define HDR_L2_WRITING(hdr) ((hdr)->b_flags & ARC_FLAG_L2_WRITING)
#define HDR_L2_EVICTED(hdr) ((hdr)->b_flags & ARC_FLAG_L2_EVICTED)
#define HDR_L2_WRITE_HEAD(hdr) ((hdr)->b_flags & ARC_FLAG_L2_WRITE_HEAD)
+#define HDR_SHARED_DATA(hdr) ((hdr)->b_flags & ARC_FLAG_SHARED_DATA)
#define HDR_ISTYPE_METADATA(hdr) \
- ((hdr)->b_flags & ARC_FLAG_BUFC_METADATA)
+ ((hdr)->b_flags & ARC_FLAG_BUFC_METADATA)
#define HDR_ISTYPE_DATA(hdr) (!HDR_ISTYPE_METADATA(hdr))
#define HDR_HAS_L1HDR(hdr) ((hdr)->b_flags & ARC_FLAG_HAS_L1HDR)
#define HDR_HAS_L2HDR(hdr) ((hdr)->b_flags & ARC_FLAG_HAS_L2HDR)
+/* For storing compression mode in b_flags */
+#define HDR_COMPRESS_OFFSET (highbit64(ARC_FLAG_COMPRESS_0) - 1)
+
+#define HDR_GET_COMPRESS(hdr) ((enum zio_compress)BF32_GET((hdr)->b_flags, \
+ HDR_COMPRESS_OFFSET, SPA_COMPRESSBITS))
+#define HDR_SET_COMPRESS(hdr, cmp) BF32_SET((hdr)->b_flags, \
+ HDR_COMPRESS_OFFSET, SPA_COMPRESSBITS, (cmp));
+
+#define ARC_BUF_LAST(buf) ((buf)->b_next == NULL)
+#define ARC_BUF_SHARED(buf) ((buf)->b_flags & ARC_BUF_FLAG_SHARED)
+#define ARC_BUF_COMPRESSED(buf) ((buf)->b_flags & ARC_BUF_FLAG_COMPRESSED)
+
/*
* Other sizes
*/
@@ -722,6 +916,7 @@ uint64_t zfs_crc64_table[256];
#define L2ARC_WRITE_SIZE (8 * 1024 * 1024) /* initial write max */
#define L2ARC_HEADROOM 2 /* num of writes */
+
/*
* If we discover during ARC scan any buffers to be compressed, we boost
* our headroom for the next scanning cycle by this percentage multiple.
@@ -731,14 +926,10 @@ uint64_t zfs_crc64_table[256];
#define L2ARC_FEED_MIN_MS 200 /* min caching interval ms */
/*
- * Used to distinguish headers that are being process by
- * l2arc_write_buffers(), but have yet to be assigned to a l2arc disk
- * address. This can happen when the header is added to the l2arc's list
- * of buffers to write in the first stage of l2arc_write_buffers(), but
- * has not yet been written out which happens in the second stage of
- * l2arc_write_buffers().
+ * We can feed L2ARC from two states of ARC buffers, mru and mfu,
+ * and each of the state has two types: data and metadata.
*/
-#define L2ARC_ADDR_UNSET ((uint64_t)(-1))
+#define L2ARC_FEED_TYPES 4
#define l2arc_writes_sent ARCSTAT(arcstat_l2_writes_sent)
#define l2arc_writes_done ARCSTAT(arcstat_l2_writes_done)
@@ -751,7 +942,6 @@ unsigned long l2arc_headroom_boost = L2ARC_HEADROOM_BOOST;
unsigned long l2arc_feed_secs = L2ARC_FEED_SECS; /* interval seconds */
unsigned long l2arc_feed_min_ms = L2ARC_FEED_MIN_MS; /* min interval msecs */
int l2arc_noprefetch = B_TRUE; /* don't cache prefetch bufs */
-int l2arc_nocompress = B_FALSE; /* don't compress bufs */
int l2arc_feed_again = B_TRUE; /* turbo warmup */
int l2arc_norw = B_FALSE; /* no reads during writes */
@@ -768,19 +958,18 @@ static kmutex_t l2arc_free_on_write_mtx; /* mutex for list */
static uint64_t l2arc_ndev; /* number of devices */
typedef struct l2arc_read_callback {
- arc_buf_t *l2rcb_buf; /* read buffer */
- spa_t *l2rcb_spa; /* spa */
+ arc_buf_hdr_t *l2rcb_hdr; /* read header */
blkptr_t l2rcb_bp; /* original blkptr */
zbookmark_phys_t l2rcb_zb; /* original bookmark */
int l2rcb_flags; /* original flags */
- enum zio_compress l2rcb_compress; /* applied compress */
+ abd_t *l2rcb_abd; /* temporary buffer */
} l2arc_read_callback_t;
typedef struct l2arc_data_free {
/* protected by l2arc_free_on_write_mtx */
- void *l2df_data;
+ abd_t *l2df_abd;
size_t l2df_size;
- void (*l2df_func)(void *, size_t);
+ arc_buf_contents_t l2df_type;
list_node_t l2df_list_node;
} l2arc_data_free_t;
@@ -788,22 +977,29 @@ static kmutex_t l2arc_feed_thr_lock;
static kcondvar_t l2arc_feed_thr_cv;
static uint8_t l2arc_thread_exit;
-static void arc_get_data_buf(arc_buf_t *);
+static abd_t *arc_get_data_abd(arc_buf_hdr_t *, uint64_t, void *);
+static void *arc_get_data_buf(arc_buf_hdr_t *, uint64_t, void *);
+static void arc_get_data_impl(arc_buf_hdr_t *, uint64_t, void *);
+static void arc_free_data_abd(arc_buf_hdr_t *, abd_t *, uint64_t, void *);
+static void arc_free_data_buf(arc_buf_hdr_t *, void *, uint64_t, void *);
+static void arc_free_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag);
+static void arc_hdr_free_pabd(arc_buf_hdr_t *);
+static void arc_hdr_alloc_pabd(arc_buf_hdr_t *);
static void arc_access(arc_buf_hdr_t *, kmutex_t *);
static boolean_t arc_is_overflowing(void);
static void arc_buf_watch(arc_buf_t *);
static void arc_tuning_update(void);
+static void arc_prune_async(int64_t);
+static uint64_t arc_all_memory(void);
static arc_buf_contents_t arc_buf_type(arc_buf_hdr_t *);
static uint32_t arc_bufc_to_flags(arc_buf_contents_t);
+static inline void arc_hdr_set_flags(arc_buf_hdr_t *hdr, arc_flags_t flags);
+static inline void arc_hdr_clear_flags(arc_buf_hdr_t *hdr, arc_flags_t flags);
static boolean_t l2arc_write_eligible(uint64_t, arc_buf_hdr_t *);
static void l2arc_read_done(zio_t *);
-static boolean_t l2arc_compress_buf(arc_buf_hdr_t *);
-static void l2arc_decompress_zio(zio_t *, arc_buf_hdr_t *, enum zio_compress);
-static void l2arc_release_cdata_buf(arc_buf_hdr_t *);
-
static uint64_t
buf_hash(uint64_t spa, const dva_t *dva, uint64_t birth)
{
@@ -821,14 +1017,14 @@ buf_hash(uint64_t spa, const dva_t *dva, uint64_t birth)
return (crc);
}
-#define BUF_EMPTY(buf) \
- ((buf)->b_dva.dva_word[0] == 0 && \
- (buf)->b_dva.dva_word[1] == 0)
+#define HDR_EMPTY(hdr) \
+ ((hdr)->b_dva.dva_word[0] == 0 && \
+ (hdr)->b_dva.dva_word[1] == 0)
-#define BUF_EQUAL(spa, dva, birth, buf) \
- ((buf)->b_dva.dva_word[0] == (dva)->dva_word[0]) && \
- ((buf)->b_dva.dva_word[1] == (dva)->dva_word[1]) && \
- ((buf)->b_birth == birth) && ((buf)->b_spa == spa)
+#define HDR_EQUAL(spa, dva, birth, hdr) \
+ ((hdr)->b_dva.dva_word[0] == (dva)->dva_word[0]) && \
+ ((hdr)->b_dva.dva_word[1] == (dva)->dva_word[1]) && \
+ ((hdr)->b_birth == birth) && ((hdr)->b_spa == spa)
static void
buf_discard_identity(arc_buf_hdr_t *hdr)
@@ -850,7 +1046,7 @@ buf_hash_find(uint64_t spa, const blkptr_t *bp, kmutex_t **lockp)
mutex_enter(hash_lock);
for (hdr = buf_hash_table.ht_table[idx]; hdr != NULL;
hdr = hdr->b_hash_next) {
- if (BUF_EQUAL(spa, dva, birth, hdr)) {
+ if (HDR_EQUAL(spa, dva, birth, hdr)) {
*lockp = hash_lock;
return (hdr);
}
@@ -888,13 +1084,13 @@ buf_hash_insert(arc_buf_hdr_t *hdr, kmutex_t **lockp)
for (fhdr = buf_hash_table.ht_table[idx], i = 0; fhdr != NULL;
fhdr = fhdr->b_hash_next, i++) {
- if (BUF_EQUAL(hdr->b_spa, &hdr->b_dva, hdr->b_birth, fhdr))
+ if (HDR_EQUAL(hdr->b_spa, &hdr->b_dva, hdr->b_birth, fhdr))
return (fhdr);
}
hdr->b_hash_next = buf_hash_table.ht_table[idx];
buf_hash_table.ht_table[idx] = hdr;
- hdr->b_flags |= ARC_FLAG_IN_HASH_TABLE;
+ arc_hdr_set_flags(hdr, ARC_FLAG_IN_HASH_TABLE);
/* collect some hash table performance data */
if (i > 0) {
@@ -922,12 +1118,12 @@ buf_hash_remove(arc_buf_hdr_t *hdr)
hdrp = &buf_hash_table.ht_table[idx];
while ((fhdr = *hdrp) != hdr) {
- ASSERT(fhdr != NULL);
+ ASSERT3P(fhdr, !=, NULL);
hdrp = &fhdr->b_hash_next;
}
*hdrp = hdr->b_hash_next;
hdr->b_hash_next = NULL;
- hdr->b_flags &= ~ARC_FLAG_IN_HASH_TABLE;
+ arc_hdr_clear_flags(hdr, ARC_FLAG_IN_HASH_TABLE);
/* collect some hash table performance data */
ARCSTAT_BUMPDOWN(arcstat_hash_elements);
@@ -1024,7 +1220,7 @@ hdr_full_dest(void *vbuf, void *unused)
{
arc_buf_hdr_t *hdr = vbuf;
- ASSERT(BUF_EMPTY(hdr));
+ ASSERT(HDR_EMPTY(hdr));
cv_destroy(&hdr->b_l1hdr.b_cv);
refcount_destroy(&hdr->b_l1hdr.b_refcnt);
mutex_destroy(&hdr->b_l1hdr.b_freeze_lock);
@@ -1038,7 +1234,7 @@ hdr_l2only_dest(void *vbuf, void *unused)
{
ASSERTV(arc_buf_hdr_t *hdr = vbuf);
- ASSERT(BUF_EMPTY(hdr));
+ ASSERT(HDR_EMPTY(hdr));
arc_space_return(HDR_L2ONLY_SIZE, ARC_SPACE_L2HDRS);
}
@@ -1071,7 +1267,7 @@ hdr_recl(void *unused)
static void
buf_init(void)
{
- uint64_t *ct;
+ uint64_t *ct = NULL;
uint64_t hsize = 1ULL << 12;
int i, j;
@@ -1081,7 +1277,7 @@ buf_init(void)
* By default, the table will take up
* totalmem * sizeof(void*) / 8K (1MB per GB with 8-byte pointers).
*/
- while (hsize * zfs_arc_average_blocksize < physmem * PAGESIZE)
+ while (hsize * zfs_arc_average_blocksize < arc_all_memory())
hsize <<= 1;
retry:
buf_hash_table.ht_mask = hsize - 1;
@@ -1120,159 +1316,220 @@ buf_init(void)
}
}
+#define ARC_MINTIME (hz>>4) /* 62 ms */
+
/*
- * Transition between the two allocation states for the arc_buf_hdr struct.
- * The arc_buf_hdr struct can be allocated with (hdr_full_cache) or without
- * (hdr_l2only_cache) the fields necessary for the L1 cache - the smaller
- * version is used when a cache buffer is only in the L2ARC in order to reduce
- * memory usage.
+ * This is the size that the buf occupies in memory. If the buf is compressed,
+ * it will correspond to the compressed size. You should use this method of
+ * getting the buf size unless you explicitly need the logical size.
*/
-static arc_buf_hdr_t *
-arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new)
+uint64_t
+arc_buf_size(arc_buf_t *buf)
{
- arc_buf_hdr_t *nhdr;
- l2arc_dev_t *dev;
-
- ASSERT(HDR_HAS_L2HDR(hdr));
- ASSERT((old == hdr_full_cache && new == hdr_l2only_cache) ||
- (old == hdr_l2only_cache && new == hdr_full_cache));
-
- dev = hdr->b_l2hdr.b_dev;
- nhdr = kmem_cache_alloc(new, KM_PUSHPAGE);
-
- ASSERT(MUTEX_HELD(HDR_LOCK(hdr)));
- buf_hash_remove(hdr);
-
- bcopy(hdr, nhdr, HDR_L2ONLY_SIZE);
-
- if (new == hdr_full_cache) {
- nhdr->b_flags |= ARC_FLAG_HAS_L1HDR;
- /*
- * arc_access and arc_change_state need to be aware that a
- * header has just come out of L2ARC, so we set its state to
- * l2c_only even though it's about to change.
- */
- nhdr->b_l1hdr.b_state = arc_l2c_only;
-
- /* Verify previous threads set to NULL before freeing */
- ASSERT3P(nhdr->b_l1hdr.b_tmp_cdata, ==, NULL);
- } else {
- ASSERT(hdr->b_l1hdr.b_buf == NULL);
- ASSERT0(hdr->b_l1hdr.b_datacnt);
-
- /*
- * If we've reached here, We must have been called from
- * arc_evict_hdr(), as such we should have already been
- * removed from any ghost list we were previously on
- * (which protects us from racing with arc_evict_state),
- * thus no locking is needed during this check.
- */
- ASSERT(!multilist_link_active(&hdr->b_l1hdr.b_arc_node));
-
- /*
- * A buffer must not be moved into the arc_l2c_only
- * state if it's not finished being written out to the
- * l2arc device. Otherwise, the b_l1hdr.b_tmp_cdata field
- * might try to be accessed, even though it was removed.
- */
- VERIFY(!HDR_L2_WRITING(hdr));
- VERIFY3P(hdr->b_l1hdr.b_tmp_cdata, ==, NULL);
-
- nhdr->b_flags &= ~ARC_FLAG_HAS_L1HDR;
- }
- /*
- * The header has been reallocated so we need to re-insert it into any
- * lists it was on.
- */
- (void) buf_hash_insert(nhdr, NULL);
-
- ASSERT(list_link_active(&hdr->b_l2hdr.b_l2node));
+ return (ARC_BUF_COMPRESSED(buf) ?
+ HDR_GET_PSIZE(buf->b_hdr) : HDR_GET_LSIZE(buf->b_hdr));
+}
- mutex_enter(&dev->l2ad_mtx);
+uint64_t
+arc_buf_lsize(arc_buf_t *buf)
+{
+ return (HDR_GET_LSIZE(buf->b_hdr));
+}
- /*
- * We must place the realloc'ed header back into the list at
- * the same spot. Otherwise, if it's placed earlier in the list,
- * l2arc_write_buffers() could find it during the function's
- * write phase, and try to write it out to the l2arc.
- */
- list_insert_after(&dev->l2ad_buflist, hdr, nhdr);
- list_remove(&dev->l2ad_buflist, hdr);
+enum zio_compress
+arc_get_compression(arc_buf_t *buf)
+{
+ return (ARC_BUF_COMPRESSED(buf) ?
+ HDR_GET_COMPRESS(buf->b_hdr) : ZIO_COMPRESS_OFF);
+}
- mutex_exit(&dev->l2ad_mtx);
+static inline boolean_t
+arc_buf_is_shared(arc_buf_t *buf)
+{
+ boolean_t shared = (buf->b_data != NULL &&
+ buf->b_hdr->b_l1hdr.b_pabd != NULL &&
+ abd_is_linear(buf->b_hdr->b_l1hdr.b_pabd) &&
+ buf->b_data == abd_to_buf(buf->b_hdr->b_l1hdr.b_pabd));
+ IMPLY(shared, HDR_SHARED_DATA(buf->b_hdr));
+ IMPLY(shared, ARC_BUF_SHARED(buf));
+ IMPLY(shared, ARC_BUF_COMPRESSED(buf) || ARC_BUF_LAST(buf));
/*
- * Since we're using the pointer address as the tag when
- * incrementing and decrementing the l2ad_alloc refcount, we
- * must remove the old pointer (that we're about to destroy) and
- * add the new pointer to the refcount. Otherwise we'd remove
- * the wrong pointer address when calling arc_hdr_destroy() later.
+ * It would be nice to assert arc_can_share() too, but the "hdr isn't
+ * already being shared" requirement prevents us from doing that.
*/
- (void) refcount_remove_many(&dev->l2ad_alloc,
- hdr->b_l2hdr.b_asize, hdr);
-
- (void) refcount_add_many(&dev->l2ad_alloc,
- nhdr->b_l2hdr.b_asize, nhdr);
-
- buf_discard_identity(hdr);
- hdr->b_freeze_cksum = NULL;
- kmem_cache_free(old, hdr);
+ return (shared);
+}
- return (nhdr);
+/*
+ * Free the checksum associated with this header. If there is no checksum, this
+ * is a no-op.
+ */
+static inline void
+arc_cksum_free(arc_buf_hdr_t *hdr)
+{
+ ASSERT(HDR_HAS_L1HDR(hdr));
+ mutex_enter(&hdr->b_l1hdr.b_freeze_lock);
+ if (hdr->b_l1hdr.b_freeze_cksum != NULL) {
+ kmem_free(hdr->b_l1hdr.b_freeze_cksum, sizeof (zio_cksum_t));
+ hdr->b_l1hdr.b_freeze_cksum = NULL;
+ }
+ mutex_exit(&hdr->b_l1hdr.b_freeze_lock);
}
+/*
+ * Return true iff at least one of the bufs on hdr is not compressed.
+ */
+static boolean_t
+arc_hdr_has_uncompressed_buf(arc_buf_hdr_t *hdr)
+{
+ for (arc_buf_t *b = hdr->b_l1hdr.b_buf; b != NULL; b = b->b_next) {
+ if (!ARC_BUF_COMPRESSED(b)) {
+ return (B_TRUE);
+ }
+ }
+ return (B_FALSE);
+}
-#define ARC_MINTIME (hz>>4) /* 62 ms */
+/*
+ * If we've turned on the ZFS_DEBUG_MODIFY flag, verify that the buf's data
+ * matches the checksum that is stored in the hdr. If there is no checksum,
+ * or if the buf is compressed, this is a no-op.
+ */
static void
arc_cksum_verify(arc_buf_t *buf)
{
+ arc_buf_hdr_t *hdr = buf->b_hdr;
zio_cksum_t zc;
if (!(zfs_flags & ZFS_DEBUG_MODIFY))
return;
- mutex_enter(&buf->b_hdr->b_l1hdr.b_freeze_lock);
- if (buf->b_hdr->b_freeze_cksum == NULL || HDR_IO_ERROR(buf->b_hdr)) {
- mutex_exit(&buf->b_hdr->b_l1hdr.b_freeze_lock);
+ if (ARC_BUF_COMPRESSED(buf)) {
+ ASSERT(hdr->b_l1hdr.b_freeze_cksum == NULL ||
+ arc_hdr_has_uncompressed_buf(hdr));
+ return;
+ }
+
+ ASSERT(HDR_HAS_L1HDR(hdr));
+
+ mutex_enter(&hdr->b_l1hdr.b_freeze_lock);
+ if (hdr->b_l1hdr.b_freeze_cksum == NULL || HDR_IO_ERROR(hdr)) {
+ mutex_exit(&hdr->b_l1hdr.b_freeze_lock);
return;
}
- fletcher_2_native(buf->b_data, buf->b_hdr->b_size, &zc);
- if (!ZIO_CHECKSUM_EQUAL(*buf->b_hdr->b_freeze_cksum, zc))
+
+ fletcher_2_native(buf->b_data, arc_buf_size(buf), NULL, &zc);
+ if (!ZIO_CHECKSUM_EQUAL(*hdr->b_l1hdr.b_freeze_cksum, zc))
panic("buffer modified while frozen!");
- mutex_exit(&buf->b_hdr->b_l1hdr.b_freeze_lock);
+ mutex_exit(&hdr->b_l1hdr.b_freeze_lock);
}
-static int
-arc_cksum_equal(arc_buf_t *buf)
+static boolean_t
+arc_cksum_is_equal(arc_buf_hdr_t *hdr, zio_t *zio)
{
- zio_cksum_t zc;
- int equal;
+ enum zio_compress compress = BP_GET_COMPRESS(zio->io_bp);
+ boolean_t valid_cksum;
- mutex_enter(&buf->b_hdr->b_l1hdr.b_freeze_lock);
- fletcher_2_native(buf->b_data, buf->b_hdr->b_size, &zc);
- equal = ZIO_CHECKSUM_EQUAL(*buf->b_hdr->b_freeze_cksum, zc);
- mutex_exit(&buf->b_hdr->b_l1hdr.b_freeze_lock);
+ ASSERT(!BP_IS_EMBEDDED(zio->io_bp));
+ VERIFY3U(BP_GET_PSIZE(zio->io_bp), ==, HDR_GET_PSIZE(hdr));
+
+ /*
+ * We rely on the blkptr's checksum to determine if the block
+ * is valid or not. When compressed arc is enabled, the l2arc
+ * writes the block to the l2arc just as it appears in the pool.
+ * This allows us to use the blkptr's checksum to validate the
+ * data that we just read off of the l2arc without having to store
+ * a separate checksum in the arc_buf_hdr_t. However, if compressed
+ * arc is disabled, then the data written to the l2arc is always
+ * uncompressed and won't match the block as it exists in the main
+ * pool. When this is the case, we must first compress it if it is
+ * compressed on the main pool before we can validate the checksum.
+ */
+ if (!HDR_COMPRESSION_ENABLED(hdr) && compress != ZIO_COMPRESS_OFF) {
+ uint64_t lsize;
+ uint64_t csize;
+ void *cbuf;
+ ASSERT3U(HDR_GET_COMPRESS(hdr), ==, ZIO_COMPRESS_OFF);
+
+ cbuf = zio_buf_alloc(HDR_GET_PSIZE(hdr));
+ lsize = HDR_GET_LSIZE(hdr);
+ csize = zio_compress_data(compress, zio->io_abd, cbuf, lsize);
+
+ ASSERT3U(csize, <=, HDR_GET_PSIZE(hdr));
+ if (csize < HDR_GET_PSIZE(hdr)) {
+ /*
+ * Compressed blocks are always a multiple of the
+ * smallest ashift in the pool. Ideally, we would
+ * like to round up the csize to the next
+ * spa_min_ashift but that value may have changed
+ * since the block was last written. Instead,
+ * we rely on the fact that the hdr's psize
+ * was set to the psize of the block when it was
+ * last written. We set the csize to that value
+ * and zero out any part that should not contain
+ * data.
+ */
+ bzero((char *)cbuf + csize, HDR_GET_PSIZE(hdr) - csize);
+ csize = HDR_GET_PSIZE(hdr);
+ }
+ zio_push_transform(zio, cbuf, csize, HDR_GET_PSIZE(hdr), NULL);
+ }
- return (equal);
+ /*
+ * Block pointers always store the checksum for the logical data.
+ * If the block pointer has the gang bit set, then the checksum
+ * it represents is for the reconstituted data and not for an
+ * individual gang member. The zio pipeline, however, must be able to
+ * determine the checksum of each of the gang constituents so it
+ * treats the checksum comparison differently than what we need
+ * for l2arc blocks. This prevents us from using the
+ * zio_checksum_error() interface directly. Instead we must call the
+ * zio_checksum_error_impl() so that we can ensure the checksum is
+ * generated using the correct checksum algorithm and accounts for the
+ * logical I/O size and not just a gang fragment.
+ */
+ valid_cksum = (zio_checksum_error_impl(zio->io_spa, zio->io_bp,
+ BP_GET_CHECKSUM(zio->io_bp), zio->io_abd, zio->io_size,
+ zio->io_offset, NULL) == 0);
+ zio_pop_transforms(zio);
+ return (valid_cksum);
}
+/*
+ * Given a buf full of data, if ZFS_DEBUG_MODIFY is enabled this computes a
+ * checksum and attaches it to the buf's hdr so that we can ensure that the buf
+ * isn't modified later on. If buf is compressed or there is already a checksum
+ * on the hdr, this is a no-op (we only checksum uncompressed bufs).
+ */
static void
-arc_cksum_compute(arc_buf_t *buf, boolean_t force)
+arc_cksum_compute(arc_buf_t *buf)
{
- if (!force && !(zfs_flags & ZFS_DEBUG_MODIFY))
+ arc_buf_hdr_t *hdr = buf->b_hdr;
+
+ if (!(zfs_flags & ZFS_DEBUG_MODIFY))
return;
+ ASSERT(HDR_HAS_L1HDR(hdr));
+
mutex_enter(&buf->b_hdr->b_l1hdr.b_freeze_lock);
- if (buf->b_hdr->b_freeze_cksum != NULL) {
- mutex_exit(&buf->b_hdr->b_l1hdr.b_freeze_lock);
+ if (hdr->b_l1hdr.b_freeze_cksum != NULL) {
+ ASSERT(arc_hdr_has_uncompressed_buf(hdr));
+ mutex_exit(&hdr->b_l1hdr.b_freeze_lock);
+ return;
+ } else if (ARC_BUF_COMPRESSED(buf)) {
+ mutex_exit(&hdr->b_l1hdr.b_freeze_lock);
return;
}
- buf->b_hdr->b_freeze_cksum = kmem_alloc(sizeof (zio_cksum_t), KM_SLEEP);
- fletcher_2_native(buf->b_data, buf->b_hdr->b_size,
- buf->b_hdr->b_freeze_cksum);
- mutex_exit(&buf->b_hdr->b_l1hdr.b_freeze_lock);
+
+ ASSERT(!ARC_BUF_COMPRESSED(buf));
+ hdr->b_l1hdr.b_freeze_cksum = kmem_alloc(sizeof (zio_cksum_t),
+ KM_SLEEP);
+ fletcher_2_native(buf->b_data, arc_buf_size(buf), NULL,
+ hdr->b_l1hdr.b_freeze_cksum);
+ mutex_exit(&hdr->b_l1hdr.b_freeze_lock);
arc_buf_watch(buf);
}
@@ -1280,7 +1537,7 @@ arc_cksum_compute(arc_buf_t *buf, boolean_t force)
void
arc_buf_sigsegv(int sig, siginfo_t *si, void *unused)
{
- panic("Got SIGSEGV at address: 0x%lx\n", (long) si->si_addr);
+ panic("Got SIGSEGV at address: 0x%lx\n", (long)si->si_addr);
}
#endif
@@ -1290,7 +1547,7 @@ arc_buf_unwatch(arc_buf_t *buf)
{
#ifndef _KERNEL
if (arc_watch) {
- ASSERT0(mprotect(buf->b_data, buf->b_hdr->b_size,
+ ASSERT0(mprotect(buf->b_data, arc_buf_size(buf),
PROT_READ | PROT_WRITE));
}
#endif
@@ -1302,18 +1559,28 @@ arc_buf_watch(arc_buf_t *buf)
{
#ifndef _KERNEL
if (arc_watch)
- ASSERT0(mprotect(buf->b_data, buf->b_hdr->b_size, PROT_READ));
+ ASSERT0(mprotect(buf->b_data, arc_buf_size(buf),
+ PROT_READ));
#endif
}
static arc_buf_contents_t
arc_buf_type(arc_buf_hdr_t *hdr)
{
+ arc_buf_contents_t type;
if (HDR_ISTYPE_METADATA(hdr)) {
- return (ARC_BUFC_METADATA);
+ type = ARC_BUFC_METADATA;
} else {
- return (ARC_BUFC_DATA);
+ type = ARC_BUFC_DATA;
}
+ VERIFY3U(hdr->b_type, ==, type);
+ return (type);
+}
+
+boolean_t
+arc_is_metadata(arc_buf_t *buf)
+{
+ return (HDR_ISTYPE_METADATA(buf->b_hdr) != 0);
}
static uint32_t
@@ -1335,83 +1602,387 @@ arc_bufc_to_flags(arc_buf_contents_t type)
void
arc_buf_thaw(arc_buf_t *buf)
{
- if (zfs_flags & ZFS_DEBUG_MODIFY) {
- if (buf->b_hdr->b_l1hdr.b_state != arc_anon)
- panic("modifying non-anon buffer!");
- if (HDR_IO_IN_PROGRESS(buf->b_hdr))
- panic("modifying buffer while i/o in progress!");
- arc_cksum_verify(buf);
- }
+ arc_buf_hdr_t *hdr = buf->b_hdr;
- mutex_enter(&buf->b_hdr->b_l1hdr.b_freeze_lock);
- if (buf->b_hdr->b_freeze_cksum != NULL) {
- kmem_free(buf->b_hdr->b_freeze_cksum, sizeof (zio_cksum_t));
- buf->b_hdr->b_freeze_cksum = NULL;
- }
+ ASSERT3P(hdr->b_l1hdr.b_state, ==, arc_anon);
+ ASSERT(!HDR_IO_IN_PROGRESS(hdr));
- mutex_exit(&buf->b_hdr->b_l1hdr.b_freeze_lock);
+ arc_cksum_verify(buf);
+
+ /*
+ * Compressed buffers do not manipulate the b_freeze_cksum or
+ * allocate b_thawed.
+ */
+ if (ARC_BUF_COMPRESSED(buf)) {
+ ASSERT(hdr->b_l1hdr.b_freeze_cksum == NULL ||
+ arc_hdr_has_uncompressed_buf(hdr));
+ return;
+ }
+ ASSERT(HDR_HAS_L1HDR(hdr));
+ arc_cksum_free(hdr);
arc_buf_unwatch(buf);
}
void
arc_buf_freeze(arc_buf_t *buf)
{
+ arc_buf_hdr_t *hdr = buf->b_hdr;
kmutex_t *hash_lock;
if (!(zfs_flags & ZFS_DEBUG_MODIFY))
return;
- hash_lock = HDR_LOCK(buf->b_hdr);
+ if (ARC_BUF_COMPRESSED(buf)) {
+ ASSERT(hdr->b_l1hdr.b_freeze_cksum == NULL ||
+ arc_hdr_has_uncompressed_buf(hdr));
+ return;
+ }
+
+ hash_lock = HDR_LOCK(hdr);
mutex_enter(hash_lock);
- ASSERT(buf->b_hdr->b_freeze_cksum != NULL ||
- buf->b_hdr->b_l1hdr.b_state == arc_anon);
- arc_cksum_compute(buf, B_FALSE);
+ ASSERT(HDR_HAS_L1HDR(hdr));
+ ASSERT(hdr->b_l1hdr.b_freeze_cksum != NULL ||
+ hdr->b_l1hdr.b_state == arc_anon);
+ arc_cksum_compute(buf);
mutex_exit(hash_lock);
+}
+
+/*
+ * The arc_buf_hdr_t's b_flags should never be modified directly. Instead,
+ * the following functions should be used to ensure that the flags are
+ * updated in a thread-safe way. When manipulating the flags either
+ * the hash_lock must be held or the hdr must be undiscoverable. This
+ * ensures that we're not racing with any other threads when updating
+ * the flags.
+ */
+static inline void
+arc_hdr_set_flags(arc_buf_hdr_t *hdr, arc_flags_t flags)
+{
+ ASSERT(MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr));
+ hdr->b_flags |= flags;
+}
+static inline void
+arc_hdr_clear_flags(arc_buf_hdr_t *hdr, arc_flags_t flags)
+{
+ ASSERT(MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr));
+ hdr->b_flags &= ~flags;
}
+/*
+ * Setting the compression bits in the arc_buf_hdr_t's b_flags is
+ * done in a special way since we have to clear and set bits
+ * at the same time. Consumers that wish to set the compression bits
+ * must use this function to ensure that the flags are updated in
+ * thread-safe manner.
+ */
static void
-add_reference(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, void *tag)
+arc_hdr_set_compress(arc_buf_hdr_t *hdr, enum zio_compress cmp)
{
- arc_state_t *state;
+ ASSERT(MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr));
- ASSERT(HDR_HAS_L1HDR(hdr));
- ASSERT(MUTEX_HELD(hash_lock));
+ /*
+ * Holes and embedded blocks will always have a psize = 0 so
+ * we ignore the compression of the blkptr and set the
+ * want to uncompress them. Mark them as uncompressed.
+ */
+ if (!zfs_compressed_arc_enabled || HDR_GET_PSIZE(hdr) == 0) {
+ arc_hdr_clear_flags(hdr, ARC_FLAG_COMPRESSED_ARC);
+ HDR_SET_COMPRESS(hdr, ZIO_COMPRESS_OFF);
+ ASSERT(!HDR_COMPRESSION_ENABLED(hdr));
+ ASSERT3U(HDR_GET_COMPRESS(hdr), ==, ZIO_COMPRESS_OFF);
+ } else {
+ arc_hdr_set_flags(hdr, ARC_FLAG_COMPRESSED_ARC);
+ HDR_SET_COMPRESS(hdr, cmp);
+ ASSERT3U(HDR_GET_COMPRESS(hdr), ==, cmp);
+ ASSERT(HDR_COMPRESSION_ENABLED(hdr));
+ }
+}
- state = hdr->b_l1hdr.b_state;
+/*
+ * Looks for another buf on the same hdr which has the data decompressed, copies
+ * from it, and returns true. If no such buf exists, returns false.
+ */
+static boolean_t
+arc_buf_try_copy_decompressed_data(arc_buf_t *buf)
+{
+ arc_buf_hdr_t *hdr = buf->b_hdr;
+ boolean_t copied = B_FALSE;
- if ((refcount_add(&hdr->b_l1hdr.b_refcnt, tag) == 1) &&
- (state != arc_anon)) {
- /* We don't use the L2-only state list. */
- if (state != arc_l2c_only) {
- arc_buf_contents_t type = arc_buf_type(hdr);
- uint64_t delta = hdr->b_size * hdr->b_l1hdr.b_datacnt;
- multilist_t *list = &state->arcs_list[type];
- uint64_t *size = &state->arcs_lsize[type];
+ ASSERT(HDR_HAS_L1HDR(hdr));
+ ASSERT3P(buf->b_data, !=, NULL);
+ ASSERT(!ARC_BUF_COMPRESSED(buf));
- multilist_remove(list, hdr);
+ for (arc_buf_t *from = hdr->b_l1hdr.b_buf; from != NULL;
+ from = from->b_next) {
+ /* can't use our own data buffer */
+ if (from == buf) {
+ continue;
+ }
- if (GHOST_STATE(state)) {
- ASSERT0(hdr->b_l1hdr.b_datacnt);
- ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
- delta = hdr->b_size;
- }
- ASSERT(delta > 0);
- ASSERT3U(*size, >=, delta);
- atomic_add_64(size, -delta);
+ if (!ARC_BUF_COMPRESSED(from)) {
+ bcopy(from->b_data, buf->b_data, arc_buf_size(buf));
+ copied = B_TRUE;
+ break;
}
- /* remove the prefetch flag if we get a reference */
- hdr->b_flags &= ~ARC_FLAG_PREFETCH;
}
+
+ /*
+ * There were no decompressed bufs, so there should not be a
+ * checksum on the hdr either.
+ */
+ EQUIV(!copied, hdr->b_l1hdr.b_freeze_cksum == NULL);
+
+ return (copied);
}
+/*
+ * Given a buf that has a data buffer attached to it, this function will
+ * efficiently fill the buf with data of the specified compression setting from
+ * the hdr and update the hdr's b_freeze_cksum if necessary. If the buf and hdr
+ * are already sharing a data buf, no copy is performed.
+ *
+ * If the buf is marked as compressed but uncompressed data was requested, this
+ * will allocate a new data buffer for the buf, remove that flag, and fill the
+ * buf with uncompressed data. You can't request a compressed buf on a hdr with
+ * uncompressed data, and (since we haven't added support for it yet) if you
+ * want compressed data your buf must already be marked as compressed and have
+ * the correct-sized data buffer.
+ */
static int
-remove_reference(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, void *tag)
+arc_buf_fill(arc_buf_t *buf, boolean_t compressed)
{
- int cnt;
- arc_state_t *state = hdr->b_l1hdr.b_state;
+ arc_buf_hdr_t *hdr = buf->b_hdr;
+ boolean_t hdr_compressed = (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF);
+ dmu_object_byteswap_t bswap = hdr->b_l1hdr.b_byteswap;
+
+ ASSERT3P(buf->b_data, !=, NULL);
+ IMPLY(compressed, hdr_compressed);
+ IMPLY(compressed, ARC_BUF_COMPRESSED(buf));
+
+ if (hdr_compressed == compressed) {
+ if (!arc_buf_is_shared(buf)) {
+ abd_copy_to_buf(buf->b_data, hdr->b_l1hdr.b_pabd,
+ arc_buf_size(buf));
+ }
+ } else {
+ ASSERT(hdr_compressed);
+ ASSERT(!compressed);
+ ASSERT3U(HDR_GET_LSIZE(hdr), !=, HDR_GET_PSIZE(hdr));
+
+ /*
+ * If the buf is sharing its data with the hdr, unlink it and
+ * allocate a new data buffer for the buf.
+ */
+ if (arc_buf_is_shared(buf)) {
+ ASSERT(ARC_BUF_COMPRESSED(buf));
+
+ /* We need to give the buf it's own b_data */
+ buf->b_flags &= ~ARC_BUF_FLAG_SHARED;
+ buf->b_data =
+ arc_get_data_buf(hdr, HDR_GET_LSIZE(hdr), buf);
+ arc_hdr_clear_flags(hdr, ARC_FLAG_SHARED_DATA);
+
+ /* Previously overhead was 0; just add new overhead */
+ ARCSTAT_INCR(arcstat_overhead_size, HDR_GET_LSIZE(hdr));
+ } else if (ARC_BUF_COMPRESSED(buf)) {
+ /* We need to reallocate the buf's b_data */
+ arc_free_data_buf(hdr, buf->b_data, HDR_GET_PSIZE(hdr),
+ buf);
+ buf->b_data =
+ arc_get_data_buf(hdr, HDR_GET_LSIZE(hdr), buf);
+
+ /* We increased the size of b_data; update overhead */
+ ARCSTAT_INCR(arcstat_overhead_size,
+ HDR_GET_LSIZE(hdr) - HDR_GET_PSIZE(hdr));
+ }
+
+ /*
+ * Regardless of the buf's previous compression settings, it
+ * should not be compressed at the end of this function.
+ */
+ buf->b_flags &= ~ARC_BUF_FLAG_COMPRESSED;
+
+ /*
+ * Try copying the data from another buf which already has a
+ * decompressed version. If that's not possible, it's time to
+ * bite the bullet and decompress the data from the hdr.
+ */
+ if (arc_buf_try_copy_decompressed_data(buf)) {
+ /* Skip byteswapping and checksumming (already done) */
+ ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, !=, NULL);
+ return (0);
+ } else {
+ int error = zio_decompress_data(HDR_GET_COMPRESS(hdr),
+ hdr->b_l1hdr.b_pabd, buf->b_data,
+ HDR_GET_PSIZE(hdr), HDR_GET_LSIZE(hdr));
+
+ /*
+ * Absent hardware errors or software bugs, this should
+ * be impossible, but log it anyway so we can debug it.
+ */
+ if (error != 0) {
+ zfs_dbgmsg(
+ "hdr %p, compress %d, psize %d, lsize %d",
+ hdr, HDR_GET_COMPRESS(hdr),
+ HDR_GET_PSIZE(hdr), HDR_GET_LSIZE(hdr));
+ return (SET_ERROR(EIO));
+ }
+ }
+ }
+
+ /* Byteswap the buf's data if necessary */
+ if (bswap != DMU_BSWAP_NUMFUNCS) {
+ ASSERT(!HDR_SHARED_DATA(hdr));
+ ASSERT3U(bswap, <, DMU_BSWAP_NUMFUNCS);
+ dmu_ot_byteswap[bswap].ob_func(buf->b_data, HDR_GET_LSIZE(hdr));
+ }
+
+ /* Compute the hdr's checksum if necessary */
+ arc_cksum_compute(buf);
+
+ return (0);
+}
+
+int
+arc_decompress(arc_buf_t *buf)
+{
+ return (arc_buf_fill(buf, B_FALSE));
+}
+
+/*
+ * Return the size of the block, b_pabd, that is stored in the arc_buf_hdr_t.
+ */
+static uint64_t
+arc_hdr_size(arc_buf_hdr_t *hdr)
+{
+ uint64_t size;
+
+ if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF &&
+ HDR_GET_PSIZE(hdr) > 0) {
+ size = HDR_GET_PSIZE(hdr);
+ } else {
+ ASSERT3U(HDR_GET_LSIZE(hdr), !=, 0);
+ size = HDR_GET_LSIZE(hdr);
+ }
+ return (size);
+}
+
+/*
+ * Increment the amount of evictable space in the arc_state_t's refcount.
+ * We account for the space used by the hdr and the arc buf individually
+ * so that we can add and remove them from the refcount individually.
+ */
+static void
+arc_evictable_space_increment(arc_buf_hdr_t *hdr, arc_state_t *state)
+{
+ arc_buf_contents_t type = arc_buf_type(hdr);
+ arc_buf_t *buf;
+
+ ASSERT(HDR_HAS_L1HDR(hdr));
+
+ if (GHOST_STATE(state)) {
+ ASSERT0(hdr->b_l1hdr.b_bufcnt);
+ ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
+ ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+ (void) refcount_add_many(&state->arcs_esize[type],
+ HDR_GET_LSIZE(hdr), hdr);
+ return;
+ }
+
+ ASSERT(!GHOST_STATE(state));
+ if (hdr->b_l1hdr.b_pabd != NULL) {
+ (void) refcount_add_many(&state->arcs_esize[type],
+ arc_hdr_size(hdr), hdr);
+ }
+ for (buf = hdr->b_l1hdr.b_buf; buf != NULL; buf = buf->b_next) {
+ if (arc_buf_is_shared(buf))
+ continue;
+ (void) refcount_add_many(&state->arcs_esize[type],
+ arc_buf_size(buf), buf);
+ }
+}
+
+/*
+ * Decrement the amount of evictable space in the arc_state_t's refcount.
+ * We account for the space used by the hdr and the arc buf individually
+ * so that we can add and remove them from the refcount individually.
+ */
+static void
+arc_evictable_space_decrement(arc_buf_hdr_t *hdr, arc_state_t *state)
+{
+ arc_buf_contents_t type = arc_buf_type(hdr);
+ arc_buf_t *buf;
+
+ ASSERT(HDR_HAS_L1HDR(hdr));
+
+ if (GHOST_STATE(state)) {
+ ASSERT0(hdr->b_l1hdr.b_bufcnt);
+ ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
+ ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+ (void) refcount_remove_many(&state->arcs_esize[type],
+ HDR_GET_LSIZE(hdr), hdr);
+ return;
+ }
+
+ ASSERT(!GHOST_STATE(state));
+ if (hdr->b_l1hdr.b_pabd != NULL) {
+ (void) refcount_remove_many(&state->arcs_esize[type],
+ arc_hdr_size(hdr), hdr);
+ }
+ for (buf = hdr->b_l1hdr.b_buf; buf != NULL; buf = buf->b_next) {
+ if (arc_buf_is_shared(buf))
+ continue;
+ (void) refcount_remove_many(&state->arcs_esize[type],
+ arc_buf_size(buf), buf);
+ }
+}
+
+/*
+ * Add a reference to this hdr indicating that someone is actively
+ * referencing that memory. When the refcount transitions from 0 to 1,
+ * we remove it from the respective arc_state_t list to indicate that
+ * it is not evictable.
+ */
+static void
+add_reference(arc_buf_hdr_t *hdr, void *tag)
+{
+ arc_state_t *state;
+
+ ASSERT(HDR_HAS_L1HDR(hdr));
+ if (!MUTEX_HELD(HDR_LOCK(hdr))) {
+ ASSERT(hdr->b_l1hdr.b_state == arc_anon);
+ ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
+ ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
+ }
+
+ state = hdr->b_l1hdr.b_state;
+
+ if ((refcount_add(&hdr->b_l1hdr.b_refcnt, tag) == 1) &&
+ (state != arc_anon)) {
+ /* We don't use the L2-only state list. */
+ if (state != arc_l2c_only) {
+ multilist_remove(state->arcs_list[arc_buf_type(hdr)],
+ hdr);
+ arc_evictable_space_decrement(hdr, state);
+ }
+ /* remove the prefetch flag if we get a reference */
+ arc_hdr_clear_flags(hdr, ARC_FLAG_PREFETCH);
+ }
+}
+
+/*
+ * Remove a reference from this hdr. When the reference transitions from
+ * 1 to 0 and we're not anonymous, then we add this hdr to the arc_state_t's
+ * list making it eligible for eviction.
+ */
+static int
+remove_reference(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, void *tag)
+{
+ int cnt;
+ arc_state_t *state = hdr->b_l1hdr.b_state;
ASSERT(HDR_HAS_L1HDR(hdr));
ASSERT(state == arc_anon || MUTEX_HELD(hash_lock));
@@ -1423,15 +1994,9 @@ remove_reference(arc_buf_hdr_t *hdr, kmutex_t *hash_lock, void *tag)
*/
if (((cnt = refcount_remove(&hdr->b_l1hdr.b_refcnt, tag)) == 0) &&
(state != arc_anon)) {
- arc_buf_contents_t type = arc_buf_type(hdr);
- multilist_t *list = &state->arcs_list[type];
- uint64_t *size = &state->arcs_lsize[type];
-
- multilist_insert(list, hdr);
-
- ASSERT(hdr->b_l1hdr.b_datacnt > 0);
- atomic_add_64(size, hdr->b_size *
- hdr->b_l1hdr.b_datacnt);
+ multilist_insert(state->arcs_list[arc_buf_type(hdr)], hdr);
+ ASSERT3U(hdr->b_l1hdr.b_bufcnt, >, 0);
+ arc_evictable_space_increment(hdr, state);
}
return (cnt);
}
@@ -1466,7 +2031,7 @@ arc_buf_info(arc_buf_t *ab, arc_buf_info_t *abi, int state_index)
l2hdr = &hdr->b_l2hdr;
if (l1hdr) {
- abi->abi_datacnt = l1hdr->b_datacnt;
+ abi->abi_bufcnt = l1hdr->b_bufcnt;
abi->abi_access = l1hdr->b_arc_access;
abi->abi_mru_hits = l1hdr->b_mru_hits;
abi->abi_mru_ghost_hits = l1hdr->b_mru_ghost_hits;
@@ -1477,14 +2042,12 @@ arc_buf_info(arc_buf_t *ab, arc_buf_info_t *abi, int state_index)
if (l2hdr) {
abi->abi_l2arc_dattr = l2hdr->b_daddr;
- abi->abi_l2arc_asize = l2hdr->b_asize;
- abi->abi_l2arc_compress = l2hdr->b_compress;
abi->abi_l2arc_hits = l2hdr->b_hits;
}
abi->abi_state_type = state ? state->arcs_state : ARC_STATE_ANON;
abi->abi_state_contents = arc_buf_type(hdr);
- abi->abi_size = hdr->b_size;
+ abi->abi_size = arc_hdr_size(hdr);
}
/*
@@ -1497,8 +2060,8 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr,
{
arc_state_t *old_state;
int64_t refcnt;
- uint32_t datacnt;
- uint64_t from_delta, to_delta;
+ uint32_t bufcnt;
+ boolean_t update_old, update_new;
arc_buf_contents_t buftype = arc_buf_type(hdr);
/*
@@ -1511,20 +2074,20 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr,
if (HDR_HAS_L1HDR(hdr)) {
old_state = hdr->b_l1hdr.b_state;
refcnt = refcount_count(&hdr->b_l1hdr.b_refcnt);
- datacnt = hdr->b_l1hdr.b_datacnt;
+ bufcnt = hdr->b_l1hdr.b_bufcnt;
+ update_old = (bufcnt > 0 || hdr->b_l1hdr.b_pabd != NULL);
} else {
old_state = arc_l2c_only;
refcnt = 0;
- datacnt = 0;
+ bufcnt = 0;
+ update_old = B_FALSE;
}
+ update_new = update_old;
ASSERT(MUTEX_HELD(hash_lock));
ASSERT3P(new_state, !=, old_state);
- ASSERT(refcnt == 0 || datacnt > 0);
- ASSERT(!GHOST_STATE(new_state) || datacnt == 0);
- ASSERT(old_state != arc_anon || datacnt <= 1);
-
- from_delta = to_delta = datacnt * hdr->b_size;
+ ASSERT(!GHOST_STATE(new_state) || bufcnt == 0);
+ ASSERT(old_state != arc_anon || bufcnt <= 1);
/*
* If this buffer is evictable, transfer it from the
@@ -1532,26 +2095,17 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr,
*/
if (refcnt == 0) {
if (old_state != arc_anon && old_state != arc_l2c_only) {
- uint64_t *size = &old_state->arcs_lsize[buftype];
-
ASSERT(HDR_HAS_L1HDR(hdr));
- multilist_remove(&old_state->arcs_list[buftype], hdr);
+ multilist_remove(old_state->arcs_list[buftype], hdr);
- /*
- * If prefetching out of the ghost cache,
- * we will have a non-zero datacnt.
- */
- if (GHOST_STATE(old_state) && datacnt == 0) {
- /* ghost elements have a ghost size */
- ASSERT(hdr->b_l1hdr.b_buf == NULL);
- from_delta = hdr->b_size;
+ if (GHOST_STATE(old_state)) {
+ ASSERT0(bufcnt);
+ ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
+ update_old = B_TRUE;
}
- ASSERT3U(*size, >=, from_delta);
- atomic_add_64(size, -from_delta);
+ arc_evictable_space_decrement(hdr, old_state);
}
if (new_state != arc_anon && new_state != arc_l2c_only) {
- uint64_t *size = &new_state->arcs_lsize[buftype];
-
/*
* An L1 header always exists here, since if we're
* moving to some L1-cached state (i.e. not l2c_only or
@@ -1559,41 +2113,41 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr,
* beforehand.
*/
ASSERT(HDR_HAS_L1HDR(hdr));
- multilist_insert(&new_state->arcs_list[buftype], hdr);
+ multilist_insert(new_state->arcs_list[buftype], hdr);
- /* ghost elements have a ghost size */
if (GHOST_STATE(new_state)) {
- ASSERT0(datacnt);
- ASSERT(hdr->b_l1hdr.b_buf == NULL);
- to_delta = hdr->b_size;
+ ASSERT0(bufcnt);
+ ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
+ update_new = B_TRUE;
}
- atomic_add_64(size, to_delta);
+ arc_evictable_space_increment(hdr, new_state);
}
}
- ASSERT(!BUF_EMPTY(hdr));
+ ASSERT(!HDR_EMPTY(hdr));
if (new_state == arc_anon && HDR_IN_HASH_TABLE(hdr))
buf_hash_remove(hdr);
/* adjust state sizes (ignore arc_l2c_only) */
- if (to_delta && new_state != arc_l2c_only) {
+ if (update_new && new_state != arc_l2c_only) {
ASSERT(HDR_HAS_L1HDR(hdr));
if (GHOST_STATE(new_state)) {
- ASSERT0(datacnt);
+ ASSERT0(bufcnt);
/*
- * We moving a header to a ghost state, we first
+ * When moving a header to a ghost state, we first
* remove all arc buffers. Thus, we'll have a
- * datacnt of zero, and no arc buffer to use for
+ * bufcnt of zero, and no arc buffer to use for
* the reference. As a result, we use the arc
* header pointer for the reference.
*/
(void) refcount_add_many(&new_state->arcs_size,
- hdr->b_size, hdr);
+ HDR_GET_LSIZE(hdr), hdr);
+ ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
} else {
arc_buf_t *buf;
- ASSERT3U(datacnt, !=, 0);
+ uint32_t buffers = 0;
/*
* Each individual buffer holds a unique reference,
@@ -1602,35 +2156,52 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr,
*/
for (buf = hdr->b_l1hdr.b_buf; buf != NULL;
buf = buf->b_next) {
+ ASSERT3U(bufcnt, !=, 0);
+ buffers++;
+
+ /*
+ * When the arc_buf_t is sharing the data
+ * block with the hdr, the owner of the
+ * reference belongs to the hdr. Only
+ * add to the refcount if the arc_buf_t is
+ * not shared.
+ */
+ if (arc_buf_is_shared(buf))
+ continue;
+
(void) refcount_add_many(&new_state->arcs_size,
- hdr->b_size, buf);
+ arc_buf_size(buf), buf);
+ }
+ ASSERT3U(bufcnt, ==, buffers);
+
+ if (hdr->b_l1hdr.b_pabd != NULL) {
+ (void) refcount_add_many(&new_state->arcs_size,
+ arc_hdr_size(hdr), hdr);
+ } else {
+ ASSERT(GHOST_STATE(old_state));
}
}
}
- if (from_delta && old_state != arc_l2c_only) {
+ if (update_old && old_state != arc_l2c_only) {
ASSERT(HDR_HAS_L1HDR(hdr));
if (GHOST_STATE(old_state)) {
+ ASSERT0(bufcnt);
+ ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+
/*
* When moving a header off of a ghost state,
- * there's the possibility for datacnt to be
- * non-zero. This is because we first add the
- * arc buffer to the header prior to changing
- * the header's state. Since we used the header
- * for the reference when putting the header on
- * the ghost state, we must balance that and use
- * the header when removing off the ghost state
- * (even though datacnt is non zero).
+ * the header will not contain any arc buffers.
+ * We use the arc header pointer for the reference
+ * which is exactly what we did when we put the
+ * header on the ghost state.
*/
- IMPLY(datacnt == 0, new_state == arc_anon ||
- new_state == arc_l2c_only);
-
(void) refcount_remove_many(&old_state->arcs_size,
- hdr->b_size, hdr);
+ HDR_GET_LSIZE(hdr), hdr);
} else {
arc_buf_t *buf;
- ASSERT3U(datacnt, !=, 0);
+ uint32_t buffers = 0;
/*
* Each individual buffer holds a unique reference,
@@ -1639,9 +2210,27 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr,
*/
for (buf = hdr->b_l1hdr.b_buf; buf != NULL;
buf = buf->b_next) {
+ ASSERT3U(bufcnt, !=, 0);
+ buffers++;
+
+ /*
+ * When the arc_buf_t is sharing the data
+ * block with the hdr, the owner of the
+ * reference belongs to the hdr. Only
+ * add to the refcount if the arc_buf_t is
+ * not shared.
+ */
+ if (arc_buf_is_shared(buf))
+ continue;
+
(void) refcount_remove_many(
- &old_state->arcs_size, hdr->b_size, buf);
+ &old_state->arcs_size, arc_buf_size(buf),
+ buf);
}
+ ASSERT3U(bufcnt, ==, buffers);
+ ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
+ (void) refcount_remove_many(
+ &old_state->arcs_size, arc_hdr_size(hdr), hdr);
}
}
@@ -1652,8 +2241,8 @@ arc_change_state(arc_state_t *new_state, arc_buf_hdr_t *hdr,
* L2 headers should never be on the L2 state list since they don't
* have L1 headers allocated.
*/
- ASSERT(multilist_is_empty(&arc_l2c_only->arcs_list[ARC_BUFC_DATA]) &&
- multilist_is_empty(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA]));
+ ASSERT(multilist_is_empty(arc_l2c_only->arcs_list[ARC_BUFC_DATA]) &&
+ multilist_is_empty(arc_l2c_only->arcs_list[ARC_BUFC_METADATA]));
}
void
@@ -1670,8 +2259,14 @@ arc_space_consume(uint64_t space, arc_space_type_t type)
case ARC_SPACE_META:
ARCSTAT_INCR(arcstat_metadata_size, space);
break;
- case ARC_SPACE_OTHER:
- ARCSTAT_INCR(arcstat_other_size, space);
+ case ARC_SPACE_BONUS:
+ ARCSTAT_INCR(arcstat_bonus_size, space);
+ break;
+ case ARC_SPACE_DNODE:
+ ARCSTAT_INCR(arcstat_dnode_size, space);
+ break;
+ case ARC_SPACE_DBUF:
+ ARCSTAT_INCR(arcstat_dbuf_size, space);
break;
case ARC_SPACE_HDRS:
ARCSTAT_INCR(arcstat_hdr_size, space);
@@ -1701,8 +2296,14 @@ arc_space_return(uint64_t space, arc_space_type_t type)
case ARC_SPACE_META:
ARCSTAT_INCR(arcstat_metadata_size, -space);
break;
- case ARC_SPACE_OTHER:
- ARCSTAT_INCR(arcstat_other_size, -space);
+ case ARC_SPACE_BONUS:
+ ARCSTAT_INCR(arcstat_bonus_size, -space);
+ break;
+ case ARC_SPACE_DNODE:
+ ARCSTAT_INCR(arcstat_dnode_size, -space);
+ break;
+ case ARC_SPACE_DBUF:
+ ARCSTAT_INCR(arcstat_dbuf_size, -space);
break;
case ARC_SPACE_HDRS:
ARCSTAT_INCR(arcstat_hdr_size, -space);
@@ -1723,49 +2324,141 @@ arc_space_return(uint64_t space, arc_space_type_t type)
atomic_add_64(&arc_size, -space);
}
-arc_buf_t *
-arc_buf_alloc(spa_t *spa, uint64_t size, void *tag, arc_buf_contents_t type)
+/*
+ * Given a hdr and a buf, returns whether that buf can share its b_data buffer
+ * with the hdr's b_pabd.
+ */
+static boolean_t
+arc_can_share(arc_buf_hdr_t *hdr, arc_buf_t *buf)
+{
+ /*
+ * The criteria for sharing a hdr's data are:
+ * 1. the hdr's compression matches the buf's compression
+ * 2. the hdr doesn't need to be byteswapped
+ * 3. the hdr isn't already being shared
+ * 4. the buf is either compressed or it is the last buf in the hdr list
+ *
+ * Criterion #4 maintains the invariant that shared uncompressed
+ * bufs must be the final buf in the hdr's b_buf list. Reading this, you
+ * might ask, "if a compressed buf is allocated first, won't that be the
+ * last thing in the list?", but in that case it's impossible to create
+ * a shared uncompressed buf anyway (because the hdr must be compressed
+ * to have the compressed buf). You might also think that #3 is
+ * sufficient to make this guarantee, however it's possible
+ * (specifically in the rare L2ARC write race mentioned in
+ * arc_buf_alloc_impl()) there will be an existing uncompressed buf that
+ * is sharable, but wasn't at the time of its allocation. Rather than
+ * allow a new shared uncompressed buf to be created and then shuffle
+ * the list around to make it the last element, this simply disallows
+ * sharing if the new buf isn't the first to be added.
+ */
+ ASSERT3P(buf->b_hdr, ==, hdr);
+ boolean_t hdr_compressed = HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF;
+ boolean_t buf_compressed = ARC_BUF_COMPRESSED(buf) != 0;
+ return (buf_compressed == hdr_compressed &&
+ hdr->b_l1hdr.b_byteswap == DMU_BSWAP_NUMFUNCS &&
+ !HDR_SHARED_DATA(hdr) &&
+ (ARC_BUF_LAST(buf) || ARC_BUF_COMPRESSED(buf)));
+}
+
+/*
+ * Allocate a buf for this hdr. If you care about the data that's in the hdr,
+ * or if you want a compressed buffer, pass those flags in. Returns 0 if the
+ * copy was made successfully, or an error code otherwise.
+ */
+static int
+arc_buf_alloc_impl(arc_buf_hdr_t *hdr, void *tag, boolean_t compressed,
+ boolean_t fill, arc_buf_t **ret)
{
- arc_buf_hdr_t *hdr;
arc_buf_t *buf;
- VERIFY3U(size, <=, spa_maxblocksize(spa));
- hdr = kmem_cache_alloc(hdr_full_cache, KM_PUSHPAGE);
- ASSERT(BUF_EMPTY(hdr));
- ASSERT3P(hdr->b_freeze_cksum, ==, NULL);
- hdr->b_size = size;
- hdr->b_spa = spa_load_guid(spa);
+ ASSERT(HDR_HAS_L1HDR(hdr));
+ ASSERT3U(HDR_GET_LSIZE(hdr), >, 0);
+ VERIFY(hdr->b_type == ARC_BUFC_DATA ||
+ hdr->b_type == ARC_BUFC_METADATA);
+ ASSERT3P(ret, !=, NULL);
+ ASSERT3P(*ret, ==, NULL);
+
hdr->b_l1hdr.b_mru_hits = 0;
hdr->b_l1hdr.b_mru_ghost_hits = 0;
hdr->b_l1hdr.b_mfu_hits = 0;
hdr->b_l1hdr.b_mfu_ghost_hits = 0;
hdr->b_l1hdr.b_l2_hits = 0;
- buf = kmem_cache_alloc(buf_cache, KM_PUSHPAGE);
+ buf = *ret = kmem_cache_alloc(buf_cache, KM_PUSHPAGE);
buf->b_hdr = hdr;
buf->b_data = NULL;
- buf->b_efunc = NULL;
- buf->b_private = NULL;
- buf->b_next = NULL;
+ buf->b_next = hdr->b_l1hdr.b_buf;
+ buf->b_flags = 0;
+
+ add_reference(hdr, tag);
- hdr->b_flags = arc_bufc_to_flags(type);
- hdr->b_flags |= ARC_FLAG_HAS_L1HDR;
+ /*
+ * We're about to change the hdr's b_flags. We must either
+ * hold the hash_lock or be undiscoverable.
+ */
+ ASSERT(MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr));
+
+ /*
+ * Only honor requests for compressed bufs if the hdr is actually
+ * compressed.
+ */
+ if (compressed && HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF)
+ buf->b_flags |= ARC_BUF_FLAG_COMPRESSED;
+
+ /*
+ * If the hdr's data can be shared then we share the data buffer and
+ * set the appropriate bit in the hdr's b_flags to indicate the hdr is
+ * allocate a new buffer to store the buf's data.
+ *
+ * There are two additional restrictions here because we're sharing
+ * hdr -> buf instead of the usual buf -> hdr. First, the hdr can't be
+ * actively involved in an L2ARC write, because if this buf is used by
+ * an arc_write() then the hdr's data buffer will be released when the
+ * write completes, even though the L2ARC write might still be using it.
+ * Second, the hdr's ABD must be linear so that the buf's user doesn't
+ * need to be ABD-aware.
+ */
+ boolean_t can_share = arc_can_share(hdr, buf) && !HDR_L2_WRITING(hdr) &&
+ abd_is_linear(hdr->b_l1hdr.b_pabd);
+
+ /* Set up b_data and sharing */
+ if (can_share) {
+ buf->b_data = abd_to_buf(hdr->b_l1hdr.b_pabd);
+ buf->b_flags |= ARC_BUF_FLAG_SHARED;
+ arc_hdr_set_flags(hdr, ARC_FLAG_SHARED_DATA);
+ } else {
+ buf->b_data =
+ arc_get_data_buf(hdr, arc_buf_size(buf), buf);
+ ARCSTAT_INCR(arcstat_overhead_size, arc_buf_size(buf));
+ }
+ VERIFY3P(buf->b_data, !=, NULL);
hdr->b_l1hdr.b_buf = buf;
- hdr->b_l1hdr.b_state = arc_anon;
- hdr->b_l1hdr.b_arc_access = 0;
- hdr->b_l1hdr.b_datacnt = 1;
- hdr->b_l1hdr.b_tmp_cdata = NULL;
+ hdr->b_l1hdr.b_bufcnt += 1;
- arc_get_data_buf(buf);
- ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
- (void) refcount_add(&hdr->b_l1hdr.b_refcnt, tag);
+ /*
+ * If the user wants the data from the hdr, we need to either copy or
+ * decompress the data.
+ */
+ if (fill) {
+ return (arc_buf_fill(buf, ARC_BUF_COMPRESSED(buf) != 0));
+ }
- return (buf);
+ return (0);
}
static char *arc_onloan_tag = "onloan";
+static inline void
+arc_loaned_bytes_update(int64_t delta)
+{
+ atomic_add_64(&arc_loaned_bytes, delta);
+
+ /* assert that it did not wrap around */
+ ASSERT3S(atomic_add_64_nv(&arc_loaned_bytes, 0), >=, 0);
+}
+
/*
* Loan out an anonymous arc buffer. Loaned buffers are not counted as in
* flight data by arc_tempreserve_space() until they are "returned". Loaned
@@ -1773,16 +2466,29 @@ static char *arc_onloan_tag = "onloan";
* freed.
*/
arc_buf_t *
-arc_loan_buf(spa_t *spa, uint64_t size)
+arc_loan_buf(spa_t *spa, boolean_t is_metadata, int size)
{
- arc_buf_t *buf;
+ arc_buf_t *buf = arc_alloc_buf(spa, arc_onloan_tag,
+ is_metadata ? ARC_BUFC_METADATA : ARC_BUFC_DATA, size);
- buf = arc_buf_alloc(spa, size, arc_onloan_tag, ARC_BUFC_DATA);
+ arc_loaned_bytes_update(size);
- atomic_add_64(&arc_loaned_bytes, size);
return (buf);
}
+arc_buf_t *
+arc_loan_compressed_buf(spa_t *spa, uint64_t psize, uint64_t lsize,
+ enum zio_compress compression_type)
+{
+ arc_buf_t *buf = arc_alloc_compressed_buf(spa, arc_onloan_tag,
+ psize, lsize, compression_type);
+
+ arc_loaned_bytes_update(psize);
+
+ return (buf);
+}
+
+
/*
* Return a loaned arc buffer to the arc.
*/
@@ -1791,12 +2497,12 @@ arc_return_buf(arc_buf_t *buf, void *tag)
{
arc_buf_hdr_t *hdr = buf->b_hdr;
- ASSERT(buf->b_data != NULL);
+ ASSERT3P(buf->b_data, !=, NULL);
ASSERT(HDR_HAS_L1HDR(hdr));
(void) refcount_add(&hdr->b_l1hdr.b_refcnt, tag);
(void) refcount_remove(&hdr->b_l1hdr.b_refcnt, arc_onloan_tag);
- atomic_add_64(&arc_loaned_bytes, -hdr->b_size);
+ arc_loaned_bytes_update(-arc_buf_size(buf));
}
/* Detach an arc_buf from a dbuf (tag) */
@@ -1805,244 +2511,478 @@ arc_loan_inuse_buf(arc_buf_t *buf, void *tag)
{
arc_buf_hdr_t *hdr = buf->b_hdr;
- ASSERT(buf->b_data != NULL);
+ ASSERT3P(buf->b_data, !=, NULL);
ASSERT(HDR_HAS_L1HDR(hdr));
(void) refcount_add(&hdr->b_l1hdr.b_refcnt, arc_onloan_tag);
(void) refcount_remove(&hdr->b_l1hdr.b_refcnt, tag);
- buf->b_efunc = NULL;
- buf->b_private = NULL;
- atomic_add_64(&arc_loaned_bytes, hdr->b_size);
+ arc_loaned_bytes_update(arc_buf_size(buf));
}
-static arc_buf_t *
-arc_buf_clone(arc_buf_t *from)
+static void
+l2arc_free_abd_on_write(abd_t *abd, size_t size, arc_buf_contents_t type)
{
- arc_buf_t *buf;
- arc_buf_hdr_t *hdr = from->b_hdr;
- uint64_t size = hdr->b_size;
+ l2arc_data_free_t *df = kmem_alloc(sizeof (*df), KM_SLEEP);
+
+ df->l2df_abd = abd;
+ df->l2df_size = size;
+ df->l2df_type = type;
+ mutex_enter(&l2arc_free_on_write_mtx);
+ list_insert_head(l2arc_free_on_write, df);
+ mutex_exit(&l2arc_free_on_write_mtx);
+}
+
+static void
+arc_hdr_free_on_write(arc_buf_hdr_t *hdr)
+{
+ arc_state_t *state = hdr->b_l1hdr.b_state;
+ arc_buf_contents_t type = arc_buf_type(hdr);
+ uint64_t size = arc_hdr_size(hdr);
+
+ /* protected by hash lock, if in the hash table */
+ if (multilist_link_active(&hdr->b_l1hdr.b_arc_node)) {
+ ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
+ ASSERT(state != arc_anon && state != arc_l2c_only);
+
+ (void) refcount_remove_many(&state->arcs_esize[type],
+ size, hdr);
+ }
+ (void) refcount_remove_many(&state->arcs_size, size, hdr);
+ if (type == ARC_BUFC_METADATA) {
+ arc_space_return(size, ARC_SPACE_META);
+ } else {
+ ASSERT(type == ARC_BUFC_DATA);
+ arc_space_return(size, ARC_SPACE_DATA);
+ }
+
+ l2arc_free_abd_on_write(hdr->b_l1hdr.b_pabd, size, type);
+}
+
+/*
+ * Share the arc_buf_t's data with the hdr. Whenever we are sharing the
+ * data buffer, we transfer the refcount ownership to the hdr and update
+ * the appropriate kstats.
+ */
+static void
+arc_share_buf(arc_buf_hdr_t *hdr, arc_buf_t *buf)
+{
+ ASSERT(arc_can_share(hdr, buf));
+ ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+ ASSERT(MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr));
+
+ /*
+ * Start sharing the data buffer. We transfer the
+ * refcount ownership to the hdr since it always owns
+ * the refcount whenever an arc_buf_t is shared.
+ */
+ refcount_transfer_ownership(&hdr->b_l1hdr.b_state->arcs_size, buf, hdr);
+ hdr->b_l1hdr.b_pabd = abd_get_from_buf(buf->b_data, arc_buf_size(buf));
+ abd_take_ownership_of_buf(hdr->b_l1hdr.b_pabd,
+ HDR_ISTYPE_METADATA(hdr));
+ arc_hdr_set_flags(hdr, ARC_FLAG_SHARED_DATA);
+ buf->b_flags |= ARC_BUF_FLAG_SHARED;
+ /*
+ * Since we've transferred ownership to the hdr we need
+ * to increment its compressed and uncompressed kstats and
+ * decrement the overhead size.
+ */
+ ARCSTAT_INCR(arcstat_compressed_size, arc_hdr_size(hdr));
+ ARCSTAT_INCR(arcstat_uncompressed_size, HDR_GET_LSIZE(hdr));
+ ARCSTAT_INCR(arcstat_overhead_size, -arc_buf_size(buf));
+}
+
+static void
+arc_unshare_buf(arc_buf_hdr_t *hdr, arc_buf_t *buf)
+{
+ ASSERT(arc_buf_is_shared(buf));
+ ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
+ ASSERT(MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr));
+
+ /*
+ * We are no longer sharing this buffer so we need
+ * to transfer its ownership to the rightful owner.
+ */
+ refcount_transfer_ownership(&hdr->b_l1hdr.b_state->arcs_size, hdr, buf);
+ arc_hdr_clear_flags(hdr, ARC_FLAG_SHARED_DATA);
+ abd_release_ownership_of_buf(hdr->b_l1hdr.b_pabd);
+ abd_put(hdr->b_l1hdr.b_pabd);
+ hdr->b_l1hdr.b_pabd = NULL;
+ buf->b_flags &= ~ARC_BUF_FLAG_SHARED;
+
+ /*
+ * Since the buffer is no longer shared between
+ * the arc buf and the hdr, count it as overhead.
+ */
+ ARCSTAT_INCR(arcstat_compressed_size, -arc_hdr_size(hdr));
+ ARCSTAT_INCR(arcstat_uncompressed_size, -HDR_GET_LSIZE(hdr));
+ ARCSTAT_INCR(arcstat_overhead_size, arc_buf_size(buf));
+}
+
+/*
+ * Remove an arc_buf_t from the hdr's buf list and return the last
+ * arc_buf_t on the list. If no buffers remain on the list then return
+ * NULL.
+ */
+static arc_buf_t *
+arc_buf_remove(arc_buf_hdr_t *hdr, arc_buf_t *buf)
+{
ASSERT(HDR_HAS_L1HDR(hdr));
- ASSERT(hdr->b_l1hdr.b_state != arc_anon);
+ ASSERT(MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr));
- buf = kmem_cache_alloc(buf_cache, KM_PUSHPAGE);
- buf->b_hdr = hdr;
- buf->b_data = NULL;
- buf->b_efunc = NULL;
- buf->b_private = NULL;
- buf->b_next = hdr->b_l1hdr.b_buf;
- hdr->b_l1hdr.b_buf = buf;
- arc_get_data_buf(buf);
- bcopy(from->b_data, buf->b_data, size);
+ arc_buf_t **bufp = &hdr->b_l1hdr.b_buf;
+ arc_buf_t *lastbuf = NULL;
/*
- * This buffer already exists in the arc so create a duplicate
- * copy for the caller. If the buffer is associated with user data
- * then track the size and number of duplicates. These stats will be
- * updated as duplicate buffers are created and destroyed.
+ * Remove the buf from the hdr list and locate the last
+ * remaining buffer on the list.
*/
- if (HDR_ISTYPE_DATA(hdr)) {
- ARCSTAT_BUMP(arcstat_duplicate_buffers);
- ARCSTAT_INCR(arcstat_duplicate_buffers_size, size);
+ while (*bufp != NULL) {
+ if (*bufp == buf)
+ *bufp = buf->b_next;
+
+ /*
+ * If we've removed a buffer in the middle of
+ * the list then update the lastbuf and update
+ * bufp.
+ */
+ if (*bufp != NULL) {
+ lastbuf = *bufp;
+ bufp = &(*bufp)->b_next;
+ }
}
- hdr->b_l1hdr.b_datacnt += 1;
- return (buf);
+ buf->b_next = NULL;
+ ASSERT3P(lastbuf, !=, buf);
+ IMPLY(hdr->b_l1hdr.b_bufcnt > 0, lastbuf != NULL);
+ IMPLY(hdr->b_l1hdr.b_bufcnt > 0, hdr->b_l1hdr.b_buf != NULL);
+ IMPLY(lastbuf != NULL, ARC_BUF_LAST(lastbuf));
+
+ return (lastbuf);
}
-void
-arc_buf_add_ref(arc_buf_t *buf, void* tag)
+/*
+ * Free up buf->b_data and pull the arc_buf_t off of the the arc_buf_hdr_t's
+ * list and free it.
+ */
+static void
+arc_buf_destroy_impl(arc_buf_t *buf)
{
- arc_buf_hdr_t *hdr;
- kmutex_t *hash_lock;
+ arc_buf_hdr_t *hdr = buf->b_hdr;
/*
- * Check to see if this buffer is evicted. Callers
- * must verify b_data != NULL to know if the add_ref
- * was successful.
+ * Free up the data associated with the buf but only if we're not
+ * sharing this with the hdr. If we are sharing it with the hdr, the
+ * hdr is responsible for doing the free.
*/
- mutex_enter(&buf->b_evict_lock);
- if (buf->b_data == NULL) {
- mutex_exit(&buf->b_evict_lock);
- return;
+ if (buf->b_data != NULL) {
+ /*
+ * We're about to change the hdr's b_flags. We must either
+ * hold the hash_lock or be undiscoverable.
+ */
+ ASSERT(MUTEX_HELD(HDR_LOCK(hdr)) || HDR_EMPTY(hdr));
+
+ arc_cksum_verify(buf);
+ arc_buf_unwatch(buf);
+
+ if (arc_buf_is_shared(buf)) {
+ arc_hdr_clear_flags(hdr, ARC_FLAG_SHARED_DATA);
+ } else {
+ uint64_t size = arc_buf_size(buf);
+ arc_free_data_buf(hdr, buf->b_data, size, buf);
+ ARCSTAT_INCR(arcstat_overhead_size, -size);
+ }
+ buf->b_data = NULL;
+
+ ASSERT(hdr->b_l1hdr.b_bufcnt > 0);
+ hdr->b_l1hdr.b_bufcnt -= 1;
}
- hash_lock = HDR_LOCK(buf->b_hdr);
- mutex_enter(hash_lock);
- hdr = buf->b_hdr;
- ASSERT(HDR_HAS_L1HDR(hdr));
- ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
- mutex_exit(&buf->b_evict_lock);
- ASSERT(hdr->b_l1hdr.b_state == arc_mru ||
- hdr->b_l1hdr.b_state == arc_mfu);
+ arc_buf_t *lastbuf = arc_buf_remove(hdr, buf);
- add_reference(hdr, hash_lock, tag);
- DTRACE_PROBE1(arc__hit, arc_buf_hdr_t *, hdr);
- arc_access(hdr, hash_lock);
- mutex_exit(hash_lock);
- ARCSTAT_BUMP(arcstat_hits);
- ARCSTAT_CONDSTAT(!HDR_PREFETCH(hdr),
- demand, prefetch, !HDR_ISTYPE_METADATA(hdr),
- data, metadata, hits);
+ if (ARC_BUF_SHARED(buf) && !ARC_BUF_COMPRESSED(buf)) {
+ /*
+ * If the current arc_buf_t is sharing its data buffer with the
+ * hdr, then reassign the hdr's b_pabd to share it with the new
+ * buffer at the end of the list. The shared buffer is always
+ * the last one on the hdr's buffer list.
+ *
+ * There is an equivalent case for compressed bufs, but since
+ * they aren't guaranteed to be the last buf in the list and
+ * that is an exceedingly rare case, we just allow that space be
+ * wasted temporarily.
+ */
+ if (lastbuf != NULL) {
+ /* Only one buf can be shared at once */
+ VERIFY(!arc_buf_is_shared(lastbuf));
+ /* hdr is uncompressed so can't have compressed buf */
+ VERIFY(!ARC_BUF_COMPRESSED(lastbuf));
+
+ ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
+ arc_hdr_free_pabd(hdr);
+
+ /*
+ * We must setup a new shared block between the
+ * last buffer and the hdr. The data would have
+ * been allocated by the arc buf so we need to transfer
+ * ownership to the hdr since it's now being shared.
+ */
+ arc_share_buf(hdr, lastbuf);
+ }
+ } else if (HDR_SHARED_DATA(hdr)) {
+ /*
+ * Uncompressed shared buffers are always at the end
+ * of the list. Compressed buffers don't have the
+ * same requirements. This makes it hard to
+ * simply assert that the lastbuf is shared so
+ * we rely on the hdr's compression flags to determine
+ * if we have a compressed, shared buffer.
+ */
+ ASSERT3P(lastbuf, !=, NULL);
+ ASSERT(arc_buf_is_shared(lastbuf) ||
+ HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF);
+ }
+
+ /*
+ * Free the checksum if we're removing the last uncompressed buf from
+ * this hdr.
+ */
+ if (!arc_hdr_has_uncompressed_buf(hdr)) {
+ arc_cksum_free(hdr);
+ }
+
+ /* clean up the buf */
+ buf->b_hdr = NULL;
+ kmem_cache_free(buf_cache, buf);
}
static void
-arc_buf_free_on_write(void *data, size_t size,
- void (*free_func)(void *, size_t))
+arc_hdr_alloc_pabd(arc_buf_hdr_t *hdr)
{
- l2arc_data_free_t *df;
+ ASSERT3U(HDR_GET_LSIZE(hdr), >, 0);
+ ASSERT(HDR_HAS_L1HDR(hdr));
+ ASSERT(!HDR_SHARED_DATA(hdr));
- df = kmem_alloc(sizeof (*df), KM_SLEEP);
- df->l2df_data = data;
- df->l2df_size = size;
- df->l2df_func = free_func;
- mutex_enter(&l2arc_free_on_write_mtx);
- list_insert_head(l2arc_free_on_write, df);
- mutex_exit(&l2arc_free_on_write_mtx);
+ ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+ hdr->b_l1hdr.b_pabd = arc_get_data_abd(hdr, arc_hdr_size(hdr), hdr);
+ hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS;
+ ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
+
+ ARCSTAT_INCR(arcstat_compressed_size, arc_hdr_size(hdr));
+ ARCSTAT_INCR(arcstat_uncompressed_size, HDR_GET_LSIZE(hdr));
}
-/*
- * Free the arc data buffer. If it is an l2arc write in progress,
- * the buffer is placed on l2arc_free_on_write to be freed later.
- */
static void
-arc_buf_data_free(arc_buf_t *buf, void (*free_func)(void *, size_t))
+arc_hdr_free_pabd(arc_buf_hdr_t *hdr)
{
- arc_buf_hdr_t *hdr = buf->b_hdr;
+ ASSERT(HDR_HAS_L1HDR(hdr));
+ ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
+ /*
+ * If the hdr is currently being written to the l2arc then
+ * we defer freeing the data by adding it to the l2arc_free_on_write
+ * list. The l2arc will free the data once it's finished
+ * writing it to the l2arc device.
+ */
if (HDR_L2_WRITING(hdr)) {
- arc_buf_free_on_write(buf->b_data, hdr->b_size, free_func);
+ arc_hdr_free_on_write(hdr);
ARCSTAT_BUMP(arcstat_l2_free_on_write);
} else {
- free_func(buf->b_data, hdr->b_size);
+ arc_free_data_abd(hdr, hdr->b_l1hdr.b_pabd,
+ arc_hdr_size(hdr), hdr);
}
+ hdr->b_l1hdr.b_pabd = NULL;
+ hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS;
+
+ ARCSTAT_INCR(arcstat_compressed_size, -arc_hdr_size(hdr));
+ ARCSTAT_INCR(arcstat_uncompressed_size, -HDR_GET_LSIZE(hdr));
}
-static void
-arc_buf_l2_cdata_free(arc_buf_hdr_t *hdr)
+static arc_buf_hdr_t *
+arc_hdr_alloc(uint64_t spa, int32_t psize, int32_t lsize,
+ enum zio_compress compression_type, arc_buf_contents_t type)
{
- ASSERT(HDR_HAS_L2HDR(hdr));
- ASSERT(MUTEX_HELD(&hdr->b_l2hdr.b_dev->l2ad_mtx));
+ arc_buf_hdr_t *hdr;
+
+ VERIFY(type == ARC_BUFC_DATA || type == ARC_BUFC_METADATA);
+
+ hdr = kmem_cache_alloc(hdr_full_cache, KM_PUSHPAGE);
+ ASSERT(HDR_EMPTY(hdr));
+ ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL);
+ HDR_SET_PSIZE(hdr, psize);
+ HDR_SET_LSIZE(hdr, lsize);
+ hdr->b_spa = spa;
+ hdr->b_type = type;
+ hdr->b_flags = 0;
+ arc_hdr_set_flags(hdr, arc_bufc_to_flags(type) | ARC_FLAG_HAS_L1HDR);
+ arc_hdr_set_compress(hdr, compression_type);
+
+ hdr->b_l1hdr.b_state = arc_anon;
+ hdr->b_l1hdr.b_arc_access = 0;
+ hdr->b_l1hdr.b_bufcnt = 0;
+ hdr->b_l1hdr.b_buf = NULL;
/*
- * The b_tmp_cdata field is linked off of the b_l1hdr, so if
- * that doesn't exist, the header is in the arc_l2c_only state,
- * and there isn't anything to free (it's already been freed).
+ * Allocate the hdr's buffer. This will contain either
+ * the compressed or uncompressed data depending on the block
+ * it references and compressed arc enablement.
*/
- if (!HDR_HAS_L1HDR(hdr))
- return;
+ arc_hdr_alloc_pabd(hdr);
+ ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
+
+ return (hdr);
+}
+
+/*
+ * Transition between the two allocation states for the arc_buf_hdr struct.
+ * The arc_buf_hdr struct can be allocated with (hdr_full_cache) or without
+ * (hdr_l2only_cache) the fields necessary for the L1 cache - the smaller
+ * version is used when a cache buffer is only in the L2ARC in order to reduce
+ * memory usage.
+ */
+static arc_buf_hdr_t *
+arc_hdr_realloc(arc_buf_hdr_t *hdr, kmem_cache_t *old, kmem_cache_t *new)
+{
+ arc_buf_hdr_t *nhdr;
+ l2arc_dev_t *dev = hdr->b_l2hdr.b_dev;
+
+ ASSERT(HDR_HAS_L2HDR(hdr));
+ ASSERT((old == hdr_full_cache && new == hdr_l2only_cache) ||
+ (old == hdr_l2only_cache && new == hdr_full_cache));
+
+ nhdr = kmem_cache_alloc(new, KM_PUSHPAGE);
+
+ ASSERT(MUTEX_HELD(HDR_LOCK(hdr)));
+ buf_hash_remove(hdr);
+
+ bcopy(hdr, nhdr, HDR_L2ONLY_SIZE);
+
+ if (new == hdr_full_cache) {
+ arc_hdr_set_flags(nhdr, ARC_FLAG_HAS_L1HDR);
+ /*
+ * arc_access and arc_change_state need to be aware that a
+ * header has just come out of L2ARC, so we set its state to
+ * l2c_only even though it's about to change.
+ */
+ nhdr->b_l1hdr.b_state = arc_l2c_only;
+
+ /* Verify previous threads set to NULL before freeing */
+ ASSERT3P(nhdr->b_l1hdr.b_pabd, ==, NULL);
+ } else {
+ ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
+ ASSERT0(hdr->b_l1hdr.b_bufcnt);
+ ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL);
+
+ /*
+ * If we've reached here, We must have been called from
+ * arc_evict_hdr(), as such we should have already been
+ * removed from any ghost list we were previously on
+ * (which protects us from racing with arc_evict_state),
+ * thus no locking is needed during this check.
+ */
+ ASSERT(!multilist_link_active(&hdr->b_l1hdr.b_arc_node));
+
+ /*
+ * A buffer must not be moved into the arc_l2c_only
+ * state if it's not finished being written out to the
+ * l2arc device. Otherwise, the b_l1hdr.b_pabd field
+ * might try to be accessed, even though it was removed.
+ */
+ VERIFY(!HDR_L2_WRITING(hdr));
+ VERIFY3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+ arc_hdr_clear_flags(nhdr, ARC_FLAG_HAS_L1HDR);
+ }
/*
- * The header isn't being written to the l2arc device, thus it
- * shouldn't have a b_tmp_cdata to free.
+ * The header has been reallocated so we need to re-insert it into any
+ * lists it was on.
*/
- if (!HDR_L2_WRITING(hdr)) {
- ASSERT3P(hdr->b_l1hdr.b_tmp_cdata, ==, NULL);
- return;
- }
+ (void) buf_hash_insert(nhdr, NULL);
+
+ ASSERT(list_link_active(&hdr->b_l2hdr.b_l2node));
+
+ mutex_enter(&dev->l2ad_mtx);
/*
- * The header does not have compression enabled. This can be due
- * to the buffer not being compressible, or because we're
- * freeing the buffer before the second phase of
- * l2arc_write_buffer() has started (which does the compression
- * step). In either case, b_tmp_cdata does not point to a
- * separately compressed buffer, so there's nothing to free (it
- * points to the same buffer as the arc_buf_t's b_data field).
+ * We must place the realloc'ed header back into the list at
+ * the same spot. Otherwise, if it's placed earlier in the list,
+ * l2arc_write_buffers() could find it during the function's
+ * write phase, and try to write it out to the l2arc.
*/
- if (hdr->b_l2hdr.b_compress == ZIO_COMPRESS_OFF) {
- hdr->b_l1hdr.b_tmp_cdata = NULL;
- return;
- }
+ list_insert_after(&dev->l2ad_buflist, hdr, nhdr);
+ list_remove(&dev->l2ad_buflist, hdr);
+
+ mutex_exit(&dev->l2ad_mtx);
/*
- * There's nothing to free since the buffer was all zero's and
- * compressed to a zero length buffer.
+ * Since we're using the pointer address as the tag when
+ * incrementing and decrementing the l2ad_alloc refcount, we
+ * must remove the old pointer (that we're about to destroy) and
+ * add the new pointer to the refcount. Otherwise we'd remove
+ * the wrong pointer address when calling arc_hdr_destroy() later.
*/
- if (hdr->b_l2hdr.b_compress == ZIO_COMPRESS_EMPTY) {
- ASSERT3P(hdr->b_l1hdr.b_tmp_cdata, ==, NULL);
- return;
- }
- ASSERT(L2ARC_IS_VALID_COMPRESS(hdr->b_l2hdr.b_compress));
+ (void) refcount_remove_many(&dev->l2ad_alloc, arc_hdr_size(hdr), hdr);
+ (void) refcount_add_many(&dev->l2ad_alloc, arc_hdr_size(nhdr), nhdr);
- arc_buf_free_on_write(hdr->b_l1hdr.b_tmp_cdata,
- hdr->b_size, zio_data_buf_free);
+ buf_discard_identity(hdr);
+ kmem_cache_free(old, hdr);
- ARCSTAT_BUMP(arcstat_l2_cdata_free_on_write);
- hdr->b_l1hdr.b_tmp_cdata = NULL;
+ return (nhdr);
}
/*
- * Free up buf->b_data and if 'remove' is set, then pull the
- * arc_buf_t off of the the arc_buf_hdr_t's list and free it.
+ * Allocate a new arc_buf_hdr_t and arc_buf_t and return the buf to the caller.
+ * The buf is returned thawed since we expect the consumer to modify it.
*/
-static void
-arc_buf_destroy(arc_buf_t *buf, boolean_t remove)
+arc_buf_t *
+arc_alloc_buf(spa_t *spa, void *tag, arc_buf_contents_t type, int32_t size)
{
- arc_buf_t **bufp;
-
- /* free up data associated with the buf */
- if (buf->b_data != NULL) {
- arc_state_t *state = buf->b_hdr->b_l1hdr.b_state;
- uint64_t size = buf->b_hdr->b_size;
- arc_buf_contents_t type = arc_buf_type(buf->b_hdr);
-
- arc_cksum_verify(buf);
- arc_buf_unwatch(buf);
+ arc_buf_hdr_t *hdr = arc_hdr_alloc(spa_load_guid(spa), size, size,
+ ZIO_COMPRESS_OFF, type);
+ ASSERT(!MUTEX_HELD(HDR_LOCK(hdr)));
- if (type == ARC_BUFC_METADATA) {
- arc_buf_data_free(buf, zio_buf_free);
- arc_space_return(size, ARC_SPACE_META);
- } else {
- ASSERT(type == ARC_BUFC_DATA);
- arc_buf_data_free(buf, zio_data_buf_free);
- arc_space_return(size, ARC_SPACE_DATA);
- }
+ arc_buf_t *buf = NULL;
+ VERIFY0(arc_buf_alloc_impl(hdr, tag, B_FALSE, B_FALSE, &buf));
+ arc_buf_thaw(buf);
- /* protected by hash lock, if in the hash table */
- if (multilist_link_active(&buf->b_hdr->b_l1hdr.b_arc_node)) {
- uint64_t *cnt = &state->arcs_lsize[type];
+ return (buf);
+}
- ASSERT(refcount_is_zero(
- &buf->b_hdr->b_l1hdr.b_refcnt));
- ASSERT(state != arc_anon && state != arc_l2c_only);
+/*
+ * Allocate a compressed buf in the same manner as arc_alloc_buf. Don't use this
+ * for bufs containing metadata.
+ */
+arc_buf_t *
+arc_alloc_compressed_buf(spa_t *spa, void *tag, uint64_t psize, uint64_t lsize,
+ enum zio_compress compression_type)
+{
+ ASSERT3U(lsize, >, 0);
+ ASSERT3U(lsize, >=, psize);
+ ASSERT(compression_type > ZIO_COMPRESS_OFF);
+ ASSERT(compression_type < ZIO_COMPRESS_FUNCTIONS);
- ASSERT3U(*cnt, >=, size);
- atomic_add_64(cnt, -size);
- }
+ arc_buf_hdr_t *hdr = arc_hdr_alloc(spa_load_guid(spa), psize, lsize,
+ compression_type, ARC_BUFC_DATA);
+ ASSERT(!MUTEX_HELD(HDR_LOCK(hdr)));
- (void) refcount_remove_many(&state->arcs_size, size, buf);
- buf->b_data = NULL;
+ arc_buf_t *buf = NULL;
+ VERIFY0(arc_buf_alloc_impl(hdr, tag, B_TRUE, B_FALSE, &buf));
+ arc_buf_thaw(buf);
+ ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL);
+ if (!arc_buf_is_shared(buf)) {
/*
- * If we're destroying a duplicate buffer make sure
- * that the appropriate statistics are updated.
+ * To ensure that the hdr has the correct data in it if we call
+ * arc_decompress() on this buf before it's been written to
+ * disk, it's easiest if we just set up sharing between the
+ * buf and the hdr.
*/
- if (buf->b_hdr->b_l1hdr.b_datacnt > 1 &&
- HDR_ISTYPE_DATA(buf->b_hdr)) {
- ARCSTAT_BUMPDOWN(arcstat_duplicate_buffers);
- ARCSTAT_INCR(arcstat_duplicate_buffers_size, -size);
- }
- ASSERT(buf->b_hdr->b_l1hdr.b_datacnt > 0);
- buf->b_hdr->b_l1hdr.b_datacnt -= 1;
+ ASSERT(!abd_is_linear(hdr->b_l1hdr.b_pabd));
+ arc_hdr_free_pabd(hdr);
+ arc_share_buf(hdr, buf);
}
- /* only remove the buf if requested */
- if (!remove)
- return;
-
- /* remove the buf from the hdr list */
- for (bufp = &buf->b_hdr->b_l1hdr.b_buf; *bufp != buf;
- bufp = &(*bufp)->b_next)
- continue;
- *bufp = buf->b_next;
- buf->b_next = NULL;
-
- ASSERT(buf->b_efunc == NULL);
-
- /* clean up the buf */
- buf->b_hdr = NULL;
- kmem_cache_free(buf_cache, buf);
+ return (buf);
}
static void
@@ -2050,50 +2990,20 @@ arc_hdr_l2hdr_destroy(arc_buf_hdr_t *hdr)
{
l2arc_buf_hdr_t *l2hdr = &hdr->b_l2hdr;
l2arc_dev_t *dev = l2hdr->b_dev;
+ uint64_t psize = arc_hdr_size(hdr);
ASSERT(MUTEX_HELD(&dev->l2ad_mtx));
ASSERT(HDR_HAS_L2HDR(hdr));
list_remove(&dev->l2ad_buflist, hdr);
- /*
- * We don't want to leak the b_tmp_cdata buffer that was
- * allocated in l2arc_write_buffers()
- */
- arc_buf_l2_cdata_free(hdr);
-
- /*
- * If the l2hdr's b_daddr is equal to L2ARC_ADDR_UNSET, then
- * this header is being processed by l2arc_write_buffers() (i.e.
- * it's in the first stage of l2arc_write_buffers()).
- * Re-affirming that truth here, just to serve as a reminder. If
- * b_daddr does not equal L2ARC_ADDR_UNSET, then the header may or
- * may not have its HDR_L2_WRITING flag set. (the write may have
- * completed, in which case HDR_L2_WRITING will be false and the
- * b_daddr field will point to the address of the buffer on disk).
- */
- IMPLY(l2hdr->b_daddr == L2ARC_ADDR_UNSET, HDR_L2_WRITING(hdr));
-
- /*
- * If b_daddr is equal to L2ARC_ADDR_UNSET, we're racing with
- * l2arc_write_buffers(). Since we've just removed this header
- * from the l2arc buffer list, this header will never reach the
- * second stage of l2arc_write_buffers(), which increments the
- * accounting stats for this header. Thus, we must be careful
- * not to decrement them for this header either.
- */
- if (l2hdr->b_daddr != L2ARC_ADDR_UNSET) {
- ARCSTAT_INCR(arcstat_l2_asize, -l2hdr->b_asize);
- ARCSTAT_INCR(arcstat_l2_size, -hdr->b_size);
+ ARCSTAT_INCR(arcstat_l2_psize, -psize);
+ ARCSTAT_INCR(arcstat_l2_lsize, -HDR_GET_LSIZE(hdr));
- vdev_space_update(dev->l2ad_vdev,
- -l2hdr->b_asize, 0, 0);
-
- (void) refcount_remove_many(&dev->l2ad_alloc,
- l2hdr->b_asize, hdr);
- }
+ vdev_space_update(dev->l2ad_vdev, -psize, 0, 0);
- hdr->b_flags &= ~ARC_FLAG_HAS_L2HDR;
+ (void) refcount_remove_many(&dev->l2ad_alloc, psize, hdr);
+ arc_hdr_clear_flags(hdr, ARC_FLAG_HAS_L2HDR);
}
static void
@@ -2101,13 +3011,16 @@ arc_hdr_destroy(arc_buf_hdr_t *hdr)
{
if (HDR_HAS_L1HDR(hdr)) {
ASSERT(hdr->b_l1hdr.b_buf == NULL ||
- hdr->b_l1hdr.b_datacnt > 0);
+ hdr->b_l1hdr.b_bufcnt > 0);
ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
ASSERT3P(hdr->b_l1hdr.b_state, ==, arc_anon);
}
ASSERT(!HDR_IO_IN_PROGRESS(hdr));
ASSERT(!HDR_IN_HASH_TABLE(hdr));
+ if (!HDR_EMPTY(hdr))
+ buf_discard_identity(hdr);
+
if (HDR_HAS_L2HDR(hdr)) {
l2arc_dev_t *dev = hdr->b_l2hdr.b_dev;
boolean_t buflist_held = MUTEX_HELD(&dev->l2ad_mtx);
@@ -2131,34 +3044,14 @@ arc_hdr_destroy(arc_buf_hdr_t *hdr)
mutex_exit(&dev->l2ad_mtx);
}
- if (!BUF_EMPTY(hdr))
- buf_discard_identity(hdr);
+ if (HDR_HAS_L1HDR(hdr)) {
+ arc_cksum_free(hdr);
- if (hdr->b_freeze_cksum != NULL) {
- kmem_free(hdr->b_freeze_cksum, sizeof (zio_cksum_t));
- hdr->b_freeze_cksum = NULL;
- }
+ while (hdr->b_l1hdr.b_buf != NULL)
+ arc_buf_destroy_impl(hdr->b_l1hdr.b_buf);
- if (HDR_HAS_L1HDR(hdr)) {
- while (hdr->b_l1hdr.b_buf) {
- arc_buf_t *buf = hdr->b_l1hdr.b_buf;
-
- if (buf->b_efunc != NULL) {
- mutex_enter(&arc_user_evicts_lock);
- mutex_enter(&buf->b_evict_lock);
- ASSERT(buf->b_hdr != NULL);
- arc_buf_destroy(hdr->b_l1hdr.b_buf, FALSE);
- hdr->b_l1hdr.b_buf = buf->b_next;
- buf->b_hdr = &arc_eviction_hdr;
- buf->b_next = arc_eviction_list;
- arc_eviction_list = buf;
- mutex_exit(&buf->b_evict_lock);
- cv_signal(&arc_user_evicts_cv);
- mutex_exit(&arc_user_evicts_lock);
- } else {
- arc_buf_destroy(hdr->b_l1hdr.b_buf, TRUE);
- }
- }
+ if (hdr->b_l1hdr.b_pabd != NULL)
+ arc_hdr_free_pabd(hdr);
}
ASSERT3P(hdr->b_hash_next, ==, NULL);
@@ -2172,133 +3065,29 @@ arc_hdr_destroy(arc_buf_hdr_t *hdr)
}
void
-arc_buf_free(arc_buf_t *buf, void *tag)
-{
- arc_buf_hdr_t *hdr = buf->b_hdr;
- int hashed = hdr->b_l1hdr.b_state != arc_anon;
-
- ASSERT(buf->b_efunc == NULL);
- ASSERT(buf->b_data != NULL);
-
- if (hashed) {
- kmutex_t *hash_lock = HDR_LOCK(hdr);
-
- mutex_enter(hash_lock);
- hdr = buf->b_hdr;
- ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
-
- (void) remove_reference(hdr, hash_lock, tag);
- if (hdr->b_l1hdr.b_datacnt > 1) {
- arc_buf_destroy(buf, TRUE);
- } else {
- ASSERT(buf == hdr->b_l1hdr.b_buf);
- ASSERT(buf->b_efunc == NULL);
- hdr->b_flags |= ARC_FLAG_BUF_AVAILABLE;
- }
- mutex_exit(hash_lock);
- } else if (HDR_IO_IN_PROGRESS(hdr)) {
- int destroy_hdr;
- /*
- * We are in the middle of an async write. Don't destroy
- * this buffer unless the write completes before we finish
- * decrementing the reference count.
- */
- mutex_enter(&arc_user_evicts_lock);
- (void) remove_reference(hdr, NULL, tag);
- ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
- destroy_hdr = !HDR_IO_IN_PROGRESS(hdr);
- mutex_exit(&arc_user_evicts_lock);
- if (destroy_hdr)
- arc_hdr_destroy(hdr);
- } else {
- if (remove_reference(hdr, NULL, tag) > 0)
- arc_buf_destroy(buf, TRUE);
- else
- arc_hdr_destroy(hdr);
- }
-}
-
-boolean_t
-arc_buf_remove_ref(arc_buf_t *buf, void* tag)
+arc_buf_destroy(arc_buf_t *buf, void* tag)
{
arc_buf_hdr_t *hdr = buf->b_hdr;
kmutex_t *hash_lock = HDR_LOCK(hdr);
- boolean_t no_callback = (buf->b_efunc == NULL);
if (hdr->b_l1hdr.b_state == arc_anon) {
- ASSERT(hdr->b_l1hdr.b_datacnt == 1);
- arc_buf_free(buf, tag);
- return (no_callback);
+ ASSERT3U(hdr->b_l1hdr.b_bufcnt, ==, 1);
+ ASSERT(!HDR_IO_IN_PROGRESS(hdr));
+ VERIFY0(remove_reference(hdr, NULL, tag));
+ arc_hdr_destroy(hdr);
+ return;
}
mutex_enter(hash_lock);
- hdr = buf->b_hdr;
- ASSERT(hdr->b_l1hdr.b_datacnt > 0);
+ ASSERT3P(hdr, ==, buf->b_hdr);
+ ASSERT(hdr->b_l1hdr.b_bufcnt > 0);
ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
- ASSERT(hdr->b_l1hdr.b_state != arc_anon);
- ASSERT(buf->b_data != NULL);
+ ASSERT3P(hdr->b_l1hdr.b_state, !=, arc_anon);
+ ASSERT3P(buf->b_data, !=, NULL);
(void) remove_reference(hdr, hash_lock, tag);
- if (hdr->b_l1hdr.b_datacnt > 1) {
- if (no_callback)
- arc_buf_destroy(buf, TRUE);
- } else if (no_callback) {
- ASSERT(hdr->b_l1hdr.b_buf == buf && buf->b_next == NULL);
- ASSERT(buf->b_efunc == NULL);
- hdr->b_flags |= ARC_FLAG_BUF_AVAILABLE;
- }
- ASSERT(no_callback || hdr->b_l1hdr.b_datacnt > 1 ||
- refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
+ arc_buf_destroy_impl(buf);
mutex_exit(hash_lock);
- return (no_callback);
-}
-
-uint64_t
-arc_buf_size(arc_buf_t *buf)
-{
- return (buf->b_hdr->b_size);
-}
-
-/*
- * Called from the DMU to determine if the current buffer should be
- * evicted. In order to ensure proper locking, the eviction must be initiated
- * from the DMU. Return true if the buffer is associated with user data and
- * duplicate buffers still exist.
- */
-boolean_t
-arc_buf_eviction_needed(arc_buf_t *buf)
-{
- arc_buf_hdr_t *hdr;
- boolean_t evict_needed = B_FALSE;
-
- if (zfs_disable_dup_eviction)
- return (B_FALSE);
-
- mutex_enter(&buf->b_evict_lock);
- hdr = buf->b_hdr;
- if (hdr == NULL) {
- /*
- * We are in arc_do_user_evicts(); let that function
- * perform the eviction.
- */
- ASSERT(buf->b_data == NULL);
- mutex_exit(&buf->b_evict_lock);
- return (B_FALSE);
- } else if (buf->b_data == NULL) {
- /*
- * We have already been added to the arc eviction list;
- * recommend eviction.
- */
- ASSERT3P(hdr, ==, &arc_eviction_hdr);
- mutex_exit(&buf->b_evict_lock);
- return (B_TRUE);
- }
-
- if (hdr->b_l1hdr.b_datacnt > 1 && HDR_ISTYPE_DATA(hdr))
- evict_needed = B_TRUE;
-
- mutex_exit(&buf->b_evict_lock);
- return (evict_needed);
}
/*
@@ -2325,11 +3114,11 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
state = hdr->b_l1hdr.b_state;
if (GHOST_STATE(state)) {
ASSERT(!HDR_IO_IN_PROGRESS(hdr));
- ASSERT(hdr->b_l1hdr.b_buf == NULL);
+ ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
/*
* l2arc_write_buffers() relies on a header's L1 portion
- * (i.e. its b_tmp_cdata field) during its write phase.
+ * (i.e. its b_pabd field) during it's write phase.
* Thus, we cannot push a header onto the arc_l2c_only
* state (removing its L1 piece) until the header is
* done being written to the l2arc.
@@ -2340,11 +3129,12 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
}
ARCSTAT_BUMP(arcstat_deleted);
- bytes_evicted += hdr->b_size;
+ bytes_evicted += HDR_GET_LSIZE(hdr);
DTRACE_PROBE1(arc__delete, arc_buf_hdr_t *, hdr);
if (HDR_HAS_L2HDR(hdr)) {
+ ASSERT(hdr->b_l1hdr.b_pabd == NULL);
/*
* This buffer is cached on the 2nd Level ARC;
* don't destroy the header.
@@ -2376,7 +3166,6 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
}
ASSERT0(refcount_count(&hdr->b_l1hdr.b_refcnt));
- ASSERT3U(hdr->b_l1hdr.b_datacnt, >, 0);
while (hdr->b_l1hdr.b_buf) {
arc_buf_t *buf = hdr->b_l1hdr.b_buf;
if (!mutex_tryenter(&buf->b_evict_lock)) {
@@ -2384,37 +3173,39 @@ arc_evict_hdr(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
break;
}
if (buf->b_data != NULL)
- bytes_evicted += hdr->b_size;
- if (buf->b_efunc != NULL) {
- mutex_enter(&arc_user_evicts_lock);
- arc_buf_destroy(buf, FALSE);
- hdr->b_l1hdr.b_buf = buf->b_next;
- buf->b_hdr = &arc_eviction_hdr;
- buf->b_next = arc_eviction_list;
- arc_eviction_list = buf;
- cv_signal(&arc_user_evicts_cv);
- mutex_exit(&arc_user_evicts_lock);
- mutex_exit(&buf->b_evict_lock);
- } else {
- mutex_exit(&buf->b_evict_lock);
- arc_buf_destroy(buf, TRUE);
- }
+ bytes_evicted += HDR_GET_LSIZE(hdr);
+ mutex_exit(&buf->b_evict_lock);
+ arc_buf_destroy_impl(buf);
}
if (HDR_HAS_L2HDR(hdr)) {
- ARCSTAT_INCR(arcstat_evict_l2_cached, hdr->b_size);
+ ARCSTAT_INCR(arcstat_evict_l2_cached, HDR_GET_LSIZE(hdr));
} else {
- if (l2arc_write_eligible(hdr->b_spa, hdr))
- ARCSTAT_INCR(arcstat_evict_l2_eligible, hdr->b_size);
- else
- ARCSTAT_INCR(arcstat_evict_l2_ineligible, hdr->b_size);
+ if (l2arc_write_eligible(hdr->b_spa, hdr)) {
+ ARCSTAT_INCR(arcstat_evict_l2_eligible,
+ HDR_GET_LSIZE(hdr));
+ } else {
+ ARCSTAT_INCR(arcstat_evict_l2_ineligible,
+ HDR_GET_LSIZE(hdr));
+ }
}
- if (hdr->b_l1hdr.b_datacnt == 0) {
+ if (hdr->b_l1hdr.b_bufcnt == 0) {
+ arc_cksum_free(hdr);
+
+ bytes_evicted += arc_hdr_size(hdr);
+
+ /*
+ * If this hdr is being evicted and has a compressed
+ * buffer then we discard it here before we change states.
+ * This ensures that the accounting is updated correctly
+ * in arc_free_data_impl().
+ */
+ arc_hdr_free_pabd(hdr);
+
arc_change_state(evicted_state, hdr, hash_lock);
ASSERT(HDR_IN_HASH_TABLE(hdr));
- hdr->b_flags |= ARC_FLAG_IN_HASH_TABLE;
- hdr->b_flags &= ~ARC_FLAG_BUF_AVAILABLE;
+ arc_hdr_set_flags(hdr, ARC_FLAG_IN_HASH_TABLE);
DTRACE_PROBE1(arc__evict, arc_buf_hdr_t *, hdr);
}
@@ -2508,7 +3299,7 @@ arc_evict_state_impl(multilist_t *ml, int idx, arc_buf_hdr_t *marker,
* thread. If we used cv_broadcast, we could
* wake up "too many" threads causing arc_size
* to significantly overflow arc_c; since
- * arc_get_data_buf() doesn't check for overflow
+ * arc_get_data_impl() doesn't check for overflow
* when it's woken up (it doesn't because it's
* possible for the ARC to be overflowing while
* full of un-evictable buffers, and the
@@ -2550,7 +3341,7 @@ arc_evict_state(arc_state_t *state, uint64_t spa, int64_t bytes,
arc_buf_contents_t type)
{
uint64_t total_evicted = 0;
- multilist_t *ml = &state->arcs_list[type];
+ multilist_t *ml = state->arcs_list[type];
int num_sublists;
arc_buf_hdr_t **markers;
int i;
@@ -2589,6 +3380,18 @@ arc_evict_state(arc_state_t *state, uint64_t spa, int64_t bytes,
* we're evicting all available buffers.
*/
while (total_evicted < bytes || bytes == ARC_EVICT_ALL) {
+ int sublist_idx = multilist_get_random_index(ml);
+ uint64_t scan_evicted = 0;
+
+ /*
+ * Try to reduce pinned dnodes with a floor of arc_dnode_limit.
+ * Request that 10% of the LRUs be scanned by the superblock
+ * shrinker.
+ */
+ if (type == ARC_BUFC_DATA && arc_dnode_size > arc_dnode_limit)
+ arc_prune_async((arc_dnode_size - arc_dnode_limit) /
+ sizeof (dnode_t) / zfs_arc_dnode_reduce_percent);
+
/*
* Start eviction using a randomly selected sublist,
* this is to try and evenly balance eviction across all
@@ -2596,9 +3399,6 @@ arc_evict_state(arc_state_t *state, uint64_t spa, int64_t bytes,
* (e.g. index 0) would cause evictions to favor certain
* sublists over others.
*/
- int sublist_idx = multilist_get_random_index(ml);
- uint64_t scan_evicted = 0;
-
for (i = 0; i < num_sublists; i++) {
uint64_t bytes_remaining;
uint64_t bytes_evicted;
@@ -2661,12 +3461,12 @@ arc_evict_state(arc_state_t *state, uint64_t spa, int64_t bytes,
* Flush all "evictable" data of the given type from the arc state
* specified. This will not evict any "active" buffers (i.e. referenced).
*
- * When 'retry' is set to FALSE, the function will make a single pass
+ * When 'retry' is set to B_FALSE, the function will make a single pass
* over the state and evict any buffers that it can. Since it doesn't
* continually retry the eviction, it might end up leaving some buffers
* in the ARC due to lock misses.
*
- * When 'retry' is set to TRUE, the function will continually retry the
+ * When 'retry' is set to B_TRUE, the function will continually retry the
* eviction until *all* evictable buffers have been removed from the
* state. As a result, if concurrent insertions into the state are
* allowed (e.g. if the ARC isn't shutting down), this function might
@@ -2678,7 +3478,7 @@ arc_flush_state(arc_state_t *state, uint64_t spa, arc_buf_contents_t type,
{
uint64_t evicted = 0;
- while (state->arcs_lsize[type] != 0) {
+ while (refcount_count(&state->arcs_esize[type]) != 0) {
evicted += arc_evict_state(state, spa, ARC_EVICT_ALL, type);
if (!retry)
@@ -2729,7 +3529,11 @@ arc_prune_async(int64_t adjust)
refcount_add(&ap->p_refcnt, ap->p_pfunc);
ap->p_adjust = adjust;
- taskq_dispatch(arc_prune_taskq, arc_prune_task, ap, TQ_SLEEP);
+ if (taskq_dispatch(arc_prune_taskq, arc_prune_task,
+ ap, TQ_SLEEP) == TASKQID_INVALID) {
+ refcount_remove(&ap->p_refcnt, ap->p_pfunc);
+ continue;
+ }
ARCSTAT_BUMP(arcstat_prune);
}
mutex_exit(&arc_prune_mtx);
@@ -2749,8 +3553,8 @@ arc_adjust_impl(arc_state_t *state, uint64_t spa, int64_t bytes,
{
int64_t delta;
- if (bytes > 0 && state->arcs_lsize[type] > 0) {
- delta = MIN(state->arcs_lsize[type], bytes);
+ if (bytes > 0 && refcount_count(&state->arcs_esize[type]) > 0) {
+ delta = MIN(refcount_count(&state->arcs_esize[type]), bytes);
return (arc_evict_state(state, spa, delta, type));
}
@@ -2777,7 +3581,7 @@ arc_adjust_impl(arc_state_t *state, uint64_t spa, int64_t bytes,
static uint64_t
arc_adjust_meta_balanced(void)
{
- int64_t adjustmnt, delta, prune = 0;
+ int64_t delta, prune = 0, adjustmnt;
uint64_t total_evicted = 0;
arc_buf_contents_t type = ARC_BUFC_DATA;
int restarts = MAX(zfs_arc_meta_adjust_restarts, 0);
@@ -2793,8 +3597,9 @@ arc_adjust_meta_balanced(void)
*/
adjustmnt = arc_meta_used - arc_meta_limit;
- if (adjustmnt > 0 && arc_mru->arcs_lsize[type] > 0) {
- delta = MIN(arc_mru->arcs_lsize[type], adjustmnt);
+ if (adjustmnt > 0 && refcount_count(&arc_mru->arcs_esize[type]) > 0) {
+ delta = MIN(refcount_count(&arc_mru->arcs_esize[type]),
+ adjustmnt);
total_evicted += arc_adjust_impl(arc_mru, 0, delta, type);
adjustmnt -= delta;
}
@@ -2809,23 +3614,26 @@ arc_adjust_meta_balanced(void)
* simply decrement the amount of data evicted from the MRU.
*/
- if (adjustmnt > 0 && arc_mfu->arcs_lsize[type] > 0) {
- delta = MIN(arc_mfu->arcs_lsize[type], adjustmnt);
+ if (adjustmnt > 0 && refcount_count(&arc_mfu->arcs_esize[type]) > 0) {
+ delta = MIN(refcount_count(&arc_mfu->arcs_esize[type]),
+ adjustmnt);
total_evicted += arc_adjust_impl(arc_mfu, 0, delta, type);
}
adjustmnt = arc_meta_used - arc_meta_limit;
- if (adjustmnt > 0 && arc_mru_ghost->arcs_lsize[type] > 0) {
+ if (adjustmnt > 0 &&
+ refcount_count(&arc_mru_ghost->arcs_esize[type]) > 0) {
delta = MIN(adjustmnt,
- arc_mru_ghost->arcs_lsize[type]);
+ refcount_count(&arc_mru_ghost->arcs_esize[type]));
total_evicted += arc_adjust_impl(arc_mru_ghost, 0, delta, type);
adjustmnt -= delta;
}
- if (adjustmnt > 0 && arc_mfu_ghost->arcs_lsize[type] > 0) {
+ if (adjustmnt > 0 &&
+ refcount_count(&arc_mfu_ghost->arcs_esize[type]) > 0) {
delta = MIN(adjustmnt,
- arc_mfu_ghost->arcs_lsize[type]);
+ refcount_count(&arc_mfu_ghost->arcs_esize[type]));
total_evicted += arc_adjust_impl(arc_mfu_ghost, 0, delta, type);
}
@@ -2882,7 +3690,7 @@ arc_adjust_meta_only(void)
/*
* Similar to the above, we want to evict enough bytes to get us
* below the meta limit, but not so much as to drop us below the
- * space alloted to the MFU (which is defined as arc_c - arc_p).
+ * space allotted to the MFU (which is defined as arc_c - arc_p).
*/
target = MIN((int64_t)(arc_meta_used - arc_meta_limit),
(int64_t)(refcount_count(&arc_mfu->arcs_size) - (arc_c - arc_p)));
@@ -2912,8 +3720,8 @@ arc_adjust_meta(void)
static arc_buf_contents_t
arc_adjust_type(arc_state_t *state)
{
- multilist_t *data_ml = &state->arcs_list[ARC_BUFC_DATA];
- multilist_t *meta_ml = &state->arcs_list[ARC_BUFC_METADATA];
+ multilist_t *data_ml = state->arcs_list[ARC_BUFC_DATA];
+ multilist_t *meta_ml = state->arcs_list[ARC_BUFC_METADATA];
int data_idx = multilist_get_random_index(data_ml);
int meta_idx = multilist_get_random_index(meta_ml);
multilist_sublist_t *data_mls;
@@ -3121,36 +3929,13 @@ arc_adjust(void)
return (total_evicted);
}
-static void
-arc_do_user_evicts(void)
-{
- mutex_enter(&arc_user_evicts_lock);
- while (arc_eviction_list != NULL) {
- arc_buf_t *buf = arc_eviction_list;
- arc_eviction_list = buf->b_next;
- mutex_enter(&buf->b_evict_lock);
- buf->b_hdr = NULL;
- mutex_exit(&buf->b_evict_lock);
- mutex_exit(&arc_user_evicts_lock);
-
- if (buf->b_efunc != NULL)
- VERIFY0(buf->b_efunc(buf->b_private));
-
- buf->b_efunc = NULL;
- buf->b_private = NULL;
- kmem_cache_free(buf_cache, buf);
- mutex_enter(&arc_user_evicts_lock);
- }
- mutex_exit(&arc_user_evicts_lock);
-}
-
void
arc_flush(spa_t *spa, boolean_t retry)
{
uint64_t guid = 0;
/*
- * If retry is TRUE, a spa must not be specified since we have
+ * If retry is B_TRUE, a spa must not be specified since we have
* no good way to determine if all of a spa's buffers have been
* evicted from an arc state.
*/
@@ -3170,9 +3955,6 @@ arc_flush(spa_t *spa, boolean_t retry)
(void) arc_flush_state(arc_mfu_ghost, guid, ARC_BUFC_DATA, retry);
(void) arc_flush_state(arc_mfu_ghost, guid, ARC_BUFC_METADATA, retry);
-
- arc_do_user_evicts();
- ASSERT(spa || arc_eviction_list == NULL);
}
void
@@ -3197,6 +3979,55 @@ arc_shrink(int64_t to_free)
(void) arc_adjust();
}
+/*
+ * Return maximum amount of memory that we could possibly use. Reduced
+ * to half of all memory in user space which is primarily used for testing.
+ */
+static uint64_t
+arc_all_memory(void)
+{
+#ifdef _KERNEL
+#ifdef CONFIG_HIGHMEM
+ return (ptob(totalram_pages - totalhigh_pages));
+#else
+ return (ptob(totalram_pages));
+#endif /* CONFIG_HIGHMEM */
+#else
+ return (ptob(physmem) / 2);
+#endif /* _KERNEL */
+}
+
+/*
+ * Return the amount of memory that is considered free. In user space
+ * which is primarily used for testing we pretend that free memory ranges
+ * from 0-20% of all memory.
+ */
+static uint64_t
+arc_free_memory(void)
+{
+#ifdef _KERNEL
+#ifdef CONFIG_HIGHMEM
+ struct sysinfo si;
+ si_meminfo(&si);
+ return (ptob(si.freeram - si.freehigh));
+#else
+#ifdef ZFS_GLOBAL_NODE_PAGE_STATE
+ return (ptob(nr_free_pages() +
+ global_node_page_state(NR_INACTIVE_FILE) +
+ global_node_page_state(NR_INACTIVE_ANON) +
+ global_node_page_state(NR_SLAB_RECLAIMABLE)));
+#else
+ return (ptob(nr_free_pages() +
+ global_page_state(NR_INACTIVE_FILE) +
+ global_page_state(NR_INACTIVE_ANON) +
+ global_page_state(NR_SLAB_RECLAIMABLE)));
+#endif /* ZFS_GLOBAL_NODE_PAGE_STATE */
+#endif /* CONFIG_HIGHMEM */
+#else
+ return (spa_get_random(arc_all_memory() * 20 / 100));
+#endif /* _KERNEL */
+}
+
typedef enum free_memory_reason_t {
FMR_UNKNOWN,
FMR_NEEDFREE,
@@ -3235,9 +4066,13 @@ arc_available_memory(void)
#ifdef _KERNEL
int64_t n;
#ifdef __linux__
+#ifdef freemem
+#undef freemem
+#endif
pgcnt_t needfree = btop(arc_need_free);
pgcnt_t lotsfree = btop(arc_sys_free);
pgcnt_t desfree = 0;
+ pgcnt_t freemem = btop(arc_free_memory());
#endif
if (needfree > 0) {
@@ -3276,7 +4111,6 @@ arc_available_memory(void)
r = FMR_SWAPFS_MINFREE;
}
-
/*
* Check that we have enough availrmem that memory locking (e.g., via
* mlock(3C) or memcntl(2)) can still succeed. (pages_pp_maximum
@@ -3292,9 +4126,9 @@ arc_available_memory(void)
}
#endif
-#if defined(__i386)
+#if defined(_ILP32)
/*
- * If we're on an i386 platform, it's possible that we'll exhaust the
+ * If we're on a 32-bit platform, it's possible that we'll exhaust the
* kernel heap space before we ever run out of available physical
* memory. Most checks of the size of the heap_area compare against
* tune.t_minarmem, which is the minimum available real memory that we
@@ -3315,15 +4149,16 @@ arc_available_memory(void)
/*
* If zio data pages are being allocated out of a separate heap segment,
* then enforce that the size of available vmem for this arena remains
- * above about 1/16th free.
+ * above about 1/4th (1/(2^arc_zio_arena_free_shift)) free.
*
- * Note: The 1/16th arena free requirement was put in place
- * to aggressively evict memory from the arc in order to avoid
- * memory fragmentation issues.
+ * Note that reducing the arc_zio_arena_free_shift keeps more virtual
+ * memory (in the zio_arena) free, which can avoid memory
+ * fragmentation issues.
*/
if (zio_arena != NULL) {
- n = vmem_size(zio_arena, VMEM_FREE) -
- (vmem_size(zio_arena, VMEM_ALLOC) >> 4);
+ n = (int64_t)vmem_size(zio_arena, VMEM_FREE) -
+ (vmem_size(zio_arena, VMEM_ALLOC) >>
+ arc_zio_arena_free_shift);
if (n < lowest) {
lowest = n;
r = FMR_ZIO_ARENA;
@@ -3343,7 +4178,7 @@ arc_available_memory(void)
/*
* Determine if the system is under memory pressure and is asking
- * to reclaim memory. A return value of TRUE indicates that the system
+ * to reclaim memory. A return value of B_TRUE indicates that the system
* is under memory pressure and that the arc should adjust accordingly.
*/
static boolean_t
@@ -3362,6 +4197,7 @@ arc_kmem_reap_now(void)
extern kmem_cache_t *zio_data_buf_cache[];
extern kmem_cache_t *range_seg_cache;
+#ifdef _KERNEL
if ((arc_meta_used >= arc_meta_limit) && zfs_arc_meta_prune) {
/*
* We are exceeding our meta-data cache limit.
@@ -3369,9 +4205,16 @@ arc_kmem_reap_now(void)
*/
arc_prune_async(zfs_arc_meta_prune);
}
+#if defined(_ILP32)
+ /*
+ * Reclaim unused memory from all kmem caches.
+ */
+ kmem_reap();
+#endif
+#endif
for (i = 0; i < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; i++) {
-#ifdef _ILP32
+#if defined(_ILP32)
/* reach upper limit of cache size on 32-bit */
if (zio_buf_cache[i] == NULL)
break;
@@ -3400,13 +4243,13 @@ arc_kmem_reap_now(void)
}
/*
- * Threads can block in arc_get_data_buf() waiting for this thread to evict
+ * Threads can block in arc_get_data_impl() waiting for this thread to evict
* enough data and signal them to proceed. When this happens, the threads in
- * arc_get_data_buf() are sleeping while holding the hash lock for their
+ * arc_get_data_impl() are sleeping while holding the hash lock for their
* particular arc header. Thus, we must be careful to never sleep on a
* hash lock in this thread. This is to prevent the following deadlock:
*
- * - Thread A sleeps on CV in arc_get_data_buf() holding hash lock "L",
+ * - Thread A sleeps on CV in arc_get_data_impl() holding hash lock "L",
* waiting for the reclaim thread to signal it.
*
* - arc_reclaim_thread() tries to acquire hash lock "L" using mutex_enter,
@@ -3419,7 +4262,7 @@ static void
arc_reclaim_thread(void)
{
fstrans_cookie_t cookie = spl_fstrans_mark();
- clock_t growtime = 0;
+ hrtime_t growtime = 0;
callb_cpr_t cpr;
CALLB_CPR_INIT(&cpr, &arc_reclaim_lock, callb_generic_cpr, FTAG);
@@ -3427,13 +4270,35 @@ arc_reclaim_thread(void)
mutex_enter(&arc_reclaim_lock);
while (!arc_reclaim_thread_exit) {
int64_t to_free;
- int64_t free_memory = arc_available_memory();
uint64_t evicted = 0;
-
+ uint64_t need_free = arc_need_free;
arc_tuning_update();
+ /*
+ * This is necessary in order for the mdb ::arc dcmd to
+ * show up to date information. Since the ::arc command
+ * does not call the kstat's update function, without
+ * this call, the command may show stale stats for the
+ * anon, mru, mru_ghost, mfu, and mfu_ghost lists. Even
+ * with this change, the data might be up to 1 second
+ * out of date; but that should suffice. The arc_state_t
+ * structures can be queried directly if more accurate
+ * information is needed.
+ */
+#ifndef __linux__
+ if (arc_ksp != NULL)
+ arc_ksp->ks_update(arc_ksp, KSTAT_READ);
+#endif
mutex_exit(&arc_reclaim_lock);
+ /*
+ * We call arc_adjust() before (possibly) calling
+ * arc_kmem_reap_now(), so that we can wake up
+ * arc_get_data_buf() sooner.
+ */
+ evicted = arc_adjust();
+
+ int64_t free_memory = arc_available_memory();
if (free_memory < 0) {
arc_no_grow = B_TRUE;
@@ -3443,7 +4308,7 @@ arc_reclaim_thread(void)
* Wait at least zfs_grow_retry (default 5) seconds
* before considering growing.
*/
- growtime = ddi_get_lbolt() + (arc_grow_retry * hz);
+ growtime = gethrtime() + SEC2NSEC(arc_grow_retry);
arc_kmem_reap_now();
@@ -3456,18 +4321,16 @@ arc_reclaim_thread(void)
to_free = (arc_c >> arc_shrink_shift) - free_memory;
if (to_free > 0) {
#ifdef _KERNEL
- to_free = MAX(to_free, arc_need_free);
+ to_free = MAX(to_free, need_free);
#endif
arc_shrink(to_free);
}
} else if (free_memory < arc_c >> arc_no_grow_shift) {
arc_no_grow = B_TRUE;
- } else if (ddi_get_lbolt() >= growtime) {
+ } else if (gethrtime() >= growtime) {
arc_no_grow = B_FALSE;
}
- evicted = arc_adjust();
-
mutex_enter(&arc_reclaim_lock);
/*
@@ -3483,11 +4346,12 @@ arc_reclaim_thread(void)
/*
* We're either no longer overflowing, or we
* can't evict anything more, so we should wake
- * up any threads before we go to sleep and clear
- * arc_need_free since nothing more can be done.
+ * up any threads before we go to sleep and remove
+ * the bytes we were working on from arc_need_free
+ * since nothing more will be done here.
*/
cv_broadcast(&arc_reclaim_waiters_cv);
- arc_need_free = 0;
+ ARCSTAT_INCR(arcstat_need_free, -need_free);
/*
* Block until signaled, or after one second (we
@@ -3495,66 +4359,19 @@ arc_reclaim_thread(void)
* even if we aren't being signalled)
*/
CALLB_CPR_SAFE_BEGIN(&cpr);
- (void) cv_timedwait_sig(&arc_reclaim_thread_cv,
- &arc_reclaim_lock, ddi_get_lbolt() + hz);
+ (void) cv_timedwait_sig_hires(&arc_reclaim_thread_cv,
+ &arc_reclaim_lock, SEC2NSEC(1), MSEC2NSEC(1), 0);
CALLB_CPR_SAFE_END(&cpr, &arc_reclaim_lock);
}
}
- arc_reclaim_thread_exit = FALSE;
+ arc_reclaim_thread_exit = B_FALSE;
cv_broadcast(&arc_reclaim_thread_cv);
CALLB_CPR_EXIT(&cpr); /* drops arc_reclaim_lock */
spl_fstrans_unmark(cookie);
thread_exit();
}
-static void
-arc_user_evicts_thread(void)
-{
- fstrans_cookie_t cookie = spl_fstrans_mark();
- callb_cpr_t cpr;
-
- CALLB_CPR_INIT(&cpr, &arc_user_evicts_lock, callb_generic_cpr, FTAG);
-
- mutex_enter(&arc_user_evicts_lock);
- while (!arc_user_evicts_thread_exit) {
- mutex_exit(&arc_user_evicts_lock);
-
- arc_do_user_evicts();
-
- /*
- * This is necessary in order for the mdb ::arc dcmd to
- * show up to date information. Since the ::arc command
- * does not call the kstat's update function, without
- * this call, the command may show stale stats for the
- * anon, mru, mru_ghost, mfu, and mfu_ghost lists. Even
- * with this change, the data might be up to 1 second
- * out of date; but that should suffice. The arc_state_t
- * structures can be queried directly if more accurate
- * information is needed.
- */
- if (arc_ksp != NULL)
- arc_ksp->ks_update(arc_ksp, KSTAT_READ);
-
- mutex_enter(&arc_user_evicts_lock);
-
- /*
- * Block until signaled, or after one second (we need to
- * call the arc's kstat update function regularly).
- */
- CALLB_CPR_SAFE_BEGIN(&cpr);
- (void) cv_timedwait_sig(&arc_user_evicts_cv,
- &arc_user_evicts_lock, ddi_get_lbolt() + hz);
- CALLB_CPR_SAFE_END(&cpr, &arc_user_evicts_lock);
- }
-
- arc_user_evicts_thread_exit = FALSE;
- cv_broadcast(&arc_user_evicts_cv);
- CALLB_CPR_EXIT(&cpr); /* drops arc_user_evicts_lock */
- spl_fstrans_unmark(cookie);
- thread_exit();
-}
-
#ifdef _KERNEL
/*
* Determine the amount of memory eligible for eviction contained in the
@@ -3602,23 +4419,32 @@ arc_user_evicts_thread(void)
* increase this negative difference.
*/
static uint64_t
-arc_evictable_memory(void) {
+arc_evictable_memory(void)
+{
uint64_t arc_clean =
- arc_mru->arcs_lsize[ARC_BUFC_DATA] +
- arc_mru->arcs_lsize[ARC_BUFC_METADATA] +
- arc_mfu->arcs_lsize[ARC_BUFC_DATA] +
- arc_mfu->arcs_lsize[ARC_BUFC_METADATA];
- uint64_t ghost_clean =
- arc_mru_ghost->arcs_lsize[ARC_BUFC_DATA] +
- arc_mru_ghost->arcs_lsize[ARC_BUFC_METADATA] +
- arc_mfu_ghost->arcs_lsize[ARC_BUFC_DATA] +
- arc_mfu_ghost->arcs_lsize[ARC_BUFC_METADATA];
+ refcount_count(&arc_mru->arcs_esize[ARC_BUFC_DATA]) +
+ refcount_count(&arc_mru->arcs_esize[ARC_BUFC_METADATA]) +
+ refcount_count(&arc_mfu->arcs_esize[ARC_BUFC_DATA]) +
+ refcount_count(&arc_mfu->arcs_esize[ARC_BUFC_METADATA]);
uint64_t arc_dirty = MAX((int64_t)arc_size - (int64_t)arc_clean, 0);
- if (arc_dirty >= arc_c_min)
- return (ghost_clean + arc_clean);
+ /*
+ * Scale reported evictable memory in proportion to page cache, cap
+ * at specified min/max.
+ */
+#ifdef ZFS_GLOBAL_NODE_PAGE_STATE
+ uint64_t min = (ptob(global_node_page_state(NR_FILE_PAGES)) / 100) *
+ zfs_arc_pc_percent;
+#else
+ uint64_t min = (ptob(global_page_state(NR_FILE_PAGES)) / 100) *
+ zfs_arc_pc_percent;
+#endif
+ min = MAX(arc_c_min, MIN(arc_c_max, min));
- return (ghost_clean + MAX((int64_t)arc_size - (int64_t)arc_c_min, 0));
+ if (arc_dirty >= min)
+ return (arc_clean);
+
+ return (MAX((int64_t)arc_size - (int64_t)min, 0));
}
/*
@@ -3652,33 +4478,33 @@ __arc_shrinker_func(struct shrinker *shrink, struct shrink_control *sc)
return (SHRINK_STOP);
/* Reclaim in progress */
- if (mutex_tryenter(&arc_reclaim_lock) == 0)
- return (SHRINK_STOP);
+ if (mutex_tryenter(&arc_reclaim_lock) == 0) {
+ ARCSTAT_INCR(arcstat_need_free, ptob(sc->nr_to_scan));
+ return (0);
+ }
mutex_exit(&arc_reclaim_lock);
/*
* Evict the requested number of pages by shrinking arc_c the
- * requested amount. If there is nothing left to evict just
- * reap whatever we can from the various arc slabs.
+ * requested amount.
*/
if (pages > 0) {
arc_shrink(ptob(sc->nr_to_scan));
- arc_kmem_reap_now();
+ if (current_is_kswapd())
+ arc_kmem_reap_now();
#ifdef HAVE_SPLIT_SHRINKER_CALLBACK
- pages = MAX(pages - btop(arc_evictable_memory()), 0);
+ pages = MAX((int64_t)pages -
+ (int64_t)btop(arc_evictable_memory()), 0);
#else
pages = btop(arc_evictable_memory());
#endif
- } else {
- arc_kmem_reap_now();
+ /*
+ * We've shrunk what we can, wake up threads.
+ */
+ cv_broadcast(&arc_reclaim_waiters_cv);
+ } else
pages = SHRINK_STOP;
- }
-
- /*
- * We've reaped what we can, wake up threads.
- */
- cv_broadcast(&arc_reclaim_waiters_cv);
/*
* When direct reclaim is observed it usually indicates a rapid
@@ -3691,7 +4517,7 @@ __arc_shrinker_func(struct shrinker *shrink, struct shrink_control *sc)
ARCSTAT_BUMP(arcstat_memory_indirect_count);
} else {
arc_no_grow = B_TRUE;
- arc_need_free = ptob(sc->nr_to_scan);
+ arc_kmem_reap_now();
ARCSTAT_BUMP(arcstat_memory_direct_count);
}
@@ -3704,7 +4530,7 @@ SPL_SHRINKER_DECLARE(arc_shrinker, arc_shrinker_func, DEFAULT_SEEKS);
/*
* Adapt arc info given the number of bytes we are trying to add and
- * the state that we are comming from. This function is only called
+ * the state that we are coming from. This function is only called
* when we are adding new content to the cache.
*/
static void
@@ -3787,19 +4613,45 @@ arc_is_overflowing(void)
return (arc_size >= arc_c + overflow);
}
+static abd_t *
+arc_get_data_abd(arc_buf_hdr_t *hdr, uint64_t size, void *tag)
+{
+ arc_buf_contents_t type = arc_buf_type(hdr);
+
+ arc_get_data_impl(hdr, size, tag);
+ if (type == ARC_BUFC_METADATA) {
+ return (abd_alloc(size, B_TRUE));
+ } else {
+ ASSERT(type == ARC_BUFC_DATA);
+ return (abd_alloc(size, B_FALSE));
+ }
+}
+
+static void *
+arc_get_data_buf(arc_buf_hdr_t *hdr, uint64_t size, void *tag)
+{
+ arc_buf_contents_t type = arc_buf_type(hdr);
+
+ arc_get_data_impl(hdr, size, tag);
+ if (type == ARC_BUFC_METADATA) {
+ return (zio_buf_alloc(size));
+ } else {
+ ASSERT(type == ARC_BUFC_DATA);
+ return (zio_data_buf_alloc(size));
+ }
+}
+
/*
- * The buffer, supplied as the first argument, needs a data block. If we
- * are hitting the hard limit for the cache size, we must sleep, waiting
- * for the eviction thread to catch up. If we're past the target size
- * but below the hard limit, we'll only signal the reclaim thread and
- * continue on.
+ * Allocate a block and return it to the caller. If we are hitting the
+ * hard limit for the cache size, we must sleep, waiting for the eviction
+ * thread to catch up. If we're past the target size but below the hard
+ * limit, we'll only signal the reclaim thread and continue on.
*/
static void
-arc_get_data_buf(arc_buf_t *buf)
+arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag)
{
- arc_state_t *state = buf->b_hdr->b_l1hdr.b_state;
- uint64_t size = buf->b_hdr->b_size;
- arc_buf_contents_t type = arc_buf_type(buf->b_hdr);
+ arc_state_t *state = hdr->b_l1hdr.b_state;
+ arc_buf_contents_t type = arc_buf_type(hdr);
arc_adapt(size, state);
@@ -3839,12 +4691,10 @@ arc_get_data_buf(arc_buf_t *buf)
mutex_exit(&arc_reclaim_lock);
}
+ VERIFY3U(hdr->b_type, ==, type);
if (type == ARC_BUFC_METADATA) {
- buf->b_data = zio_buf_alloc(size);
arc_space_consume(size, ARC_SPACE_META);
} else {
- ASSERT(type == ARC_BUFC_DATA);
- buf->b_data = zio_data_buf_alloc(size);
arc_space_consume(size, ARC_SPACE_DATA);
}
@@ -3852,11 +4702,9 @@ arc_get_data_buf(arc_buf_t *buf)
* Update the state size. Note that ghost states have a
* "ghost size" and so don't need to be updated.
*/
- if (!GHOST_STATE(buf->b_hdr->b_l1hdr.b_state)) {
- arc_buf_hdr_t *hdr = buf->b_hdr;
- arc_state_t *state = hdr->b_l1hdr.b_state;
+ if (!GHOST_STATE(state)) {
- (void) refcount_add_many(&state->arcs_size, size, buf);
+ (void) refcount_add_many(&state->arcs_size, size, tag);
/*
* If this is reached via arc_read, the link is
@@ -3869,9 +4717,10 @@ arc_get_data_buf(arc_buf_t *buf)
*/
if (multilist_link_active(&hdr->b_l1hdr.b_arc_node)) {
ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
- atomic_add_64(&hdr->b_l1hdr.b_state->arcs_lsize[type],
- size);
+ (void) refcount_add_many(&state->arcs_esize[type],
+ size, tag);
}
+
/*
* If we are growing the cache, and we are adding anonymous
* data, and we have outgrown arc_p, update arc_p
@@ -3883,6 +4732,55 @@ arc_get_data_buf(arc_buf_t *buf)
}
}
+static void
+arc_free_data_abd(arc_buf_hdr_t *hdr, abd_t *abd, uint64_t size, void *tag)
+{
+ arc_free_data_impl(hdr, size, tag);
+ abd_free(abd);
+}
+
+static void
+arc_free_data_buf(arc_buf_hdr_t *hdr, void *buf, uint64_t size, void *tag)
+{
+ arc_buf_contents_t type = arc_buf_type(hdr);
+
+ arc_free_data_impl(hdr, size, tag);
+ if (type == ARC_BUFC_METADATA) {
+ zio_buf_free(buf, size);
+ } else {
+ ASSERT(type == ARC_BUFC_DATA);
+ zio_data_buf_free(buf, size);
+ }
+}
+
+/*
+ * Free the arc data buffer.
+ */
+static void
+arc_free_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag)
+{
+ arc_state_t *state = hdr->b_l1hdr.b_state;
+ arc_buf_contents_t type = arc_buf_type(hdr);
+
+ /* protected by hash lock, if in the hash table */
+ if (multilist_link_active(&hdr->b_l1hdr.b_arc_node)) {
+ ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
+ ASSERT(state != arc_anon && state != arc_l2c_only);
+
+ (void) refcount_remove_many(&state->arcs_esize[type],
+ size, tag);
+ }
+ (void) refcount_remove_many(&state->arcs_size, size, tag);
+
+ VERIFY3U(hdr->b_type, ==, type);
+ if (type == ARC_BUFC_METADATA) {
+ arc_space_return(size, ARC_SPACE_META);
+ } else {
+ ASSERT(type == ARC_BUFC_DATA);
+ arc_space_return(size, ARC_SPACE_DATA);
+ }
+}
+
/*
* This routine is called whenever a buffer is accessed.
* NOTE: the hash lock is dropped in this function.
@@ -3924,7 +4822,7 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
ASSERT(multilist_link_active(
&hdr->b_l1hdr.b_arc_node));
} else {
- hdr->b_flags &= ~ARC_FLAG_PREFETCH;
+ arc_hdr_clear_flags(hdr, ARC_FLAG_PREFETCH);
atomic_inc_32(&hdr->b_l1hdr.b_mru_hits);
ARCSTAT_BUMP(arcstat_mru_hits);
}
@@ -3961,7 +4859,7 @@ arc_access(arc_buf_hdr_t *hdr, kmutex_t *hash_lock)
if (HDR_PREFETCH(hdr)) {
new_state = arc_mru;
if (refcount_count(&hdr->b_l1hdr.b_refcnt) > 0)
- hdr->b_flags &= ~ARC_FLAG_PREFETCH;
+ arc_hdr_clear_flags(hdr, ARC_FLAG_PREFETCH);
DTRACE_PROBE1(new_state__mru, arc_buf_hdr_t *, hdr);
} else {
new_state = arc_mfu;
@@ -4034,8 +4932,8 @@ void
arc_bcopy_func(zio_t *zio, arc_buf_t *buf, void *arg)
{
if (zio == NULL || zio->io_error == 0)
- bcopy(buf->b_data, arg, buf->b_hdr->b_size);
- VERIFY(arc_buf_remove_ref(buf, arg));
+ bcopy(buf->b_data, arg, arc_buf_size(buf));
+ arc_buf_destroy(buf, arg);
}
/* a generic arc_done_func_t */
@@ -4044,7 +4942,7 @@ arc_getbuf_func(zio_t *zio, arc_buf_t *buf, void *arg)
{
arc_buf_t **bufp = arg;
if (zio && zio->io_error) {
- VERIFY(arc_buf_remove_ref(buf, arg));
+ arc_buf_destroy(buf, arg);
*bufp = NULL;
} else {
*bufp = buf;
@@ -4052,18 +4950,31 @@ arc_getbuf_func(zio_t *zio, arc_buf_t *buf, void *arg)
}
}
+static void
+arc_hdr_verify(arc_buf_hdr_t *hdr, blkptr_t *bp)
+{
+ if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp)) {
+ ASSERT3U(HDR_GET_PSIZE(hdr), ==, 0);
+ ASSERT3U(HDR_GET_COMPRESS(hdr), ==, ZIO_COMPRESS_OFF);
+ } else {
+ if (HDR_COMPRESSION_ENABLED(hdr)) {
+ ASSERT3U(HDR_GET_COMPRESS(hdr), ==,
+ BP_GET_COMPRESS(bp));
+ }
+ ASSERT3U(HDR_GET_LSIZE(hdr), ==, BP_GET_LSIZE(bp));
+ ASSERT3U(HDR_GET_PSIZE(hdr), ==, BP_GET_PSIZE(bp));
+ }
+}
+
static void
arc_read_done(zio_t *zio)
{
- arc_buf_hdr_t *hdr;
- arc_buf_t *buf;
- arc_buf_t *abuf; /* buffer we're assigning to callback */
+ arc_buf_hdr_t *hdr = zio->io_private;
kmutex_t *hash_lock = NULL;
- arc_callback_t *callback_list, *acb;
- int freeable = FALSE;
-
- buf = zio->io_private;
- hdr = buf->b_hdr;
+ arc_callback_t *callback_list;
+ arc_callback_t *acb;
+ boolean_t freeable = B_FALSE;
+ boolean_t no_zio_error = (zio->io_error == 0);
/*
* The hdr was inserted into hash-table and removed from lists
@@ -4082,37 +4993,36 @@ arc_read_done(zio_t *zio)
ASSERT3U(hdr->b_dva.dva_word[1], ==,
BP_IDENTITY(zio->io_bp)->dva_word[1]);
- found = buf_hash_find(hdr->b_spa, zio->io_bp,
- &hash_lock);
+ found = buf_hash_find(hdr->b_spa, zio->io_bp, &hash_lock);
- ASSERT((found == NULL && HDR_FREED_IN_READ(hdr) &&
- hash_lock == NULL) ||
- (found == hdr &&
+ ASSERT((found == hdr &&
DVA_EQUAL(&hdr->b_dva, BP_IDENTITY(zio->io_bp))) ||
(found == hdr && HDR_L2_READING(hdr)));
+ ASSERT3P(hash_lock, !=, NULL);
+ }
+
+ if (no_zio_error) {
+ /* byteswap if necessary */
+ if (BP_SHOULD_BYTESWAP(zio->io_bp)) {
+ if (BP_GET_LEVEL(zio->io_bp) > 0) {
+ hdr->b_l1hdr.b_byteswap = DMU_BSWAP_UINT64;
+ } else {
+ hdr->b_l1hdr.b_byteswap =
+ DMU_OT_BYTESWAP(BP_GET_TYPE(zio->io_bp));
+ }
+ } else {
+ hdr->b_l1hdr.b_byteswap = DMU_BSWAP_NUMFUNCS;
+ }
}
- hdr->b_flags &= ~ARC_FLAG_L2_EVICTED;
+ arc_hdr_clear_flags(hdr, ARC_FLAG_L2_EVICTED);
if (l2arc_noprefetch && HDR_PREFETCH(hdr))
- hdr->b_flags &= ~ARC_FLAG_L2CACHE;
+ arc_hdr_clear_flags(hdr, ARC_FLAG_L2CACHE);
- /* byteswap if necessary */
callback_list = hdr->b_l1hdr.b_acb;
- ASSERT(callback_list != NULL);
- if (BP_SHOULD_BYTESWAP(zio->io_bp) && zio->io_error == 0) {
- dmu_object_byteswap_t bswap =
- DMU_OT_BYTESWAP(BP_GET_TYPE(zio->io_bp));
- if (BP_GET_LEVEL(zio->io_bp) > 0)
- byteswap_uint64_array(buf->b_data, hdr->b_size);
- else
- dmu_ot_byteswap[bswap].ob_func(buf->b_data, hdr->b_size);
- }
-
- arc_cksum_compute(buf, B_FALSE);
- arc_buf_watch(buf);
+ ASSERT3P(callback_list, !=, NULL);
- if (hash_lock && zio->io_error == 0 &&
- hdr->b_l1hdr.b_state == arc_anon) {
+ if (hash_lock && no_zio_error && hdr->b_l1hdr.b_state == arc_anon) {
/*
* Only call arc_access on anonymous buffers. This is because
* if we've issued an I/O for an evicted buffer, we've already
@@ -4122,32 +5032,41 @@ arc_read_done(zio_t *zio)
arc_access(hdr, hash_lock);
}
- /* create copies of the data buffer for the callers */
- abuf = buf;
- for (acb = callback_list; acb; acb = acb->acb_next) {
- if (acb->acb_done) {
- if (abuf == NULL) {
- ARCSTAT_BUMP(arcstat_duplicate_reads);
- abuf = arc_buf_clone(buf);
- }
- acb->acb_buf = abuf;
- abuf = NULL;
+ /*
+ * If a read request has a callback (i.e. acb_done is not NULL), then we
+ * make a buf containing the data according to the parameters which were
+ * passed in. The implementation of arc_buf_alloc_impl() ensures that we
+ * aren't needlessly decompressing the data multiple times.
+ */
+ int callback_cnt = 0;
+ for (acb = callback_list; acb != NULL; acb = acb->acb_next) {
+ if (!acb->acb_done)
+ continue;
+
+ /* This is a demand read since prefetches don't use callbacks */
+ callback_cnt++;
+
+ int error = arc_buf_alloc_impl(hdr, acb->acb_private,
+ acb->acb_compressed, no_zio_error, &acb->acb_buf);
+ if (no_zio_error) {
+ zio->io_error = error;
}
}
hdr->b_l1hdr.b_acb = NULL;
- hdr->b_flags &= ~ARC_FLAG_IO_IN_PROGRESS;
- ASSERT(!HDR_BUF_AVAILABLE(hdr));
- if (abuf == buf) {
- ASSERT(buf->b_efunc == NULL);
- ASSERT(hdr->b_l1hdr.b_datacnt == 1);
- hdr->b_flags |= ARC_FLAG_BUF_AVAILABLE;
+ arc_hdr_clear_flags(hdr, ARC_FLAG_IO_IN_PROGRESS);
+ if (callback_cnt == 0) {
+ ASSERT(HDR_PREFETCH(hdr));
+ ASSERT0(hdr->b_l1hdr.b_bufcnt);
+ ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
}
ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt) ||
callback_list != NULL);
- if (zio->io_error != 0) {
- hdr->b_flags |= ARC_FLAG_IO_ERROR;
+ if (no_zio_error) {
+ arc_hdr_verify(hdr, zio->io_bp);
+ } else {
+ arc_hdr_set_flags(hdr, ARC_FLAG_IO_ERROR);
if (hdr->b_l1hdr.b_state != arc_anon)
arc_change_state(arc_anon, hdr, hash_lock);
if (HDR_IN_HASH_TABLE(hdr))
@@ -4217,10 +5136,10 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done,
arc_flags_t *arc_flags, const zbookmark_phys_t *zb)
{
arc_buf_hdr_t *hdr = NULL;
- arc_buf_t *buf = NULL;
kmutex_t *hash_lock = NULL;
zio_t *rzio;
uint64_t guid = spa_load_guid(spa);
+ boolean_t compressed_read = (zio_flags & ZIO_FLAG_RAW) != 0;
int rc = 0;
ASSERT(!BP_IS_EMBEDDED(bp) ||
@@ -4235,12 +5154,43 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done,
hdr = buf_hash_find(guid, bp, &hash_lock);
}
- if (hdr != NULL && HDR_HAS_L1HDR(hdr) && hdr->b_l1hdr.b_datacnt > 0) {
-
+ if (hdr != NULL && HDR_HAS_L1HDR(hdr) && hdr->b_l1hdr.b_pabd != NULL) {
+ arc_buf_t *buf = NULL;
*arc_flags |= ARC_FLAG_CACHED;
if (HDR_IO_IN_PROGRESS(hdr)) {
+ if ((hdr->b_flags & ARC_FLAG_PRIO_ASYNC_READ) &&
+ priority == ZIO_PRIORITY_SYNC_READ) {
+ /*
+ * This sync read must wait for an
+ * in-progress async read (e.g. a predictive
+ * prefetch). Async reads are queued
+ * separately at the vdev_queue layer, so
+ * this is a form of priority inversion.
+ * Ideally, we would "inherit" the demand
+ * i/o's priority by moving the i/o from
+ * the async queue to the synchronous queue,
+ * but there is currently no mechanism to do
+ * so. Track this so that we can evaluate
+ * the magnitude of this potential performance
+ * problem.
+ *
+ * Note that if the prefetch i/o is already
+ * active (has been issued to the device),
+ * the prefetch improved performance, because
+ * we issued it sooner than we would have
+ * without the prefetch.
+ */
+ DTRACE_PROBE1(arc__sync__wait__for__async,
+ arc_buf_hdr_t *, hdr);
+ ARCSTAT_BUMP(arcstat_sync_wait_for_async);
+ }
+ if (hdr->b_flags & ARC_FLAG_PREDICTIVE_PREFETCH) {
+ arc_hdr_clear_flags(hdr,
+ ARC_FLAG_PREDICTIVE_PREFETCH);
+ }
+
if (*arc_flags & ARC_FLAG_WAIT) {
cv_wait(&hdr->b_l1hdr.b_cv, hash_lock);
mutex_exit(hash_lock);
@@ -4249,20 +5199,20 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done,
ASSERT(*arc_flags & ARC_FLAG_NOWAIT);
if (done) {
- arc_callback_t *acb = NULL;
+ arc_callback_t *acb = NULL;
acb = kmem_zalloc(sizeof (arc_callback_t),
KM_SLEEP);
acb->acb_done = done;
acb->acb_private = private;
+ acb->acb_compressed = compressed_read;
if (pio != NULL)
acb->acb_zio_dummy = zio_null(pio,
spa, NULL, NULL, NULL, zio_flags);
- ASSERT(acb->acb_done != NULL);
+ ASSERT3P(acb->acb_done, !=, NULL);
acb->acb_next = hdr->b_l1hdr.b_acb;
hdr->b_l1hdr.b_acb = acb;
- add_reference(hdr, hash_lock, private);
mutex_exit(hash_lock);
goto out;
}
@@ -4274,32 +5224,33 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done,
hdr->b_l1hdr.b_state == arc_mfu);
if (done) {
- add_reference(hdr, hash_lock, private);
- /*
- * If this block is already in use, create a new
- * copy of the data so that we will be guaranteed
- * that arc_release() will always succeed.
- */
- buf = hdr->b_l1hdr.b_buf;
- ASSERT(buf);
- ASSERT(buf->b_data);
- if (HDR_BUF_AVAILABLE(hdr)) {
- ASSERT(buf->b_efunc == NULL);
- hdr->b_flags &= ~ARC_FLAG_BUF_AVAILABLE;
- } else {
- buf = arc_buf_clone(buf);
+ if (hdr->b_flags & ARC_FLAG_PREDICTIVE_PREFETCH) {
+ /*
+ * This is a demand read which does not have to
+ * wait for i/o because we did a predictive
+ * prefetch i/o for it, which has completed.
+ */
+ DTRACE_PROBE1(
+ arc__demand__hit__predictive__prefetch,
+ arc_buf_hdr_t *, hdr);
+ ARCSTAT_BUMP(
+ arcstat_demand_hit_predictive_prefetch);
+ arc_hdr_clear_flags(hdr,
+ ARC_FLAG_PREDICTIVE_PREFETCH);
}
+ ASSERT(!BP_IS_EMBEDDED(bp) || !BP_IS_HOLE(bp));
+ /* Get a buf with the desired data in it. */
+ VERIFY0(arc_buf_alloc_impl(hdr, private,
+ compressed_read, B_TRUE, &buf));
} else if (*arc_flags & ARC_FLAG_PREFETCH &&
refcount_count(&hdr->b_l1hdr.b_refcnt) == 0) {
- hdr->b_flags |= ARC_FLAG_PREFETCH;
+ arc_hdr_set_flags(hdr, ARC_FLAG_PREFETCH);
}
DTRACE_PROBE1(arc__hit, arc_buf_hdr_t *, hdr);
arc_access(hdr, hash_lock);
if (*arc_flags & ARC_FLAG_L2CACHE)
- hdr->b_flags |= ARC_FLAG_L2CACHE;
- if (*arc_flags & ARC_FLAG_L2COMPRESS)
- hdr->b_flags |= ARC_FLAG_L2COMPRESS;
+ arc_hdr_set_flags(hdr, ARC_FLAG_L2CACHE);
mutex_exit(hash_lock);
ARCSTAT_BUMP(arcstat_hits);
ARCSTAT_CONDSTAT(!HDR_PREFETCH(hdr),
@@ -4309,20 +5260,19 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done,
if (done)
done(NULL, buf, private);
} else {
- uint64_t size = BP_GET_LSIZE(bp);
+ uint64_t lsize = BP_GET_LSIZE(bp);
+ uint64_t psize = BP_GET_PSIZE(bp);
arc_callback_t *acb;
vdev_t *vd = NULL;
uint64_t addr = 0;
boolean_t devw = B_FALSE;
- enum zio_compress b_compress = ZIO_COMPRESS_OFF;
- int32_t b_asize = 0;
+ uint64_t size;
/*
* Gracefully handle a damaged logical block size as a
* checksum error.
*/
- if (size > spa_maxblocksize(spa)) {
- ASSERT3P(buf, ==, NULL);
+ if (lsize > spa_maxblocksize(spa)) {
rc = SET_ERROR(ECKSUM);
goto out;
}
@@ -4331,8 +5281,9 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done,
/* this block is not in the cache */
arc_buf_hdr_t *exists = NULL;
arc_buf_contents_t type = BP_GET_BUFC_TYPE(bp);
- buf = arc_buf_alloc(spa, size, private, type);
- hdr = buf->b_hdr;
+ hdr = arc_hdr_alloc(spa_load_guid(spa), psize, lsize,
+ BP_GET_COMPRESS(bp), type);
+
if (!BP_IS_EMBEDDED(bp)) {
hdr->b_dva = *BP_IDENTITY(bp);
hdr->b_birth = BP_PHYSICAL_BIRTH(bp);
@@ -4342,22 +5293,9 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done,
/* somebody beat us to the hash insert */
mutex_exit(hash_lock);
buf_discard_identity(hdr);
- (void) arc_buf_remove_ref(buf, private);
+ arc_hdr_destroy(hdr);
goto top; /* restart the IO request */
}
-
- /* if this is a prefetch, we don't have a reference */
- if (*arc_flags & ARC_FLAG_PREFETCH) {
- (void) remove_reference(hdr, hash_lock,
- private);
- hdr->b_flags |= ARC_FLAG_PREFETCH;
- }
- if (*arc_flags & ARC_FLAG_L2CACHE)
- hdr->b_flags |= ARC_FLAG_L2CACHE;
- if (*arc_flags & ARC_FLAG_L2COMPRESS)
- hdr->b_flags |= ARC_FLAG_L2COMPRESS;
- if (BP_GET_LEVEL(bp) > 0)
- hdr->b_flags |= ARC_FLAG_INDIRECT;
} else {
/*
* This block is in the ghost cache. If it was L2-only
@@ -4369,49 +5307,62 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done,
hdr_full_cache);
}
+ ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
ASSERT(GHOST_STATE(hdr->b_l1hdr.b_state));
ASSERT(!HDR_IO_IN_PROGRESS(hdr));
ASSERT(refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
ASSERT3P(hdr->b_l1hdr.b_buf, ==, NULL);
+ ASSERT3P(hdr->b_l1hdr.b_freeze_cksum, ==, NULL);
- /* if this is a prefetch, we don't have a reference */
- if (*arc_flags & ARC_FLAG_PREFETCH)
- hdr->b_flags |= ARC_FLAG_PREFETCH;
- else
- add_reference(hdr, hash_lock, private);
- if (*arc_flags & ARC_FLAG_L2CACHE)
- hdr->b_flags |= ARC_FLAG_L2CACHE;
- if (*arc_flags & ARC_FLAG_L2COMPRESS)
- hdr->b_flags |= ARC_FLAG_L2COMPRESS;
- buf = kmem_cache_alloc(buf_cache, KM_PUSHPAGE);
- buf->b_hdr = hdr;
- buf->b_data = NULL;
- buf->b_efunc = NULL;
- buf->b_private = NULL;
- buf->b_next = NULL;
- hdr->b_l1hdr.b_buf = buf;
- ASSERT0(hdr->b_l1hdr.b_datacnt);
- hdr->b_l1hdr.b_datacnt = 1;
- arc_get_data_buf(buf);
+ /*
+ * This is a delicate dance that we play here.
+ * This hdr is in the ghost list so we access it
+ * to move it out of the ghost list before we
+ * initiate the read. If it's a prefetch then
+ * it won't have a callback so we'll remove the
+ * reference that arc_buf_alloc_impl() created. We
+ * do this after we've called arc_access() to
+ * avoid hitting an assert in remove_reference().
+ */
arc_access(hdr, hash_lock);
+ arc_hdr_alloc_pabd(hdr);
}
+ ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
+ size = arc_hdr_size(hdr);
+ /*
+ * If compression is enabled on the hdr, then will do
+ * RAW I/O and will store the compressed data in the hdr's
+ * data block. Otherwise, the hdr's data block will contain
+ * the uncompressed data.
+ */
+ if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF) {
+ zio_flags |= ZIO_FLAG_RAW;
+ }
+
+ if (*arc_flags & ARC_FLAG_PREFETCH)
+ arc_hdr_set_flags(hdr, ARC_FLAG_PREFETCH);
+ if (*arc_flags & ARC_FLAG_L2CACHE)
+ arc_hdr_set_flags(hdr, ARC_FLAG_L2CACHE);
+ if (BP_GET_LEVEL(bp) > 0)
+ arc_hdr_set_flags(hdr, ARC_FLAG_INDIRECT);
+ if (*arc_flags & ARC_FLAG_PREDICTIVE_PREFETCH)
+ arc_hdr_set_flags(hdr, ARC_FLAG_PREDICTIVE_PREFETCH);
ASSERT(!GHOST_STATE(hdr->b_l1hdr.b_state));
acb = kmem_zalloc(sizeof (arc_callback_t), KM_SLEEP);
acb->acb_done = done;
acb->acb_private = private;
+ acb->acb_compressed = compressed_read;
- ASSERT(hdr->b_l1hdr.b_acb == NULL);
+ ASSERT3P(hdr->b_l1hdr.b_acb, ==, NULL);
hdr->b_l1hdr.b_acb = acb;
- hdr->b_flags |= ARC_FLAG_IO_IN_PROGRESS;
+ arc_hdr_set_flags(hdr, ARC_FLAG_IO_IN_PROGRESS);
if (HDR_HAS_L2HDR(hdr) &&
(vd = hdr->b_l2hdr.b_dev->l2ad_vdev) != NULL) {
devw = hdr->b_l2hdr.b_dev->l2ad_writing;
addr = hdr->b_l2hdr.b_daddr;
- b_compress = hdr->b_l2hdr.b_compress;
- b_asize = hdr->b_l2hdr.b_asize;
/*
* Lock out device removal.
*/
@@ -4420,6 +5371,11 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done,
vd = NULL;
}
+ if (priority == ZIO_PRIORITY_ASYNC_READ)
+ arc_hdr_set_flags(hdr, ARC_FLAG_PRIO_ASYNC_READ);
+ else
+ arc_hdr_clear_flags(hdr, ARC_FLAG_PRIO_ASYNC_READ);
+
if (hash_lock != NULL)
mutex_exit(hash_lock);
@@ -4427,9 +5383,10 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done,
* At this point, we have a level 1 cache miss. Try again in
* L2ARC if possible.
*/
- ASSERT3U(hdr->b_size, ==, size);
+ ASSERT3U(HDR_GET_LSIZE(hdr), ==, lsize);
+
DTRACE_PROBE4(arc__miss, arc_buf_hdr_t *, hdr, blkptr_t *, bp,
- uint64_t, size, zbookmark_phys_t *, zb);
+ uint64_t, lsize, zbookmark_phys_t *, zb);
ARCSTAT_BUMP(arcstat_misses);
ARCSTAT_CONDSTAT(!HDR_PREFETCH(hdr),
demand, prefetch, !HDR_ISTYPE_METADATA(hdr),
@@ -4449,6 +5406,8 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done,
!HDR_L2_WRITING(hdr) && !HDR_L2_EVICTED(hdr) &&
!(l2arc_noprefetch && HDR_PREFETCH(hdr))) {
l2arc_read_callback_t *cb;
+ abd_t *abd;
+ uint64_t asize;
DTRACE_PROBE1(l2arc__hit, arc_buf_hdr_t *, hdr);
ARCSTAT_BUMP(arcstat_l2_hits);
@@ -4456,15 +5415,22 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done,
cb = kmem_zalloc(sizeof (l2arc_read_callback_t),
KM_SLEEP);
- cb->l2rcb_buf = buf;
- cb->l2rcb_spa = spa;
+ cb->l2rcb_hdr = hdr;
cb->l2rcb_bp = *bp;
cb->l2rcb_zb = *zb;
cb->l2rcb_flags = zio_flags;
- cb->l2rcb_compress = b_compress;
+
+ asize = vdev_psize_to_asize(vd, size);
+ if (asize != size) {
+ abd = abd_alloc_for_io(asize,
+ HDR_ISTYPE_METADATA(hdr));
+ cb->l2rcb_abd = abd;
+ } else {
+ abd = hdr->b_l1hdr.b_pabd;
+ }
ASSERT(addr >= VDEV_LABEL_START_SIZE &&
- addr + size < vd->vdev_psize -
+ addr + asize <= vd->vdev_psize -
VDEV_LABEL_END_SIZE);
/*
@@ -4473,26 +5439,20 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done,
* Issue a null zio if the underlying buffer
* was squashed to zero size by compression.
*/
- if (b_compress == ZIO_COMPRESS_EMPTY) {
- rzio = zio_null(pio, spa, vd,
- l2arc_read_done, cb,
- zio_flags | ZIO_FLAG_DONT_CACHE |
- ZIO_FLAG_CANFAIL |
- ZIO_FLAG_DONT_PROPAGATE |
- ZIO_FLAG_DONT_RETRY);
- } else {
- rzio = zio_read_phys(pio, vd, addr,
- b_asize, buf->b_data,
- ZIO_CHECKSUM_OFF,
- l2arc_read_done, cb, priority,
- zio_flags | ZIO_FLAG_DONT_CACHE |
- ZIO_FLAG_CANFAIL |
- ZIO_FLAG_DONT_PROPAGATE |
- ZIO_FLAG_DONT_RETRY, B_FALSE);
- }
+ ASSERT3U(HDR_GET_COMPRESS(hdr), !=,
+ ZIO_COMPRESS_EMPTY);
+ rzio = zio_read_phys(pio, vd, addr,
+ asize, abd,
+ ZIO_CHECKSUM_OFF,
+ l2arc_read_done, cb, priority,
+ zio_flags | ZIO_FLAG_DONT_CACHE |
+ ZIO_FLAG_CANFAIL |
+ ZIO_FLAG_DONT_PROPAGATE |
+ ZIO_FLAG_DONT_RETRY, B_FALSE);
+
DTRACE_PROBE2(l2arc__read, vdev_t *, vd,
zio_t *, rzio);
- ARCSTAT_INCR(arcstat_l2_read_bytes, b_asize);
+ ARCSTAT_INCR(arcstat_l2_read_bytes, size);
if (*arc_flags & ARC_FLAG_NOWAIT) {
zio_nowait(rzio);
@@ -4522,8 +5482,8 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, arc_done_func_t *done,
}
}
- rzio = zio_read(pio, spa, bp, buf->b_data, size,
- arc_read_done, buf, priority, zio_flags, zb);
+ rzio = zio_read(pio, spa, bp, hdr->b_l1hdr.b_pabd, size,
+ arc_read_done, hdr, priority, zio_flags, zb);
if (*arc_flags & ARC_FLAG_WAIT) {
rc = zio_wait(rzio);
@@ -4576,20 +5536,6 @@ arc_remove_prune_callback(arc_prune_t *p)
kmem_free(p, sizeof (*p));
}
-void
-arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *private)
-{
- ASSERT(buf->b_hdr != NULL);
- ASSERT(buf->b_hdr->b_l1hdr.b_state != arc_anon);
- ASSERT(!refcount_is_zero(&buf->b_hdr->b_l1hdr.b_refcnt) ||
- func == NULL);
- ASSERT(buf->b_efunc == NULL);
- ASSERT(!HDR_BUF_AVAILABLE(buf->b_hdr));
-
- buf->b_efunc = func;
- buf->b_private = private;
-}
-
/*
* Notify the arc that a block was freed, and thus will never be used again.
*/
@@ -4605,85 +5551,38 @@ arc_freed(spa_t *spa, const blkptr_t *bp)
hdr = buf_hash_find(guid, bp, &hash_lock);
if (hdr == NULL)
return;
- if (HDR_BUF_AVAILABLE(hdr)) {
- arc_buf_t *buf = hdr->b_l1hdr.b_buf;
- add_reference(hdr, hash_lock, FTAG);
- hdr->b_flags &= ~ARC_FLAG_BUF_AVAILABLE;
- mutex_exit(hash_lock);
- arc_release(buf, FTAG);
- (void) arc_buf_remove_ref(buf, FTAG);
- } else {
+ /*
+ * We might be trying to free a block that is still doing I/O
+ * (i.e. prefetch) or has a reference (i.e. a dedup-ed,
+ * dmu_sync-ed block). If this block is being prefetched, then it
+ * would still have the ARC_FLAG_IO_IN_PROGRESS flag set on the hdr
+ * until the I/O completes. A block may also have a reference if it is
+ * part of a dedup-ed, dmu_synced write. The dmu_sync() function would
+ * have written the new block to its final resting place on disk but
+ * without the dedup flag set. This would have left the hdr in the MRU
+ * state and discoverable. When the txg finally syncs it detects that
+ * the block was overridden in open context and issues an override I/O.
+ * Since this is a dedup block, the override I/O will determine if the
+ * block is already in the DDT. If so, then it will replace the io_bp
+ * with the bp from the DDT and allow the I/O to finish. When the I/O
+ * reaches the done callback, dbuf_write_override_done, it will
+ * check to see if the io_bp and io_bp_override are identical.
+ * If they are not, then it indicates that the bp was replaced with
+ * the bp in the DDT and the override bp is freed. This allows
+ * us to arrive here with a reference on a block that is being
+ * freed. So if we have an I/O in progress, or a reference to
+ * this hdr, then we don't destroy the hdr.
+ */
+ if (!HDR_HAS_L1HDR(hdr) || (!HDR_IO_IN_PROGRESS(hdr) &&
+ refcount_is_zero(&hdr->b_l1hdr.b_refcnt))) {
+ arc_change_state(arc_anon, hdr, hash_lock);
+ arc_hdr_destroy(hdr);
mutex_exit(hash_lock);
- }
-
-}
-
-/*
- * Clear the user eviction callback set by arc_set_callback(), first calling
- * it if it exists. Because the presence of a callback keeps an arc_buf cached
- * clearing the callback may result in the arc_buf being destroyed. However,
- * it will not result in the *last* arc_buf being destroyed, hence the data
- * will remain cached in the ARC. We make a copy of the arc buffer here so
- * that we can process the callback without holding any locks.
- *
- * It's possible that the callback is already in the process of being cleared
- * by another thread. In this case we can not clear the callback.
- *
- * Returns B_TRUE if the callback was successfully called and cleared.
- */
-boolean_t
-arc_clear_callback(arc_buf_t *buf)
-{
- arc_buf_hdr_t *hdr;
- kmutex_t *hash_lock;
- arc_evict_func_t *efunc = buf->b_efunc;
- void *private = buf->b_private;
-
- mutex_enter(&buf->b_evict_lock);
- hdr = buf->b_hdr;
- if (hdr == NULL) {
- /*
- * We are in arc_do_user_evicts().
- */
- ASSERT(buf->b_data == NULL);
- mutex_exit(&buf->b_evict_lock);
- return (B_FALSE);
- } else if (buf->b_data == NULL) {
- /*
- * We are on the eviction list; process this buffer now
- * but let arc_do_user_evicts() do the reaping.
- */
- buf->b_efunc = NULL;
- mutex_exit(&buf->b_evict_lock);
- VERIFY0(efunc(private));
- return (B_TRUE);
- }
- hash_lock = HDR_LOCK(hdr);
- mutex_enter(hash_lock);
- hdr = buf->b_hdr;
- ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
-
- ASSERT3U(refcount_count(&hdr->b_l1hdr.b_refcnt), <,
- hdr->b_l1hdr.b_datacnt);
- ASSERT(hdr->b_l1hdr.b_state == arc_mru ||
- hdr->b_l1hdr.b_state == arc_mfu);
-
- buf->b_efunc = NULL;
- buf->b_private = NULL;
-
- if (hdr->b_l1hdr.b_datacnt > 1) {
- mutex_exit(&buf->b_evict_lock);
- arc_buf_destroy(buf, TRUE);
} else {
- ASSERT(buf == hdr->b_l1hdr.b_buf);
- hdr->b_flags |= ARC_FLAG_BUF_AVAILABLE;
- mutex_exit(&buf->b_evict_lock);
+ mutex_exit(hash_lock);
}
- mutex_exit(hash_lock);
- VERIFY0(efunc(private));
- return (B_TRUE);
}
/*
@@ -4719,16 +5618,19 @@ arc_release(arc_buf_t *buf, void *tag)
ASSERT(!HDR_IO_IN_PROGRESS(hdr));
ASSERT(!HDR_IN_HASH_TABLE(hdr));
ASSERT(!HDR_HAS_L2HDR(hdr));
- ASSERT(BUF_EMPTY(hdr));
+ ASSERT(HDR_EMPTY(hdr));
- ASSERT3U(hdr->b_l1hdr.b_datacnt, ==, 1);
+ ASSERT3U(hdr->b_l1hdr.b_bufcnt, ==, 1);
ASSERT3S(refcount_count(&hdr->b_l1hdr.b_refcnt), ==, 1);
ASSERT(!list_link_active(&hdr->b_l1hdr.b_arc_node));
- ASSERT3P(buf->b_efunc, ==, NULL);
- ASSERT3P(buf->b_private, ==, NULL);
-
hdr->b_l1hdr.b_arc_access = 0;
+
+ /*
+ * If the buf is being overridden then it may already
+ * have a hdr that is not empty.
+ */
+ buf_discard_identity(hdr);
arc_buf_thaw(buf);
return;
@@ -4747,7 +5649,7 @@ arc_release(arc_buf_t *buf, void *tag)
ASSERT3P(state, !=, arc_anon);
/* this buffer is not on any list */
- ASSERT(refcount_count(&hdr->b_l1hdr.b_refcnt) > 0);
+ ASSERT3S(refcount_count(&hdr->b_l1hdr.b_refcnt), >, 0);
if (HDR_HAS_L2HDR(hdr)) {
mutex_enter(&hdr->b_l2hdr.b_dev->l2ad_mtx);
@@ -4769,79 +5671,118 @@ arc_release(arc_buf_t *buf, void *tag)
/*
* Do we have more than one buf?
*/
- if (hdr->b_l1hdr.b_datacnt > 1) {
+ if (hdr->b_l1hdr.b_bufcnt > 1) {
arc_buf_hdr_t *nhdr;
- arc_buf_t **bufp;
- uint64_t blksz = hdr->b_size;
uint64_t spa = hdr->b_spa;
+ uint64_t psize = HDR_GET_PSIZE(hdr);
+ uint64_t lsize = HDR_GET_LSIZE(hdr);
+ enum zio_compress compress = HDR_GET_COMPRESS(hdr);
arc_buf_contents_t type = arc_buf_type(hdr);
- uint32_t flags = hdr->b_flags;
+ VERIFY3U(hdr->b_type, ==, type);
ASSERT(hdr->b_l1hdr.b_buf != buf || buf->b_next != NULL);
+ (void) remove_reference(hdr, hash_lock, tag);
+
+ if (arc_buf_is_shared(buf) && !ARC_BUF_COMPRESSED(buf)) {
+ ASSERT3P(hdr->b_l1hdr.b_buf, !=, buf);
+ ASSERT(ARC_BUF_LAST(buf));
+ }
+
/*
* Pull the data off of this hdr and attach it to
- * a new anonymous hdr.
+ * a new anonymous hdr. Also find the last buffer
+ * in the hdr's buffer list.
*/
- (void) remove_reference(hdr, hash_lock, tag);
- bufp = &hdr->b_l1hdr.b_buf;
- while (*bufp != buf)
- bufp = &(*bufp)->b_next;
- *bufp = buf->b_next;
- buf->b_next = NULL;
+ arc_buf_t *lastbuf = arc_buf_remove(hdr, buf);
+ ASSERT3P(lastbuf, !=, NULL);
+
+ /*
+ * If the current arc_buf_t and the hdr are sharing their data
+ * buffer, then we must stop sharing that block.
+ */
+ if (arc_buf_is_shared(buf)) {
+ ASSERT3P(hdr->b_l1hdr.b_buf, !=, buf);
+ VERIFY(!arc_buf_is_shared(lastbuf));
+
+ /*
+ * First, sever the block sharing relationship between
+ * buf and the arc_buf_hdr_t.
+ */
+ arc_unshare_buf(hdr, buf);
+ /*
+ * Now we need to recreate the hdr's b_pabd. Since we
+ * have lastbuf handy, we try to share with it, but if
+ * we can't then we allocate a new b_pabd and copy the
+ * data from buf into it.
+ */
+ if (arc_can_share(hdr, lastbuf)) {
+ arc_share_buf(hdr, lastbuf);
+ } else {
+ arc_hdr_alloc_pabd(hdr);
+ abd_copy_from_buf(hdr->b_l1hdr.b_pabd,
+ buf->b_data, psize);
+ }
+ VERIFY3P(lastbuf->b_data, !=, NULL);
+ } else if (HDR_SHARED_DATA(hdr)) {
+ /*
+ * Uncompressed shared buffers are always at the end
+ * of the list. Compressed buffers don't have the
+ * same requirements. This makes it hard to
+ * simply assert that the lastbuf is shared so
+ * we rely on the hdr's compression flags to determine
+ * if we have a compressed, shared buffer.
+ */
+ ASSERT(arc_buf_is_shared(lastbuf) ||
+ HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF);
+ ASSERT(!ARC_BUF_SHARED(buf));
+ }
+ ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
ASSERT3P(state, !=, arc_l2c_only);
- (void) refcount_remove_many(
- &state->arcs_size, hdr->b_size, buf);
+ (void) refcount_remove_many(&state->arcs_size,
+ arc_buf_size(buf), buf);
if (refcount_is_zero(&hdr->b_l1hdr.b_refcnt)) {
- uint64_t *size;
-
ASSERT3P(state, !=, arc_l2c_only);
- size = &state->arcs_lsize[type];
- ASSERT3U(*size, >=, hdr->b_size);
- atomic_add_64(size, -hdr->b_size);
+ (void) refcount_remove_many(&state->arcs_esize[type],
+ arc_buf_size(buf), buf);
}
- /*
- * We're releasing a duplicate user data buffer, update
- * our statistics accordingly.
- */
- if (HDR_ISTYPE_DATA(hdr)) {
- ARCSTAT_BUMPDOWN(arcstat_duplicate_buffers);
- ARCSTAT_INCR(arcstat_duplicate_buffers_size,
- -hdr->b_size);
- }
- hdr->b_l1hdr.b_datacnt -= 1;
+ hdr->b_l1hdr.b_bufcnt -= 1;
arc_cksum_verify(buf);
arc_buf_unwatch(buf);
+ /* if this is the last uncompressed buf free the checksum */
+ if (!arc_hdr_has_uncompressed_buf(hdr))
+ arc_cksum_free(hdr);
+
mutex_exit(hash_lock);
- nhdr = kmem_cache_alloc(hdr_full_cache, KM_PUSHPAGE);
- nhdr->b_size = blksz;
- nhdr->b_spa = spa;
+ /*
+ * Allocate a new hdr. The new hdr will contain a b_pabd
+ * buffer which will be freed in arc_write().
+ */
+ nhdr = arc_hdr_alloc(spa, psize, lsize, compress, type);
+ ASSERT3P(nhdr->b_l1hdr.b_buf, ==, NULL);
+ ASSERT0(nhdr->b_l1hdr.b_bufcnt);
+ ASSERT0(refcount_count(&nhdr->b_l1hdr.b_refcnt));
+ VERIFY3U(nhdr->b_type, ==, type);
+ ASSERT(!HDR_SHARED_DATA(nhdr));
+ nhdr->b_l1hdr.b_buf = buf;
+ nhdr->b_l1hdr.b_bufcnt = 1;
nhdr->b_l1hdr.b_mru_hits = 0;
nhdr->b_l1hdr.b_mru_ghost_hits = 0;
nhdr->b_l1hdr.b_mfu_hits = 0;
nhdr->b_l1hdr.b_mfu_ghost_hits = 0;
nhdr->b_l1hdr.b_l2_hits = 0;
- nhdr->b_flags = flags & ARC_FLAG_L2_WRITING;
- nhdr->b_flags |= arc_bufc_to_flags(type);
- nhdr->b_flags |= ARC_FLAG_HAS_L1HDR;
-
- nhdr->b_l1hdr.b_buf = buf;
- nhdr->b_l1hdr.b_datacnt = 1;
- nhdr->b_l1hdr.b_state = arc_anon;
- nhdr->b_l1hdr.b_arc_access = 0;
- nhdr->b_l1hdr.b_tmp_cdata = NULL;
- nhdr->b_freeze_cksum = NULL;
-
(void) refcount_add(&nhdr->b_l1hdr.b_refcnt, tag);
buf->b_hdr = nhdr;
+
mutex_exit(&buf->b_evict_lock);
- (void) refcount_add_many(&arc_anon->arcs_size, blksz, buf);
+ (void) refcount_add_many(&arc_anon->arcs_size,
+ HDR_GET_LSIZE(nhdr), buf);
} else {
mutex_exit(&buf->b_evict_lock);
ASSERT(refcount_count(&hdr->b_l1hdr.b_refcnt) == 1);
@@ -4860,8 +5801,6 @@ arc_release(arc_buf_t *buf, void *tag)
buf_discard_identity(hdr);
arc_buf_thaw(buf);
}
- buf->b_efunc = NULL;
- buf->b_private = NULL;
}
int
@@ -4895,28 +5834,102 @@ arc_write_ready(zio_t *zio)
arc_write_callback_t *callback = zio->io_private;
arc_buf_t *buf = callback->awcb_buf;
arc_buf_hdr_t *hdr = buf->b_hdr;
+ uint64_t psize = BP_IS_HOLE(zio->io_bp) ? 0 : BP_GET_PSIZE(zio->io_bp);
+ enum zio_compress compress;
+ fstrans_cookie_t cookie = spl_fstrans_mark();
ASSERT(HDR_HAS_L1HDR(hdr));
ASSERT(!refcount_is_zero(&buf->b_hdr->b_l1hdr.b_refcnt));
- ASSERT(hdr->b_l1hdr.b_datacnt > 0);
+ ASSERT(hdr->b_l1hdr.b_bufcnt > 0);
+
+ /*
+ * If we're reexecuting this zio because the pool suspended, then
+ * cleanup any state that was previously set the first time the
+ * callback was invoked.
+ */
+ if (zio->io_flags & ZIO_FLAG_REEXECUTED) {
+ arc_cksum_free(hdr);
+ arc_buf_unwatch(buf);
+ if (hdr->b_l1hdr.b_pabd != NULL) {
+ if (arc_buf_is_shared(buf)) {
+ arc_unshare_buf(hdr, buf);
+ } else {
+ arc_hdr_free_pabd(hdr);
+ }
+ }
+ }
+ ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+ ASSERT(!HDR_SHARED_DATA(hdr));
+ ASSERT(!arc_buf_is_shared(buf));
+
callback->awcb_ready(zio, buf, callback->awcb_private);
+ if (HDR_IO_IN_PROGRESS(hdr))
+ ASSERT(zio->io_flags & ZIO_FLAG_REEXECUTED);
+
+ arc_cksum_compute(buf);
+ arc_hdr_set_flags(hdr, ARC_FLAG_IO_IN_PROGRESS);
+
+ if (BP_IS_HOLE(zio->io_bp) || BP_IS_EMBEDDED(zio->io_bp)) {
+ compress = ZIO_COMPRESS_OFF;
+ } else {
+ ASSERT3U(HDR_GET_LSIZE(hdr), ==, BP_GET_LSIZE(zio->io_bp));
+ compress = BP_GET_COMPRESS(zio->io_bp);
+ }
+ HDR_SET_PSIZE(hdr, psize);
+ arc_hdr_set_compress(hdr, compress);
+
/*
- * If the IO is already in progress, then this is a re-write
- * attempt, so we need to thaw and re-compute the cksum.
- * It is the responsibility of the callback to handle the
- * accounting for any re-write attempt.
+ * Fill the hdr with data. If the hdr is compressed, the data we want
+ * is available from the zio, otherwise we can take it from the buf.
+ *
+ * We might be able to share the buf's data with the hdr here. However,
+ * doing so would cause the ARC to be full of linear ABDs if we write a
+ * lot of shareable data. As a compromise, we check whether scattered
+ * ABDs are allowed, and assume that if they are then the user wants
+ * the ARC to be primarily filled with them regardless of the data being
+ * written. Therefore, if they're allowed then we allocate one and copy
+ * the data into it; otherwise, we share the data directly if we can.
*/
- if (HDR_IO_IN_PROGRESS(hdr)) {
- mutex_enter(&hdr->b_l1hdr.b_freeze_lock);
- if (hdr->b_freeze_cksum != NULL) {
- kmem_free(hdr->b_freeze_cksum, sizeof (zio_cksum_t));
- hdr->b_freeze_cksum = NULL;
+ if (zfs_abd_scatter_enabled || !arc_can_share(hdr, buf)) {
+ arc_hdr_alloc_pabd(hdr);
+
+ /*
+ * Ideally, we would always copy the io_abd into b_pabd, but the
+ * user may have disabled compressed ARC, thus we must check the
+ * hdr's compression setting rather than the io_bp's.
+ */
+ if (HDR_GET_COMPRESS(hdr) != ZIO_COMPRESS_OFF) {
+ ASSERT3U(BP_GET_COMPRESS(zio->io_bp), !=,
+ ZIO_COMPRESS_OFF);
+ ASSERT3U(psize, >, 0);
+
+ abd_copy(hdr->b_l1hdr.b_pabd, zio->io_abd, psize);
+ } else {
+ ASSERT3U(zio->io_orig_size, ==, arc_hdr_size(hdr));
+
+ abd_copy_from_buf(hdr->b_l1hdr.b_pabd, buf->b_data,
+ arc_buf_size(buf));
}
- mutex_exit(&hdr->b_l1hdr.b_freeze_lock);
+ } else {
+ ASSERT3P(buf->b_data, ==, abd_to_buf(zio->io_orig_abd));
+ ASSERT3U(zio->io_orig_size, ==, arc_buf_size(buf));
+ ASSERT3U(hdr->b_l1hdr.b_bufcnt, ==, 1);
+
+ arc_share_buf(hdr, buf);
}
- arc_cksum_compute(buf, B_FALSE);
- hdr->b_flags |= ARC_FLAG_IO_IN_PROGRESS;
+
+ arc_hdr_verify(hdr, zio->io_bp);
+ spl_fstrans_unmark(cookie);
+}
+
+static void
+arc_write_children_ready(zio_t *zio)
+{
+ arc_write_callback_t *callback = zio->io_private;
+ arc_buf_t *buf = callback->awcb_buf;
+
+ callback->awcb_children_ready(zio, buf, callback->awcb_private);
}
/*
@@ -4938,9 +5951,11 @@ arc_write_done(zio_t *zio)
arc_buf_t *buf = callback->awcb_buf;
arc_buf_hdr_t *hdr = buf->b_hdr;
- ASSERT(hdr->b_l1hdr.b_acb == NULL);
+ ASSERT3P(hdr->b_l1hdr.b_acb, ==, NULL);
if (zio->io_error == 0) {
+ arc_hdr_verify(hdr, zio->io_bp);
+
if (BP_IS_HOLE(zio->io_bp) || BP_IS_EMBEDDED(zio->io_bp)) {
buf_discard_identity(hdr);
} else {
@@ -4948,7 +5963,7 @@ arc_write_done(zio_t *zio)
hdr->b_birth = BP_PHYSICAL_BIRTH(zio->io_bp);
}
} else {
- ASSERT(BUF_EMPTY(hdr));
+ ASSERT(HDR_EMPTY(hdr));
}
/*
@@ -4957,11 +5972,11 @@ arc_write_done(zio_t *zio)
* dva/birth/checksum. The buffer must therefore remain anonymous
* (and uncached).
*/
- if (!BUF_EMPTY(hdr)) {
+ if (!HDR_EMPTY(hdr)) {
arc_buf_hdr_t *exists;
kmutex_t *hash_lock;
- ASSERT(zio->io_error == 0);
+ ASSERT3U(zio->io_error, ==, 0);
arc_cksum_verify(buf);
@@ -4991,57 +6006,95 @@ arc_write_done(zio_t *zio)
(void *)hdr, (void *)exists);
} else {
/* Dedup */
- ASSERT(hdr->b_l1hdr.b_datacnt == 1);
+ ASSERT(hdr->b_l1hdr.b_bufcnt == 1);
ASSERT(hdr->b_l1hdr.b_state == arc_anon);
ASSERT(BP_GET_DEDUP(zio->io_bp));
ASSERT(BP_GET_LEVEL(zio->io_bp) == 0);
}
}
- hdr->b_flags &= ~ARC_FLAG_IO_IN_PROGRESS;
+ arc_hdr_clear_flags(hdr, ARC_FLAG_IO_IN_PROGRESS);
/* if it's not anon, we are doing a scrub */
if (exists == NULL && hdr->b_l1hdr.b_state == arc_anon)
arc_access(hdr, hash_lock);
mutex_exit(hash_lock);
} else {
- hdr->b_flags &= ~ARC_FLAG_IO_IN_PROGRESS;
+ arc_hdr_clear_flags(hdr, ARC_FLAG_IO_IN_PROGRESS);
}
ASSERT(!refcount_is_zero(&hdr->b_l1hdr.b_refcnt));
callback->awcb_done(zio, buf, callback->awcb_private);
+ abd_put(zio->io_abd);
kmem_free(callback, sizeof (arc_write_callback_t));
}
zio_t *
arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
- blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc, boolean_t l2arc_compress,
- const zio_prop_t *zp, arc_done_func_t *ready, arc_done_func_t *physdone,
+ blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc,
+ const zio_prop_t *zp, arc_done_func_t *ready,
+ arc_done_func_t *children_ready, arc_done_func_t *physdone,
arc_done_func_t *done, void *private, zio_priority_t priority,
int zio_flags, const zbookmark_phys_t *zb)
{
arc_buf_hdr_t *hdr = buf->b_hdr;
arc_write_callback_t *callback;
zio_t *zio;
+ zio_prop_t localprop = *zp;
- ASSERT(ready != NULL);
- ASSERT(done != NULL);
+ ASSERT3P(ready, !=, NULL);
+ ASSERT3P(done, !=, NULL);
ASSERT(!HDR_IO_ERROR(hdr));
ASSERT(!HDR_IO_IN_PROGRESS(hdr));
- ASSERT(hdr->b_l1hdr.b_acb == NULL);
- ASSERT(hdr->b_l1hdr.b_datacnt > 0);
+ ASSERT3P(hdr->b_l1hdr.b_acb, ==, NULL);
+ ASSERT3U(hdr->b_l1hdr.b_bufcnt, >, 0);
if (l2arc)
- hdr->b_flags |= ARC_FLAG_L2CACHE;
- if (l2arc_compress)
- hdr->b_flags |= ARC_FLAG_L2COMPRESS;
+ arc_hdr_set_flags(hdr, ARC_FLAG_L2CACHE);
+ if (ARC_BUF_COMPRESSED(buf)) {
+ /*
+ * We're writing a pre-compressed buffer. Make the
+ * compression algorithm requested by the zio_prop_t match
+ * the pre-compressed buffer's compression algorithm.
+ */
+ localprop.zp_compress = HDR_GET_COMPRESS(hdr);
+
+ ASSERT3U(HDR_GET_LSIZE(hdr), !=, arc_buf_size(buf));
+ zio_flags |= ZIO_FLAG_RAW;
+ }
callback = kmem_zalloc(sizeof (arc_write_callback_t), KM_SLEEP);
callback->awcb_ready = ready;
+ callback->awcb_children_ready = children_ready;
callback->awcb_physdone = physdone;
callback->awcb_done = done;
callback->awcb_private = private;
callback->awcb_buf = buf;
- zio = zio_write(pio, spa, txg, bp, buf->b_data, hdr->b_size, zp,
- arc_write_ready, arc_write_physdone, arc_write_done, callback,
+ /*
+ * The hdr's b_pabd is now stale, free it now. A new data block
+ * will be allocated when the zio pipeline calls arc_write_ready().
+ */
+ if (hdr->b_l1hdr.b_pabd != NULL) {
+ /*
+ * If the buf is currently sharing the data block with
+ * the hdr then we need to break that relationship here.
+ * The hdr will remain with a NULL data pointer and the
+ * buf will take sole ownership of the block.
+ */
+ if (arc_buf_is_shared(buf)) {
+ arc_unshare_buf(hdr, buf);
+ } else {
+ arc_hdr_free_pabd(hdr);
+ }
+ VERIFY3P(buf->b_data, !=, NULL);
+ arc_hdr_set_compress(hdr, ZIO_COMPRESS_OFF);
+ }
+ ASSERT(!arc_buf_is_shared(buf));
+ ASSERT3P(hdr->b_l1hdr.b_pabd, ==, NULL);
+
+ zio = zio_write(pio, spa, txg, bp,
+ abd_get_from_buf(buf->b_data, HDR_GET_LSIZE(hdr)),
+ HDR_GET_LSIZE(hdr), arc_buf_size(buf), &localprop, arc_write_ready,
+ (children_ready != NULL) ? arc_write_children_ready : NULL,
+ arc_write_physdone, arc_write_done, callback,
priority, zio_flags, zb);
return (zio);
@@ -5051,28 +6104,29 @@ static int
arc_memory_throttle(uint64_t reserve, uint64_t txg)
{
#ifdef _KERNEL
- uint64_t available_memory = ptob(freemem);
+ uint64_t available_memory = arc_free_memory();
static uint64_t page_load = 0;
static uint64_t last_txg = 0;
-#ifdef __linux__
- pgcnt_t minfree = btop(arc_sys_free / 4);
+
+#if defined(_ILP32)
+ available_memory =
+ MIN(available_memory, vmem_size(heap_arena, VMEM_FREE));
#endif
- if (freemem > physmem * arc_lotsfree_percent / 100)
+ if (available_memory > arc_all_memory() * arc_lotsfree_percent / 100)
return (0);
if (txg > last_txg) {
last_txg = txg;
page_load = 0;
}
-
/*
* If we are in pageout, we know that memory is already tight,
* the arc is already going to be evicting, so we just want to
* continue to let page writes occur as quickly as possible.
*/
if (current_is_kswapd()) {
- if (page_load > MAX(ptob(minfree), available_memory) / 4) {
+ if (page_load > MAX(arc_sys_free / 4, available_memory) / 4) {
DMU_TX_STAT_BUMP(dmu_tx_memory_reclaim);
return (SET_ERROR(ERESTART));
}
@@ -5122,6 +6176,10 @@ arc_tempreserve_space(uint64_t reserve, uint64_t txg)
* network delays from blocking transactions that are ready to be
* assigned to a txg.
*/
+
+ /* assert that it has not wrapped around */
+ ASSERT3S(atomic_add_64_nv(&arc_loaned_bytes, 0), >=, 0);
+
anon_size = MAX((int64_t)(refcount_count(&arc_anon->arcs_size) -
arc_loaned_bytes), 0);
@@ -5144,12 +6202,14 @@ arc_tempreserve_space(uint64_t reserve, uint64_t txg)
if (reserve + arc_tempreserve + anon_size > arc_c / 2 &&
anon_size > arc_c / 4) {
+ uint64_t meta_esize =
+ refcount_count(&arc_anon->arcs_esize[ARC_BUFC_METADATA]);
+ uint64_t data_esize =
+ refcount_count(&arc_anon->arcs_esize[ARC_BUFC_DATA]);
dprintf("failing, arc_tempreserve=%lluK anon_meta=%lluK "
"anon_data=%lluK tempreserve=%lluK arc_c=%lluK\n",
- arc_tempreserve>>10,
- arc_anon->arcs_lsize[ARC_BUFC_METADATA]>>10,
- arc_anon->arcs_lsize[ARC_BUFC_DATA]>>10,
- reserve>>10, arc_c>>10);
+ arc_tempreserve >> 10, meta_esize >> 10,
+ data_esize >> 10, reserve >> 10, arc_c >> 10);
DMU_TX_STAT_BUMP(dmu_tx_dirty_throttle);
return (SET_ERROR(ERESTART));
}
@@ -5162,8 +6222,10 @@ arc_kstat_update_state(arc_state_t *state, kstat_named_t *size,
kstat_named_t *evict_data, kstat_named_t *evict_metadata)
{
size->value.ui64 = refcount_count(&state->arcs_size);
- evict_data->value.ui64 = state->arcs_lsize[ARC_BUFC_DATA];
- evict_metadata->value.ui64 = state->arcs_lsize[ARC_BUFC_METADATA];
+ evict_data->value.ui64 =
+ refcount_count(&state->arcs_esize[ARC_BUFC_DATA]);
+ evict_metadata->value.ui64 =
+ refcount_count(&state->arcs_esize[ARC_BUFC_METADATA]);
}
static int
@@ -5194,6 +6256,13 @@ arc_kstat_update(kstat_t *ksp, int rw)
&as->arcstat_mfu_ghost_size,
&as->arcstat_mfu_ghost_evictable_data,
&as->arcstat_mfu_ghost_evictable_metadata);
+
+ as->arcstat_memory_all_bytes.value.ui64 =
+ arc_all_memory();
+ as->arcstat_memory_free_bytes.value.ui64 =
+ arc_free_memory();
+ as->arcstat_memory_available_bytes.value.i64 =
+ arc_available_memory();
}
return (0);
@@ -5216,7 +6285,7 @@ arc_state_multilist_index_func(multilist_t *ml, void *obj)
* numbers using buf_hash below. So, as an added precaution,
* let's make sure we never add empty buffers to the arc lists.
*/
- ASSERT(!BUF_EMPTY(hdr));
+ ASSERT(!HDR_EMPTY(hdr));
/*
* The assumption here, is the hash value for a given
@@ -5242,14 +6311,20 @@ arc_state_multilist_index_func(multilist_t *ml, void *obj)
static void
arc_tuning_update(void)
{
+ uint64_t allmem = arc_all_memory();
+ unsigned long limit;
+
/* Valid range: 64M - <all physical memory> */
if ((zfs_arc_max) && (zfs_arc_max != arc_c_max) &&
- (zfs_arc_max > 64 << 20) && (zfs_arc_max < ptob(physmem)) &&
+ (zfs_arc_max > 64 << 20) && (zfs_arc_max < allmem) &&
(zfs_arc_max > arc_c_min)) {
arc_c_max = zfs_arc_max;
arc_c = arc_c_max;
arc_p = (arc_c >> 1);
- arc_meta_limit = MIN(arc_meta_limit, (3 * arc_c_max) / 4);
+ if (arc_meta_limit > arc_c_max)
+ arc_meta_limit = arc_c_max;
+ if (arc_dnode_limit > arc_meta_limit)
+ arc_dnode_limit = arc_meta_limit;
}
/* Valid range: 32M - <arc_c_max> */
@@ -5265,77 +6340,190 @@ arc_tuning_update(void)
(zfs_arc_meta_min >= 1ULL << SPA_MAXBLOCKSHIFT) &&
(zfs_arc_meta_min <= arc_c_max)) {
arc_meta_min = zfs_arc_meta_min;
- arc_meta_limit = MAX(arc_meta_limit, arc_meta_min);
+ if (arc_meta_limit < arc_meta_min)
+ arc_meta_limit = arc_meta_min;
+ if (arc_dnode_limit < arc_meta_min)
+ arc_dnode_limit = arc_meta_min;
}
/* Valid range: <arc_meta_min> - <arc_c_max> */
- if ((zfs_arc_meta_limit) && (zfs_arc_meta_limit != arc_meta_limit) &&
- (zfs_arc_meta_limit >= zfs_arc_meta_min) &&
- (zfs_arc_meta_limit <= arc_c_max))
- arc_meta_limit = zfs_arc_meta_limit;
+ limit = zfs_arc_meta_limit ? zfs_arc_meta_limit :
+ MIN(zfs_arc_meta_limit_percent, 100) * arc_c_max / 100;
+ if ((limit != arc_meta_limit) &&
+ (limit >= arc_meta_min) &&
+ (limit <= arc_c_max))
+ arc_meta_limit = limit;
+
+ /* Valid range: <arc_meta_min> - <arc_meta_limit> */
+ limit = zfs_arc_dnode_limit ? zfs_arc_dnode_limit :
+ MIN(zfs_arc_dnode_limit_percent, 100) * arc_meta_limit / 100;
+ if ((limit != arc_dnode_limit) &&
+ (limit >= arc_meta_min) &&
+ (limit <= arc_meta_limit))
+ arc_dnode_limit = limit;
/* Valid range: 1 - N */
if (zfs_arc_grow_retry)
arc_grow_retry = zfs_arc_grow_retry;
- /* Valid range: 1 - N */
- if (zfs_arc_shrink_shift) {
- arc_shrink_shift = zfs_arc_shrink_shift;
- arc_no_grow_shift = MIN(arc_no_grow_shift, arc_shrink_shift -1);
- }
+ /* Valid range: 1 - N */
+ if (zfs_arc_shrink_shift) {
+ arc_shrink_shift = zfs_arc_shrink_shift;
+ arc_no_grow_shift = MIN(arc_no_grow_shift, arc_shrink_shift -1);
+ }
+
+ /* Valid range: 1 - N */
+ if (zfs_arc_p_min_shift)
+ arc_p_min_shift = zfs_arc_p_min_shift;
+
+ /* Valid range: 1 - N ticks */
+ if (zfs_arc_min_prefetch_lifespan)
+ arc_min_prefetch_lifespan = zfs_arc_min_prefetch_lifespan;
+
+ /* Valid range: 0 - 100 */
+ if ((zfs_arc_lotsfree_percent >= 0) &&
+ (zfs_arc_lotsfree_percent <= 100))
+ arc_lotsfree_percent = zfs_arc_lotsfree_percent;
+
+ /* Valid range: 0 - <all physical memory> */
+ if ((zfs_arc_sys_free) && (zfs_arc_sys_free != arc_sys_free))
+ arc_sys_free = MIN(MAX(zfs_arc_sys_free, 0), allmem);
+
+}
+
+static void
+arc_state_init(void)
+{
+ arc_anon = &ARC_anon;
+ arc_mru = &ARC_mru;
+ arc_mru_ghost = &ARC_mru_ghost;
+ arc_mfu = &ARC_mfu;
+ arc_mfu_ghost = &ARC_mfu_ghost;
+ arc_l2c_only = &ARC_l2c_only;
+
+ arc_mru->arcs_list[ARC_BUFC_METADATA] =
+ multilist_create(sizeof (arc_buf_hdr_t),
+ offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
+ arc_state_multilist_index_func);
+ arc_mru->arcs_list[ARC_BUFC_DATA] =
+ multilist_create(sizeof (arc_buf_hdr_t),
+ offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
+ arc_state_multilist_index_func);
+ arc_mru_ghost->arcs_list[ARC_BUFC_METADATA] =
+ multilist_create(sizeof (arc_buf_hdr_t),
+ offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
+ arc_state_multilist_index_func);
+ arc_mru_ghost->arcs_list[ARC_BUFC_DATA] =
+ multilist_create(sizeof (arc_buf_hdr_t),
+ offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
+ arc_state_multilist_index_func);
+ arc_mfu->arcs_list[ARC_BUFC_METADATA] =
+ multilist_create(sizeof (arc_buf_hdr_t),
+ offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
+ arc_state_multilist_index_func);
+ arc_mfu->arcs_list[ARC_BUFC_DATA] =
+ multilist_create(sizeof (arc_buf_hdr_t),
+ offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
+ arc_state_multilist_index_func);
+ arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA] =
+ multilist_create(sizeof (arc_buf_hdr_t),
+ offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
+ arc_state_multilist_index_func);
+ arc_mfu_ghost->arcs_list[ARC_BUFC_DATA] =
+ multilist_create(sizeof (arc_buf_hdr_t),
+ offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
+ arc_state_multilist_index_func);
+ arc_l2c_only->arcs_list[ARC_BUFC_METADATA] =
+ multilist_create(sizeof (arc_buf_hdr_t),
+ offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
+ arc_state_multilist_index_func);
+ arc_l2c_only->arcs_list[ARC_BUFC_DATA] =
+ multilist_create(sizeof (arc_buf_hdr_t),
+ offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
+ arc_state_multilist_index_func);
+
+ refcount_create(&arc_anon->arcs_esize[ARC_BUFC_METADATA]);
+ refcount_create(&arc_anon->arcs_esize[ARC_BUFC_DATA]);
+ refcount_create(&arc_mru->arcs_esize[ARC_BUFC_METADATA]);
+ refcount_create(&arc_mru->arcs_esize[ARC_BUFC_DATA]);
+ refcount_create(&arc_mru_ghost->arcs_esize[ARC_BUFC_METADATA]);
+ refcount_create(&arc_mru_ghost->arcs_esize[ARC_BUFC_DATA]);
+ refcount_create(&arc_mfu->arcs_esize[ARC_BUFC_METADATA]);
+ refcount_create(&arc_mfu->arcs_esize[ARC_BUFC_DATA]);
+ refcount_create(&arc_mfu_ghost->arcs_esize[ARC_BUFC_METADATA]);
+ refcount_create(&arc_mfu_ghost->arcs_esize[ARC_BUFC_DATA]);
+ refcount_create(&arc_l2c_only->arcs_esize[ARC_BUFC_METADATA]);
+ refcount_create(&arc_l2c_only->arcs_esize[ARC_BUFC_DATA]);
+
+ refcount_create(&arc_anon->arcs_size);
+ refcount_create(&arc_mru->arcs_size);
+ refcount_create(&arc_mru_ghost->arcs_size);
+ refcount_create(&arc_mfu->arcs_size);
+ refcount_create(&arc_mfu_ghost->arcs_size);
+ refcount_create(&arc_l2c_only->arcs_size);
- /* Valid range: 1 - N */
- if (zfs_arc_p_min_shift)
- arc_p_min_shift = zfs_arc_p_min_shift;
+ arc_anon->arcs_state = ARC_STATE_ANON;
+ arc_mru->arcs_state = ARC_STATE_MRU;
+ arc_mru_ghost->arcs_state = ARC_STATE_MRU_GHOST;
+ arc_mfu->arcs_state = ARC_STATE_MFU;
+ arc_mfu_ghost->arcs_state = ARC_STATE_MFU_GHOST;
+ arc_l2c_only->arcs_state = ARC_STATE_L2C_ONLY;
+}
- /* Valid range: 1 - N ticks */
- if (zfs_arc_min_prefetch_lifespan)
- arc_min_prefetch_lifespan = zfs_arc_min_prefetch_lifespan;
+static void
+arc_state_fini(void)
+{
+ refcount_destroy(&arc_anon->arcs_esize[ARC_BUFC_METADATA]);
+ refcount_destroy(&arc_anon->arcs_esize[ARC_BUFC_DATA]);
+ refcount_destroy(&arc_mru->arcs_esize[ARC_BUFC_METADATA]);
+ refcount_destroy(&arc_mru->arcs_esize[ARC_BUFC_DATA]);
+ refcount_destroy(&arc_mru_ghost->arcs_esize[ARC_BUFC_METADATA]);
+ refcount_destroy(&arc_mru_ghost->arcs_esize[ARC_BUFC_DATA]);
+ refcount_destroy(&arc_mfu->arcs_esize[ARC_BUFC_METADATA]);
+ refcount_destroy(&arc_mfu->arcs_esize[ARC_BUFC_DATA]);
+ refcount_destroy(&arc_mfu_ghost->arcs_esize[ARC_BUFC_METADATA]);
+ refcount_destroy(&arc_mfu_ghost->arcs_esize[ARC_BUFC_DATA]);
+ refcount_destroy(&arc_l2c_only->arcs_esize[ARC_BUFC_METADATA]);
+ refcount_destroy(&arc_l2c_only->arcs_esize[ARC_BUFC_DATA]);
- /* Valid range: 0 - 100 */
- if ((zfs_arc_lotsfree_percent >= 0) &&
- (zfs_arc_lotsfree_percent <= 100))
- arc_lotsfree_percent = zfs_arc_lotsfree_percent;
+ refcount_destroy(&arc_anon->arcs_size);
+ refcount_destroy(&arc_mru->arcs_size);
+ refcount_destroy(&arc_mru_ghost->arcs_size);
+ refcount_destroy(&arc_mfu->arcs_size);
+ refcount_destroy(&arc_mfu_ghost->arcs_size);
+ refcount_destroy(&arc_l2c_only->arcs_size);
- /* Valid range: 0 - <all physical memory> */
- if ((zfs_arc_sys_free) && (zfs_arc_sys_free != arc_sys_free))
- arc_sys_free = MIN(MAX(zfs_arc_sys_free, 0), ptob(physmem));
+ multilist_destroy(arc_mru->arcs_list[ARC_BUFC_METADATA]);
+ multilist_destroy(arc_mru_ghost->arcs_list[ARC_BUFC_METADATA]);
+ multilist_destroy(arc_mfu->arcs_list[ARC_BUFC_METADATA]);
+ multilist_destroy(arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA]);
+ multilist_destroy(arc_mru->arcs_list[ARC_BUFC_DATA]);
+ multilist_destroy(arc_mru_ghost->arcs_list[ARC_BUFC_DATA]);
+ multilist_destroy(arc_mfu->arcs_list[ARC_BUFC_DATA]);
+ multilist_destroy(arc_mfu_ghost->arcs_list[ARC_BUFC_DATA]);
+ multilist_destroy(arc_l2c_only->arcs_list[ARC_BUFC_METADATA]);
+ multilist_destroy(arc_l2c_only->arcs_list[ARC_BUFC_DATA]);
+}
+uint64_t
+arc_target_bytes(void)
+{
+ return (arc_c);
}
void
arc_init(void)
{
- /*
- * allmem is "all memory that we could possibly use".
- */
-#ifdef _KERNEL
- uint64_t allmem = ptob(physmem);
-#else
- uint64_t allmem = (physmem * PAGESIZE) / 2;
-#endif
+ uint64_t percent, allmem = arc_all_memory();
mutex_init(&arc_reclaim_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&arc_reclaim_thread_cv, NULL, CV_DEFAULT, NULL);
cv_init(&arc_reclaim_waiters_cv, NULL, CV_DEFAULT, NULL);
- mutex_init(&arc_user_evicts_lock, NULL, MUTEX_DEFAULT, NULL);
- cv_init(&arc_user_evicts_cv, NULL, CV_DEFAULT, NULL);
-
/* Convert seconds to clock ticks */
arc_min_prefetch_lifespan = 1 * hz;
- /* Start out with 1/8 of all memory */
- arc_c = allmem / 8;
-
#ifdef _KERNEL
- /*
- * On architectures where the physical memory can be larger
- * than the addressable space (intel in 32-bit mode), we may
- * need to limit the cache to 1/8 of VM size.
- */
- arc_c = MIN(arc_c, vmem_size(heap_arena, VMEM_ALLOC | VMEM_FREE) / 8);
-
/*
* Register a shrinker to support synchronous (direct) memory
* reclaim from the arc. This is done to prevent kswapd from
@@ -5344,113 +6532,64 @@ arc_init(void)
spl_register_shrinker(&arc_shrinker);
/* Set to 1/64 of all memory or a minimum of 512K */
- arc_sys_free = MAX(ptob(physmem / 64), (512 * 1024));
+ arc_sys_free = MAX(allmem / 64, (512 * 1024));
arc_need_free = 0;
#endif
- /* Set min cache to allow safe operation of arc_adapt() */
- arc_c_min = 2ULL << SPA_MAXBLOCKSHIFT;
/* Set max to 1/2 of all memory */
arc_c_max = allmem / 2;
+#ifdef _KERNEL
+ /* Set min cache to 1/32 of all memory, or 32MB, whichever is more */
+ arc_c_min = MAX(allmem / 32, 2ULL << SPA_MAXBLOCKSHIFT);
+#else
+ /*
+ * In userland, there's only the memory pressure that we artificially
+ * create (see arc_available_memory()). Don't let arc_c get too
+ * small, because it can cause transactions to be larger than
+ * arc_c, causing arc_tempreserve_space() to fail.
+ */
+ arc_c_min = MAX(arc_c_max / 2, 2ULL << SPA_MAXBLOCKSHIFT);
+#endif
+
arc_c = arc_c_max;
arc_p = (arc_c >> 1);
+ arc_size = 0;
/* Set min to 1/2 of arc_c_min */
arc_meta_min = 1ULL << SPA_MAXBLOCKSHIFT;
/* Initialize maximum observed usage to zero */
arc_meta_max = 0;
- /* Set limit to 3/4 of arc_c_max with a floor of arc_meta_min */
- arc_meta_limit = MAX((3 * arc_c_max) / 4, arc_meta_min);
+ /*
+ * Set arc_meta_limit to a percent of arc_c_max with a floor of
+ * arc_meta_min, and a ceiling of arc_c_max.
+ */
+ percent = MIN(zfs_arc_meta_limit_percent, 100);
+ arc_meta_limit = MAX(arc_meta_min, (percent * arc_c_max) / 100);
+ percent = MIN(zfs_arc_dnode_limit_percent, 100);
+ arc_dnode_limit = (percent * arc_meta_limit) / 100;
/* Apply user specified tunings */
arc_tuning_update();
- if (zfs_arc_num_sublists_per_state < 1)
- zfs_arc_num_sublists_per_state = MAX(boot_ncpus, 1);
-
/* if kmem_flags are set, lets try to use less memory */
if (kmem_debugging())
arc_c = arc_c / 2;
if (arc_c < arc_c_min)
arc_c = arc_c_min;
- arc_anon = &ARC_anon;
- arc_mru = &ARC_mru;
- arc_mru_ghost = &ARC_mru_ghost;
- arc_mfu = &ARC_mfu;
- arc_mfu_ghost = &ARC_mfu_ghost;
- arc_l2c_only = &ARC_l2c_only;
- arc_size = 0;
-
- multilist_create(&arc_mru->arcs_list[ARC_BUFC_METADATA],
- sizeof (arc_buf_hdr_t),
- offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
- zfs_arc_num_sublists_per_state, arc_state_multilist_index_func);
- multilist_create(&arc_mru->arcs_list[ARC_BUFC_DATA],
- sizeof (arc_buf_hdr_t),
- offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
- zfs_arc_num_sublists_per_state, arc_state_multilist_index_func);
- multilist_create(&arc_mru_ghost->arcs_list[ARC_BUFC_METADATA],
- sizeof (arc_buf_hdr_t),
- offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
- zfs_arc_num_sublists_per_state, arc_state_multilist_index_func);
- multilist_create(&arc_mru_ghost->arcs_list[ARC_BUFC_DATA],
- sizeof (arc_buf_hdr_t),
- offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
- zfs_arc_num_sublists_per_state, arc_state_multilist_index_func);
- multilist_create(&arc_mfu->arcs_list[ARC_BUFC_METADATA],
- sizeof (arc_buf_hdr_t),
- offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
- zfs_arc_num_sublists_per_state, arc_state_multilist_index_func);
- multilist_create(&arc_mfu->arcs_list[ARC_BUFC_DATA],
- sizeof (arc_buf_hdr_t),
- offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
- zfs_arc_num_sublists_per_state, arc_state_multilist_index_func);
- multilist_create(&arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA],
- sizeof (arc_buf_hdr_t),
- offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
- zfs_arc_num_sublists_per_state, arc_state_multilist_index_func);
- multilist_create(&arc_mfu_ghost->arcs_list[ARC_BUFC_DATA],
- sizeof (arc_buf_hdr_t),
- offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
- zfs_arc_num_sublists_per_state, arc_state_multilist_index_func);
- multilist_create(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA],
- sizeof (arc_buf_hdr_t),
- offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
- zfs_arc_num_sublists_per_state, arc_state_multilist_index_func);
- multilist_create(&arc_l2c_only->arcs_list[ARC_BUFC_DATA],
- sizeof (arc_buf_hdr_t),
- offsetof(arc_buf_hdr_t, b_l1hdr.b_arc_node),
- zfs_arc_num_sublists_per_state, arc_state_multilist_index_func);
-
- arc_anon->arcs_state = ARC_STATE_ANON;
- arc_mru->arcs_state = ARC_STATE_MRU;
- arc_mru_ghost->arcs_state = ARC_STATE_MRU_GHOST;
- arc_mfu->arcs_state = ARC_STATE_MFU;
- arc_mfu_ghost->arcs_state = ARC_STATE_MFU_GHOST;
- arc_l2c_only->arcs_state = ARC_STATE_L2C_ONLY;
-
- refcount_create(&arc_anon->arcs_size);
- refcount_create(&arc_mru->arcs_size);
- refcount_create(&arc_mru_ghost->arcs_size);
- refcount_create(&arc_mfu->arcs_size);
- refcount_create(&arc_mfu_ghost->arcs_size);
- refcount_create(&arc_l2c_only->arcs_size);
-
+ arc_state_init();
buf_init();
- arc_reclaim_thread_exit = FALSE;
- arc_user_evicts_thread_exit = FALSE;
list_create(&arc_prune_list, sizeof (arc_prune_t),
offsetof(arc_prune_t, p_node));
- arc_eviction_list = NULL;
mutex_init(&arc_prune_mtx, NULL, MUTEX_DEFAULT, NULL);
- bzero(&arc_eviction_hdr, sizeof (arc_buf_hdr_t));
arc_prune_taskq = taskq_create("arc_prune", max_ncpus, defclsyspri,
max_ncpus, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
+ arc_reclaim_thread_exit = B_FALSE;
+
arc_ksp = kstat_create("zfs", 0, "arcstats", "misc", KSTAT_TYPE_NAMED,
sizeof (arc_stats) / sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL);
@@ -5463,10 +6602,7 @@ arc_init(void)
(void) thread_create(NULL, 0, arc_reclaim_thread, NULL, 0, &p0,
TS_RUN, defclsyspri);
- (void) thread_create(NULL, 0, arc_user_evicts_thread, NULL, 0, &p0,
- TS_RUN, defclsyspri);
-
- arc_dead = FALSE;
+ arc_dead = B_FALSE;
arc_warm = B_FALSE;
/*
@@ -5479,11 +6615,10 @@ arc_init(void)
*/
if (zfs_dirty_data_max_max == 0)
zfs_dirty_data_max_max = MIN(4ULL * 1024 * 1024 * 1024,
- (uint64_t)physmem * PAGESIZE *
- zfs_dirty_data_max_max_percent / 100);
+ allmem * zfs_dirty_data_max_max_percent / 100);
if (zfs_dirty_data_max == 0) {
- zfs_dirty_data_max = (uint64_t)physmem * PAGESIZE *
+ zfs_dirty_data_max = allmem *
zfs_dirty_data_max_percent / 100;
zfs_dirty_data_max = MIN(zfs_dirty_data_max,
zfs_dirty_data_max_max);
@@ -5500,10 +6635,10 @@ arc_fini(void)
#endif /* _KERNEL */
mutex_enter(&arc_reclaim_lock);
- arc_reclaim_thread_exit = TRUE;
+ arc_reclaim_thread_exit = B_TRUE;
/*
* The reclaim thread will set arc_reclaim_thread_exit back to
- * FALSE when it is finished exiting; we're waiting for that.
+ * B_FALSE when it is finished exiting; we're waiting for that.
*/
while (arc_reclaim_thread_exit) {
cv_signal(&arc_reclaim_thread_cv);
@@ -5511,22 +6646,10 @@ arc_fini(void)
}
mutex_exit(&arc_reclaim_lock);
- mutex_enter(&arc_user_evicts_lock);
- arc_user_evicts_thread_exit = TRUE;
- /*
- * The user evicts thread will set arc_user_evicts_thread_exit
- * to FALSE when it is finished exiting; we're waiting for that.
- */
- while (arc_user_evicts_thread_exit) {
- cv_signal(&arc_user_evicts_cv);
- cv_wait(&arc_user_evicts_cv, &arc_user_evicts_lock);
- }
- mutex_exit(&arc_user_evicts_lock);
-
- /* Use TRUE to ensure *all* buffers are evicted */
- arc_flush(NULL, TRUE);
+ /* Use B_TRUE to ensure *all* buffers are evicted */
+ arc_flush(NULL, B_TRUE);
- arc_dead = TRUE;
+ arc_dead = B_TRUE;
if (arc_ksp != NULL) {
kstat_delete(arc_ksp);
@@ -5551,27 +6674,7 @@ arc_fini(void)
cv_destroy(&arc_reclaim_thread_cv);
cv_destroy(&arc_reclaim_waiters_cv);
- mutex_destroy(&arc_user_evicts_lock);
- cv_destroy(&arc_user_evicts_cv);
-
- refcount_destroy(&arc_anon->arcs_size);
- refcount_destroy(&arc_mru->arcs_size);
- refcount_destroy(&arc_mru_ghost->arcs_size);
- refcount_destroy(&arc_mfu->arcs_size);
- refcount_destroy(&arc_mfu_ghost->arcs_size);
- refcount_destroy(&arc_l2c_only->arcs_size);
-
- multilist_destroy(&arc_mru->arcs_list[ARC_BUFC_METADATA]);
- multilist_destroy(&arc_mru_ghost->arcs_list[ARC_BUFC_METADATA]);
- multilist_destroy(&arc_mfu->arcs_list[ARC_BUFC_METADATA]);
- multilist_destroy(&arc_mfu_ghost->arcs_list[ARC_BUFC_METADATA]);
- multilist_destroy(&arc_mru->arcs_list[ARC_BUFC_DATA]);
- multilist_destroy(&arc_mru_ghost->arcs_list[ARC_BUFC_DATA]);
- multilist_destroy(&arc_mfu->arcs_list[ARC_BUFC_DATA]);
- multilist_destroy(&arc_mfu_ghost->arcs_list[ARC_BUFC_DATA]);
- multilist_destroy(&arc_l2c_only->arcs_list[ARC_BUFC_METADATA]);
- multilist_destroy(&arc_l2c_only->arcs_list[ARC_BUFC_DATA]);
-
+ arc_state_fini();
buf_fini();
ASSERT0(arc_loaned_bytes);
@@ -5701,7 +6804,6 @@ arc_fini(void)
* l2arc_write_max max write bytes per interval
* l2arc_write_boost extra write bytes during device warmup
* l2arc_noprefetch skip caching prefetched buffers
- * l2arc_nocompress skip compressing buffers
* l2arc_headroom number of max device writes to precache
* l2arc_headroom_boost when we find compressed buffers during ARC
* scanning, we multiply headroom by this
@@ -5861,9 +6963,8 @@ l2arc_do_free_on_write(void)
for (df = list_tail(buflist); df; df = df_prev) {
df_prev = list_prev(buflist, df);
- ASSERT(df->l2df_data != NULL);
- ASSERT(df->l2df_func != NULL);
- df->l2df_func(df->l2df_data, df->l2df_size);
+ ASSERT3P(df->l2df_abd, !=, NULL);
+ abd_free(df->l2df_abd);
list_remove(buflist, df);
kmem_free(df, sizeof (l2arc_data_free_t));
}
@@ -5886,13 +6987,13 @@ l2arc_write_done(zio_t *zio)
int64_t bytes_dropped = 0;
cb = zio->io_private;
- ASSERT(cb != NULL);
+ ASSERT3P(cb, !=, NULL);
dev = cb->l2wcb_dev;
- ASSERT(dev != NULL);
+ ASSERT3P(dev, !=, NULL);
head = cb->l2wcb_head;
- ASSERT(head != NULL);
+ ASSERT3P(head, !=, NULL);
buflist = &dev->l2ad_buflist;
- ASSERT(buflist != NULL);
+ ASSERT3P(buflist, !=, NULL);
DTRACE_PROBE2(l2arc__iodone, zio_t *, zio,
l2arc_write_callback_t *, cb);
@@ -5951,31 +7052,29 @@ l2arc_write_done(zio_t *zio)
ASSERT(HDR_HAS_L1HDR(hdr));
/*
- * We may have allocated a buffer for L2ARC compression,
- * we must release it to avoid leaking this data.
+ * Skipped - drop L2ARC entry and mark the header as no
+ * longer L2 eligibile.
*/
- l2arc_release_cdata_buf(hdr);
-
if (zio->io_error != 0) {
/*
* Error - drop L2ARC entry.
*/
list_remove(buflist, hdr);
- hdr->b_flags &= ~ARC_FLAG_HAS_L2HDR;
+ arc_hdr_clear_flags(hdr, ARC_FLAG_HAS_L2HDR);
- ARCSTAT_INCR(arcstat_l2_asize, -hdr->b_l2hdr.b_asize);
- ARCSTAT_INCR(arcstat_l2_size, -hdr->b_size);
+ ARCSTAT_INCR(arcstat_l2_psize, -arc_hdr_size(hdr));
+ ARCSTAT_INCR(arcstat_l2_lsize, -HDR_GET_LSIZE(hdr));
- bytes_dropped += hdr->b_l2hdr.b_asize;
+ bytes_dropped += arc_hdr_size(hdr);
(void) refcount_remove_many(&dev->l2ad_alloc,
- hdr->b_l2hdr.b_asize, hdr);
+ arc_hdr_size(hdr), hdr);
}
/*
* Allow ARC to begin reads and ghost list evictions to
* this L2ARC entry.
*/
- hdr->b_flags &= ~ARC_FLAG_L2_WRITING;
+ arc_hdr_clear_flags(hdr, ARC_FLAG_L2_WRITING);
mutex_exit(hash_lock);
}
@@ -6002,43 +7101,63 @@ l2arc_read_done(zio_t *zio)
{
l2arc_read_callback_t *cb;
arc_buf_hdr_t *hdr;
- arc_buf_t *buf;
kmutex_t *hash_lock;
- int equal;
+ boolean_t valid_cksum;
- ASSERT(zio->io_vd != NULL);
+ ASSERT3P(zio->io_vd, !=, NULL);
ASSERT(zio->io_flags & ZIO_FLAG_DONT_PROPAGATE);
spa_config_exit(zio->io_spa, SCL_L2ARC, zio->io_vd);
cb = zio->io_private;
- ASSERT(cb != NULL);
- buf = cb->l2rcb_buf;
- ASSERT(buf != NULL);
+ ASSERT3P(cb, !=, NULL);
+ hdr = cb->l2rcb_hdr;
+ ASSERT3P(hdr, !=, NULL);
- hash_lock = HDR_LOCK(buf->b_hdr);
+ hash_lock = HDR_LOCK(hdr);
mutex_enter(hash_lock);
- hdr = buf->b_hdr;
ASSERT3P(hash_lock, ==, HDR_LOCK(hdr));
/*
- * If the buffer was compressed, decompress it first.
+ * If the data was read into a temporary buffer,
+ * move it and free the buffer.
*/
- if (cb->l2rcb_compress != ZIO_COMPRESS_OFF)
- l2arc_decompress_zio(zio, hdr, cb->l2rcb_compress);
- ASSERT(zio->io_data != NULL);
- ASSERT3U(zio->io_size, ==, hdr->b_size);
- ASSERT3U(BP_GET_LSIZE(&cb->l2rcb_bp), ==, hdr->b_size);
+ if (cb->l2rcb_abd != NULL) {
+ ASSERT3U(arc_hdr_size(hdr), <, zio->io_size);
+ if (zio->io_error == 0) {
+ abd_copy(hdr->b_l1hdr.b_pabd, cb->l2rcb_abd,
+ arc_hdr_size(hdr));
+ }
+
+ /*
+ * The following must be done regardless of whether
+ * there was an error:
+ * - free the temporary buffer
+ * - point zio to the real ARC buffer
+ * - set zio size accordingly
+ * These are required because zio is either re-used for
+ * an I/O of the block in the case of the error
+ * or the zio is passed to arc_read_done() and it
+ * needs real data.
+ */
+ abd_free(cb->l2rcb_abd);
+ zio->io_size = zio->io_orig_size = arc_hdr_size(hdr);
+ zio->io_abd = zio->io_orig_abd = hdr->b_l1hdr.b_pabd;
+ }
+
+ ASSERT3P(zio->io_abd, !=, NULL);
/*
* Check this survived the L2ARC journey.
*/
- equal = arc_cksum_equal(buf);
- if (equal && zio->io_error == 0 && !HDR_L2_EVICTED(hdr)) {
+ ASSERT3P(zio->io_abd, ==, hdr->b_l1hdr.b_pabd);
+ zio->io_bp_copy = cb->l2rcb_bp; /* XXX fix in L2ARC 2.0 */
+ zio->io_bp = &zio->io_bp_copy; /* XXX fix in L2ARC 2.0 */
+
+ valid_cksum = arc_cksum_is_equal(hdr, zio);
+ if (valid_cksum && zio->io_error == 0 && !HDR_L2_EVICTED(hdr)) {
mutex_exit(hash_lock);
- zio->io_private = buf;
- zio->io_bp_copy = cb->l2rcb_bp; /* XXX fix in L2ARC 2.0 */
- zio->io_bp = &zio->io_bp_copy; /* XXX fix in L2ARC 2.0 */
+ zio->io_private = hdr;
arc_read_done(zio);
} else {
mutex_exit(hash_lock);
@@ -6051,7 +7170,7 @@ l2arc_read_done(zio_t *zio)
} else {
zio->io_error = SET_ERROR(EIO);
}
- if (!equal)
+ if (!valid_cksum)
ARCSTAT_BUMP(arcstat_l2_cksum_bad);
/*
@@ -6064,9 +7183,10 @@ l2arc_read_done(zio_t *zio)
ASSERT(!pio || pio->io_child_type == ZIO_CHILD_LOGICAL);
- zio_nowait(zio_read(pio, cb->l2rcb_spa, &cb->l2rcb_bp,
- buf->b_data, hdr->b_size, arc_read_done, buf,
- zio->io_priority, cb->l2rcb_flags, &cb->l2rcb_zb));
+ zio_nowait(zio_read(pio, zio->io_spa, zio->io_bp,
+ hdr->b_l1hdr.b_pabd, zio->io_size, arc_read_done,
+ hdr, zio->io_priority, cb->l2rcb_flags,
+ &cb->l2rcb_zb));
}
}
@@ -6089,21 +7209,23 @@ l2arc_sublist_lock(int list_num)
multilist_t *ml = NULL;
unsigned int idx;
- ASSERT(list_num >= 0 && list_num <= 3);
+ ASSERT(list_num >= 0 && list_num < L2ARC_FEED_TYPES);
switch (list_num) {
case 0:
- ml = &arc_mfu->arcs_list[ARC_BUFC_METADATA];
+ ml = arc_mfu->arcs_list[ARC_BUFC_METADATA];
break;
case 1:
- ml = &arc_mru->arcs_list[ARC_BUFC_METADATA];
+ ml = arc_mru->arcs_list[ARC_BUFC_METADATA];
break;
case 2:
- ml = &arc_mfu->arcs_list[ARC_BUFC_DATA];
+ ml = arc_mfu->arcs_list[ARC_BUFC_DATA];
break;
case 3:
- ml = &arc_mru->arcs_list[ARC_BUFC_DATA];
+ ml = arc_mru->arcs_list[ARC_BUFC_DATA];
break;
+ default:
+ return (NULL);
}
/*
@@ -6202,7 +7324,7 @@ l2arc_evict(l2arc_dev_t *dev, uint64_t distance, boolean_t all)
/*
* This doesn't exist in the ARC. Destroy.
* arc_hdr_destroy() will call list_remove()
- * and decrement arcstat_l2_size.
+ * and decrement arcstat_l2_lsize.
*/
arc_change_state(arc_anon, hdr, hash_lock);
arc_hdr_destroy(hdr);
@@ -6216,12 +7338,11 @@ l2arc_evict(l2arc_dev_t *dev, uint64_t distance, boolean_t all)
*/
if (HDR_L2_READING(hdr)) {
ARCSTAT_BUMP(arcstat_l2_evict_reading);
- hdr->b_flags |= ARC_FLAG_L2_EVICTED;
+ arc_hdr_set_flags(hdr, ARC_FLAG_L2_EVICTED);
}
/* Ensure this header has finished being written */
ASSERT(!HDR_L2_WRITING(hdr));
- ASSERT3P(hdr->b_l1hdr.b_tmp_cdata, ==, NULL);
arc_hdr_l2hdr_destroy(hdr);
}
@@ -6242,45 +7363,33 @@ l2arc_evict(l2arc_dev_t *dev, uint64_t distance, boolean_t all)
* the delta by which the device hand has changed due to alignment).
*/
static uint64_t
-l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
- boolean_t *headroom_boost)
+l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
{
arc_buf_hdr_t *hdr, *hdr_prev, *head;
- uint64_t write_asize, write_sz, headroom, buf_compress_minsz,
- stats_size;
- void *buf_data;
+ uint64_t write_asize, write_psize, write_lsize, headroom;
boolean_t full;
l2arc_write_callback_t *cb;
zio_t *pio, *wzio;
uint64_t guid = spa_load_guid(spa);
int try;
- const boolean_t do_headroom_boost = *headroom_boost;
-
- ASSERT(dev->l2ad_vdev != NULL);
- /* Lower the flag now, we might want to raise it again later. */
- *headroom_boost = B_FALSE;
+ ASSERT3P(dev->l2ad_vdev, !=, NULL);
pio = NULL;
- write_sz = write_asize = 0;
+ write_lsize = write_asize = write_psize = 0;
full = B_FALSE;
head = kmem_cache_alloc(hdr_l2only_cache, KM_PUSHPAGE);
- head->b_flags |= ARC_FLAG_L2_WRITE_HEAD;
- head->b_flags |= ARC_FLAG_HAS_L2HDR;
-
- /*
- * We will want to try to compress buffers that are at least 2x the
- * device sector size.
- */
- buf_compress_minsz = 2 << dev->l2ad_vdev->vdev_ashift;
+ arc_hdr_set_flags(head, ARC_FLAG_L2_WRITE_HEAD | ARC_FLAG_HAS_L2HDR);
/*
* Copy buffers for L2ARC writing.
*/
- for (try = 0; try <= 3; try++) {
+ for (try = 0; try < L2ARC_FEED_TYPES; try++) {
multilist_sublist_t *mls = l2arc_sublist_lock(try);
uint64_t passed_sz = 0;
+ VERIFY3P(mls, !=, NULL);
+
/*
* L2ARC fast warmup.
*
@@ -6293,13 +7402,11 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
hdr = multilist_sublist_tail(mls);
headroom = target_sz * l2arc_headroom;
- if (do_headroom_boost)
+ if (zfs_compressed_arc_enabled)
headroom = (headroom * l2arc_headroom_boost) / 100;
for (; hdr; hdr = hdr_prev) {
kmutex_t *hash_lock;
- uint64_t buf_sz;
- uint64_t buf_a_sz;
if (arc_warm == B_FALSE)
hdr_prev = multilist_sublist_next(mls, hdr);
@@ -6314,7 +7421,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
continue;
}
- passed_sz += hdr->b_size;
+ passed_sz += HDR_GET_LSIZE(hdr);
if (passed_sz > headroom) {
/*
* Searched too far.
@@ -6329,14 +7436,21 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
}
/*
- * Assume that the buffer is not going to be compressed
- * and could take more space on disk because of a larger
- * disk block size.
+ * We rely on the L1 portion of the header below, so
+ * it's invalid for this header to have been evicted out
+ * of the ghost cache, prior to being written out. The
+ * ARC_FLAG_L2_WRITING bit ensures this won't happen.
*/
- buf_sz = hdr->b_size;
- buf_a_sz = vdev_psize_to_asize(dev->l2ad_vdev, buf_sz);
+ ASSERT(HDR_HAS_L1HDR(hdr));
+
+ ASSERT3U(HDR_GET_PSIZE(hdr), >, 0);
+ ASSERT3P(hdr->b_l1hdr.b_pabd, !=, NULL);
+ ASSERT3U(arc_hdr_size(hdr), >, 0);
+ uint64_t psize = arc_hdr_size(hdr);
+ uint64_t asize = vdev_psize_to_asize(dev->l2ad_vdev,
+ psize);
- if ((write_asize + buf_a_sz) > target_sz) {
+ if ((write_asize + asize) > target_sz) {
full = B_TRUE;
mutex_exit(hash_lock);
break;
@@ -6360,63 +7474,64 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
ZIO_FLAG_CANFAIL);
}
- /*
- * Create and add a new L2ARC header.
- */
hdr->b_l2hdr.b_dev = dev;
- hdr->b_flags |= ARC_FLAG_L2_WRITING;
- /*
- * Temporarily stash the data buffer in b_tmp_cdata.
- * The subsequent write step will pick it up from
- * there. This is because can't access b_l1hdr.b_buf
- * without holding the hash_lock, which we in turn
- * can't access without holding the ARC list locks
- * (which we want to avoid during compression/writing)
- */
- hdr->b_l2hdr.b_compress = ZIO_COMPRESS_OFF;
- hdr->b_l2hdr.b_asize = hdr->b_size;
hdr->b_l2hdr.b_hits = 0;
- hdr->b_l1hdr.b_tmp_cdata = hdr->b_l1hdr.b_buf->b_data;
- /*
- * Explicitly set the b_daddr field to a known
- * value which means "invalid address". This
- * enables us to differentiate which stage of
- * l2arc_write_buffers() the particular header
- * is in (e.g. this loop, or the one below).
- * ARC_FLAG_L2_WRITING is not enough to make
- * this distinction, and we need to know in
- * order to do proper l2arc vdev accounting in
- * arc_release() and arc_hdr_destroy().
- *
- * Note, we can't use a new flag to distinguish
- * the two stages because we don't hold the
- * header's hash_lock below, in the second stage
- * of this function. Thus, we can't simply
- * change the b_flags field to denote that the
- * IO has been sent. We can change the b_daddr
- * field of the L2 portion, though, since we'll
- * be holding the l2ad_mtx; which is why we're
- * using it to denote the header's state change.
- */
- hdr->b_l2hdr.b_daddr = L2ARC_ADDR_UNSET;
- hdr->b_flags |= ARC_FLAG_HAS_L2HDR;
+ hdr->b_l2hdr.b_daddr = dev->l2ad_hand;
+ arc_hdr_set_flags(hdr,
+ ARC_FLAG_L2_WRITING | ARC_FLAG_HAS_L2HDR);
mutex_enter(&dev->l2ad_mtx);
list_insert_head(&dev->l2ad_buflist, hdr);
mutex_exit(&dev->l2ad_mtx);
+ (void) refcount_add_many(&dev->l2ad_alloc, psize, hdr);
+
/*
- * Compute and store the buffer cksum before
- * writing. On debug the cksum is verified first.
+ * Normally the L2ARC can use the hdr's data, but if
+ * we're sharing data between the hdr and one of its
+ * bufs, L2ARC needs its own copy of the data so that
+ * the ZIO below can't race with the buf consumer.
+ * Another case where we need to create a copy of the
+ * data is when the buffer size is not device-aligned
+ * and we need to pad the block to make it such.
+ * That also keeps the clock hand suitably aligned.
+ *
+ * To ensure that the copy will be available for the
+ * lifetime of the ZIO and be cleaned up afterwards, we
+ * add it to the l2arc_free_on_write queue.
*/
- arc_cksum_verify(hdr->b_l1hdr.b_buf);
- arc_cksum_compute(hdr->b_l1hdr.b_buf, B_TRUE);
+ abd_t *to_write;
+ if (!HDR_SHARED_DATA(hdr) && psize == asize) {
+ to_write = hdr->b_l1hdr.b_pabd;
+ } else {
+ to_write = abd_alloc_for_io(asize,
+ HDR_ISTYPE_METADATA(hdr));
+ abd_copy(to_write, hdr->b_l1hdr.b_pabd, psize);
+ if (asize != psize) {
+ abd_zero_off(to_write, psize,
+ asize - psize);
+ }
+ l2arc_free_abd_on_write(to_write, asize,
+ arc_buf_type(hdr));
+ }
+ wzio = zio_write_phys(pio, dev->l2ad_vdev,
+ hdr->b_l2hdr.b_daddr, asize, to_write,
+ ZIO_CHECKSUM_OFF, NULL, hdr,
+ ZIO_PRIORITY_ASYNC_WRITE,
+ ZIO_FLAG_CANFAIL, B_FALSE);
+
+ write_lsize += HDR_GET_LSIZE(hdr);
+ DTRACE_PROBE2(l2arc__write, vdev_t *, dev->l2ad_vdev,
+ zio_t *, wzio);
+
+ write_psize += psize;
+ write_asize += asize;
+ dev->l2ad_hand += asize;
mutex_exit(hash_lock);
- write_sz += buf_sz;
- write_asize += buf_a_sz;
+ (void) zio_nowait(wzio);
}
multilist_sublist_unlock(mls);
@@ -6427,110 +7542,18 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
/* No buffers selected for writing? */
if (pio == NULL) {
- ASSERT0(write_sz);
+ ASSERT0(write_lsize);
ASSERT(!HDR_HAS_L1HDR(head));
kmem_cache_free(hdr_l2only_cache, head);
return (0);
}
- mutex_enter(&dev->l2ad_mtx);
-
- /*
- * Note that elsewhere in this file arcstat_l2_asize
- * and the used space on l2ad_vdev are updated using b_asize,
- * which is not necessarily rounded up to the device block size.
- * Too keep accounting consistent we do the same here as well:
- * stats_size accumulates the sum of b_asize of the written buffers,
- * while write_asize accumulates the sum of b_asize rounded up
- * to the device block size.
- * The latter sum is used only to validate the corectness of the code.
- */
- stats_size = 0;
- write_asize = 0;
-
- /*
- * Now start writing the buffers. We're starting at the write head
- * and work backwards, retracing the course of the buffer selector
- * loop above.
- */
- for (hdr = list_prev(&dev->l2ad_buflist, head); hdr;
- hdr = list_prev(&dev->l2ad_buflist, hdr)) {
- uint64_t buf_sz;
-
- /*
- * We rely on the L1 portion of the header below, so
- * it's invalid for this header to have been evicted out
- * of the ghost cache, prior to being written out. The
- * ARC_FLAG_L2_WRITING bit ensures this won't happen.
- */
- ASSERT(HDR_HAS_L1HDR(hdr));
-
- /*
- * We shouldn't need to lock the buffer here, since we flagged
- * it as ARC_FLAG_L2_WRITING in the previous step, but we must
- * take care to only access its L2 cache parameters. In
- * particular, hdr->l1hdr.b_buf may be invalid by now due to
- * ARC eviction.
- */
- hdr->b_l2hdr.b_daddr = dev->l2ad_hand;
-
- if ((!l2arc_nocompress && HDR_L2COMPRESS(hdr)) &&
- hdr->b_l2hdr.b_asize >= buf_compress_minsz) {
- if (l2arc_compress_buf(hdr)) {
- /*
- * If compression succeeded, enable headroom
- * boost on the next scan cycle.
- */
- *headroom_boost = B_TRUE;
- }
- }
-
- /*
- * Pick up the buffer data we had previously stashed away
- * (and now potentially also compressed).
- */
- buf_data = hdr->b_l1hdr.b_tmp_cdata;
- buf_sz = hdr->b_l2hdr.b_asize;
-
- /*
- * We need to do this regardless if buf_sz is zero or
- * not, otherwise, when this l2hdr is evicted we'll
- * remove a reference that was never added.
- */
- (void) refcount_add_many(&dev->l2ad_alloc, buf_sz, hdr);
-
- /* Compression may have squashed the buffer to zero length. */
- if (buf_sz != 0) {
- uint64_t buf_a_sz;
-
- wzio = zio_write_phys(pio, dev->l2ad_vdev,
- dev->l2ad_hand, buf_sz, buf_data, ZIO_CHECKSUM_OFF,
- NULL, NULL, ZIO_PRIORITY_ASYNC_WRITE,
- ZIO_FLAG_CANFAIL, B_FALSE);
-
- DTRACE_PROBE2(l2arc__write, vdev_t *, dev->l2ad_vdev,
- zio_t *, wzio);
- (void) zio_nowait(wzio);
-
- stats_size += buf_sz;
-
- /*
- * Keep the clock hand suitably device-aligned.
- */
- buf_a_sz = vdev_psize_to_asize(dev->l2ad_vdev, buf_sz);
- write_asize += buf_a_sz;
- dev->l2ad_hand += buf_a_sz;
- }
- }
-
- mutex_exit(&dev->l2ad_mtx);
-
ASSERT3U(write_asize, <=, target_sz);
ARCSTAT_BUMP(arcstat_l2_writes_sent);
- ARCSTAT_INCR(arcstat_l2_write_bytes, write_asize);
- ARCSTAT_INCR(arcstat_l2_size, write_sz);
- ARCSTAT_INCR(arcstat_l2_asize, stats_size);
- vdev_space_update(dev->l2ad_vdev, stats_size, 0, 0);
+ ARCSTAT_INCR(arcstat_l2_write_bytes, write_psize);
+ ARCSTAT_INCR(arcstat_l2_lsize, write_lsize);
+ ARCSTAT_INCR(arcstat_l2_psize, write_psize);
+ vdev_space_update(dev->l2ad_vdev, write_psize, 0, 0);
/*
* Bump device hand to the device start if it is approaching the end.
@@ -6548,186 +7571,6 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz,
return (write_asize);
}
-/*
- * Compresses an L2ARC buffer.
- * The data to be compressed must be prefilled in l1hdr.b_tmp_cdata and its
- * size in l2hdr->b_asize. This routine tries to compress the data and
- * depending on the compression result there are three possible outcomes:
- * *) The buffer was incompressible. The original l2hdr contents were left
- * untouched and are ready for writing to an L2 device.
- * *) The buffer was all-zeros, so there is no need to write it to an L2
- * device. To indicate this situation b_tmp_cdata is NULL'ed, b_asize is
- * set to zero and b_compress is set to ZIO_COMPRESS_EMPTY.
- * *) Compression succeeded and b_tmp_cdata was replaced with a temporary
- * data buffer which holds the compressed data to be written, and b_asize
- * tells us how much data there is. b_compress is set to the appropriate
- * compression algorithm. Once writing is done, invoke
- * l2arc_release_cdata_buf on this l2hdr to free this temporary buffer.
- *
- * Returns B_TRUE if compression succeeded, or B_FALSE if it didn't (the
- * buffer was incompressible).
- */
-static boolean_t
-l2arc_compress_buf(arc_buf_hdr_t *hdr)
-{
- void *cdata;
- size_t csize, len, rounded;
- l2arc_buf_hdr_t *l2hdr;
-
- ASSERT(HDR_HAS_L2HDR(hdr));
-
- l2hdr = &hdr->b_l2hdr;
-
- ASSERT(HDR_HAS_L1HDR(hdr));
- ASSERT3U(l2hdr->b_compress, ==, ZIO_COMPRESS_OFF);
- ASSERT(hdr->b_l1hdr.b_tmp_cdata != NULL);
-
- len = l2hdr->b_asize;
- cdata = zio_data_buf_alloc(len);
- ASSERT3P(cdata, !=, NULL);
- csize = zio_compress_data(ZIO_COMPRESS_LZ4, hdr->b_l1hdr.b_tmp_cdata,
- cdata, l2hdr->b_asize);
-
- rounded = P2ROUNDUP(csize, (size_t)SPA_MINBLOCKSIZE);
- if (rounded > csize) {
- bzero((char *)cdata + csize, rounded - csize);
- csize = rounded;
- }
-
- if (csize == 0) {
- /* zero block, indicate that there's nothing to write */
- zio_data_buf_free(cdata, len);
- l2hdr->b_compress = ZIO_COMPRESS_EMPTY;
- l2hdr->b_asize = 0;
- hdr->b_l1hdr.b_tmp_cdata = NULL;
- ARCSTAT_BUMP(arcstat_l2_compress_zeros);
- return (B_TRUE);
- } else if (csize > 0 && csize < len) {
- /*
- * Compression succeeded, we'll keep the cdata around for
- * writing and release it afterwards.
- */
- l2hdr->b_compress = ZIO_COMPRESS_LZ4;
- l2hdr->b_asize = csize;
- hdr->b_l1hdr.b_tmp_cdata = cdata;
- ARCSTAT_BUMP(arcstat_l2_compress_successes);
- return (B_TRUE);
- } else {
- /*
- * Compression failed, release the compressed buffer.
- * l2hdr will be left unmodified.
- */
- zio_data_buf_free(cdata, len);
- ARCSTAT_BUMP(arcstat_l2_compress_failures);
- return (B_FALSE);
- }
-}
-
-/*
- * Decompresses a zio read back from an l2arc device. On success, the
- * underlying zio's io_data buffer is overwritten by the uncompressed
- * version. On decompression error (corrupt compressed stream), the
- * zio->io_error value is set to signal an I/O error.
- *
- * Please note that the compressed data stream is not checksummed, so
- * if the underlying device is experiencing data corruption, we may feed
- * corrupt data to the decompressor, so the decompressor needs to be
- * able to handle this situation (LZ4 does).
- */
-static void
-l2arc_decompress_zio(zio_t *zio, arc_buf_hdr_t *hdr, enum zio_compress c)
-{
- uint64_t csize;
- void *cdata;
-
- ASSERT(L2ARC_IS_VALID_COMPRESS(c));
-
- if (zio->io_error != 0) {
- /*
- * An io error has occured, just restore the original io
- * size in preparation for a main pool read.
- */
- zio->io_orig_size = zio->io_size = hdr->b_size;
- return;
- }
-
- if (c == ZIO_COMPRESS_EMPTY) {
- /*
- * An empty buffer results in a null zio, which means we
- * need to fill its io_data after we're done restoring the
- * buffer's contents.
- */
- ASSERT(hdr->b_l1hdr.b_buf != NULL);
- bzero(hdr->b_l1hdr.b_buf->b_data, hdr->b_size);
- zio->io_data = zio->io_orig_data = hdr->b_l1hdr.b_buf->b_data;
- } else {
- ASSERT(zio->io_data != NULL);
- /*
- * We copy the compressed data from the start of the arc buffer
- * (the zio_read will have pulled in only what we need, the
- * rest is garbage which we will overwrite at decompression)
- * and then decompress back to the ARC data buffer. This way we
- * can minimize copying by simply decompressing back over the
- * original compressed data (rather than decompressing to an
- * aux buffer and then copying back the uncompressed buffer,
- * which is likely to be much larger).
- */
- csize = zio->io_size;
- cdata = zio_data_buf_alloc(csize);
- bcopy(zio->io_data, cdata, csize);
- if (zio_decompress_data(c, cdata, zio->io_data, csize,
- hdr->b_size) != 0)
- zio->io_error = EIO;
- zio_data_buf_free(cdata, csize);
- }
-
- /* Restore the expected uncompressed IO size. */
- zio->io_orig_size = zio->io_size = hdr->b_size;
-}
-
-/*
- * Releases the temporary b_tmp_cdata buffer in an l2arc header structure.
- * This buffer serves as a temporary holder of compressed data while
- * the buffer entry is being written to an l2arc device. Once that is
- * done, we can dispose of it.
- */
-static void
-l2arc_release_cdata_buf(arc_buf_hdr_t *hdr)
-{
- enum zio_compress comp;
-
- ASSERT(HDR_HAS_L1HDR(hdr));
- ASSERT(HDR_HAS_L2HDR(hdr));
- comp = hdr->b_l2hdr.b_compress;
- ASSERT(comp == ZIO_COMPRESS_OFF || L2ARC_IS_VALID_COMPRESS(comp));
-
- if (comp == ZIO_COMPRESS_OFF) {
- /*
- * In this case, b_tmp_cdata points to the same buffer
- * as the arc_buf_t's b_data field. We don't want to
- * free it, since the arc_buf_t will handle that.
- */
- hdr->b_l1hdr.b_tmp_cdata = NULL;
- } else if (comp == ZIO_COMPRESS_EMPTY) {
- /*
- * In this case, b_tmp_cdata was compressed to an empty
- * buffer, thus there's nothing to free and b_tmp_cdata
- * should have been set to NULL in l2arc_write_buffers().
- */
- ASSERT3P(hdr->b_l1hdr.b_tmp_cdata, ==, NULL);
- } else {
- /*
- * If the data was compressed, then we've allocated a
- * temporary buffer for it, so now we need to release it.
- */
- ASSERT(hdr->b_l1hdr.b_tmp_cdata != NULL);
- zio_data_buf_free(hdr->b_l1hdr.b_tmp_cdata,
- hdr->b_size);
- hdr->b_l1hdr.b_tmp_cdata = NULL;
- }
-
-}
-
/*
* This thread feeds the L2ARC at regular intervals. This is the beating
* heart of the L2ARC.
@@ -6740,7 +7583,6 @@ l2arc_feed_thread(void)
spa_t *spa;
uint64_t size, wrote;
clock_t begin, next = ddi_get_lbolt();
- boolean_t headroom_boost = B_FALSE;
fstrans_cookie_t cookie;
CALLB_CPR_INIT(&cpr, &l2arc_feed_thr_lock, callb_generic_cpr, FTAG);
@@ -6780,7 +7622,7 @@ l2arc_feed_thread(void)
continue;
spa = dev->l2ad_spa;
- ASSERT(spa != NULL);
+ ASSERT3P(spa, !=, NULL);
/*
* If the pool is read-only then force the feed thread to
@@ -6813,7 +7655,7 @@ l2arc_feed_thread(void)
/*
* Write ARC buffers.
*/
- wrote = l2arc_write_buffers(spa, dev, size, &headroom_boost);
+ wrote = l2arc_write_buffers(spa, dev, size);
/*
* Calculate interval between writes.
@@ -6908,7 +7750,7 @@ l2arc_remove_vdev(vdev_t *vd)
break;
}
}
- ASSERT(remdev != NULL);
+ ASSERT3P(remdev, !=, NULL);
/*
* Remove device from global list
@@ -6997,12 +7839,12 @@ l2arc_stop(void)
EXPORT_SYMBOL(arc_buf_size);
EXPORT_SYMBOL(arc_write);
EXPORT_SYMBOL(arc_read);
-EXPORT_SYMBOL(arc_buf_remove_ref);
EXPORT_SYMBOL(arc_buf_info);
EXPORT_SYMBOL(arc_getbuf_func);
EXPORT_SYMBOL(arc_add_prune_callback);
EXPORT_SYMBOL(arc_remove_prune_callback);
+/* BEGIN CSTYLED */
module_param(zfs_arc_min, ulong, 0644);
MODULE_PARM_DESC(zfs_arc_min, "Min arc size");
@@ -7012,6 +7854,10 @@ MODULE_PARM_DESC(zfs_arc_max, "Max arc size");
module_param(zfs_arc_meta_limit, ulong, 0644);
MODULE_PARM_DESC(zfs_arc_meta_limit, "Meta limit for arc size");
+module_param(zfs_arc_meta_limit_percent, ulong, 0644);
+MODULE_PARM_DESC(zfs_arc_meta_limit_percent,
+ "Percent of arc size for arc meta limit");
+
module_param(zfs_arc_meta_min, ulong, 0644);
MODULE_PARM_DESC(zfs_arc_meta_min, "Min arc metadata");
@@ -7037,22 +7883,22 @@ MODULE_PARM_DESC(zfs_arc_p_dampener_disable, "disable arc_p adapt dampener");
module_param(zfs_arc_shrink_shift, int, 0644);
MODULE_PARM_DESC(zfs_arc_shrink_shift, "log2(fraction of arc to reclaim)");
+module_param(zfs_arc_pc_percent, uint, 0644);
+MODULE_PARM_DESC(zfs_arc_pc_percent,
+ "Percent of pagecache to reclaim arc to");
+
module_param(zfs_arc_p_min_shift, int, 0644);
MODULE_PARM_DESC(zfs_arc_p_min_shift, "arc_c shift to calc min/max arc_p");
-module_param(zfs_disable_dup_eviction, int, 0644);
-MODULE_PARM_DESC(zfs_disable_dup_eviction, "disable duplicate buffer eviction");
-
module_param(zfs_arc_average_blocksize, int, 0444);
MODULE_PARM_DESC(zfs_arc_average_blocksize, "Target average block size");
+module_param(zfs_compressed_arc_enabled, int, 0644);
+MODULE_PARM_DESC(zfs_compressed_arc_enabled, "Disable compressed arc buffers");
+
module_param(zfs_arc_min_prefetch_lifespan, int, 0644);
MODULE_PARM_DESC(zfs_arc_min_prefetch_lifespan, "Min life of prefetch block");
-module_param(zfs_arc_num_sublists_per_state, int, 0644);
-MODULE_PARM_DESC(zfs_arc_num_sublists_per_state,
- "Number of sublists used in each of the ARC state lists");
-
module_param(l2arc_write_max, ulong, 0644);
MODULE_PARM_DESC(l2arc_write_max, "Max write bytes per interval");
@@ -7074,9 +7920,6 @@ MODULE_PARM_DESC(l2arc_feed_min_ms, "Min feed interval in milliseconds");
module_param(l2arc_noprefetch, int, 0644);
MODULE_PARM_DESC(l2arc_noprefetch, "Skip caching prefetched buffers");
-module_param(l2arc_nocompress, int, 0644);
-MODULE_PARM_DESC(l2arc_nocompress, "Skip compressing L2ARC buffers");
-
module_param(l2arc_feed_again, int, 0644);
MODULE_PARM_DESC(l2arc_feed_again, "Turbo L2ARC warmup");
@@ -7090,4 +7933,15 @@ MODULE_PARM_DESC(zfs_arc_lotsfree_percent,
module_param(zfs_arc_sys_free, ulong, 0644);
MODULE_PARM_DESC(zfs_arc_sys_free, "System free memory target size in bytes");
+module_param(zfs_arc_dnode_limit, ulong, 0644);
+MODULE_PARM_DESC(zfs_arc_dnode_limit, "Minimum bytes of dnodes in arc");
+
+module_param(zfs_arc_dnode_limit_percent, ulong, 0644);
+MODULE_PARM_DESC(zfs_arc_dnode_limit_percent,
+ "Percent of ARC meta buffers for dnodes");
+
+module_param(zfs_arc_dnode_reduce_percent, ulong, 0644);
+MODULE_PARM_DESC(zfs_arc_dnode_reduce_percent,
+ "Percentage of excess dnodes to try to unpin");
+/* END CSTYLED */
#endif
diff --git a/zfs/module/zfs/blkptr.c b/zfs/module/zfs/blkptr.c
index d56e19996d8d..bb407af03fec 100644
--- a/zfs/module/zfs/blkptr.c
+++ b/zfs/module/zfs/blkptr.c
@@ -14,7 +14,7 @@
*/
/*
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2016 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -119,3 +119,36 @@ decode_embedded_bp_compressed(const blkptr_t *bp, void *buf)
buf8[i] = BF64_GET(w, (i % sizeof (w)) * NBBY, NBBY);
}
}
+
+/*
+ * Fill in the buffer with the (decompressed) payload of the embedded
+ * blkptr_t. Takes into account compression and byteorder (the payload is
+ * treated as a stream of bytes).
+ * Return 0 on success, or ENOSPC if it won't fit in the buffer.
+ */
+int
+decode_embedded_bp(const blkptr_t *bp, void *buf, int buflen)
+{
+ int lsize, psize;
+
+ ASSERT(BP_IS_EMBEDDED(bp));
+
+ lsize = BPE_GET_LSIZE(bp);
+ psize = BPE_GET_PSIZE(bp);
+
+ if (lsize > buflen)
+ return (ENOSPC);
+ ASSERT3U(lsize, ==, buflen);
+
+ if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF) {
+ uint8_t dstbuf[BPE_PAYLOAD_SIZE];
+ decode_embedded_bp_compressed(bp, dstbuf);
+ VERIFY0(zio_decompress_data_buf(BP_GET_COMPRESS(bp),
+ dstbuf, buf, psize, buflen));
+ } else {
+ ASSERT3U(lsize, ==, psize);
+ decode_embedded_bp_compressed(bp, buf);
+ }
+
+ return (0);
+}
diff --git a/zfs/module/zfs/bpobj.c b/zfs/module/zfs/bpobj.c
index 17d98c36e134..82ca94e1d11b 100644
--- a/zfs/module/zfs/bpobj.c
+++ b/zfs/module/zfs/bpobj.c
@@ -20,7 +20,8 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
+ * Copyright (c) 2017 Datto Inc.
*/
#include <sys/bpobj.h>
@@ -211,6 +212,9 @@ bpobj_iterate_impl(bpobj_t *bpo, bpobj_itor_t func, void *arg, dmu_tx_t *tx,
mutex_enter(&bpo->bpo_lock);
+ if (!bpobj_hasentries(bpo))
+ goto out;
+
if (free)
dmu_buf_will_dirty(bpo->bpo_dbuf, tx);
@@ -395,6 +399,7 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx)
return;
}
+ mutex_enter(&bpo->bpo_lock);
dmu_buf_will_dirty(bpo->bpo_dbuf, tx);
if (bpo->bpo_phys->bpo_subobjs == 0) {
bpo->bpo_phys->bpo_subobjs = dmu_object_alloc(bpo->bpo_os,
@@ -405,7 +410,6 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx)
ASSERT0(dmu_object_info(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, &doi));
ASSERT3U(doi.doi_type, ==, DMU_OT_BPOBJ_SUBOBJ);
- mutex_enter(&bpo->bpo_lock);
dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs,
bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj),
sizeof (subobj), &subobj, tx);
diff --git a/zfs/module/zfs/bptree.c b/zfs/module/zfs/bptree.c
index 9f62d7b911f3..6cd2b019f742 100644
--- a/zfs/module/zfs/bptree.c
+++ b/zfs/module/zfs/bptree.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
*/
#include <sys/arc.h>
@@ -156,7 +156,7 @@ bptree_visit_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
int err;
struct bptree_args *ba = arg;
- if (BP_IS_HOLE(bp))
+ if (bp == NULL || BP_IS_HOLE(bp))
return (0);
err = ba->ba_func(ba->ba_arg, bp, ba->ba_tx);
@@ -223,7 +223,8 @@ bptree_iterate(objset_t *os, uint64_t obj, boolean_t free, bptree_itor_t func,
flags |= TRAVERSE_HARD;
zfs_dbgmsg("bptree index %lld: traversing from min_txg=%lld "
"bookmark %lld/%lld/%lld/%lld",
- i, (longlong_t)bte.be_birth_txg,
+ (longlong_t)i,
+ (longlong_t)bte.be_birth_txg,
(longlong_t)bte.be_zb.zb_objset,
(longlong_t)bte.be_zb.zb_object,
(longlong_t)bte.be_zb.zb_level,
diff --git a/zfs/module/zfs/bqueue.c b/zfs/module/zfs/bqueue.c
new file mode 100644
index 000000000000..f30253d24bfb
--- /dev/null
+++ b/zfs/module/zfs/bqueue.c
@@ -0,0 +1,112 @@
+/*
+ * CDDL HEADER START
+ *
+ * This file and its contents are supplied under the terms of the
+ * Common Development and Distribution License ("CDDL"), version 1.0.
+ * You may only use this file in accordance with the terms of version
+ * 1.0 of the CDDL.
+ *
+ * A full copy of the text of the CDDL should have accompanied this
+ * source. A copy of the CDDL is also available via the Internet at
+ * http://www.illumos.org/license/CDDL.
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2014 by Delphix. All rights reserved.
+ */
+
+#include <sys/bqueue.h>
+#include <sys/zfs_context.h>
+
+static inline bqueue_node_t *
+obj2node(bqueue_t *q, void *data)
+{
+ return ((bqueue_node_t *)((char *)data + q->bq_node_offset));
+}
+
+/*
+ * Initialize a blocking queue The maximum capacity of the queue is set to
+ * size. Types that want to be stored in a bqueue must contain a bqueue_node_t,
+ * and offset should give its offset from the start of the struct. Return 0 on
+ * success, or -1 on failure.
+ */
+int
+bqueue_init(bqueue_t *q, uint64_t size, size_t node_offset)
+{
+ list_create(&q->bq_list, node_offset + sizeof (bqueue_node_t),
+ node_offset + offsetof(bqueue_node_t, bqn_node));
+ cv_init(&q->bq_add_cv, NULL, CV_DEFAULT, NULL);
+ cv_init(&q->bq_pop_cv, NULL, CV_DEFAULT, NULL);
+ mutex_init(&q->bq_lock, NULL, MUTEX_DEFAULT, NULL);
+ q->bq_node_offset = node_offset;
+ q->bq_size = 0;
+ q->bq_maxsize = size;
+ return (0);
+}
+
+/*
+ * Destroy a blocking queue. This function asserts that there are no
+ * elements in the queue, and no one is blocked on the condition
+ * variables.
+ */
+void
+bqueue_destroy(bqueue_t *q)
+{
+ ASSERT0(q->bq_size);
+ cv_destroy(&q->bq_add_cv);
+ cv_destroy(&q->bq_pop_cv);
+ mutex_destroy(&q->bq_lock);
+ list_destroy(&q->bq_list);
+}
+
+/*
+ * Add data to q, consuming size units of capacity. If there is insufficient
+ * capacity to consume size units, block until capacity exists. Asserts size is
+ * > 0.
+ */
+void
+bqueue_enqueue(bqueue_t *q, void *data, uint64_t item_size)
+{
+ ASSERT3U(item_size, >, 0);
+ ASSERT3U(item_size, <=, q->bq_maxsize);
+ mutex_enter(&q->bq_lock);
+ obj2node(q, data)->bqn_size = item_size;
+ while (q->bq_size + item_size > q->bq_maxsize) {
+ cv_wait(&q->bq_add_cv, &q->bq_lock);
+ }
+ q->bq_size += item_size;
+ list_insert_tail(&q->bq_list, data);
+ cv_signal(&q->bq_pop_cv);
+ mutex_exit(&q->bq_lock);
+}
+/*
+ * Take the first element off of q. If there are no elements on the queue, wait
+ * until one is put there. Return the removed element.
+ */
+void *
+bqueue_dequeue(bqueue_t *q)
+{
+ void *ret = NULL;
+ uint64_t item_size;
+ mutex_enter(&q->bq_lock);
+ while (q->bq_size == 0) {
+ cv_wait(&q->bq_pop_cv, &q->bq_lock);
+ }
+ ret = list_remove_head(&q->bq_list);
+ ASSERT3P(ret, !=, NULL);
+ item_size = obj2node(q, ret)->bqn_size;
+ q->bq_size -= item_size;
+ cv_signal(&q->bq_add_cv);
+ mutex_exit(&q->bq_lock);
+ return (ret);
+}
+
+/*
+ * Returns true if the space used is 0.
+ */
+boolean_t
+bqueue_empty(bqueue_t *q)
+{
+ return (q->bq_size == 0);
+}
diff --git a/zfs/module/zfs/dbuf.c b/zfs/module/zfs/dbuf.c
index 483067cc7ba4..62340469d357 100644
--- a/zfs/module/zfs/dbuf.c
+++ b/zfs/module/zfs/dbuf.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
@@ -45,13 +45,16 @@
#include <sys/blkptr.h>
#include <sys/range_tree.h>
#include <sys/trace_dbuf.h>
+#include <sys/callb.h>
+#include <sys/abd.h>
struct dbuf_hold_impl_data {
/* Function arguments */
dnode_t *dh_dn;
uint8_t dh_level;
uint64_t dh_blkid;
- int dh_fail_sparse;
+ boolean_t dh_fail_sparse;
+ boolean_t dh_fail_uncached;
void *dh_tag;
dmu_buf_impl_t **dh_dbp;
/* Local variables */
@@ -65,31 +68,94 @@ struct dbuf_hold_impl_data {
};
static void __dbuf_hold_impl_init(struct dbuf_hold_impl_data *dh,
- dnode_t *dn, uint8_t level, uint64_t blkid, int fail_sparse,
- void *tag, dmu_buf_impl_t **dbp, int depth);
+ dnode_t *dn, uint8_t level, uint64_t blkid, boolean_t fail_sparse,
+ boolean_t fail_uncached,
+ void *tag, dmu_buf_impl_t **dbp, int depth);
static int __dbuf_hold_impl(struct dbuf_hold_impl_data *dh);
-/*
- * Number of times that zfs_free_range() took the slow path while doing
- * a zfs receive. A nonzero value indicates a potential performance problem.
- */
-uint64_t zfs_free_range_recv_miss;
+uint_t zfs_dbuf_evict_key;
-static void dbuf_destroy(dmu_buf_impl_t *db);
static boolean_t dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
static void dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx);
-#ifndef __lint
extern inline void dmu_buf_init_user(dmu_buf_user_t *dbu,
- dmu_buf_evict_func_t *evict_func, dmu_buf_t **clear_on_evict_dbufp);
-#endif /* ! __lint */
+ dmu_buf_evict_func_t *evict_func_sync,
+ dmu_buf_evict_func_t *evict_func_async,
+ dmu_buf_t **clear_on_evict_dbufp);
/*
* Global data structures and functions for the dbuf cache.
*/
-static kmem_cache_t *dbuf_cache;
+static kmem_cache_t *dbuf_kmem_cache;
static taskq_t *dbu_evict_taskq;
+static kthread_t *dbuf_cache_evict_thread;
+static kmutex_t dbuf_evict_lock;
+static kcondvar_t dbuf_evict_cv;
+static boolean_t dbuf_evict_thread_exit;
+
+/*
+ * LRU cache of dbufs. The dbuf cache maintains a list of dbufs that
+ * are not currently held but have been recently released. These dbufs
+ * are not eligible for arc eviction until they are aged out of the cache.
+ * Dbufs are added to the dbuf cache once the last hold is released. If a
+ * dbuf is later accessed and still exists in the dbuf cache, then it will
+ * be removed from the cache and later re-added to the head of the cache.
+ * Dbufs that are aged out of the cache will be immediately destroyed and
+ * become eligible for arc eviction.
+ */
+static multilist_t *dbuf_cache;
+static refcount_t dbuf_cache_size;
+unsigned long dbuf_cache_max_bytes = 100 * 1024 * 1024;
+
+/* Cap the size of the dbuf cache to log2 fraction of arc size. */
+int dbuf_cache_max_shift = 5;
+
+/*
+ * The dbuf cache uses a three-stage eviction policy:
+ * - A low water marker designates when the dbuf eviction thread
+ * should stop evicting from the dbuf cache.
+ * - When we reach the maximum size (aka mid water mark), we
+ * signal the eviction thread to run.
+ * - The high water mark indicates when the eviction thread
+ * is unable to keep up with the incoming load and eviction must
+ * happen in the context of the calling thread.
+ *
+ * The dbuf cache:
+ * (max size)
+ * low water mid water hi water
+ * +----------------------------------------+----------+----------+
+ * | | | |
+ * | | | |
+ * | | | |
+ * | | | |
+ * +----------------------------------------+----------+----------+
+ * stop signal evict
+ * evicting eviction directly
+ * thread
+ *
+ * The high and low water marks indicate the operating range for the eviction
+ * thread. The low water mark is, by default, 90% of the total size of the
+ * cache and the high water mark is at 110% (both of these percentages can be
+ * changed by setting dbuf_cache_lowater_pct and dbuf_cache_hiwater_pct,
+ * respectively). The eviction thread will try to ensure that the cache remains
+ * within this range by waking up every second and checking if the cache is
+ * above the low water mark. The thread can also be woken up by callers adding
+ * elements into the cache if the cache is larger than the mid water (i.e max
+ * cache size). Once the eviction thread is woken up and eviction is required,
+ * it will continue evicting buffers until it's able to reduce the cache size
+ * to the low water mark. If the cache size continues to grow and hits the high
+ * water mark, then callers adding elements to the cache will begin to evict
+ * directly from the cache until the cache is no longer above the high water
+ * mark.
+ */
+
+/*
+ * The percentage above and below the maximum cache size.
+ */
+uint_t dbuf_cache_hiwater_pct = 10;
+uint_t dbuf_cache_lowater_pct = 10;
+
/* ARGSUSED */
static int
dbuf_cons(void *vdb, void *unused, int kmflag)
@@ -99,7 +165,9 @@ dbuf_cons(void *vdb, void *unused, int kmflag)
mutex_init(&db->db_mtx, NULL, MUTEX_DEFAULT, NULL);
cv_init(&db->db_changed, NULL, CV_DEFAULT, NULL);
+ multilist_link_init(&db->db_cache_link);
refcount_create(&db->db_holds);
+ multilist_link_init(&db->db_cache_link);
return (0);
}
@@ -111,6 +179,7 @@ dbuf_dest(void *vdb, void *unused)
dmu_buf_impl_t *db = vdb;
mutex_destroy(&db->db_mtx);
cv_destroy(&db->db_changed);
+ ASSERT(!multilist_link_active(&db->db_cache_link));
refcount_destroy(&db->db_holds);
}
@@ -140,8 +209,6 @@ dbuf_hash(void *os, uint64_t obj, uint8_t lvl, uint64_t blkid)
return (crc);
}
-#define DBUF_HASH(os, obj, level, blkid) dbuf_hash(os, obj, level, blkid);
-
#define DBUF_EQUAL(dbuf, os, obj, level, blkid) \
((dbuf)->db.db_object == (obj) && \
(dbuf)->db_objset == (os) && \
@@ -156,7 +223,7 @@ dbuf_find(objset_t *os, uint64_t obj, uint8_t level, uint64_t blkid)
uint64_t idx;
dmu_buf_impl_t *db;
- hv = DBUF_HASH(os, obj, level, blkid);
+ hv = dbuf_hash(os, obj, level, blkid);
idx = hv & h->hash_table_mask;
mutex_enter(DBUF_HASH_MUTEX(h, idx));
@@ -209,7 +276,7 @@ dbuf_hash_insert(dmu_buf_impl_t *db)
dmu_buf_impl_t *dbf;
blkid = db->db_blkid;
- hv = DBUF_HASH(os, obj, level, blkid);
+ hv = dbuf_hash(os, obj, level, blkid);
idx = hv & h->hash_table_mask;
mutex_enter(DBUF_HASH_MUTEX(h, idx));
@@ -228,7 +295,7 @@ dbuf_hash_insert(dmu_buf_impl_t *db)
db->db_hash_next = h->hash_table[idx];
h->hash_table[idx] = db;
mutex_exit(DBUF_HASH_MUTEX(h, idx));
- atomic_add_64(&dbuf_hash_count, 1);
+ atomic_inc_64(&dbuf_hash_count);
return (NULL);
}
@@ -243,12 +310,12 @@ dbuf_hash_remove(dmu_buf_impl_t *db)
uint64_t hv, idx;
dmu_buf_impl_t *dbf, **dbp;
- hv = DBUF_HASH(db->db_objset, db->db.db_object,
+ hv = dbuf_hash(db->db_objset, db->db.db_object,
db->db_level, db->db_blkid);
idx = hv & h->hash_table_mask;
/*
- * We musn't hold db_mtx to maintain lock ordering:
+ * We mustn't hold db_mtx to maintain lock ordering:
* DBUF_HASH_MUTEX > db_mtx.
*/
ASSERT(refcount_is_zero(&db->db_holds));
@@ -264,11 +331,9 @@ dbuf_hash_remove(dmu_buf_impl_t *db)
*dbp = db->db_hash_next;
db->db_hash_next = NULL;
mutex_exit(DBUF_HASH_MUTEX(h, idx));
- atomic_add_64(&dbuf_hash_count, -1);
+ atomic_dec_64(&dbuf_hash_count);
}
-static arc_evict_func_t dbuf_do_evict;
-
typedef enum {
DBVU_EVICTING,
DBVU_NOT_EVICTING
@@ -330,11 +395,24 @@ dbuf_evict_user(dmu_buf_impl_t *db)
#endif
/*
- * Invoke the callback from a taskq to avoid lock order reversals
- * and limit stack depth.
+ * There are two eviction callbacks - one that we call synchronously
+ * and one that we invoke via a taskq. The async one is useful for
+ * avoiding lock order reversals and limiting stack depth.
+ *
+ * Note that if we have a sync callback but no async callback,
+ * it's likely that the sync callback will free the structure
+ * containing the dbu. In that case we need to take care to not
+ * dereference dbu after calling the sync evict func.
*/
- taskq_dispatch_ent(dbu_evict_taskq, dbu->dbu_evict_func, dbu, 0,
- &dbu->dbu_tqent);
+ boolean_t has_async = (dbu->dbu_evict_func_async != NULL);
+
+ if (dbu->dbu_evict_func_sync != NULL)
+ dbu->dbu_evict_func_sync(dbu);
+
+ if (has_async) {
+ taskq_dispatch_ent(dbu_evict_taskq, dbu->dbu_evict_func_async,
+ dbu, 0, &dbu->dbu_tqent);
+ }
}
boolean_t
@@ -356,17 +434,193 @@ dbuf_is_metadata(dmu_buf_impl_t *db)
}
}
-void
-dbuf_evict(dmu_buf_impl_t *db)
+
+/*
+ * This function *must* return indices evenly distributed between all
+ * sublists of the multilist. This is needed due to how the dbuf eviction
+ * code is laid out; dbuf_evict_thread() assumes dbufs are evenly
+ * distributed between all sublists and uses this assumption when
+ * deciding which sublist to evict from and how much to evict from it.
+ */
+unsigned int
+dbuf_cache_multilist_index_func(multilist_t *ml, void *obj)
{
- ASSERT(MUTEX_HELD(&db->db_mtx));
- ASSERT(db->db_buf == NULL);
- ASSERT(db->db_data_pending == NULL);
+ dmu_buf_impl_t *db = obj;
+
+ /*
+ * The assumption here, is the hash value for a given
+ * dmu_buf_impl_t will remain constant throughout it's lifetime
+ * (i.e. it's objset, object, level and blkid fields don't change).
+ * Thus, we don't need to store the dbuf's sublist index
+ * on insertion, as this index can be recalculated on removal.
+ *
+ * Also, the low order bits of the hash value are thought to be
+ * distributed evenly. Otherwise, in the case that the multilist
+ * has a power of two number of sublists, each sublists' usage
+ * would not be evenly distributed.
+ */
+ return (dbuf_hash(db->db_objset, db->db.db_object,
+ db->db_level, db->db_blkid) %
+ multilist_get_num_sublists(ml));
+}
- dbuf_clear(db);
- dbuf_destroy(db);
+static inline unsigned long
+dbuf_cache_target_bytes(void)
+{
+ return MIN(dbuf_cache_max_bytes,
+ arc_target_bytes() >> dbuf_cache_max_shift);
}
+static inline boolean_t
+dbuf_cache_above_hiwater(void)
+{
+ uint64_t dbuf_cache_target = dbuf_cache_target_bytes();
+
+ uint64_t dbuf_cache_hiwater_bytes =
+ (dbuf_cache_target * dbuf_cache_hiwater_pct) / 100;
+
+ return (refcount_count(&dbuf_cache_size) >
+ dbuf_cache_target + dbuf_cache_hiwater_bytes);
+}
+
+static inline boolean_t
+dbuf_cache_above_lowater(void)
+{
+ uint64_t dbuf_cache_target = dbuf_cache_target_bytes();
+
+ uint64_t dbuf_cache_lowater_bytes =
+ (dbuf_cache_target * dbuf_cache_lowater_pct) / 100;
+
+ return (refcount_count(&dbuf_cache_size) >
+ dbuf_cache_target - dbuf_cache_lowater_bytes);
+}
+
+/*
+ * Evict the oldest eligible dbuf from the dbuf cache.
+ */
+static void
+dbuf_evict_one(void)
+{
+ int idx = multilist_get_random_index(dbuf_cache);
+ multilist_sublist_t *mls = multilist_sublist_lock(dbuf_cache, idx);
+ dmu_buf_impl_t *db;
+ ASSERT(!MUTEX_HELD(&dbuf_evict_lock));
+
+ /*
+ * Set the thread's tsd to indicate that it's processing evictions.
+ * Once a thread stops evicting from the dbuf cache it will
+ * reset its tsd to NULL.
+ */
+ ASSERT3P(tsd_get(zfs_dbuf_evict_key), ==, NULL);
+ (void) tsd_set(zfs_dbuf_evict_key, (void *)B_TRUE);
+
+ db = multilist_sublist_tail(mls);
+ while (db != NULL && mutex_tryenter(&db->db_mtx) == 0) {
+ db = multilist_sublist_prev(mls, db);
+ }
+
+ DTRACE_PROBE2(dbuf__evict__one, dmu_buf_impl_t *, db,
+ multilist_sublist_t *, mls);
+
+ if (db != NULL) {
+ multilist_sublist_remove(mls, db);
+ multilist_sublist_unlock(mls);
+ (void) refcount_remove_many(&dbuf_cache_size,
+ db->db.db_size, db);
+ dbuf_destroy(db);
+ } else {
+ multilist_sublist_unlock(mls);
+ }
+ (void) tsd_set(zfs_dbuf_evict_key, NULL);
+}
+
+/*
+ * The dbuf evict thread is responsible for aging out dbufs from the
+ * cache. Once the cache has reached it's maximum size, dbufs are removed
+ * and destroyed. The eviction thread will continue running until the size
+ * of the dbuf cache is at or below the maximum size. Once the dbuf is aged
+ * out of the cache it is destroyed and becomes eligible for arc eviction.
+ */
+static void
+dbuf_evict_thread(void)
+{
+ callb_cpr_t cpr;
+
+ CALLB_CPR_INIT(&cpr, &dbuf_evict_lock, callb_generic_cpr, FTAG);
+
+ mutex_enter(&dbuf_evict_lock);
+ while (!dbuf_evict_thread_exit) {
+ while (!dbuf_cache_above_lowater() && !dbuf_evict_thread_exit) {
+ CALLB_CPR_SAFE_BEGIN(&cpr);
+ (void) cv_timedwait_sig_hires(&dbuf_evict_cv,
+ &dbuf_evict_lock, SEC2NSEC(1), MSEC2NSEC(1), 0);
+ CALLB_CPR_SAFE_END(&cpr, &dbuf_evict_lock);
+ }
+ mutex_exit(&dbuf_evict_lock);
+
+ /*
+ * Keep evicting as long as we're above the low water mark
+ * for the cache. We do this without holding the locks to
+ * minimize lock contention.
+ */
+ while (dbuf_cache_above_lowater() && !dbuf_evict_thread_exit) {
+ dbuf_evict_one();
+ }
+
+ mutex_enter(&dbuf_evict_lock);
+ }
+
+ dbuf_evict_thread_exit = B_FALSE;
+ cv_broadcast(&dbuf_evict_cv);
+ CALLB_CPR_EXIT(&cpr); /* drops dbuf_evict_lock */
+ thread_exit();
+}
+
+/*
+ * Wake up the dbuf eviction thread if the dbuf cache is at its max size.
+ * If the dbuf cache is at its high water mark, then evict a dbuf from the
+ * dbuf cache using the callers context.
+ */
+static void
+dbuf_evict_notify(void)
+{
+
+ /*
+ * We use thread specific data to track when a thread has
+ * started processing evictions. This allows us to avoid deeply
+ * nested stacks that would have a call flow similar to this:
+ *
+ * dbuf_rele()-->dbuf_rele_and_unlock()-->dbuf_evict_notify()
+ * ^ |
+ * | |
+ * +-----dbuf_destroy()<--dbuf_evict_one()<--------+
+ *
+ * The dbuf_eviction_thread will always have its tsd set until
+ * that thread exits. All other threads will only set their tsd
+ * if they are participating in the eviction process. This only
+ * happens if the eviction thread is unable to process evictions
+ * fast enough. To keep the dbuf cache size in check, other threads
+ * can evict from the dbuf cache directly. Those threads will set
+ * their tsd values so that we ensure that they only evict one dbuf
+ * from the dbuf cache.
+ */
+ if (tsd_get(zfs_dbuf_evict_key) != NULL)
+ return;
+
+ /*
+ * We check if we should evict without holding the dbuf_evict_lock,
+ * because it's OK to occasionally make the wrong decision here,
+ * and grabbing the lock results in massive lock contention.
+ */
+ if (refcount_count(&dbuf_cache_size) > dbuf_cache_target_bytes()) {
+ if (dbuf_cache_above_hiwater())
+ dbuf_evict_one();
+ cv_signal(&dbuf_evict_cv);
+ }
+}
+
+
+
void
dbuf_init(void)
{
@@ -401,7 +655,7 @@ dbuf_init(void)
goto retry;
}
- dbuf_cache = kmem_cache_create("dmu_buf_impl_t",
+ dbuf_kmem_cache = kmem_cache_create("dmu_buf_impl_t",
sizeof (dmu_buf_impl_t),
0, dbuf_cons, dbuf_dest, NULL, NULL, NULL, 0);
@@ -410,11 +664,30 @@ dbuf_init(void)
dbuf_stats_init(h);
+ /*
+ * Setup the parameters for the dbuf cache. We cap the size of the
+ * dbuf cache to 1/32nd (default) of the size of the ARC.
+ */
+ dbuf_cache_max_bytes = MIN(dbuf_cache_max_bytes,
+ arc_target_bytes() >> dbuf_cache_max_shift);
+
/*
* All entries are queued via taskq_dispatch_ent(), so min/maxalloc
* configuration is not required.
*/
dbu_evict_taskq = taskq_create("dbu_evict", 1, defclsyspri, 0, 0, 0);
+
+ dbuf_cache = multilist_create(sizeof (dmu_buf_impl_t),
+ offsetof(dmu_buf_impl_t, db_cache_link),
+ dbuf_cache_multilist_index_func);
+ refcount_create(&dbuf_cache_size);
+
+ tsd_create(&zfs_dbuf_evict_key, NULL);
+ dbuf_evict_thread_exit = B_FALSE;
+ mutex_init(&dbuf_evict_lock, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&dbuf_evict_cv, NULL, CV_DEFAULT, NULL);
+ dbuf_cache_evict_thread = thread_create(NULL, 0, dbuf_evict_thread,
+ NULL, 0, &p0, TS_RUN, minclsyspri);
}
void
@@ -436,8 +709,23 @@ dbuf_fini(void)
#else
kmem_free(h->hash_table, (h->hash_table_mask + 1) * sizeof (void *));
#endif
- kmem_cache_destroy(dbuf_cache);
+ kmem_cache_destroy(dbuf_kmem_cache);
taskq_destroy(dbu_evict_taskq);
+
+ mutex_enter(&dbuf_evict_lock);
+ dbuf_evict_thread_exit = B_TRUE;
+ while (dbuf_evict_thread_exit) {
+ cv_signal(&dbuf_evict_cv);
+ cv_wait(&dbuf_evict_cv, &dbuf_evict_lock);
+ }
+ mutex_exit(&dbuf_evict_lock);
+ tsd_destroy(&zfs_dbuf_evict_key);
+
+ mutex_destroy(&dbuf_evict_lock);
+ cv_destroy(&dbuf_evict_cv);
+
+ refcount_destroy(&dbuf_cache_size);
+ multilist_destroy(dbuf_cache);
}
/*
@@ -476,7 +764,6 @@ dbuf_verify(dmu_buf_impl_t *db)
ASSERT3U(db->db.db_offset, ==, DMU_BONUS_BLKID);
} else if (db->db_blkid == DMU_SPILL_BLKID) {
ASSERT(dn != NULL);
- ASSERT3U(db->db.db_size, >=, dn->dn_bonuslen);
ASSERT0(db->db.db_offset);
} else {
ASSERT3U(db->db.db_offset, ==, db->db_blkid * db->db.db_size);
@@ -517,7 +804,7 @@ dbuf_verify(dmu_buf_impl_t *db)
} else {
/* db is pointed to by an indirect block */
ASSERTV(int epb = db->db_parent->db.db_size >>
- SPA_BLKPTRSHIFT);
+ SPA_BLKPTRSHIFT);
ASSERT3U(db->db_parent->db_level, ==, db->db_level+1);
ASSERT3U(db->db_parent->db.db_object, ==,
db->db.db_object);
@@ -541,13 +828,50 @@ dbuf_verify(dmu_buf_impl_t *db)
* If the blkptr isn't set but they have nonzero data,
* it had better be dirty, otherwise we'll lose that
* data when we evict this buffer.
+ *
+ * There is an exception to this rule for indirect blocks; in
+ * this case, if the indirect block is a hole, we fill in a few
+ * fields on each of the child blocks (importantly, birth time)
+ * to prevent hole birth times from being lost when you
+ * partially fill in a hole.
*/
if (db->db_dirtycnt == 0) {
- ASSERTV(uint64_t *buf = db->db.db_data);
- int i;
+ if (db->db_level == 0) {
+ uint64_t *buf = db->db.db_data;
+ int i;
- for (i = 0; i < db->db.db_size >> 3; i++) {
- ASSERT(buf[i] == 0);
+ for (i = 0; i < db->db.db_size >> 3; i++) {
+ ASSERT(buf[i] == 0);
+ }
+ } else {
+ int i;
+ blkptr_t *bps = db->db.db_data;
+ ASSERT3U(1 << DB_DNODE(db)->dn_indblkshift, ==,
+ db->db.db_size);
+ /*
+ * We want to verify that all the blkptrs in the
+ * indirect block are holes, but we may have
+ * automatically set up a few fields for them.
+ * We iterate through each blkptr and verify
+ * they only have those fields set.
+ */
+ for (i = 0;
+ i < db->db.db_size / sizeof (blkptr_t);
+ i++) {
+ blkptr_t *bp = &bps[i];
+ ASSERT(ZIO_CHECKSUM_IS_ZERO(
+ &bp->blk_cksum));
+ ASSERT(
+ DVA_IS_EMPTY(&bp->blk_dva[0]) &&
+ DVA_IS_EMPTY(&bp->blk_dva[1]) &&
+ DVA_IS_EMPTY(&bp->blk_dva[2]));
+ ASSERT0(bp->blk_fill);
+ ASSERT0(bp->blk_pad[0]);
+ ASSERT0(bp->blk_pad[1]);
+ ASSERT(!BP_IS_EMBEDDED(bp));
+ ASSERT(BP_IS_HOLE(bp));
+ ASSERT0(bp->blk_phys_birth);
+ }
}
}
}
@@ -560,7 +884,7 @@ dbuf_clear_data(dmu_buf_impl_t *db)
{
ASSERT(MUTEX_HELD(&db->db_mtx));
dbuf_evict_user(db);
- db->db_buf = NULL;
+ ASSERT3P(db->db_buf, ==, NULL);
db->db.db_data = NULL;
if (db->db_state != DB_NOFILL)
db->db_state = DB_UNCACHED;
@@ -575,8 +899,6 @@ dbuf_set_data(dmu_buf_impl_t *db, arc_buf_t *buf)
db->db_buf = buf;
ASSERT(buf->b_data != NULL);
db->db.db_data = buf->b_data;
- if (!arc_released(buf))
- arc_set_callback(buf, dbuf_do_evict, db);
}
/*
@@ -587,28 +909,65 @@ dbuf_loan_arcbuf(dmu_buf_impl_t *db)
{
arc_buf_t *abuf;
+ ASSERT(db->db_blkid != DMU_BONUS_BLKID);
mutex_enter(&db->db_mtx);
if (arc_released(db->db_buf) || refcount_count(&db->db_holds) > 1) {
int blksz = db->db.db_size;
spa_t *spa = db->db_objset->os_spa;
mutex_exit(&db->db_mtx);
- abuf = arc_loan_buf(spa, blksz);
+ abuf = arc_loan_buf(spa, B_FALSE, blksz);
bcopy(db->db.db_data, abuf->b_data, blksz);
} else {
abuf = db->db_buf;
arc_loan_inuse_buf(abuf, db);
+ db->db_buf = NULL;
dbuf_clear_data(db);
mutex_exit(&db->db_mtx);
}
return (abuf);
}
+/*
+ * Calculate which level n block references the data at the level 0 offset
+ * provided.
+ */
uint64_t
-dbuf_whichblock(dnode_t *dn, uint64_t offset)
+dbuf_whichblock(const dnode_t *dn, const int64_t level, const uint64_t offset)
{
- if (dn->dn_datablkshift) {
- return (offset >> dn->dn_datablkshift);
+ if (dn->dn_datablkshift != 0 && dn->dn_indblkshift != 0) {
+ /*
+ * The level n blkid is equal to the level 0 blkid divided by
+ * the number of level 0s in a level n block.
+ *
+ * The level 0 blkid is offset >> datablkshift =
+ * offset / 2^datablkshift.
+ *
+ * The number of level 0s in a level n is the number of block
+ * pointers in an indirect block, raised to the power of level.
+ * This is 2^(indblkshift - SPA_BLKPTRSHIFT)^level =
+ * 2^(level*(indblkshift - SPA_BLKPTRSHIFT)).
+ *
+ * Thus, the level n blkid is: offset /
+ * ((2^datablkshift)*(2^(level*(indblkshift - SPA_BLKPTRSHIFT)))
+ * = offset / 2^(datablkshift + level *
+ * (indblkshift - SPA_BLKPTRSHIFT))
+ * = offset >> (datablkshift + level *
+ * (indblkshift - SPA_BLKPTRSHIFT))
+ */
+
+ const unsigned exp = dn->dn_datablkshift +
+ level * (dn->dn_indblkshift - SPA_BLKPTRSHIFT);
+
+ if (exp >= 8 * sizeof (offset)) {
+ /* This only happens on the highest indirection level */
+ ASSERT3U(level, ==, dn->dn_nlevels - 1);
+ return (0);
+ }
+
+ ASSERT3U(exp, <, 8 * sizeof (offset));
+
+ return (offset >> exp);
} else {
ASSERT3U(offset, <, dn->dn_datablksz);
return (0);
@@ -642,7 +1001,7 @@ dbuf_read_done(zio_t *zio, arc_buf_t *buf, void *vdb)
} else {
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
ASSERT3P(db->db_buf, ==, NULL);
- VERIFY(arc_buf_remove_ref(buf, db));
+ arc_buf_destroy(buf, db);
db->db_state = DB_UNCACHED;
}
cv_broadcast(&db->db_changed);
@@ -650,7 +1009,7 @@ dbuf_read_done(zio_t *zio, arc_buf_t *buf, void *vdb)
}
static int
-dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags)
+dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
{
dnode_t *dn;
zbookmark_phys_t zb;
@@ -667,13 +1026,18 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags)
ASSERT(db->db_buf == NULL);
if (db->db_blkid == DMU_BONUS_BLKID) {
+ /*
+ * The bonus length stored in the dnode may be less than
+ * the maximum available space in the bonus buffer.
+ */
int bonuslen = MIN(dn->dn_bonuslen, dn->dn_phys->dn_bonuslen);
+ int max_bonuslen = DN_SLOTS_TO_BONUSLEN(dn->dn_num_slots);
ASSERT3U(bonuslen, <=, db->db.db_size);
- db->db.db_data = zio_buf_alloc(DN_MAX_BONUSLEN);
- arc_space_consume(DN_MAX_BONUSLEN, ARC_SPACE_OTHER);
- if (bonuslen < DN_MAX_BONUSLEN)
- bzero(db->db.db_data, DN_MAX_BONUSLEN);
+ db->db.db_data = kmem_alloc(max_bonuslen, KM_SLEEP);
+ arc_space_consume(max_bonuslen, ARC_SPACE_BONUS);
+ if (bonuslen < max_bonuslen)
+ bzero(db->db.db_data, max_bonuslen);
if (bonuslen)
bcopy(DN_BONUS(dn->dn_phys), db->db.db_data, bonuslen);
DB_DNODE_EXIT(db);
@@ -692,12 +1056,33 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags)
BP_IS_HOLE(db->db_blkptr)))) {
arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
- DB_DNODE_EXIT(db);
- dbuf_set_data(db, arc_buf_alloc(db->db_objset->os_spa,
- db->db.db_size, db, type));
+ dbuf_set_data(db, arc_alloc_buf(db->db_objset->os_spa, db, type,
+ db->db.db_size));
bzero(db->db.db_data, db->db.db_size);
+
+ if (db->db_blkptr != NULL && db->db_level > 0 &&
+ BP_IS_HOLE(db->db_blkptr) &&
+ db->db_blkptr->blk_birth != 0) {
+ blkptr_t *bps = db->db.db_data;
+ int i;
+ for (i = 0; i < ((1 <<
+ DB_DNODE(db)->dn_indblkshift) / sizeof (blkptr_t));
+ i++) {
+ blkptr_t *bp = &bps[i];
+ ASSERT3U(BP_GET_LSIZE(db->db_blkptr), ==,
+ 1 << dn->dn_indblkshift);
+ BP_SET_LSIZE(bp,
+ BP_GET_LEVEL(db->db_blkptr) == 1 ?
+ dn->dn_datablksz :
+ BP_GET_LSIZE(db->db_blkptr));
+ BP_SET_TYPE(bp, BP_GET_TYPE(db->db_blkptr));
+ BP_SET_LEVEL(bp,
+ BP_GET_LEVEL(db->db_blkptr) - 1);
+ BP_SET_BIRTH(bp, db->db_blkptr->blk_birth, 0);
+ }
+ }
+ DB_DNODE_EXIT(db);
db->db_state = DB_CACHED;
- *flags |= DB_RF_CACHED;
mutex_exit(&db->db_mtx);
return (0);
}
@@ -709,8 +1094,6 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags)
if (DBUF_IS_L2CACHEABLE(db))
aflags |= ARC_FLAG_L2CACHE;
- if (DBUF_IS_L2COMPRESSIBLE(db))
- aflags |= ARC_FLAG_L2COMPRESS;
SET_BOOKMARK(&zb, db->db_objset->os_dsl_dataset ?
db->db_objset->os_dsl_dataset->ds_object : DMU_META_OBJSET,
@@ -720,19 +1103,79 @@ dbuf_read_impl(dmu_buf_impl_t *db, zio_t *zio, uint32_t *flags)
err = arc_read(zio, db->db_objset->os_spa, db->db_blkptr,
dbuf_read_done, db, ZIO_PRIORITY_SYNC_READ,
- (*flags & DB_RF_CANFAIL) ? ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED,
+ (flags & DB_RF_CANFAIL) ? ZIO_FLAG_CANFAIL : ZIO_FLAG_MUSTSUCCEED,
&aflags, &zb);
- if (aflags & ARC_FLAG_CACHED)
- *flags |= DB_RF_CACHED;
- return (SET_ERROR(err));
+ return (err);
+}
+
+/*
+ * This is our just-in-time copy function. It makes a copy of buffers that
+ * have been modified in a previous transaction group before we access them in
+ * the current active group.
+ *
+ * This function is used in three places: when we are dirtying a buffer for the
+ * first time in a txg, when we are freeing a range in a dnode that includes
+ * this buffer, and when we are accessing a buffer which was received compressed
+ * and later referenced in a WRITE_BYREF record.
+ *
+ * Note that when we are called from dbuf_free_range() we do not put a hold on
+ * the buffer, we just traverse the active dbuf list for the dnode.
+ */
+static void
+dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg)
+{
+ dbuf_dirty_record_t *dr = db->db_last_dirty;
+
+ ASSERT(MUTEX_HELD(&db->db_mtx));
+ ASSERT(db->db.db_data != NULL);
+ ASSERT(db->db_level == 0);
+ ASSERT(db->db.db_object != DMU_META_DNODE_OBJECT);
+
+ if (dr == NULL ||
+ (dr->dt.dl.dr_data !=
+ ((db->db_blkid == DMU_BONUS_BLKID) ? db->db.db_data : db->db_buf)))
+ return;
+
+ /*
+ * If the last dirty record for this dbuf has not yet synced
+ * and its referencing the dbuf data, either:
+ * reset the reference to point to a new copy,
+ * or (if there a no active holders)
+ * just null out the current db_data pointer.
+ */
+ ASSERT(dr->dr_txg >= txg - 2);
+ if (db->db_blkid == DMU_BONUS_BLKID) {
+ dnode_t *dn = DB_DNODE(db);
+ int bonuslen = DN_SLOTS_TO_BONUSLEN(dn->dn_num_slots);
+ dr->dt.dl.dr_data = kmem_alloc(bonuslen, KM_SLEEP);
+ arc_space_consume(bonuslen, ARC_SPACE_BONUS);
+ bcopy(db->db.db_data, dr->dt.dl.dr_data, bonuslen);
+ } else if (refcount_count(&db->db_holds) > db->db_dirtycnt) {
+ int size = arc_buf_size(db->db_buf);
+ arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
+ spa_t *spa = db->db_objset->os_spa;
+ enum zio_compress compress_type =
+ arc_get_compression(db->db_buf);
+
+ if (compress_type == ZIO_COMPRESS_OFF) {
+ dr->dt.dl.dr_data = arc_alloc_buf(spa, db, type, size);
+ } else {
+ ASSERT3U(type, ==, ARC_BUFC_DATA);
+ dr->dt.dl.dr_data = arc_alloc_compressed_buf(spa, db,
+ size, arc_buf_lsize(db->db_buf), compress_type);
+ }
+ bcopy(db->db.db_data, dr->dt.dl.dr_data->b_data, size);
+ } else {
+ db->db_buf = NULL;
+ dbuf_clear_data(db);
+ }
}
int
dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
{
int err = 0;
- boolean_t havepzio = (zio != NULL);
boolean_t prefetch;
dnode_t *dn;
@@ -756,32 +1199,45 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
mutex_enter(&db->db_mtx);
if (db->db_state == DB_CACHED) {
+ /*
+ * If the arc buf is compressed, we need to decompress it to
+ * read the data. This could happen during the "zfs receive" of
+ * a stream which is compressed and deduplicated.
+ */
+ if (db->db_buf != NULL &&
+ arc_get_compression(db->db_buf) != ZIO_COMPRESS_OFF) {
+ dbuf_fix_old_data(db,
+ spa_syncing_txg(dmu_objset_spa(db->db_objset)));
+ err = arc_decompress(db->db_buf);
+ dbuf_set_data(db, db->db_buf);
+ }
mutex_exit(&db->db_mtx);
if (prefetch)
- dmu_zfetch(&dn->dn_zfetch, db->db.db_offset,
- db->db.db_size, TRUE);
+ dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE);
if ((flags & DB_RF_HAVESTRUCT) == 0)
rw_exit(&dn->dn_struct_rwlock);
DB_DNODE_EXIT(db);
} else if (db->db_state == DB_UNCACHED) {
spa_t *spa = dn->dn_objset->os_spa;
+ boolean_t need_wait = B_FALSE;
- if (zio == NULL)
+ if (zio == NULL &&
+ db->db_blkptr != NULL && !BP_IS_HOLE(db->db_blkptr)) {
zio = zio_root(spa, NULL, NULL, ZIO_FLAG_CANFAIL);
-
- err = dbuf_read_impl(db, zio, &flags);
+ need_wait = B_TRUE;
+ }
+ err = dbuf_read_impl(db, zio, flags);
/* dbuf_read_impl has dropped db_mtx for us */
if (!err && prefetch)
- dmu_zfetch(&dn->dn_zfetch, db->db.db_offset,
- db->db.db_size, flags & DB_RF_CACHED);
+ dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE);
if ((flags & DB_RF_HAVESTRUCT) == 0)
rw_exit(&dn->dn_struct_rwlock);
DB_DNODE_EXIT(db);
- if (!err && !havepzio)
+ if (!err && need_wait)
err = zio_wait(zio);
} else {
/*
@@ -794,8 +1250,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
*/
mutex_exit(&db->db_mtx);
if (prefetch)
- dmu_zfetch(&dn->dn_zfetch, db->db.db_offset,
- db->db.db_size, TRUE);
+ dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE);
if ((flags & DB_RF_HAVESTRUCT) == 0)
rw_exit(&dn->dn_struct_rwlock);
DB_DNODE_EXIT(db);
@@ -817,7 +1272,6 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
mutex_exit(&db->db_mtx);
}
- ASSERT(err || havepzio || db->db_state == DB_CACHED);
return (err);
}
@@ -835,7 +1289,7 @@ dbuf_noread(dmu_buf_impl_t *db)
ASSERT(db->db_buf == NULL);
ASSERT(db->db.db_data == NULL);
- dbuf_set_data(db, arc_buf_alloc(spa, db->db.db_size, db, type));
+ dbuf_set_data(db, arc_alloc_buf(spa, db, type, db->db.db_size));
db->db_state = DB_FILL;
} else if (db->db_state == DB_NOFILL) {
dbuf_clear_data(db);
@@ -845,59 +1299,6 @@ dbuf_noread(dmu_buf_impl_t *db)
mutex_exit(&db->db_mtx);
}
-/*
- * This is our just-in-time copy function. It makes a copy of
- * buffers, that have been modified in a previous transaction
- * group, before we modify them in the current active group.
- *
- * This function is used in two places: when we are dirtying a
- * buffer for the first time in a txg, and when we are freeing
- * a range in a dnode that includes this buffer.
- *
- * Note that when we are called from dbuf_free_range() we do
- * not put a hold on the buffer, we just traverse the active
- * dbuf list for the dnode.
- */
-static void
-dbuf_fix_old_data(dmu_buf_impl_t *db, uint64_t txg)
-{
- dbuf_dirty_record_t *dr = db->db_last_dirty;
-
- ASSERT(MUTEX_HELD(&db->db_mtx));
- ASSERT(db->db.db_data != NULL);
- ASSERT(db->db_level == 0);
- ASSERT(db->db.db_object != DMU_META_DNODE_OBJECT);
-
- if (dr == NULL ||
- (dr->dt.dl.dr_data !=
- ((db->db_blkid == DMU_BONUS_BLKID) ? db->db.db_data : db->db_buf)))
- return;
-
- /*
- * If the last dirty record for this dbuf has not yet synced
- * and its referencing the dbuf data, either:
- * reset the reference to point to a new copy,
- * or (if there a no active holders)
- * just null out the current db_data pointer.
- */
- ASSERT(dr->dr_txg >= txg - 2);
- if (db->db_blkid == DMU_BONUS_BLKID) {
- /* Note that the data bufs here are zio_bufs */
- dr->dt.dl.dr_data = zio_buf_alloc(DN_MAX_BONUSLEN);
- arc_space_consume(DN_MAX_BONUSLEN, ARC_SPACE_OTHER);
- bcopy(db->db.db_data, dr->dt.dl.dr_data, DN_MAX_BONUSLEN);
- } else if (refcount_count(&db->db_holds) > db->db_dirtycnt) {
- int size = db->db.db_size;
- arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
- spa_t *spa = db->db_objset->os_spa;
-
- dr->dt.dl.dr_data = arc_buf_alloc(spa, size, db, type);
- bcopy(db->db.db_data, dr->dt.dl.dr_data->b_data, size);
- } else {
- dbuf_clear_data(db);
- }
-}
-
void
dbuf_unoverride(dbuf_dirty_record_t *dr)
{
@@ -906,6 +1307,11 @@ dbuf_unoverride(dbuf_dirty_record_t *dr)
uint64_t txg = dr->dr_txg;
ASSERT(MUTEX_HELD(&db->db_mtx));
+ /*
+ * This assert is valid because dmu_sync() expects to be called by
+ * a zilog's get_data while holding a range lock. This call only
+ * comes from dbuf_dirty() callers who must also hold a range lock.
+ */
ASSERT(dr->dt.dl.dr_override_state != DR_IN_DMU_SYNC);
ASSERT(db->db_level == 0);
@@ -937,9 +1343,6 @@ dbuf_unoverride(dbuf_dirty_record_t *dr)
* Evict (if its unreferenced) or clear (if its referenced) any level-0
* data blocks in the free range, so that any future readers will find
* empty blocks.
- *
- * This is a no-op if the dataset is in the middle of an incremental
- * receive; see comment below for details.
*/
void
dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid,
@@ -949,10 +1352,9 @@ dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid,
dmu_buf_impl_t *db, *db_next;
uint64_t txg = tx->tx_txg;
avl_index_t where;
- boolean_t freespill =
- (start_blkid == DMU_SPILL_BLKID || end_blkid == DMU_SPILL_BLKID);
- if (end_blkid > dn->dn_maxblkid && !freespill)
+ if (end_blkid > dn->dn_maxblkid &&
+ !(start_blkid == DMU_SPILL_BLKID || end_blkid == DMU_SPILL_BLKID))
end_blkid = dn->dn_maxblkid;
dprintf_dnode(dn, "start=%llu end=%llu\n", start_blkid, end_blkid);
@@ -962,28 +1364,9 @@ dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid,
db_search->db_state = DB_SEARCH;
mutex_enter(&dn->dn_dbufs_mtx);
- if (start_blkid >= dn->dn_unlisted_l0_blkid && !freespill) {
- /* There can't be any dbufs in this range; no need to search. */
-#ifdef DEBUG
- db = avl_find(&dn->dn_dbufs, db_search, &where);
- ASSERT3P(db, ==, NULL);
- db = avl_nearest(&dn->dn_dbufs, where, AVL_AFTER);
- ASSERT(db == NULL || db->db_level > 0);
-#endif
- goto out;
- } else if (dmu_objset_is_receiving(dn->dn_objset)) {
- /*
- * If we are receiving, we expect there to be no dbufs in
- * the range to be freed, because receive modifies each
- * block at most once, and in offset order. If this is
- * not the case, it can lead to performance problems,
- * so note that we unexpectedly took the slow path.
- */
- atomic_inc_64(&zfs_free_range_recv_miss);
- }
-
db = avl_find(&dn->dn_dbufs, db_search, &where);
ASSERT3P(db, ==, NULL);
+
db = avl_nearest(&dn->dn_dbufs, where, AVL_AFTER);
for (; db != NULL; db = db_next) {
@@ -1017,7 +1400,7 @@ dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid,
}
if (refcount_count(&db->db_holds) == 0) {
ASSERT(db->db_buf);
- dbuf_clear(db);
+ dbuf_destroy(db);
continue;
}
/* The dbuf is referenced */
@@ -1056,46 +1439,10 @@ dbuf_free_range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid,
mutex_exit(&db->db_mtx);
}
-out:
kmem_free(db_search, sizeof (dmu_buf_impl_t));
mutex_exit(&dn->dn_dbufs_mtx);
}
-static int
-dbuf_block_freeable(dmu_buf_impl_t *db)
-{
- dsl_dataset_t *ds = db->db_objset->os_dsl_dataset;
- uint64_t birth_txg = 0;
-
- /*
- * We don't need any locking to protect db_blkptr:
- * If it's syncing, then db_last_dirty will be set
- * so we'll ignore db_blkptr.
- *
- * This logic ensures that only block births for
- * filled blocks are considered.
- */
- ASSERT(MUTEX_HELD(&db->db_mtx));
- if (db->db_last_dirty && (db->db_blkptr == NULL ||
- !BP_IS_HOLE(db->db_blkptr))) {
- birth_txg = db->db_last_dirty->dr_txg;
- } else if (db->db_blkptr != NULL && !BP_IS_HOLE(db->db_blkptr)) {
- birth_txg = db->db_blkptr->blk_birth;
- }
-
- /*
- * If this block don't exist or is in a snapshot, it can't be freed.
- * Don't pass the bp to dsl_dataset_block_freeable() since we
- * are holding the db_mtx lock and might deadlock if we are
- * prefetching a dedup-ed block.
- */
- if (birth_txg != 0)
- return (ds == NULL ||
- dsl_dataset_block_freeable(ds, NULL, birth_txg));
- else
- return (B_FALSE);
-}
-
void
dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx)
{
@@ -1125,7 +1472,7 @@ dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx)
dmu_buf_will_dirty(&db->db, tx);
/* create the data buffer for the new block */
- buf = arc_buf_alloc(dn->dn_objset->os_spa, size, db, type);
+ buf = arc_alloc_buf(dn->dn_objset->os_spa, db, type, size);
/* copy old block data to the new block */
obuf = db->db_buf;
@@ -1136,7 +1483,7 @@ dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx)
mutex_enter(&db->db_mtx);
dbuf_set_data(db, buf);
- VERIFY(arc_buf_remove_ref(obuf, db));
+ arc_buf_destroy(obuf, db);
db->db.db_size = size;
if (db->db_level == 0) {
@@ -1145,7 +1492,7 @@ dbuf_new_size(dmu_buf_impl_t *db, int size, dmu_tx_t *tx)
}
mutex_exit(&db->db_mtx);
- dnode_willuse_space(dn, size-osize, tx);
+ dmu_objset_willuse_space(dn->dn_objset, size - osize, tx);
DB_DNODE_EXIT(db);
}
@@ -1162,6 +1509,32 @@ dbuf_release_bp(dmu_buf_impl_t *db)
(void) arc_release(db->db_buf, db);
}
+/*
+ * We already have a dirty record for this TXG, and we are being
+ * dirtied again.
+ */
+static void
+dbuf_redirty(dbuf_dirty_record_t *dr)
+{
+ dmu_buf_impl_t *db = dr->dr_dbuf;
+
+ ASSERT(MUTEX_HELD(&db->db_mtx));
+
+ if (db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID) {
+ /*
+ * If this buffer has already been written out,
+ * we now need to reset its state.
+ */
+ dbuf_unoverride(dr);
+ if (db->db.db_object != DMU_META_DNODE_OBJECT &&
+ db->db_state != DB_NOFILL) {
+ /* Already released on initial dirty, so just thaw. */
+ ASSERT(arc_released(db->db_buf));
+ arc_buf_thaw(db->db_buf);
+ }
+ }
+}
+
dbuf_dirty_record_t *
dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
{
@@ -1169,7 +1542,6 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
objset_t *os;
dbuf_dirty_record_t **drp, *dr;
int drop_struct_lock = FALSE;
- boolean_t do_free_accounting = B_FALSE;
int txgoff = tx->tx_txg & TXG_MASK;
ASSERT(tx->tx_txg != 0);
@@ -1183,10 +1555,18 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
* objects may be dirtied in syncing context, but only if they
* were already pre-dirtied in open context.
*/
+#ifdef DEBUG
+ if (dn->dn_objset->os_dsl_dataset != NULL) {
+ rrw_enter(&dn->dn_objset->os_dsl_dataset->ds_bp_rwlock,
+ RW_READER, FTAG);
+ }
ASSERT(!dmu_tx_is_syncing(tx) ||
BP_IS_HOLE(dn->dn_objset->os_rootbp) ||
DMU_OBJECT_IS_SPECIAL(dn->dn_object) ||
dn->dn_objset->os_dsl_dataset == NULL);
+ if (dn->dn_objset->os_dsl_dataset != NULL)
+ rrw_exit(&dn->dn_objset->os_dsl_dataset->ds_bp_rwlock, FTAG);
+#endif
/*
* We make this assert for private objects as well, but after we
* check if we're already dirty. They are allowed to re-dirty
@@ -1211,12 +1591,21 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
* Don't set dirtyctx to SYNC if we're just modifying this as we
* initialize the objset.
*/
- if (dn->dn_dirtyctx == DN_UNDIRTIED &&
- !BP_IS_HOLE(dn->dn_objset->os_rootbp)) {
- dn->dn_dirtyctx =
- (dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN);
- ASSERT(dn->dn_dirtyctx_firstset == NULL);
- dn->dn_dirtyctx_firstset = kmem_alloc(1, KM_SLEEP);
+ if (dn->dn_dirtyctx == DN_UNDIRTIED) {
+ if (dn->dn_objset->os_dsl_dataset != NULL) {
+ rrw_enter(&dn->dn_objset->os_dsl_dataset->ds_bp_rwlock,
+ RW_READER, FTAG);
+ }
+ if (!BP_IS_HOLE(dn->dn_objset->os_rootbp)) {
+ dn->dn_dirtyctx = (dmu_tx_is_syncing(tx) ?
+ DN_DIRTY_SYNC : DN_DIRTY_OPEN);
+ ASSERT(dn->dn_dirtyctx_firstset == NULL);
+ dn->dn_dirtyctx_firstset = kmem_alloc(1, KM_SLEEP);
+ }
+ if (dn->dn_objset->os_dsl_dataset != NULL) {
+ rrw_exit(&dn->dn_objset->os_dsl_dataset->ds_bp_rwlock,
+ FTAG);
+ }
}
mutex_exit(&dn->dn_mtx);
@@ -1234,16 +1623,7 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
if (dr && dr->dr_txg == tx->tx_txg) {
DB_DNODE_EXIT(db);
- if (db->db_level == 0 && db->db_blkid != DMU_BONUS_BLKID) {
- /*
- * If this buffer has already been written out,
- * we now need to reset its state.
- */
- dbuf_unoverride(dr);
- if (db->db.db_object != DMU_META_DNODE_OBJECT &&
- db->db_state != DB_NOFILL)
- arc_buf_thaw(db->db_buf);
- }
+ dbuf_redirty(dr);
mutex_exit(&db->db_mtx);
return (dr);
}
@@ -1256,11 +1636,6 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
(dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN));
ASSERT3U(dn->dn_nlevels, >, db->db_level);
- ASSERT((dn->dn_phys->dn_nlevels == 0 && db->db_level == 0) ||
- dn->dn_phys->dn_nlevels > db->db_level ||
- dn->dn_next_nlevels[txgoff] > db->db_level ||
- dn->dn_next_nlevels[(tx->tx_txg-1) & TXG_MASK] > db->db_level ||
- dn->dn_next_nlevels[(tx->tx_txg-2) & TXG_MASK] > db->db_level);
/*
* We should only be dirtying in syncing context if it's the
@@ -1270,22 +1645,21 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
* this assertion only if we're not already dirty.
*/
os = dn->dn_objset;
+ VERIFY3U(tx->tx_txg, <=, spa_final_dirty_txg(os->os_spa));
+#ifdef DEBUG
+ if (dn->dn_objset->os_dsl_dataset != NULL)
+ rrw_enter(&os->os_dsl_dataset->ds_bp_rwlock, RW_READER, FTAG);
ASSERT(!dmu_tx_is_syncing(tx) || DMU_OBJECT_IS_SPECIAL(dn->dn_object) ||
os->os_dsl_dataset == NULL || BP_IS_HOLE(os->os_rootbp));
+ if (dn->dn_objset->os_dsl_dataset != NULL)
+ rrw_exit(&os->os_dsl_dataset->ds_bp_rwlock, FTAG);
+#endif
ASSERT(db->db.db_size != 0);
dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size);
if (db->db_blkid != DMU_BONUS_BLKID) {
- /*
- * Update the accounting.
- * Note: we delay "free accounting" until after we drop
- * the db_mtx. This keeps us from grabbing other locks
- * (and possibly deadlocking) in bp_get_dsize() while
- * also holding the db_mtx.
- */
- dnode_willuse_space(dn, db->db.db_size, tx);
- do_free_accounting = dbuf_block_freeable(db);
+ dmu_objset_willuse_space(os, db->db.db_size, tx);
}
/*
@@ -1320,7 +1694,7 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
}
dr->dt.dl.dr_data = data_old;
} else {
- mutex_init(&dr->dt.di.dr_mtx, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&dr->dt.di.dr_mtx, NULL, MUTEX_NOLOCKDEP, NULL);
list_create(&dr->dt.di.dr_children,
sizeof (dbuf_dirty_record_t),
offsetof(dbuf_dirty_record_t, dr_dirty_node));
@@ -1366,27 +1740,37 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
dnode_setdirty(dn, tx);
DB_DNODE_EXIT(db);
return (dr);
- } else if (do_free_accounting) {
- blkptr_t *bp = db->db_blkptr;
- int64_t willfree = (bp && !BP_IS_HOLE(bp)) ?
- bp_get_dsize(os->os_spa, bp) : db->db.db_size;
- /*
- * This is only a guess -- if the dbuf is dirty
- * in a previous txg, we don't know how much
- * space it will use on disk yet. We should
- * really have the struct_rwlock to access
- * db_blkptr, but since this is just a guess,
- * it's OK if we get an odd answer.
- */
- ddt_prefetch(os->os_spa, bp);
- dnode_willuse_space(dn, -willfree, tx);
}
+ /*
+ * The dn_struct_rwlock prevents db_blkptr from changing
+ * due to a write from syncing context completing
+ * while we are running, so we want to acquire it before
+ * looking at db_blkptr.
+ */
if (!RW_WRITE_HELD(&dn->dn_struct_rwlock)) {
rw_enter(&dn->dn_struct_rwlock, RW_READER);
drop_struct_lock = TRUE;
}
+ /*
+ * We need to hold the dn_struct_rwlock to make this assertion,
+ * because it protects dn_phys / dn_next_nlevels from changing.
+ */
+ ASSERT((dn->dn_phys->dn_nlevels == 0 && db->db_level == 0) ||
+ dn->dn_phys->dn_nlevels > db->db_level ||
+ dn->dn_next_nlevels[txgoff] > db->db_level ||
+ dn->dn_next_nlevels[(tx->tx_txg-1) & TXG_MASK] > db->db_level ||
+ dn->dn_next_nlevels[(tx->tx_txg-2) & TXG_MASK] > db->db_level);
+
+ /*
+ * If we are overwriting a dedup BP, then unless it is snapshotted,
+ * when we get to syncing context we will need to decrement its
+ * refcount in the DDT. Prefetch the relevant DDT block so that
+ * syncing context won't have to wait for the i/o.
+ */
+ ddt_prefetch(os->os_spa, db->db_blkptr);
+
if (db->db_level == 0) {
dnode_new_blkid(dn, db->db_blkid, tx, drop_struct_lock);
ASSERT(dn->dn_maxblkid >= db->db_blkid);
@@ -1518,7 +1902,7 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
ASSERT(db->db_buf != NULL);
ASSERT(dr->dt.dl.dr_data != NULL);
if (dr->dt.dl.dr_data != db->db_buf)
- VERIFY(arc_buf_remove_ref(dr->dt.dl.dr_data, db));
+ arc_buf_destroy(dr->dt.dl.dr_data, db);
}
kmem_free(dr, sizeof (dbuf_dirty_record_t));
@@ -1527,12 +1911,8 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
db->db_dirtycnt -= 1;
if (refcount_remove(&db->db_holds, (void *)(uintptr_t)txg) == 0) {
- arc_buf_t *buf = db->db_buf;
-
- ASSERT(db->db_state == DB_NOFILL || arc_released(buf));
- dbuf_clear_data(db);
- VERIFY(arc_buf_remove_ref(buf, db));
- dbuf_evict(db);
+ ASSERT(db->db_state == DB_NOFILL || arc_released(db->db_buf));
+ dbuf_destroy(db);
return (B_TRUE);
}
@@ -1544,10 +1924,35 @@ dmu_buf_will_dirty(dmu_buf_t *db_fake, dmu_tx_t *tx)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
int rf = DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH;
+ dbuf_dirty_record_t *dr;
ASSERT(tx->tx_txg != 0);
ASSERT(!refcount_is_zero(&db->db_holds));
+ /*
+ * Quick check for dirtyness. For already dirty blocks, this
+ * reduces runtime of this function by >90%, and overall performance
+ * by 50% for some workloads (e.g. file deletion with indirect blocks
+ * cached).
+ */
+ mutex_enter(&db->db_mtx);
+
+ for (dr = db->db_last_dirty;
+ dr != NULL && dr->dr_txg >= tx->tx_txg; dr = dr->dr_next) {
+ /*
+ * It's possible that it is already dirty but not cached,
+ * because there are some calls to dbuf_dirty() that don't
+ * go through dmu_buf_will_dirty().
+ */
+ if (dr->dr_txg == tx->tx_txg && db->db_state == DB_CACHED) {
+ /* This dbuf is already dirty and cached. */
+ dbuf_redirty(dr);
+ mutex_exit(&db->db_mtx);
+ return;
+ }
+ }
+ mutex_exit(&db->db_mtx);
+
DB_DNODE_ENTER(db);
if (RW_WRITE_HELD(&DB_DNODE(db)->dn_struct_rwlock))
rf |= DB_RF_HAVESTRUCT;
@@ -1615,6 +2020,11 @@ dmu_buf_write_embedded(dmu_buf_t *dbuf, void *data,
struct dirty_leaf *dl;
dmu_object_type_t type;
+ if (etype == BP_EMBEDDED_TYPE_DATA) {
+ ASSERT(spa_feature_is_active(dmu_objset_spa(db->db_objset),
+ SPA_FEATURE_EMBEDDED_DATA));
+ }
+
DB_DNODE_ENTER(db);
type = DB_DNODE(db)->dn_type;
DB_DNODE_EXIT(db);
@@ -1647,9 +2057,9 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
ASSERT(!refcount_is_zero(&db->db_holds));
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
ASSERT(db->db_level == 0);
- ASSERT(DBUF_GET_BUFC_TYPE(db) == ARC_BUFC_DATA);
+ ASSERT3U(dbuf_is_metadata(db), ==, arc_is_metadata(buf));
ASSERT(buf != NULL);
- ASSERT(arc_buf_size(buf) == db->db.db_size);
+ ASSERT(arc_buf_lsize(buf) == db->db.db_size);
ASSERT(tx->tx_txg != 0);
arc_return_buf(buf, db);
@@ -1667,7 +2077,7 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
mutex_exit(&db->db_mtx);
(void) dbuf_dirty(db, tx);
bcopy(buf->b_data, db->db.db_data, db->db.db_size);
- VERIFY(arc_buf_remove_ref(buf, db));
+ arc_buf_destroy(buf, db);
xuio_stat_wbuf_copied();
return;
}
@@ -1685,10 +2095,10 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
arc_release(db->db_buf, db);
}
dr->dt.dl.dr_data = buf;
- VERIFY(arc_buf_remove_ref(db->db_buf, db));
+ arc_buf_destroy(db->db_buf, db);
} else if (dr == NULL || dr->dt.dl.dr_data != db->db_buf) {
arc_release(db->db_buf, db);
- VERIFY(arc_buf_remove_ref(db->db_buf, db));
+ arc_buf_destroy(db->db_buf, db);
}
db->db_buf = NULL;
}
@@ -1700,59 +2110,64 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
dmu_buf_fill_done(&db->db, tx);
}
-/*
- * "Clear" the contents of this dbuf. This will mark the dbuf
- * EVICTING and clear *most* of its references. Unfortunately,
- * when we are not holding the dn_dbufs_mtx, we can't clear the
- * entry in the dn_dbufs list. We have to wait until dbuf_destroy()
- * in this case. For callers from the DMU we will usually see:
- * dbuf_clear()->arc_clear_callback()->dbuf_do_evict()->dbuf_destroy()
- * For the arc callback, we will usually see:
- * dbuf_do_evict()->dbuf_clear();dbuf_destroy()
- * Sometimes, though, we will get a mix of these two:
- * DMU: dbuf_clear()->arc_clear_callback()
- * ARC: dbuf_do_evict()->dbuf_destroy()
- *
- * This routine will dissociate the dbuf from the arc, by calling
- * arc_clear_callback(), but will not evict the data from the ARC.
- */
void
-dbuf_clear(dmu_buf_impl_t *db)
+dbuf_destroy(dmu_buf_impl_t *db)
{
dnode_t *dn;
dmu_buf_impl_t *parent = db->db_parent;
dmu_buf_impl_t *dndb;
- boolean_t dbuf_gone = B_FALSE;
ASSERT(MUTEX_HELD(&db->db_mtx));
ASSERT(refcount_is_zero(&db->db_holds));
- dbuf_evict_user(db);
+ if (db->db_buf != NULL) {
+ arc_buf_destroy(db->db_buf, db);
+ db->db_buf = NULL;
+ }
- if (db->db_state == DB_CACHED) {
+ if (db->db_blkid == DMU_BONUS_BLKID) {
+ int slots = DB_DNODE(db)->dn_num_slots;
+ int bonuslen = DN_SLOTS_TO_BONUSLEN(slots);
ASSERT(db->db.db_data != NULL);
- if (db->db_blkid == DMU_BONUS_BLKID) {
- zio_buf_free(db->db.db_data, DN_MAX_BONUSLEN);
- arc_space_return(DN_MAX_BONUSLEN, ARC_SPACE_OTHER);
- }
- db->db.db_data = NULL;
+ kmem_free(db->db.db_data, bonuslen);
+ arc_space_return(bonuslen, ARC_SPACE_BONUS);
db->db_state = DB_UNCACHED;
}
+ dbuf_clear_data(db);
+
+ if (multilist_link_active(&db->db_cache_link)) {
+ multilist_remove(dbuf_cache, db);
+ (void) refcount_remove_many(&dbuf_cache_size,
+ db->db.db_size, db);
+ }
+
ASSERT(db->db_state == DB_UNCACHED || db->db_state == DB_NOFILL);
ASSERT(db->db_data_pending == NULL);
db->db_state = DB_EVICTING;
db->db_blkptr = NULL;
+ /*
+ * Now that db_state is DB_EVICTING, nobody else can find this via
+ * the hash table. We can now drop db_mtx, which allows us to
+ * acquire the dn_dbufs_mtx.
+ */
+ mutex_exit(&db->db_mtx);
+
DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
dndb = dn->dn_dbuf;
- if (db->db_blkid != DMU_BONUS_BLKID && MUTEX_HELD(&dn->dn_dbufs_mtx)) {
+ if (db->db_blkid != DMU_BONUS_BLKID) {
+ boolean_t needlock = !MUTEX_HELD(&dn->dn_dbufs_mtx);
+ if (needlock)
+ mutex_enter(&dn->dn_dbufs_mtx);
avl_remove(&dn->dn_dbufs, db);
atomic_dec_32(&dn->dn_dbufs_count);
membar_producer();
DB_DNODE_EXIT(db);
+ if (needlock)
+ mutex_exit(&dn->dn_dbufs_mtx);
/*
* Decrementing the dbuf count means that the hold corresponding
* to the removed dbuf is no longer discounted in dnode_move(),
@@ -1763,15 +2178,25 @@ dbuf_clear(dmu_buf_impl_t *db)
*/
dnode_rele(dn, db);
db->db_dnode_handle = NULL;
+
+ dbuf_hash_remove(db);
} else {
DB_DNODE_EXIT(db);
}
- if (db->db_buf)
- dbuf_gone = arc_clear_callback(db->db_buf);
+ ASSERT(refcount_is_zero(&db->db_holds));
+
+ db->db_parent = NULL;
- if (!dbuf_gone)
- mutex_exit(&db->db_mtx);
+ ASSERT(db->db_buf == NULL);
+ ASSERT(db->db.db_data == NULL);
+ ASSERT(db->db_hash_next == NULL);
+ ASSERT(db->db_blkptr == NULL);
+ ASSERT(db->db_data_pending == NULL);
+ ASSERT(!multilist_link_active(&db->db_cache_link));
+
+ kmem_cache_free(dbuf_kmem_cache, db);
+ arc_space_return(sizeof (dmu_buf_impl_t), ARC_SPACE_DBUF);
/*
* If this dbuf is referenced from an indirect dbuf,
@@ -1781,6 +2206,12 @@ dbuf_clear(dmu_buf_impl_t *db)
dbuf_rele(parent, db);
}
+/*
+ * Note: While bpp will always be updated if the function returns success,
+ * parentp will not be updated if the dnode does not have dn_dbuf filled in;
+ * this happens when the dnode is the meta-dnode, or a userused or groupused
+ * object.
+ */
__attribute__((always_inline))
static inline int
dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse,
@@ -1797,7 +2228,7 @@ dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse,
mutex_enter(&dn->dn_mtx);
if (dn->dn_have_spill &&
(dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR))
- *bpp = &dn->dn_phys->dn_spill;
+ *bpp = DN_SPILL_BLKPTR(dn->dn_phys);
else
*bpp = NULL;
dbuf_add_ref(dn->dn_dbuf, NULL);
@@ -1806,29 +2237,47 @@ dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse,
return (0);
}
- if (dn->dn_phys->dn_nlevels == 0)
- nlevels = 1;
- else
- nlevels = dn->dn_phys->dn_nlevels;
-
+ nlevels =
+ (dn->dn_phys->dn_nlevels == 0) ? 1 : dn->dn_phys->dn_nlevels;
epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
ASSERT3U(level * epbs, <, 64);
ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
+ /*
+ * This assertion shouldn't trip as long as the max indirect block size
+ * is less than 1M. The reason for this is that up to that point,
+ * the number of levels required to address an entire object with blocks
+ * of size SPA_MINBLOCKSIZE satisfies nlevels * epbs + 1 <= 64. In
+ * other words, if N * epbs + 1 > 64, then if (N-1) * epbs + 1 > 55
+ * (i.e. we can address the entire object), objects will all use at most
+ * N-1 levels and the assertion won't overflow. However, once epbs is
+ * 13, 4 * 13 + 1 = 53, but 5 * 13 + 1 = 66. Then, 4 levels will not be
+ * enough to address an entire object, so objects will have 5 levels,
+ * but then this assertion will overflow.
+ *
+ * All this is to say that if we ever increase DN_MAX_INDBLKSHIFT, we
+ * need to redo this logic to handle overflows.
+ */
+ ASSERT(level >= nlevels ||
+ ((nlevels - level - 1) * epbs) +
+ highbit64(dn->dn_phys->dn_nblkptr) <= 64);
if (level >= nlevels ||
- (blkid > (dn->dn_phys->dn_maxblkid >> (level * epbs)))) {
+ blkid >= ((uint64_t)dn->dn_phys->dn_nblkptr <<
+ ((nlevels - level - 1) * epbs)) ||
+ (fail_sparse &&
+ blkid > (dn->dn_phys->dn_maxblkid >> (level * epbs)))) {
/* the buffer has no parent yet */
return (SET_ERROR(ENOENT));
} else if (level < nlevels-1) {
/* this block is referenced from an indirect block */
int err;
if (dh == NULL) {
- err = dbuf_hold_impl(dn, level+1, blkid >> epbs,
- fail_sparse, NULL, parentp);
+ err = dbuf_hold_impl(dn, level+1,
+ blkid >> epbs, fail_sparse, FALSE, NULL, parentp);
} else {
__dbuf_hold_impl_init(dh + 1, dn, dh->dh_level + 1,
- blkid >> epbs, fail_sparse, NULL,
- parentp, dh->dh_depth + 1);
+ blkid >> epbs, fail_sparse, FALSE, NULL,
+ parentp, dh->dh_depth + 1);
err = __dbuf_hold_impl(dh + 1);
}
if (err)
@@ -1842,6 +2291,8 @@ dbuf_findbp(dnode_t *dn, int level, uint64_t blkid, int fail_sparse,
}
*bpp = ((blkptr_t *)(*parentp)->db.db_data) +
(blkid & ((1ULL << epbs) - 1));
+ if (blkid > (dn->dn_phys->dn_maxblkid >> (level * epbs)))
+ ASSERT(BP_IS_HOLE(*bpp));
return (0);
} else {
/* the block is referenced from the dnode */
@@ -1867,7 +2318,7 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
ASSERT(dn->dn_type != DMU_OT_NONE);
- db = kmem_cache_alloc(dbuf_cache, KM_SLEEP);
+ db = kmem_cache_alloc(dbuf_kmem_cache, KM_SLEEP);
db->db_objset = os;
db->db.db_object = dn->dn_object;
@@ -1886,13 +2337,13 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
if (blkid == DMU_BONUS_BLKID) {
ASSERT3P(parent, ==, dn->dn_dbuf);
- db->db.db_size = DN_MAX_BONUSLEN -
+ db->db.db_size = DN_SLOTS_TO_BONUSLEN(dn->dn_num_slots) -
(dn->dn_nblkptr-1) * sizeof (blkptr_t);
ASSERT3U(db->db.db_size, >=, dn->dn_bonuslen);
db->db.db_offset = DMU_BONUS_BLKID;
db->db_state = DB_UNCACHED;
/* the bonus dbuf is not placed in the hash table */
- arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_OTHER);
+ arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_DBUF);
return (db);
} else if (blkid == DMU_SPILL_BLKID) {
db->db.db_size = (blkptr != NULL) ?
@@ -1916,17 +2367,15 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
db->db_state = DB_EVICTING;
if ((odb = dbuf_hash_insert(db)) != NULL) {
/* someone else inserted it first */
- kmem_cache_free(dbuf_cache, db);
+ kmem_cache_free(dbuf_kmem_cache, db);
mutex_exit(&dn->dn_dbufs_mtx);
return (odb);
}
avl_add(&dn->dn_dbufs, db);
- if (db->db_level == 0 && db->db_blkid >=
- dn->dn_unlisted_l0_blkid)
- dn->dn_unlisted_l0_blkid = db->db_blkid + 1;
+
db->db_state = DB_UNCACHED;
mutex_exit(&dn->dn_dbufs_mtx);
- arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_OTHER);
+ arc_space_consume(sizeof (dmu_buf_impl_t), ARC_SPACE_DBUF);
if (parent && parent != dn->dn_dbuf)
dbuf_add_ref(parent, db);
@@ -1941,112 +2390,240 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
return (db);
}
-static int
-dbuf_do_evict(void *private)
-{
- dmu_buf_impl_t *db = private;
+typedef struct dbuf_prefetch_arg {
+ spa_t *dpa_spa; /* The spa to issue the prefetch in. */
+ zbookmark_phys_t dpa_zb; /* The target block to prefetch. */
+ int dpa_epbs; /* Entries (blkptr_t's) Per Block Shift. */
+ int dpa_curlevel; /* The current level that we're reading */
+ dnode_t *dpa_dnode; /* The dnode associated with the prefetch */
+ zio_priority_t dpa_prio; /* The priority I/Os should be issued at. */
+ zio_t *dpa_zio; /* The parent zio_t for all prefetches. */
+ arc_flags_t dpa_aflags; /* Flags to pass to the final prefetch. */
+} dbuf_prefetch_arg_t;
- if (!MUTEX_HELD(&db->db_mtx))
- mutex_enter(&db->db_mtx);
+/*
+ * Actually issue the prefetch read for the block given.
+ */
+static void
+dbuf_issue_final_prefetch(dbuf_prefetch_arg_t *dpa, blkptr_t *bp)
+{
+ arc_flags_t aflags;
+ if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
+ return;
- ASSERT(refcount_is_zero(&db->db_holds));
+ aflags = dpa->dpa_aflags | ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH;
- if (db->db_state != DB_EVICTING) {
- ASSERT(db->db_state == DB_CACHED);
- DBUF_VERIFY(db);
- db->db_buf = NULL;
- dbuf_evict(db);
- } else {
- mutex_exit(&db->db_mtx);
- dbuf_destroy(db);
- }
- return (0);
+ ASSERT3U(dpa->dpa_curlevel, ==, BP_GET_LEVEL(bp));
+ ASSERT3U(dpa->dpa_curlevel, ==, dpa->dpa_zb.zb_level);
+ ASSERT(dpa->dpa_zio != NULL);
+ (void) arc_read(dpa->dpa_zio, dpa->dpa_spa, bp, NULL, NULL,
+ dpa->dpa_prio, ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
+ &aflags, &dpa->dpa_zb);
}
+/*
+ * Called when an indirect block above our prefetch target is read in. This
+ * will either read in the next indirect block down the tree or issue the actual
+ * prefetch if the next block down is our target.
+ */
static void
-dbuf_destroy(dmu_buf_impl_t *db)
+dbuf_prefetch_indirect_done(zio_t *zio, arc_buf_t *abuf, void *private)
{
- ASSERT(refcount_is_zero(&db->db_holds));
+ dbuf_prefetch_arg_t *dpa = private;
+ uint64_t nextblkid;
+ blkptr_t *bp;
- if (db->db_blkid != DMU_BONUS_BLKID) {
- /*
- * If this dbuf is still on the dn_dbufs list,
- * remove it from that list.
- */
- if (db->db_dnode_handle != NULL) {
- dnode_t *dn;
+ ASSERT3S(dpa->dpa_zb.zb_level, <, dpa->dpa_curlevel);
+ ASSERT3S(dpa->dpa_curlevel, >, 0);
- DB_DNODE_ENTER(db);
- dn = DB_DNODE(db);
- mutex_enter(&dn->dn_dbufs_mtx);
- avl_remove(&dn->dn_dbufs, db);
- atomic_dec_32(&dn->dn_dbufs_count);
- mutex_exit(&dn->dn_dbufs_mtx);
- DB_DNODE_EXIT(db);
- /*
- * Decrementing the dbuf count means that the hold
- * corresponding to the removed dbuf is no longer
- * discounted in dnode_move(), so the dnode cannot be
- * moved until after we release the hold.
- */
- dnode_rele(dn, db);
- db->db_dnode_handle = NULL;
+ /*
+ * The dpa_dnode is only valid if we are called with a NULL
+ * zio. This indicates that the arc_read() returned without
+ * first calling zio_read() to issue a physical read. Once
+ * a physical read is made the dpa_dnode must be invalidated
+ * as the locks guarding it may have been dropped. If the
+ * dpa_dnode is still valid, then we want to add it to the dbuf
+ * cache. To do so, we must hold the dbuf associated with the block
+ * we just prefetched, read its contents so that we associate it
+ * with an arc_buf_t, and then release it.
+ */
+ if (zio != NULL) {
+ ASSERT3S(BP_GET_LEVEL(zio->io_bp), ==, dpa->dpa_curlevel);
+ if (zio->io_flags & ZIO_FLAG_RAW) {
+ ASSERT3U(BP_GET_PSIZE(zio->io_bp), ==, zio->io_size);
+ } else {
+ ASSERT3U(BP_GET_LSIZE(zio->io_bp), ==, zio->io_size);
}
- dbuf_hash_remove(db);
- }
- db->db_parent = NULL;
- db->db_buf = NULL;
+ ASSERT3P(zio->io_spa, ==, dpa->dpa_spa);
+
+ dpa->dpa_dnode = NULL;
+ } else if (dpa->dpa_dnode != NULL) {
+ uint64_t curblkid = dpa->dpa_zb.zb_blkid >>
+ (dpa->dpa_epbs * (dpa->dpa_curlevel -
+ dpa->dpa_zb.zb_level));
+ dmu_buf_impl_t *db = dbuf_hold_level(dpa->dpa_dnode,
+ dpa->dpa_curlevel, curblkid, FTAG);
+ (void) dbuf_read(db, NULL,
+ DB_RF_MUST_SUCCEED | DB_RF_NOPREFETCH | DB_RF_HAVESTRUCT);
+ dbuf_rele(db, FTAG);
+ }
+
+ dpa->dpa_curlevel--;
+
+ nextblkid = dpa->dpa_zb.zb_blkid >>
+ (dpa->dpa_epbs * (dpa->dpa_curlevel - dpa->dpa_zb.zb_level));
+ bp = ((blkptr_t *)abuf->b_data) +
+ P2PHASE(nextblkid, 1ULL << dpa->dpa_epbs);
+ if (BP_IS_HOLE(bp) || (zio != NULL && zio->io_error != 0)) {
+ kmem_free(dpa, sizeof (*dpa));
+ } else if (dpa->dpa_curlevel == dpa->dpa_zb.zb_level) {
+ ASSERT3U(nextblkid, ==, dpa->dpa_zb.zb_blkid);
+ dbuf_issue_final_prefetch(dpa, bp);
+ kmem_free(dpa, sizeof (*dpa));
+ } else {
+ arc_flags_t iter_aflags = ARC_FLAG_NOWAIT;
+ zbookmark_phys_t zb;
- ASSERT(db->db.db_data == NULL);
- ASSERT(db->db_hash_next == NULL);
- ASSERT(db->db_blkptr == NULL);
- ASSERT(db->db_data_pending == NULL);
+ ASSERT3U(dpa->dpa_curlevel, ==, BP_GET_LEVEL(bp));
+
+ SET_BOOKMARK(&zb, dpa->dpa_zb.zb_objset,
+ dpa->dpa_zb.zb_object, dpa->dpa_curlevel, nextblkid);
+
+ (void) arc_read(dpa->dpa_zio, dpa->dpa_spa,
+ bp, dbuf_prefetch_indirect_done, dpa, dpa->dpa_prio,
+ ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
+ &iter_aflags, &zb);
+ }
- kmem_cache_free(dbuf_cache, db);
- arc_space_return(sizeof (dmu_buf_impl_t), ARC_SPACE_OTHER);
+ arc_buf_destroy(abuf, private);
}
+/*
+ * Issue prefetch reads for the given block on the given level. If the indirect
+ * blocks above that block are not in memory, we will read them in
+ * asynchronously. As a result, this call never blocks waiting for a read to
+ * complete.
+ */
void
-dbuf_prefetch(dnode_t *dn, uint64_t blkid, zio_priority_t prio)
+dbuf_prefetch(dnode_t *dn, int64_t level, uint64_t blkid, zio_priority_t prio,
+ arc_flags_t aflags)
{
- dmu_buf_impl_t *db = NULL;
- blkptr_t *bp = NULL;
+ blkptr_t bp;
+ int epbs, nlevels, curlevel;
+ uint64_t curblkid;
+ dmu_buf_impl_t *db;
+ zio_t *pio;
+ dbuf_prefetch_arg_t *dpa;
+ dsl_dataset_t *ds;
ASSERT(blkid != DMU_BONUS_BLKID);
ASSERT(RW_LOCK_HELD(&dn->dn_struct_rwlock));
+ if (blkid > dn->dn_maxblkid)
+ return;
+
if (dnode_block_freed(dn, blkid))
return;
- /* dbuf_find() returns with db_mtx held */
- if ((db = dbuf_find(dn->dn_objset, dn->dn_object, 0, blkid))) {
+ /*
+ * This dnode hasn't been written to disk yet, so there's nothing to
+ * prefetch.
+ */
+ nlevels = dn->dn_phys->dn_nlevels;
+ if (level >= nlevels || dn->dn_phys->dn_nblkptr == 0)
+ return;
+
+ epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
+ if (dn->dn_phys->dn_maxblkid < blkid << (epbs * level))
+ return;
+
+ db = dbuf_find(dn->dn_objset, dn->dn_object,
+ level, blkid);
+ if (db != NULL) {
+ mutex_exit(&db->db_mtx);
/*
- * This dbuf is already in the cache. We assume that
- * it is already CACHED, or else about to be either
- * read or filled.
+ * This dbuf already exists. It is either CACHED, or
+ * (we assume) about to be read or filled.
*/
- mutex_exit(&db->db_mtx);
return;
}
- if (dbuf_findbp(dn, 0, blkid, TRUE, &db, &bp, NULL) == 0) {
- if (bp && !BP_IS_HOLE(bp) && !BP_IS_EMBEDDED(bp)) {
- dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
- arc_flags_t aflags =
- ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH;
- zbookmark_phys_t zb;
+ /*
+ * Find the closest ancestor (indirect block) of the target block
+ * that is present in the cache. In this indirect block, we will
+ * find the bp that is at curlevel, curblkid.
+ */
+ curlevel = level;
+ curblkid = blkid;
+ while (curlevel < nlevels - 1) {
+ int parent_level = curlevel + 1;
+ uint64_t parent_blkid = curblkid >> epbs;
+ dmu_buf_impl_t *db;
+
+ if (dbuf_hold_impl(dn, parent_level, parent_blkid,
+ FALSE, TRUE, FTAG, &db) == 0) {
+ blkptr_t *bpp = db->db_buf->b_data;
+ bp = bpp[P2PHASE(curblkid, 1 << epbs)];
+ dbuf_rele(db, FTAG);
+ break;
+ }
+
+ curlevel = parent_level;
+ curblkid = parent_blkid;
+ }
- SET_BOOKMARK(&zb, ds ? ds->ds_object : DMU_META_OBJSET,
- dn->dn_object, 0, blkid);
+ if (curlevel == nlevels - 1) {
+ /* No cached indirect blocks found. */
+ ASSERT3U(curblkid, <, dn->dn_phys->dn_nblkptr);
+ bp = dn->dn_phys->dn_blkptr[curblkid];
+ }
+ if (BP_IS_HOLE(&bp))
+ return;
- (void) arc_read(NULL, dn->dn_objset->os_spa,
- bp, NULL, NULL, prio,
- ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
- &aflags, &zb);
- }
- if (db)
- dbuf_rele(db, NULL);
+ ASSERT3U(curlevel, ==, BP_GET_LEVEL(&bp));
+
+ pio = zio_root(dmu_objset_spa(dn->dn_objset), NULL, NULL,
+ ZIO_FLAG_CANFAIL);
+
+ dpa = kmem_zalloc(sizeof (*dpa), KM_SLEEP);
+ ds = dn->dn_objset->os_dsl_dataset;
+ SET_BOOKMARK(&dpa->dpa_zb, ds != NULL ? ds->ds_object : DMU_META_OBJSET,
+ dn->dn_object, level, blkid);
+ dpa->dpa_curlevel = curlevel;
+ dpa->dpa_prio = prio;
+ dpa->dpa_aflags = aflags;
+ dpa->dpa_spa = dn->dn_objset->os_spa;
+ dpa->dpa_dnode = dn;
+ dpa->dpa_epbs = epbs;
+ dpa->dpa_zio = pio;
+
+ /*
+ * If we have the indirect just above us, no need to do the asynchronous
+ * prefetch chain; we'll just run the last step ourselves. If we're at
+ * a higher level, though, we want to issue the prefetches for all the
+ * indirect blocks asynchronously, so we can go on with whatever we were
+ * doing.
+ */
+ if (curlevel == level) {
+ ASSERT3U(curblkid, ==, blkid);
+ dbuf_issue_final_prefetch(dpa, &bp);
+ kmem_free(dpa, sizeof (*dpa));
+ } else {
+ arc_flags_t iter_aflags = ARC_FLAG_NOWAIT;
+ zbookmark_phys_t zb;
+
+ SET_BOOKMARK(&zb, ds != NULL ? ds->ds_object : DMU_META_OBJSET,
+ dn->dn_object, curlevel, curblkid);
+ (void) arc_read(dpa->dpa_zio, dpa->dpa_spa,
+ &bp, dbuf_prefetch_indirect_done, dpa, prio,
+ ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE,
+ &iter_aflags, &zb);
}
+ /*
+ * We use pio here instead of dpa_zio since it's possible that
+ * dpa may have already been freed.
+ */
+ zio_nowait(pio);
}
#define DBUF_HOLD_IMPL_MAX_DEPTH 20
@@ -2066,7 +2643,7 @@ __dbuf_hold_impl(struct dbuf_hold_impl_data *dh)
ASSERT3U(dh->dh_dn->dn_nlevels, >, dh->dh_level);
*(dh->dh_dbp) = NULL;
-top:
+
/* dbuf_find() returns with db_mtx held */
dh->dh_db = dbuf_find(dh->dh_dn->dn_objset, dh->dh_dn->dn_object,
dh->dh_level, dh->dh_blkid);
@@ -2074,10 +2651,12 @@ __dbuf_hold_impl(struct dbuf_hold_impl_data *dh)
if (dh->dh_db == NULL) {
dh->dh_bp = NULL;
+ if (dh->dh_fail_uncached)
+ return (SET_ERROR(ENOENT));
+
ASSERT3P(dh->dh_parent, ==, NULL);
dh->dh_err = dbuf_findbp(dh->dh_dn, dh->dh_level, dh->dh_blkid,
- dh->dh_fail_sparse, &dh->dh_parent,
- &dh->dh_bp, dh);
+ dh->dh_fail_sparse, &dh->dh_parent, &dh->dh_bp, dh);
if (dh->dh_fail_sparse) {
if (dh->dh_err == 0 &&
dh->dh_bp && BP_IS_HOLE(dh->dh_bp))
@@ -2091,22 +2670,17 @@ __dbuf_hold_impl(struct dbuf_hold_impl_data *dh)
if (dh->dh_err && dh->dh_err != ENOENT)
return (dh->dh_err);
dh->dh_db = dbuf_create(dh->dh_dn, dh->dh_level, dh->dh_blkid,
- dh->dh_parent, dh->dh_bp);
+ dh->dh_parent, dh->dh_bp);
}
- if (dh->dh_db->db_buf && refcount_is_zero(&dh->dh_db->db_holds)) {
- arc_buf_add_ref(dh->dh_db->db_buf, dh->dh_db);
- if (dh->dh_db->db_buf->b_data == NULL) {
- dbuf_clear(dh->dh_db);
- if (dh->dh_parent) {
- dbuf_rele(dh->dh_parent, NULL);
- dh->dh_parent = NULL;
- }
- goto top;
- }
- ASSERT3P(dh->dh_db->db.db_data, ==, dh->dh_db->db_buf->b_data);
+ if (dh->dh_fail_uncached && dh->dh_db->db_state != DB_CACHED) {
+ mutex_exit(&dh->dh_db->db_mtx);
+ return (SET_ERROR(ENOENT));
}
+ if (dh->dh_db->db_buf != NULL)
+ ASSERT3P(dh->dh_db->db.db_data, ==, dh->dh_db->db_buf->b_data);
+
ASSERT(dh->dh_db->db_buf == NULL || arc_referenced(dh->dh_db->db_buf));
/*
@@ -2124,13 +2698,19 @@ __dbuf_hold_impl(struct dbuf_hold_impl_data *dh)
dh->dh_type = DBUF_GET_BUFC_TYPE(dh->dh_db);
dbuf_set_data(dh->dh_db,
- arc_buf_alloc(dh->dh_dn->dn_objset->os_spa,
- dh->dh_db->db.db_size, dh->dh_db, dh->dh_type));
+ arc_alloc_buf(dh->dh_dn->dn_objset->os_spa,
+ dh->dh_db, dh->dh_type, dh->dh_db->db.db_size));
bcopy(dh->dh_dr->dt.dl.dr_data->b_data,
dh->dh_db->db.db_data, dh->dh_db->db.db_size);
}
}
+ if (multilist_link_active(&dh->dh_db->db_cache_link)) {
+ ASSERT(refcount_is_zero(&dh->dh_db->db_holds));
+ multilist_remove(dbuf_cache, dh->dh_db);
+ (void) refcount_remove_many(&dbuf_cache_size,
+ dh->dh_db->db.db_size, dh->dh_db);
+ }
(void) refcount_add(&dh->dh_db->db_holds, dh->dh_tag);
DBUF_VERIFY(dh->dh_db);
mutex_exit(&dh->dh_db->db_mtx);
@@ -2154,15 +2734,17 @@ __dbuf_hold_impl(struct dbuf_hold_impl_data *dh)
* on the stack for 20 levels of recursion.
*/
int
-dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid, int fail_sparse,
+dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid,
+ boolean_t fail_sparse, boolean_t fail_uncached,
void *tag, dmu_buf_impl_t **dbp)
{
struct dbuf_hold_impl_data *dh;
int error;
- dh = kmem_zalloc(sizeof (struct dbuf_hold_impl_data) *
+ dh = kmem_alloc(sizeof (struct dbuf_hold_impl_data) *
DBUF_HOLD_IMPL_MAX_DEPTH, KM_SLEEP);
- __dbuf_hold_impl_init(dh, dn, level, blkid, fail_sparse, tag, dbp, 0);
+ __dbuf_hold_impl_init(dh, dn, level, blkid, fail_sparse,
+ fail_uncached, tag, dbp, 0);
error = __dbuf_hold_impl(dh);
@@ -2174,31 +2756,41 @@ dbuf_hold_impl(dnode_t *dn, uint8_t level, uint64_t blkid, int fail_sparse,
static void
__dbuf_hold_impl_init(struct dbuf_hold_impl_data *dh,
- dnode_t *dn, uint8_t level, uint64_t blkid, int fail_sparse,
+ dnode_t *dn, uint8_t level, uint64_t blkid,
+ boolean_t fail_sparse, boolean_t fail_uncached,
void *tag, dmu_buf_impl_t **dbp, int depth)
{
dh->dh_dn = dn;
dh->dh_level = level;
dh->dh_blkid = blkid;
+
dh->dh_fail_sparse = fail_sparse;
+ dh->dh_fail_uncached = fail_uncached;
+
dh->dh_tag = tag;
dh->dh_dbp = dbp;
+
+ dh->dh_db = NULL;
+ dh->dh_parent = NULL;
+ dh->dh_bp = NULL;
+ dh->dh_err = 0;
+ dh->dh_dr = NULL;
+ dh->dh_type = 0;
+
dh->dh_depth = depth;
}
dmu_buf_impl_t *
dbuf_hold(dnode_t *dn, uint64_t blkid, void *tag)
{
- dmu_buf_impl_t *db;
- int err = dbuf_hold_impl(dn, 0, blkid, FALSE, tag, &db);
- return (err ? NULL : db);
+ return (dbuf_hold_level(dn, 0, blkid, tag));
}
dmu_buf_impl_t *
dbuf_hold_level(dnode_t *dn, int level, uint64_t blkid, void *tag)
{
dmu_buf_impl_t *db;
- int err = dbuf_hold_impl(dn, level, blkid, FALSE, tag, &db);
+ int err = dbuf_hold_impl(dn, level, blkid, FALSE, FALSE, tag, &db);
return (err ? NULL : db);
}
@@ -2244,7 +2836,8 @@ dbuf_rm_spill(dnode_t *dn, dmu_tx_t *tx)
void
dbuf_add_ref(dmu_buf_impl_t *db, void *tag)
{
- VERIFY(refcount_add(&db->db_holds, tag) > 1);
+ int64_t holds = refcount_add(&db->db_holds, tag);
+ VERIFY3S(holds, >, 1);
}
#pragma weak dmu_buf_try_add_ref = dbuf_try_add_ref
@@ -2315,8 +2908,10 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
* We can't freeze indirects if there is a possibility that they
* may be modified in the current syncing context.
*/
- if (db->db_buf && holds == (db->db_level == 0 ? db->db_dirtycnt : 0))
+ if (db->db_buf != NULL &&
+ holds == (db->db_level == 0 ? db->db_dirtycnt : 0)) {
arc_buf_freeze(db->db_buf);
+ }
if (holds == db->db_dirtycnt &&
db->db_level == 0 && db->db_user_immediate_evict)
@@ -2361,55 +2956,44 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag)
*/
ASSERT(db->db_state == DB_UNCACHED ||
db->db_state == DB_NOFILL);
- dbuf_evict(db);
+ dbuf_destroy(db);
} else if (arc_released(db->db_buf)) {
- arc_buf_t *buf = db->db_buf;
/*
* This dbuf has anonymous data associated with it.
*/
- dbuf_clear_data(db);
- VERIFY(arc_buf_remove_ref(buf, db));
- dbuf_evict(db);
+ dbuf_destroy(db);
} else {
- VERIFY(!arc_buf_remove_ref(db->db_buf, db));
+ boolean_t do_arc_evict = B_FALSE;
+ blkptr_t bp;
+ spa_t *spa = dmu_objset_spa(db->db_objset);
+
+ if (!DBUF_IS_CACHEABLE(db) &&
+ db->db_blkptr != NULL &&
+ !BP_IS_HOLE(db->db_blkptr) &&
+ !BP_IS_EMBEDDED(db->db_blkptr)) {
+ do_arc_evict = B_TRUE;
+ bp = *db->db_blkptr;
+ }
- /*
- * A dbuf will be eligible for eviction if either the
- * 'primarycache' property is set or a duplicate
- * copy of this buffer is already cached in the arc.
- *
- * In the case of the 'primarycache' a buffer
- * is considered for eviction if it matches the
- * criteria set in the property.
- *
- * To decide if our buffer is considered a
- * duplicate, we must call into the arc to determine
- * if multiple buffers are referencing the same
- * block on-disk. If so, then we simply evict
- * ourselves.
- */
- if (!DBUF_IS_CACHEABLE(db)) {
- if (db->db_blkptr != NULL &&
- !BP_IS_HOLE(db->db_blkptr) &&
- !BP_IS_EMBEDDED(db->db_blkptr)) {
- spa_t *spa =
- dmu_objset_spa(db->db_objset);
- blkptr_t bp = *db->db_blkptr;
- dbuf_clear(db);
- arc_freed(spa, &bp);
- } else {
- dbuf_clear(db);
- }
- } else if (db->db_pending_evict ||
- arc_buf_eviction_needed(db->db_buf)) {
- dbuf_clear(db);
- } else {
+ if (!DBUF_IS_CACHEABLE(db) ||
+ db->db_pending_evict) {
+ dbuf_destroy(db);
+ } else if (!multilist_link_active(&db->db_cache_link)) {
+ multilist_insert(dbuf_cache, db);
+ (void) refcount_add_many(&dbuf_cache_size,
+ db->db.db_size, db);
mutex_exit(&db->db_mtx);
+
+ dbuf_evict_notify();
}
+
+ if (do_arc_evict)
+ arc_freed(spa, &bp);
}
} else {
mutex_exit(&db->db_mtx);
}
+
}
#pragma weak dmu_buf_refcount = dbuf_refcount
@@ -2473,24 +3057,33 @@ dmu_buf_user_evict_wait()
taskq_wait(dbu_evict_taskq);
}
-boolean_t
-dmu_buf_freeable(dmu_buf_t *dbuf)
+blkptr_t *
+dmu_buf_get_blkptr(dmu_buf_t *db)
{
- boolean_t res = B_FALSE;
- dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbuf;
+ dmu_buf_impl_t *dbi = (dmu_buf_impl_t *)db;
+ return (dbi->db_blkptr);
+}
- if (db->db_blkptr)
- res = dsl_dataset_block_freeable(db->db_objset->os_dsl_dataset,
- db->db_blkptr, db->db_blkptr->blk_birth);
+objset_t *
+dmu_buf_get_objset(dmu_buf_t *db)
+{
+ dmu_buf_impl_t *dbi = (dmu_buf_impl_t *)db;
+ return (dbi->db_objset);
+}
- return (res);
+dnode_t *
+dmu_buf_dnode_enter(dmu_buf_t *db)
+{
+ dmu_buf_impl_t *dbi = (dmu_buf_impl_t *)db;
+ DB_DNODE_ENTER(dbi);
+ return (DB_DNODE(dbi));
}
-blkptr_t *
-dmu_buf_get_blkptr(dmu_buf_t *db)
+void
+dmu_buf_dnode_exit(dmu_buf_t *db)
{
dmu_buf_impl_t *dbi = (dmu_buf_impl_t *)db;
- return (dbi->db_blkptr);
+ DB_DNODE_EXIT(dbi);
}
static void
@@ -2503,7 +3096,7 @@ dbuf_check_blkptr(dnode_t *dn, dmu_buf_impl_t *db)
return;
if (db->db_blkid == DMU_SPILL_BLKID) {
- db->db_blkptr = &dn->dn_phys->dn_spill;
+ db->db_blkptr = DN_SPILL_BLKPTR(dn->dn_phys);
BP_ZERO(db->db_blkptr);
return;
}
@@ -2526,8 +3119,8 @@ dbuf_check_blkptr(dnode_t *dn, dmu_buf_impl_t *db)
if (parent == NULL) {
mutex_exit(&db->db_mtx);
rw_enter(&dn->dn_struct_rwlock, RW_READER);
- (void) dbuf_hold_impl(dn, db->db_level+1,
- db->db_blkid >> epbs, FALSE, db, &parent);
+ parent = dbuf_hold_level(dn, db->db_level + 1,
+ db->db_blkid >> epbs, db);
rw_exit(&dn->dn_struct_rwlock);
mutex_enter(&db->db_mtx);
db->db_parent = parent;
@@ -2659,13 +3252,16 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
ASSERT(*datap != NULL);
ASSERT0(db->db_level);
- ASSERT3U(dn->dn_phys->dn_bonuslen, <=, DN_MAX_BONUSLEN);
+ ASSERT3U(dn->dn_phys->dn_bonuslen, <=,
+ DN_SLOTS_TO_BONUSLEN(dn->dn_phys->dn_extra_slots + 1));
bcopy(*datap, DN_BONUS(dn->dn_phys), dn->dn_phys->dn_bonuslen);
DB_DNODE_EXIT(db);
if (*datap != db->db.db_data) {
- zio_buf_free(*datap, DN_MAX_BONUSLEN);
- arc_space_return(DN_MAX_BONUSLEN, ARC_SPACE_OTHER);
+ int slots = DB_DNODE(db)->dn_num_slots;
+ int bonuslen = DN_SLOTS_TO_BONUSLEN(slots);
+ kmem_free(*datap, bonuslen);
+ arc_space_return(bonuslen, ARC_SPACE_BONUS);
}
db->db_data_pending = NULL;
drp = &db->db_last_dirty;
@@ -2721,10 +3317,19 @@ dbuf_sync_leaf(dbuf_dirty_record_t *dr, dmu_tx_t *tx)
* objects only modified in the syncing context (e.g.
* DNONE_DNODE blocks).
*/
- int blksz = arc_buf_size(*datap);
+ int psize = arc_buf_size(*datap);
arc_buf_contents_t type = DBUF_GET_BUFC_TYPE(db);
- *datap = arc_buf_alloc(os->os_spa, blksz, db, type);
- bcopy(db->db.db_data, (*datap)->b_data, blksz);
+ enum zio_compress compress_type = arc_get_compression(*datap);
+
+ if (compress_type == ZIO_COMPRESS_OFF) {
+ *datap = arc_alloc_buf(os->os_spa, db, type, psize);
+ } else {
+ ASSERT3U(type, ==, ARC_BUFC_DATA);
+ int lsize = arc_buf_lsize(*datap);
+ *datap = arc_alloc_compressed_buf(os->os_spa, db,
+ psize, lsize, compress_type);
+ }
+ bcopy(db->db.db_data, (*datap)->b_data, psize);
}
db->db_data_pending = dr;
@@ -2792,7 +3397,8 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
uint64_t fill = 0;
int i;
- ASSERT3P(db->db_blkptr, ==, bp);
+ ASSERT3P(db->db_blkptr, !=, NULL);
+ ASSERT3P(&db->db_data_pending->dr_bp_copy, ==, bp);
DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
@@ -2814,8 +3420,8 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
#ifdef ZFS_DEBUG
if (db->db_blkid == DMU_SPILL_BLKID) {
ASSERT(dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR);
- ASSERT(!(BP_IS_HOLE(db->db_blkptr)) &&
- db->db_blkptr == &dn->dn_phys->dn_spill);
+ ASSERT(!(BP_IS_HOLE(bp)) &&
+ db->db_blkptr == DN_SPILL_BLKPTR(dn->dn_phys));
}
#endif
@@ -2827,11 +3433,17 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
mutex_exit(&dn->dn_mtx);
if (dn->dn_type == DMU_OT_DNODE) {
- dnode_phys_t *dnp = db->db.db_data;
- for (i = db->db.db_size >> DNODE_SHIFT; i > 0;
- i--, dnp++) {
- if (dnp->dn_type != DMU_OT_NONE)
+ i = 0;
+ while (i < db->db.db_size) {
+ dnode_phys_t *dnp =
+ (void *)(((char *)db->db.db_data) + i);
+
+ i += DNODE_MIN_SIZE;
+ if (dnp->dn_type != DMU_OT_NONE) {
fill++;
+ i += dnp->dn_extra_slots *
+ DNODE_MIN_SIZE;
+ }
}
} else {
if (BP_IS_HOLE(bp)) {
@@ -2855,6 +3467,55 @@ dbuf_write_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
bp->blk_fill = fill;
mutex_exit(&db->db_mtx);
+
+ rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
+ *db->db_blkptr = *bp;
+ rw_exit(&dn->dn_struct_rwlock);
+}
+
+/* ARGSUSED */
+/*
+ * This function gets called just prior to running through the compression
+ * stage of the zio pipeline. If we're an indirect block comprised of only
+ * holes, then we want this indirect to be compressed away to a hole. In
+ * order to do that we must zero out any information about the holes that
+ * this indirect points to prior to before we try to compress it.
+ */
+static void
+dbuf_write_children_ready(zio_t *zio, arc_buf_t *buf, void *vdb)
+{
+ dmu_buf_impl_t *db = vdb;
+ dnode_t *dn;
+ blkptr_t *bp;
+ unsigned int epbs, i;
+
+ ASSERT3U(db->db_level, >, 0);
+ DB_DNODE_ENTER(db);
+ dn = DB_DNODE(db);
+ epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
+ ASSERT3U(epbs, <, 31);
+
+ /* Determine if all our children are holes */
+ for (i = 0, bp = db->db.db_data; i < 1ULL << epbs; i++, bp++) {
+ if (!BP_IS_HOLE(bp))
+ break;
+ }
+
+ /*
+ * If all the children are holes, then zero them all out so that
+ * we may get compressed away.
+ */
+ if (i == 1ULL << epbs) {
+ /*
+ * We only found holes. Grab the rwlock to prevent
+ * anybody from reading the blocks we're about to
+ * zero out.
+ */
+ rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
+ bzero(db->db.db_data, db->db.db_size);
+ rw_exit(&dn->dn_struct_rwlock);
+ }
+ DB_DNODE_EXIT(db);
}
/*
@@ -2935,7 +3596,7 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
dn = DB_DNODE(db);
ASSERT(dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR);
ASSERT(!(BP_IS_HOLE(db->db_blkptr)) &&
- db->db_blkptr == &dn->dn_phys->dn_spill);
+ db->db_blkptr == DN_SPILL_BLKPTR(dn->dn_phys));
DB_DNODE_EXIT(db);
}
#endif
@@ -2945,10 +3606,7 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
ASSERT(dr->dt.dl.dr_override_state == DR_NOT_OVERRIDDEN);
if (db->db_state != DB_NOFILL) {
if (dr->dt.dl.dr_data != db->db_buf)
- VERIFY(arc_buf_remove_ref(dr->dt.dl.dr_data,
- db));
- else if (!arc_released(db->db_buf))
- arc_set_callback(db->db_buf, dbuf_do_evict, db);
+ arc_buf_destroy(dr->dt.dl.dr_data, db);
}
} else {
dnode_t *dn;
@@ -2964,8 +3622,6 @@ dbuf_write_done(zio_t *zio, arc_buf_t *buf, void *vdb)
dn->dn_phys->dn_maxblkid >> (db->db_level * epbs));
ASSERT3U(BP_GET_LSIZE(db->db_blkptr), ==,
db->db.db_size);
- if (!arc_released(db->db_buf))
- arc_set_callback(db->db_buf, dbuf_do_evict, db);
}
DB_DNODE_EXIT(db);
mutex_destroy(&dr->dt.di.dr_mtx);
@@ -3017,6 +3673,9 @@ dbuf_write_override_done(zio_t *zio)
mutex_exit(&db->db_mtx);
dbuf_write_done(zio, NULL, db);
+
+ if (zio->io_abd != NULL)
+ abd_put(zio->io_abd);
}
/* Issue I/O to commit a dirty buffer to disk. */
@@ -3033,6 +3692,8 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
zio_t *zio;
int wp_flag = 0;
+ ASSERT(dmu_tx_is_syncing(tx));
+
DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
os = dn->dn_objset;
@@ -3091,17 +3752,27 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
dmu_write_policy(os, dn, db->db_level, wp_flag, &zp);
DB_DNODE_EXIT(db);
+ /*
+ * We copy the blkptr now (rather than when we instantiate the dirty
+ * record), because its value can change between open context and
+ * syncing context. We do not need to hold dn_struct_rwlock to read
+ * db_blkptr because we are in syncing context.
+ */
+ dr->dr_bp_copy = *db->db_blkptr;
+
if (db->db_level == 0 &&
dr->dt.dl.dr_override_state == DR_OVERRIDDEN) {
/*
* The BP for this block has been provided by open context
* (by dmu_sync() or dmu_buf_write_embedded()).
*/
- void *contents = (data != NULL) ? data->b_data : NULL;
+ abd_t *contents = (data != NULL) ?
+ abd_get_from_buf(data->b_data, arc_buf_size(data)) : NULL;
dr->dr_zio = zio_write(zio, os->os_spa, txg,
- db->db_blkptr, contents, db->db.db_size, &zp,
- dbuf_write_override_ready, NULL, dbuf_write_override_done,
+ &dr->dr_bp_copy, contents, db->db.db_size, db->db.db_size,
+ &zp, dbuf_write_override_ready, NULL, NULL,
+ dbuf_write_override_done,
dr, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
mutex_enter(&db->db_mtx);
dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN;
@@ -3109,26 +3780,39 @@ dbuf_write(dbuf_dirty_record_t *dr, arc_buf_t *data, dmu_tx_t *tx)
dr->dt.dl.dr_copies, dr->dt.dl.dr_nopwrite);
mutex_exit(&db->db_mtx);
} else if (db->db_state == DB_NOFILL) {
- ASSERT(zp.zp_checksum == ZIO_CHECKSUM_OFF);
+ ASSERT(zp.zp_checksum == ZIO_CHECKSUM_OFF ||
+ zp.zp_checksum == ZIO_CHECKSUM_NOPARITY);
dr->dr_zio = zio_write(zio, os->os_spa, txg,
- db->db_blkptr, NULL, db->db.db_size, &zp,
- dbuf_write_nofill_ready, NULL, dbuf_write_nofill_done, db,
+ &dr->dr_bp_copy, NULL, db->db.db_size, db->db.db_size, &zp,
+ dbuf_write_nofill_ready, NULL, NULL,
+ dbuf_write_nofill_done, db,
ZIO_PRIORITY_ASYNC_WRITE,
ZIO_FLAG_MUSTSUCCEED | ZIO_FLAG_NODATA, &zb);
} else {
+ arc_done_func_t *children_ready_cb = NULL;
ASSERT(arc_released(data));
+
+ /*
+ * For indirect blocks, we want to setup the children
+ * ready callback so that we can properly handle an indirect
+ * block that only contains holes.
+ */
+ if (db->db_level != 0)
+ children_ready_cb = dbuf_write_children_ready;
+
dr->dr_zio = arc_write(zio, os->os_spa, txg,
- db->db_blkptr, data, DBUF_IS_L2CACHEABLE(db),
- DBUF_IS_L2COMPRESSIBLE(db), &zp, dbuf_write_ready,
- dbuf_write_physdone, dbuf_write_done, db,
- ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
+ &dr->dr_bp_copy, data, DBUF_IS_L2CACHEABLE(db),
+ &zp, dbuf_write_ready,
+ children_ready_cb, dbuf_write_physdone,
+ dbuf_write_done, db, ZIO_PRIORITY_ASYNC_WRITE,
+ ZIO_FLAG_MUSTSUCCEED, &zb);
}
}
#if defined(_KERNEL) && defined(HAVE_SPL)
EXPORT_SYMBOL(dbuf_find);
EXPORT_SYMBOL(dbuf_is_metadata);
-EXPORT_SYMBOL(dbuf_evict);
+EXPORT_SYMBOL(dbuf_destroy);
EXPORT_SYMBOL(dbuf_loan_arcbuf);
EXPORT_SYMBOL(dbuf_whichblock);
EXPORT_SYMBOL(dbuf_read);
@@ -3143,7 +3827,6 @@ EXPORT_SYMBOL(dmu_buf_will_fill);
EXPORT_SYMBOL(dmu_buf_fill_done);
EXPORT_SYMBOL(dmu_buf_rele);
EXPORT_SYMBOL(dbuf_assign_arcbuf);
-EXPORT_SYMBOL(dbuf_clear);
EXPORT_SYMBOL(dbuf_prefetch);
EXPORT_SYMBOL(dbuf_hold_impl);
EXPORT_SYMBOL(dbuf_hold);
@@ -3159,6 +3842,25 @@ EXPORT_SYMBOL(dbuf_sync_list);
EXPORT_SYMBOL(dmu_buf_set_user);
EXPORT_SYMBOL(dmu_buf_set_user_ie);
EXPORT_SYMBOL(dmu_buf_get_user);
-EXPORT_SYMBOL(dmu_buf_freeable);
EXPORT_SYMBOL(dmu_buf_get_blkptr);
+
+/* BEGIN CSTYLED */
+module_param(dbuf_cache_max_bytes, ulong, 0644);
+MODULE_PARM_DESC(dbuf_cache_max_bytes,
+ "Maximum size in bytes of the dbuf cache.");
+
+module_param(dbuf_cache_hiwater_pct, uint, 0644);
+MODULE_PARM_DESC(dbuf_cache_hiwater_pct,
+ "Percentage over dbuf_cache_max_bytes when dbufs must be evicted "
+ "directly.");
+
+module_param(dbuf_cache_lowater_pct, uint, 0644);
+MODULE_PARM_DESC(dbuf_cache_lowater_pct,
+ "Percentage below dbuf_cache_max_bytes when the evict thread stops "
+ "evicting dbufs.");
+
+module_param(dbuf_cache_max_shift, int, 0644);
+MODULE_PARM_DESC(dbuf_cache_max_shift,
+ "Cap the size of the dbuf cache to a log2 fraction of arc size.");
+/* END CSTYLED */
#endif
diff --git a/zfs/module/zfs/dbuf_stats.c b/zfs/module/zfs/dbuf_stats.c
index 6f39f80e563a..1712c9c10ee4 100644
--- a/zfs/module/zfs/dbuf_stats.c
+++ b/zfs/module/zfs/dbuf_stats.c
@@ -72,8 +72,7 @@ __dbuf_stats_hash_table_data(char *buf, size_t size, dmu_buf_impl_t *db)
if (db->db_buf)
arc_buf_info(db->db_buf, &abi, zfs_dbuf_state_index);
- if (dn)
- __dmu_object_info_from_dnode(dn, &doi);
+ __dmu_object_info_from_dnode(dn, &doi);
nwritten = snprintf(buf, size,
"%-16s %-8llu %-8lld %-8lld %-8lld %-8llu %-8llu %-5d %-5d %-5lu | "
@@ -95,7 +94,7 @@ __dbuf_stats_hash_table_data(char *buf, size_t size, dmu_buf_impl_t *db)
abi.abi_state_type,
abi.abi_state_contents,
abi.abi_flags,
- (ulong_t)abi.abi_datacnt,
+ (ulong_t)abi.abi_bufcnt,
(u_longlong_t)abi.abi_size,
(u_longlong_t)abi.abi_access,
(ulong_t)abi.abi_mru_hits,
diff --git a/zfs/module/zfs/ddt.c b/zfs/module/zfs/ddt.c
index 12c1b7300a21..75ab7f5b26e1 100644
--- a/zfs/module/zfs/ddt.c
+++ b/zfs/module/zfs/ddt.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -36,6 +36,7 @@
#include <sys/zio_checksum.h>
#include <sys/zio_compress.h>
#include <sys/dsl_scan.h>
+#include <sys/abd.h>
static kmem_cache_t *ddt_cache;
static kmem_cache_t *ddt_entry_cache;
@@ -62,7 +63,8 @@ ddt_object_create(ddt_t *ddt, enum ddt_type type, enum ddt_class class,
spa_t *spa = ddt->ddt_spa;
objset_t *os = ddt->ddt_os;
uint64_t *objectp = &ddt->ddt_object[type][class];
- boolean_t prehash = zio_checksum_table[ddt->ddt_checksum].ci_dedup;
+ boolean_t prehash = zio_checksum_table[ddt->ddt_checksum].ci_flags &
+ ZCHECKSUM_FLAG_DEDUP;
char name[DDT_NAMELEN];
ddt_object_name(ddt, type, class, name);
@@ -527,10 +529,17 @@ ddt_get_dedup_stats(spa_t *spa, ddt_stat_t *dds_total)
uint64_t
ddt_get_dedup_dspace(spa_t *spa)
{
- ddt_stat_t dds_total = { 0 };
+ ddt_stat_t dds_total;
+
+ if (spa->spa_dedup_dspace != ~0ULL)
+ return (spa->spa_dedup_dspace);
+
+ bzero(&dds_total, sizeof (ddt_stat_t));
+ /* Calculate and cache the stats */
ddt_get_dedup_stats(spa, &dds_total);
- return (dds_total.dds_ref_dsize - dds_total.dds_dsize);
+ spa->spa_dedup_dspace = dds_total.dds_ref_dsize - dds_total.dds_dsize;
+ return (spa->spa_dedup_dspace);
}
uint64_t
@@ -705,9 +714,8 @@ ddt_free(ddt_entry_t *dde)
for (p = 0; p < DDT_PHYS_TYPES; p++)
ASSERT(dde->dde_lead_zio[p] == NULL);
- if (dde->dde_repair_data != NULL)
- zio_buf_free(dde->dde_repair_data,
- DDK_GET_PSIZE(&dde->dde_key));
+ if (dde->dde_repair_abd != NULL)
+ abd_free(dde->dde_repair_abd);
cv_destroy(&dde->dde_cv);
kmem_cache_free(ddt_entry_cache, dde);
@@ -811,23 +819,32 @@ ddt_prefetch(spa_t *spa, const blkptr_t *bp)
}
}
+/*
+ * Opaque struct used for ddt_key comparison
+ */
+#define DDT_KEY_CMP_LEN (sizeof (ddt_key_t) / sizeof (uint16_t))
+
+typedef struct ddt_key_cmp {
+ uint16_t u16[DDT_KEY_CMP_LEN];
+} ddt_key_cmp_t;
+
int
ddt_entry_compare(const void *x1, const void *x2)
{
const ddt_entry_t *dde1 = x1;
const ddt_entry_t *dde2 = x2;
- const uint64_t *u1 = (const uint64_t *)&dde1->dde_key;
- const uint64_t *u2 = (const uint64_t *)&dde2->dde_key;
+ const ddt_key_cmp_t *k1 = (const ddt_key_cmp_t *)&dde1->dde_key;
+ const ddt_key_cmp_t *k2 = (const ddt_key_cmp_t *)&dde2->dde_key;
+ int32_t cmp = 0;
int i;
- for (i = 0; i < DDT_KEY_WORDS; i++) {
- if (u1[i] < u2[i])
- return (-1);
- if (u1[i] > u2[i])
- return (1);
+ for (i = 0; i < DDT_KEY_CMP_LEN; i++) {
+ cmp = (int32_t)k1->u16[i] - (int32_t)k2->u16[i];
+ if (likely(cmp))
+ break;
}
- return (0);
+ return (AVL_ISIGN(cmp));
}
static ddt_t *
@@ -905,6 +922,7 @@ ddt_load(spa_t *spa)
*/
bcopy(ddt->ddt_histogram, &ddt->ddt_histogram_cache,
sizeof (ddt->ddt_histogram));
+ spa->spa_dedup_dspace = ~0ULL;
}
return (0);
@@ -992,7 +1010,7 @@ ddt_repair_done(ddt_t *ddt, ddt_entry_t *dde)
ddt_enter(ddt);
- if (dde->dde_repair_data != NULL && spa_writeable(ddt->ddt_spa) &&
+ if (dde->dde_repair_abd != NULL && spa_writeable(ddt->ddt_spa) &&
avl_find(&ddt->ddt_repair_tree, dde, &where) == NULL)
avl_insert(&ddt->ddt_repair_tree, dde, where);
else
@@ -1030,7 +1048,7 @@ ddt_repair_entry(ddt_t *ddt, ddt_entry_t *dde, ddt_entry_t *rdde, zio_t *rio)
continue;
ddt_bp_create(ddt->ddt_checksum, ddk, ddp, &blk);
zio_nowait(zio_rewrite(zio, zio->io_spa, 0, &blk,
- rdde->dde_repair_data, DDK_GET_PSIZE(rddk), NULL, NULL,
+ rdde->dde_repair_abd, DDK_GET_PSIZE(rddk), NULL, NULL,
ZIO_PRIORITY_SYNC_WRITE, ZIO_DDT_CHILD_FLAGS(zio), NULL));
}
@@ -1172,6 +1190,7 @@ ddt_sync_table(ddt_t *ddt, dmu_tx_t *tx, uint64_t txg)
bcopy(ddt->ddt_histogram, &ddt->ddt_histogram_cache,
sizeof (ddt->ddt_histogram));
+ spa->spa_dedup_dspace = ~0ULL;
}
void
diff --git a/zfs/module/zfs/dmu.c b/zfs/module/zfs/dmu.c
index 9baeb01fd1dc..44705006eb97 100644
--- a/zfs/module/zfs/dmu.c
+++ b/zfs/module/zfs/dmu.c
@@ -20,9 +20,10 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
- * Copyright (c) 2014, Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
+ * Copyright (c) 2016, Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2015 by Chunwei Chen. All rights reserved.
*/
@@ -46,10 +47,12 @@
#include <sys/zio_compress.h>
#include <sys/sa.h>
#include <sys/zfeature.h>
+#include <sys/abd.h>
+#include <sys/trace_dmu.h>
+#include <sys/zfs_rlock.h>
#ifdef _KERNEL
#include <sys/vmsystm.h>
#include <sys/zfs_znode.h>
-#include <linux/kmap_compat.h>
#endif
/*
@@ -57,6 +60,19 @@
*/
int zfs_nopwrite_enabled = 1;
+/*
+ * Tunable to control percentage of dirtied blocks from frees in one TXG.
+ * After this threshold is crossed, additional dirty blocks from frees
+ * wait until the next TXG.
+ * A value of zero will disable this throttle.
+ */
+unsigned long zfs_per_txg_dirty_frees_percent = 30;
+
+/*
+ * Enable/disable forcing txg sync when dirty in dmu_offset_next.
+ */
+int zfs_dmu_offset_next_sync = 0;
+
const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = {
{ DMU_BSWAP_UINT8, TRUE, "unallocated" },
{ DMU_BSWAP_ZAP, TRUE, "object directory" },
@@ -127,6 +143,26 @@ const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS] = {
{ zfs_acl_byteswap, "acl" }
};
+int
+dmu_buf_hold_noread_by_dnode(dnode_t *dn, uint64_t offset,
+ void *tag, dmu_buf_t **dbp)
+{
+ uint64_t blkid;
+ dmu_buf_impl_t *db;
+
+ blkid = dbuf_whichblock(dn, 0, offset);
+ rw_enter(&dn->dn_struct_rwlock, RW_READER);
+ db = dbuf_hold(dn, blkid, tag);
+ rw_exit(&dn->dn_struct_rwlock);
+
+ if (db == NULL) {
+ *dbp = NULL;
+ return (SET_ERROR(EIO));
+ }
+
+ *dbp = &db->db;
+ return (0);
+}
int
dmu_buf_hold_noread(objset_t *os, uint64_t object, uint64_t offset,
void *tag, dmu_buf_t **dbp)
@@ -139,7 +175,7 @@ dmu_buf_hold_noread(objset_t *os, uint64_t object, uint64_t offset,
err = dnode_hold(os, object, FTAG, &dn);
if (err)
return (err);
- blkid = dbuf_whichblock(dn, offset);
+ blkid = dbuf_whichblock(dn, 0, offset);
rw_enter(&dn->dn_struct_rwlock, RW_READER);
db = dbuf_hold(dn, blkid, tag);
rw_exit(&dn->dn_struct_rwlock);
@@ -154,6 +190,29 @@ dmu_buf_hold_noread(objset_t *os, uint64_t object, uint64_t offset,
return (err);
}
+int
+dmu_buf_hold_by_dnode(dnode_t *dn, uint64_t offset,
+ void *tag, dmu_buf_t **dbp, int flags)
+{
+ int err;
+ int db_flags = DB_RF_CANFAIL;
+
+ if (flags & DMU_READ_NO_PREFETCH)
+ db_flags |= DB_RF_NOPREFETCH;
+
+ err = dmu_buf_hold_noread_by_dnode(dn, offset, tag, dbp);
+ if (err == 0) {
+ dmu_buf_impl_t *db = (dmu_buf_impl_t *)(*dbp);
+ err = dbuf_read(db, NULL, db_flags);
+ if (err != 0) {
+ dbuf_rele(db, tag);
+ *dbp = NULL;
+ }
+ }
+
+ return (err);
+}
+
int
dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
void *tag, dmu_buf_t **dbp, int flags)
@@ -180,7 +239,7 @@ dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
int
dmu_bonus_max(void)
{
- return (DN_MAX_BONUSLEN);
+ return (DN_OLD_MAX_BONUSLEN);
}
int
@@ -326,12 +385,17 @@ dmu_spill_hold_by_dnode(dnode_t *dn, uint32_t flags, void *tag, dmu_buf_t **dbp)
if ((flags & DB_RF_HAVESTRUCT) == 0)
rw_exit(&dn->dn_struct_rwlock);
- ASSERT(db != NULL);
+ if (db == NULL) {
+ *dbp = NULL;
+ return (SET_ERROR(EIO));
+ }
err = dbuf_read(db, NULL, flags);
if (err == 0)
*dbp = &db->db;
- else
+ else {
dbuf_rele(db, tag);
+ *dbp = NULL;
+ }
return (err);
}
@@ -387,7 +451,7 @@ dmu_spill_hold_by_bonus(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp)
*/
static int
dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
- int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp, uint32_t flags)
+ boolean_t read, void *tag, int *numbufsp, dmu_buf_t ***dbpp, uint32_t flags)
{
dmu_buf_t **dbp;
uint64_t blkid, nblks, i;
@@ -397,15 +461,19 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
ASSERT(length <= DMU_MAX_ACCESS);
- dbuf_flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT | DB_RF_HAVESTRUCT;
- if (flags & DMU_READ_NO_PREFETCH || length > zfetch_array_rd_sz)
- dbuf_flags |= DB_RF_NOPREFETCH;
+ /*
+ * Note: We directly notify the prefetch code of this read, so that
+ * we can tell it about the multi-block read. dbuf_read() only knows
+ * about the one block it is accessing.
+ */
+ dbuf_flags = DB_RF_CANFAIL | DB_RF_NEVERWAIT | DB_RF_HAVESTRUCT |
+ DB_RF_NOPREFETCH;
rw_enter(&dn->dn_struct_rwlock, RW_READER);
if (dn->dn_datablkshift) {
int blkshift = dn->dn_datablkshift;
- nblks = (P2ROUNDUP(offset+length, 1ULL<<blkshift) -
- P2ALIGN(offset, 1ULL<<blkshift)) >> blkshift;
+ nblks = (P2ROUNDUP(offset + length, 1ULL << blkshift) -
+ P2ALIGN(offset, 1ULL << blkshift)) >> blkshift;
} else {
if (offset + length > dn->dn_datablksz) {
zfs_panic_recover("zfs: accessing past end of object "
@@ -422,21 +490,27 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
dbp = kmem_zalloc(sizeof (dmu_buf_t *) * nblks, KM_SLEEP);
zio = zio_root(dn->dn_objset->os_spa, NULL, NULL, ZIO_FLAG_CANFAIL);
- blkid = dbuf_whichblock(dn, offset);
+ blkid = dbuf_whichblock(dn, 0, offset);
for (i = 0; i < nblks; i++) {
- dmu_buf_impl_t *db = dbuf_hold(dn, blkid+i, tag);
+ dmu_buf_impl_t *db = dbuf_hold(dn, blkid + i, tag);
if (db == NULL) {
rw_exit(&dn->dn_struct_rwlock);
dmu_buf_rele_array(dbp, nblks, tag);
zio_nowait(zio);
return (SET_ERROR(EIO));
}
+
/* initiate async i/o */
- if (read) {
+ if (read)
(void) dbuf_read(db, zio, dbuf_flags);
- }
dbp[i] = &db->db;
}
+
+ if ((flags & DMU_READ_NO_PREFETCH) == 0 &&
+ DNODE_META_IS_CACHEABLE(dn) && length <= zfetch_array_rd_sz) {
+ dmu_zfetch(&dn->dn_zfetch, blkid, nblks,
+ read && DNODE_IS_CACHEABLE(dn));
+ }
rw_exit(&dn->dn_struct_rwlock);
/* wait for async i/o */
@@ -490,7 +564,8 @@ dmu_buf_hold_array(objset_t *os, uint64_t object, uint64_t offset,
int
dmu_buf_hold_array_by_bonus(dmu_buf_t *db_fake, uint64_t offset,
- uint64_t length, int read, void *tag, int *numbufsp, dmu_buf_t ***dbpp)
+ uint64_t length, boolean_t read, void *tag, int *numbufsp,
+ dmu_buf_t ***dbpp)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
dnode_t *dn;
@@ -523,25 +598,21 @@ dmu_buf_rele_array(dmu_buf_t **dbp_fake, int numbufs, void *tag)
}
/*
- * Issue prefetch i/os for the given blocks.
- *
- * Note: The assumption is that we *know* these blocks will be needed
- * almost immediately. Therefore, the prefetch i/os will be issued at
- * ZIO_PRIORITY_SYNC_READ
+ * Issue prefetch i/os for the given blocks. If level is greater than 0, the
+ * indirect blocks prefeteched will be those that point to the blocks containing
+ * the data starting at offset, and continuing to offset + len.
*
- * Note: indirect blocks and other metadata will be read synchronously,
- * causing this function to block if they are not already cached.
+ * Note that if the indirect blocks above the blocks being prefetched are not in
+ * cache, they will be asychronously read in.
*/
void
-dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset, uint64_t len)
+dmu_prefetch(objset_t *os, uint64_t object, int64_t level, uint64_t offset,
+ uint64_t len, zio_priority_t pri)
{
dnode_t *dn;
uint64_t blkid;
int nblks, err;
- if (zfs_prefetch_disable)
- return;
-
if (len == 0) { /* they're interested in the bonus buffer */
dn = DMU_META_DNODE(os);
@@ -549,8 +620,9 @@ dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset, uint64_t len)
return;
rw_enter(&dn->dn_struct_rwlock, RW_READER);
- blkid = dbuf_whichblock(dn, object * sizeof (dnode_phys_t));
- dbuf_prefetch(dn, blkid, ZIO_PRIORITY_SYNC_READ);
+ blkid = dbuf_whichblock(dn, level,
+ object * sizeof (dnode_phys_t));
+ dbuf_prefetch(dn, level, blkid, pri, 0);
rw_exit(&dn->dn_struct_rwlock);
return;
}
@@ -565,10 +637,16 @@ dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset, uint64_t len)
return;
rw_enter(&dn->dn_struct_rwlock, RW_READER);
- if (dn->dn_datablkshift) {
- int blkshift = dn->dn_datablkshift;
- nblks = (P2ROUNDUP(offset + len, 1 << blkshift) -
- P2ALIGN(offset, 1 << blkshift)) >> blkshift;
+ /*
+ * offset + len - 1 is the last byte we want to prefetch for, and offset
+ * is the first. Then dbuf_whichblk(dn, level, off + len - 1) is the
+ * last block we want to prefetch, and dbuf_whichblock(dn, level,
+ * offset) is the first. Then the number we need to prefetch is the
+ * last - first + 1.
+ */
+ if (level > 0 || dn->dn_datablkshift != 0) {
+ nblks = dbuf_whichblock(dn, level, offset + len - 1) -
+ dbuf_whichblock(dn, level, offset) + 1;
} else {
nblks = (offset < dn->dn_datablksz);
}
@@ -576,9 +654,9 @@ dmu_prefetch(objset_t *os, uint64_t object, uint64_t offset, uint64_t len)
if (nblks != 0) {
int i;
- blkid = dbuf_whichblock(dn, offset);
+ blkid = dbuf_whichblock(dn, level, offset);
for (i = 0; i < nblks; i++)
- dbuf_prefetch(dn, blkid + i, ZIO_PRIORITY_SYNC_READ);
+ dbuf_prefetch(dn, level, blkid + i, pri, 0);
}
rw_exit(&dn->dn_struct_rwlock);
@@ -642,12 +720,31 @@ get_next_chunk(dnode_t *dn, uint64_t *start, uint64_t minimum)
return (0);
}
+/*
+ * If this objset is of type OST_ZFS return true if vfs's unmounted flag is set,
+ * otherwise return false.
+ * Used below in dmu_free_long_range_impl() to enable abort when unmounting
+ */
+/*ARGSUSED*/
+static boolean_t
+dmu_objset_zfs_unmounting(objset_t *os)
+{
+#ifdef _KERNEL
+ if (dmu_objset_type(os) == DMU_OST_ZFS)
+ return (zfs_get_vfs_flag_unmounted(os));
+#endif
+ return (B_FALSE);
+}
+
static int
dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset,
uint64_t length)
{
uint64_t object_size;
int err;
+ uint64_t dirty_frees_threshold;
+ dsl_pool_t *dp = dmu_objset_pool(os);
+ int t;
if (dn == NULL)
return (SET_ERROR(EINVAL));
@@ -656,13 +753,23 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset,
if (offset >= object_size)
return (0);
+ if (zfs_per_txg_dirty_frees_percent <= 100)
+ dirty_frees_threshold =
+ zfs_per_txg_dirty_frees_percent * zfs_dirty_data_max / 100;
+ else
+ dirty_frees_threshold = zfs_dirty_data_max / 4;
+
if (length == DMU_OBJECT_END || offset + length > object_size)
length = object_size - offset;
while (length != 0) {
- uint64_t chunk_end, chunk_begin;
+ uint64_t chunk_end, chunk_begin, chunk_len;
+ uint64_t long_free_dirty_all_txgs = 0;
dmu_tx_t *tx;
+ if (dmu_objset_zfs_unmounting(dn->dn_objset))
+ return (SET_ERROR(EINTR));
+
chunk_end = chunk_begin = offset + length;
/* move chunk_begin backwards to the beginning of this chunk */
@@ -672,18 +779,51 @@ dmu_free_long_range_impl(objset_t *os, dnode_t *dn, uint64_t offset,
ASSERT3U(chunk_begin, >=, offset);
ASSERT3U(chunk_begin, <=, chunk_end);
+ chunk_len = chunk_end - chunk_begin;
+
+ mutex_enter(&dp->dp_lock);
+ for (t = 0; t < TXG_SIZE; t++) {
+ long_free_dirty_all_txgs +=
+ dp->dp_long_free_dirty_pertxg[t];
+ }
+ mutex_exit(&dp->dp_lock);
+
+ /*
+ * To avoid filling up a TXG with just frees wait for
+ * the next TXG to open before freeing more chunks if
+ * we have reached the threshold of frees
+ */
+ if (dirty_frees_threshold != 0 &&
+ long_free_dirty_all_txgs >= dirty_frees_threshold) {
+ txg_wait_open(dp, 0);
+ continue;
+ }
+
tx = dmu_tx_create(os);
- dmu_tx_hold_free(tx, dn->dn_object,
- chunk_begin, chunk_end - chunk_begin);
+ dmu_tx_hold_free(tx, dn->dn_object, chunk_begin, chunk_len);
+
+ /*
+ * Mark this transaction as typically resulting in a net
+ * reduction in space used.
+ */
+ dmu_tx_mark_netfree(tx);
err = dmu_tx_assign(tx, TXG_WAIT);
if (err) {
dmu_tx_abort(tx);
return (err);
}
- dnode_free_range(dn, chunk_begin, chunk_end - chunk_begin, tx);
+
+ mutex_enter(&dp->dp_lock);
+ dp->dp_long_free_dirty_pertxg[dmu_tx_get_txg(tx) & TXG_MASK] +=
+ chunk_len;
+ mutex_exit(&dp->dp_lock);
+ DTRACE_PROBE3(free__long__range,
+ uint64_t, long_free_dirty_all_txgs, uint64_t, chunk_len,
+ uint64_t, dmu_tx_get_txg(tx));
+ dnode_free_range(dn, chunk_begin, chunk_len, tx);
dmu_tx_commit(tx);
- length -= chunk_end - chunk_begin;
+ length -= chunk_len;
}
return (0);
}
@@ -726,6 +866,7 @@ dmu_free_long_object(objset_t *os, uint64_t object)
tx = dmu_tx_create(os);
dmu_tx_hold_bonus(tx, object);
dmu_tx_hold_free(tx, object, 0, DMU_OBJECT_END);
+ dmu_tx_mark_netfree(tx);
err = dmu_tx_assign(tx, TXG_WAIT);
if (err == 0) {
err = dmu_object_free(os, object, tx);
@@ -752,17 +893,12 @@ dmu_free_range(objset_t *os, uint64_t object, uint64_t offset,
return (0);
}
-int
-dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
+static int
+dmu_read_impl(dnode_t *dn, uint64_t offset, uint64_t size,
void *buf, uint32_t flags)
{
- dnode_t *dn;
dmu_buf_t **dbp;
- int numbufs, err;
-
- err = dnode_hold(os, object, FTAG, &dn);
- if (err)
- return (err);
+ int numbufs, err = 0;
/*
* Deal with odd block sizes, where there can't be data past the first
@@ -807,22 +943,37 @@ dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
}
dmu_buf_rele_array(dbp, numbufs, FTAG);
}
- dnode_rele(dn, FTAG);
return (err);
}
-void
-dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
- const void *buf, dmu_tx_t *tx)
+int
+dmu_read(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
+ void *buf, uint32_t flags)
{
- dmu_buf_t **dbp;
- int numbufs, i;
+ dnode_t *dn;
+ int err;
- if (size == 0)
- return;
+ err = dnode_hold(os, object, FTAG, &dn);
+ if (err != 0)
+ return (err);
- VERIFY0(dmu_buf_hold_array(os, object, offset, size,
- FALSE, FTAG, &numbufs, &dbp));
+ err = dmu_read_impl(dn, offset, size, buf, flags);
+ dnode_rele(dn, FTAG);
+ return (err);
+}
+
+int
+dmu_read_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size, void *buf,
+ uint32_t flags)
+{
+ return (dmu_read_impl(dn, offset, size, buf, flags));
+}
+
+static void
+dmu_write_impl(dmu_buf_t **dbp, int numbufs, uint64_t offset, uint64_t size,
+ const void *buf, dmu_tx_t *tx)
+{
+ int i;
for (i = 0; i < numbufs; i++) {
uint64_t tocpy;
@@ -850,6 +1001,37 @@ dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
size -= tocpy;
buf = (char *)buf + tocpy;
}
+}
+
+void
+dmu_write(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
+ const void *buf, dmu_tx_t *tx)
+{
+ dmu_buf_t **dbp;
+ int numbufs;
+
+ if (size == 0)
+ return;
+
+ VERIFY0(dmu_buf_hold_array(os, object, offset, size,
+ FALSE, FTAG, &numbufs, &dbp));
+ dmu_write_impl(dbp, numbufs, offset, size, buf, tx);
+ dmu_buf_rele_array(dbp, numbufs, FTAG);
+}
+
+void
+dmu_write_by_dnode(dnode_t *dn, uint64_t offset, uint64_t size,
+ const void *buf, dmu_tx_t *tx)
+{
+ dmu_buf_t **dbp;
+ int numbufs;
+
+ if (size == 0)
+ return;
+
+ VERIFY0(dmu_buf_hold_array_by_dnode(dn, offset, size,
+ FALSE, FTAG, &numbufs, &dbp, DMU_READ_PREFETCH));
+ dmu_write_impl(dbp, numbufs, offset, size, buf, tx);
dmu_buf_rele_array(dbp, numbufs, FTAG);
}
@@ -923,6 +1105,7 @@ static xuio_stats_t xuio_stats = {
atomic_add_64(&xuio_stats.stat.value.ui64, (val))
#define XUIOSTAT_BUMP(stat) XUIOSTAT_INCR(stat, 1)
+#ifdef HAVE_UIO_ZEROCOPY
int
dmu_xuio_init(xuio_t *xuio, int nblk)
{
@@ -975,7 +1158,7 @@ dmu_xuio_add(xuio_t *xuio, arc_buf_t *abuf, offset_t off, size_t n)
int i = priv->next++;
ASSERT(i < priv->cnt);
- ASSERT(off + n <= arc_buf_size(abuf));
+ ASSERT(off + n <= arc_buf_lsize(abuf));
iov = (iovec_t *)uio->uio_iov + i;
iov->iov_base = (char *)abuf->b_data + off;
iov->iov_len = n;
@@ -1007,6 +1190,7 @@ dmu_xuio_clear(xuio_t *xuio, int i)
ASSERT(i < priv->cnt);
priv->bufs[i] = NULL;
}
+#endif /* HAVE_UIO_ZEROCOPY */
static void
xuio_stat_init(void)
@@ -1030,190 +1214,26 @@ xuio_stat_fini(void)
}
void
-xuio_stat_wbuf_copied()
+xuio_stat_wbuf_copied(void)
{
XUIOSTAT_BUMP(xuiostat_wbuf_copied);
}
void
-xuio_stat_wbuf_nocopy()
+xuio_stat_wbuf_nocopy(void)
{
XUIOSTAT_BUMP(xuiostat_wbuf_nocopy);
}
#ifdef _KERNEL
-
-/*
- * Copy up to size bytes between arg_buf and req based on the data direction
- * described by the req. If an entire req's data cannot be transfered in one
- * pass, you should pass in @req_offset to indicate where to continue. The
- * return value is the number of bytes successfully copied to arg_buf.
- */
-static int
-dmu_bio_copy(void *arg_buf, int size, struct bio *bio, size_t bio_offset)
-{
- struct bio_vec bv, *bvp = &bv;
- bvec_iterator_t iter;
- char *bv_buf;
- int tocpy, bv_len, bv_offset;
- int offset = 0;
- void *paddr;
-
- bio_for_each_segment4(bv, bvp, bio, iter) {
-
- /*
- * Fully consumed the passed arg_buf. We use goto here because
- * rq_for_each_segment is a double loop
- */
- ASSERT3S(offset, <=, size);
- if (size == offset)
- goto out;
-
- /* Skip already copied bvp */
- if (bio_offset >= bvp->bv_len) {
- bio_offset -= bvp->bv_len;
- continue;
- }
-
- bv_len = bvp->bv_len - bio_offset;
- bv_offset = bvp->bv_offset + bio_offset;
- bio_offset = 0;
-
- tocpy = MIN(bv_len, size - offset);
- ASSERT3S(tocpy, >=, 0);
-
- paddr = zfs_kmap_atomic(bvp->bv_page, KM_USER0);
- bv_buf = paddr + bv_offset;
- ASSERT3P(paddr, !=, NULL);
-
- if (bio_data_dir(bio) == WRITE)
- memcpy(arg_buf + offset, bv_buf, tocpy);
- else
- memcpy(bv_buf, arg_buf + offset, tocpy);
- zfs_kunmap_atomic(paddr, KM_USER0);
- offset += tocpy;
- }
-out:
- return (offset);
-}
-
int
-dmu_read_bio(objset_t *os, uint64_t object, struct bio *bio)
-{
- uint64_t offset = BIO_BI_SECTOR(bio) << 9;
- uint64_t size = BIO_BI_SIZE(bio);
- dmu_buf_t **dbp;
- int numbufs, i, err;
- size_t bio_offset;
-
- /*
- * NB: we could do this block-at-a-time, but it's nice
- * to be reading in parallel.
- */
- err = dmu_buf_hold_array(os, object, offset, size, TRUE, FTAG,
- &numbufs, &dbp);
- if (err)
- return (err);
-
- bio_offset = 0;
- for (i = 0; i < numbufs; i++) {
- uint64_t tocpy;
- int64_t bufoff;
- int didcpy;
- dmu_buf_t *db = dbp[i];
-
- bufoff = offset - db->db_offset;
- ASSERT3S(bufoff, >=, 0);
-
- tocpy = MIN(db->db_size - bufoff, size);
- if (tocpy == 0)
- break;
-
- didcpy = dmu_bio_copy(db->db_data + bufoff, tocpy, bio,
- bio_offset);
-
- if (didcpy < tocpy)
- err = EIO;
-
- if (err)
- break;
-
- size -= tocpy;
- offset += didcpy;
- bio_offset += didcpy;
- err = 0;
- }
- dmu_buf_rele_array(dbp, numbufs, FTAG);
-
- return (err);
-}
-
-int
-dmu_write_bio(objset_t *os, uint64_t object, struct bio *bio, dmu_tx_t *tx)
-{
- uint64_t offset = BIO_BI_SECTOR(bio) << 9;
- uint64_t size = BIO_BI_SIZE(bio);
- dmu_buf_t **dbp;
- int numbufs, i, err;
- size_t bio_offset;
-
- if (size == 0)
- return (0);
-
- err = dmu_buf_hold_array(os, object, offset, size, FALSE, FTAG,
- &numbufs, &dbp);
- if (err)
- return (err);
-
- bio_offset = 0;
- for (i = 0; i < numbufs; i++) {
- uint64_t tocpy;
- int64_t bufoff;
- int didcpy;
- dmu_buf_t *db = dbp[i];
-
- bufoff = offset - db->db_offset;
- ASSERT3S(bufoff, >=, 0);
-
- tocpy = MIN(db->db_size - bufoff, size);
- if (tocpy == 0)
- break;
-
- ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);
-
- if (tocpy == db->db_size)
- dmu_buf_will_fill(db, tx);
- else
- dmu_buf_will_dirty(db, tx);
-
- didcpy = dmu_bio_copy(db->db_data + bufoff, tocpy, bio,
- bio_offset);
-
- if (tocpy == db->db_size)
- dmu_buf_fill_done(db, tx);
-
- if (didcpy < tocpy)
- err = EIO;
-
- if (err)
- break;
-
- size -= tocpy;
- offset += didcpy;
- bio_offset += didcpy;
- err = 0;
- }
-
- dmu_buf_rele_array(dbp, numbufs, FTAG);
- return (err);
-}
-
-static int
dmu_read_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size)
{
dmu_buf_t **dbp;
int numbufs, i, err;
+#ifdef HAVE_UIO_ZEROCOPY
xuio_t *xuio = NULL;
+#endif
/*
* NB: we could do this block-at-a-time, but it's nice
@@ -1234,6 +1254,7 @@ dmu_read_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size)
bufoff = uio->uio_loffset - db->db_offset;
tocpy = MIN(db->db_size - bufoff, size);
+#ifdef HAVE_UIO_ZEROCOPY
if (xuio) {
dmu_buf_impl_t *dbi = (dmu_buf_impl_t *)db;
arc_buf_t *dbuf_abuf = dbi->db_buf;
@@ -1248,10 +1269,10 @@ dmu_read_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size)
XUIOSTAT_BUMP(xuiostat_rbuf_nocopy);
else
XUIOSTAT_BUMP(xuiostat_rbuf_copied);
- } else {
+ } else
+#endif
err = uiomove((char *)db->db_data + bufoff, tocpy,
UIO_READ, uio);
- }
if (err)
break;
@@ -1314,7 +1335,7 @@ dmu_read_uio(objset_t *os, uint64_t object, uio_t *uio, uint64_t size)
return (err);
}
-static int
+int
dmu_write_uio_dnode(dnode_t *dn, uio_t *uio, uint64_t size, dmu_tx_t *tx)
{
dmu_buf_t **dbp;
@@ -1429,7 +1450,7 @@ dmu_request_arcbuf(dmu_buf_t *handle, int size)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)handle;
- return (arc_loan_buf(db->db_objset->os_spa, size));
+ return (arc_loan_buf(db->db_objset->os_spa, B_FALSE, size));
}
/*
@@ -1439,7 +1460,7 @@ void
dmu_return_arcbuf(arc_buf_t *buf)
{
arc_return_buf(buf, FTAG);
- VERIFY(arc_buf_remove_ref(buf, FTAG));
+ arc_buf_destroy(buf, FTAG);
}
/*
@@ -1454,31 +1475,32 @@ dmu_assign_arcbuf(dmu_buf_t *handle, uint64_t offset, arc_buf_t *buf,
dmu_buf_impl_t *dbuf = (dmu_buf_impl_t *)handle;
dnode_t *dn;
dmu_buf_impl_t *db;
- uint32_t blksz = (uint32_t)arc_buf_size(buf);
+ uint32_t blksz = (uint32_t)arc_buf_lsize(buf);
uint64_t blkid;
DB_DNODE_ENTER(dbuf);
dn = DB_DNODE(dbuf);
rw_enter(&dn->dn_struct_rwlock, RW_READER);
- blkid = dbuf_whichblock(dn, offset);
+ blkid = dbuf_whichblock(dn, 0, offset);
VERIFY((db = dbuf_hold(dn, blkid, FTAG)) != NULL);
rw_exit(&dn->dn_struct_rwlock);
DB_DNODE_EXIT(dbuf);
/*
* We can only assign if the offset is aligned, the arc buf is the
- * same size as the dbuf, and the dbuf is not metadata. It
- * can't be metadata because the loaned arc buf comes from the
- * user-data kmem area.
+ * same size as the dbuf, and the dbuf is not metadata.
*/
- if (offset == db->db.db_offset && blksz == db->db.db_size &&
- DBUF_GET_BUFC_TYPE(db) == ARC_BUFC_DATA) {
+ if (offset == db->db.db_offset && blksz == db->db.db_size) {
dbuf_assign_arcbuf(db, buf, tx);
dbuf_rele(db, FTAG);
} else {
objset_t *os;
uint64_t object;
+ /* compressed bufs must always be assignable to their dbuf */
+ ASSERT3U(arc_get_compression(buf), ==, ZIO_COMPRESS_OFF);
+ ASSERT(!(buf->b_flags & ARC_BUF_FLAG_COMPRESSED));
+
DB_DNODE_ENTER(dbuf);
dn = DB_DNODE(dbuf);
os = dn->dn_objset;
@@ -1540,13 +1562,15 @@ dmu_sync_done(zio_t *zio, arc_buf_t *buf, void *varg)
if (zio->io_error == 0) {
dr->dt.dl.dr_nopwrite = !!(zio->io_flags & ZIO_FLAG_NOPWRITE);
if (dr->dt.dl.dr_nopwrite) {
- ASSERTV(blkptr_t *bp = zio->io_bp);
- ASSERTV(blkptr_t *bp_orig = &zio->io_bp_orig);
- ASSERTV(uint8_t chksum = BP_GET_CHECKSUM(bp_orig));
+ blkptr_t *bp = zio->io_bp;
+ blkptr_t *bp_orig = &zio->io_bp_orig;
+ uint8_t chksum = BP_GET_CHECKSUM(bp_orig);
ASSERT(BP_EQUAL(bp, bp_orig));
+ VERIFY(BP_EQUAL(bp, db->db_blkptr));
ASSERT(zio->io_prop.zp_compress != ZIO_COMPRESS_OFF);
- ASSERT(zio_checksum_table[chksum].ci_dedup);
+ VERIFY(zio_checksum_table[chksum].ci_flags &
+ ZCHECKSUM_FLAG_NOPWRITE);
}
dr->dt.dl.dr_overridden_by = *zio->io_bp;
dr->dt.dl.dr_override_state = DR_OVERRIDDEN;
@@ -1584,25 +1608,18 @@ dmu_sync_late_arrival_done(zio_t *zio)
ASSERTV(blkptr_t *bp_orig = &zio->io_bp_orig);
if (zio->io_error == 0 && !BP_IS_HOLE(bp)) {
- /*
- * If we didn't allocate a new block (i.e. ZIO_FLAG_NOPWRITE)
- * then there is nothing to do here. Otherwise, free the
- * newly allocated block in this txg.
- */
- if (zio->io_flags & ZIO_FLAG_NOPWRITE) {
- ASSERT(BP_EQUAL(bp, bp_orig));
- } else {
- ASSERT(BP_IS_HOLE(bp_orig) || !BP_EQUAL(bp, bp_orig));
- ASSERT(zio->io_bp->blk_birth == zio->io_txg);
- ASSERT(zio->io_txg > spa_syncing_txg(zio->io_spa));
- zio_free(zio->io_spa, zio->io_txg, zio->io_bp);
- }
+ ASSERT(!(zio->io_flags & ZIO_FLAG_NOPWRITE));
+ ASSERT(BP_IS_HOLE(bp_orig) || !BP_EQUAL(bp, bp_orig));
+ ASSERT(zio->io_bp->blk_birth == zio->io_txg);
+ ASSERT(zio->io_txg > spa_syncing_txg(zio->io_spa));
+ zio_free(zio->io_spa, zio->io_txg, zio->io_bp);
}
dmu_tx_commit(dsa->dsa_tx);
dsa->dsa_done(dsa->dsa_zgd, zio->io_error);
+ abd_put(zio->io_abd);
kmem_free(dsa, sizeof (*dsa));
}
@@ -1627,10 +1644,34 @@ dmu_sync_late_arrival(zio_t *pio, objset_t *os, dmu_sync_cb_t *done, zgd_t *zgd,
dsa->dsa_zgd = zgd;
dsa->dsa_tx = tx;
+ /*
+ * Since we are currently syncing this txg, it's nontrivial to
+ * determine what BP to nopwrite against, so we disable nopwrite.
+ *
+ * When syncing, the db_blkptr is initially the BP of the previous
+ * txg. We can not nopwrite against it because it will be changed
+ * (this is similar to the non-late-arrival case where the dbuf is
+ * dirty in a future txg).
+ *
+ * Then dbuf_write_ready() sets bp_blkptr to the location we will write.
+ * We can not nopwrite against it because although the BP will not
+ * (typically) be changed, the data has not yet been persisted to this
+ * location.
+ *
+ * Finally, when dbuf_write_done() is called, it is theoretically
+ * possible to always nopwrite, because the data that was written in
+ * this txg is the same data that we are trying to write. However we
+ * would need to check that this dbuf is not dirty in any future
+ * txg's (as we do in the normal dmu_sync() path). For simplicity, we
+ * don't nopwrite in this case.
+ */
+ zp->zp_nopwrite = B_FALSE;
+
zio_nowait(zio_write(pio, os->os_spa, dmu_tx_get_txg(tx), zgd->zgd_bp,
- zgd->zgd_db->db_data, zgd->zgd_db->db_size, zp,
- dmu_sync_late_arrival_ready, NULL, dmu_sync_late_arrival_done, dsa,
- ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL|ZIO_FLAG_FASTWRITE, zb));
+ abd_get_from_buf(zgd->zgd_db->db_data, zgd->zgd_db->db_size),
+ zgd->zgd_db->db_size, zgd->zgd_db->db_size, zp,
+ dmu_sync_late_arrival_ready, NULL, NULL, dmu_sync_late_arrival_done,
+ dsa, ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, zb));
return (0);
}
@@ -1663,7 +1704,6 @@ dmu_sync_late_arrival(zio_t *pio, objset_t *os, dmu_sync_cb_t *done, zgd_t *zgd,
int
dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd)
{
- blkptr_t *bp = zgd->zgd_bp;
dmu_buf_impl_t *db = (dmu_buf_impl_t *)zgd->zgd_db;
objset_t *os = db->db_objset;
dsl_dataset_t *ds = os->os_dsl_dataset;
@@ -1676,6 +1716,11 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd)
ASSERT(pio != NULL);
ASSERT(txg != 0);
+ /* dbuf is within the locked range */
+ ASSERT3U(db->db.db_offset, >=, zgd->zgd_rl->r_off);
+ ASSERT3U(db->db.db_offset + db->db.db_size, <=,
+ zgd->zgd_rl->r_off + zgd->zgd_rl->r_len);
+
SET_BOOKMARK(&zb, ds->ds_object,
db->db.db_object, db->db_level, db->db_blkid);
@@ -1730,6 +1775,21 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd)
ASSERT(dr->dr_next == NULL || dr->dr_next->dr_txg < txg);
+ if (db->db_blkptr != NULL) {
+ /*
+ * We need to fill in zgd_bp with the current blkptr so that
+ * the nopwrite code can check if we're writing the same
+ * data that's already on disk. We can only nopwrite if we
+ * are sure that after making the copy, db_blkptr will not
+ * change until our i/o completes. We ensure this by
+ * holding the db_mtx, and only allowing nopwrite if the
+ * block is not already dirty (see below). This is verified
+ * by dmu_sync_done(), which VERIFYs that the db_blkptr has
+ * not changed.
+ */
+ *zgd->zgd_bp = *db->db_blkptr;
+ }
+
/*
* Assume the on-disk data is X, the current syncing data (in
* txg - 1) is Y, and the current in-memory data is Z (currently
@@ -1781,17 +1841,16 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd)
dsa->dsa_tx = NULL;
zio_nowait(arc_write(pio, os->os_spa, txg,
- bp, dr->dt.dl.dr_data, DBUF_IS_L2CACHEABLE(db),
- DBUF_IS_L2COMPRESSIBLE(db), &zp, dmu_sync_ready,
- NULL, dmu_sync_done, dsa, ZIO_PRIORITY_SYNC_WRITE,
- ZIO_FLAG_CANFAIL, &zb));
+ zgd->zgd_bp, dr->dt.dl.dr_data, DBUF_IS_L2CACHEABLE(db),
+ &zp, dmu_sync_ready, NULL, NULL, dmu_sync_done, dsa,
+ ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, &zb));
return (0);
}
int
dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size, int ibs,
- dmu_tx_t *tx)
+ dmu_tx_t *tx)
{
dnode_t *dn;
int err;
@@ -1806,7 +1865,7 @@ dmu_object_set_blocksize(objset_t *os, uint64_t object, uint64_t size, int ibs,
void
dmu_object_set_checksum(objset_t *os, uint64_t object, uint8_t checksum,
- dmu_tx_t *tx)
+ dmu_tx_t *tx)
{
dnode_t *dn;
@@ -1826,7 +1885,7 @@ dmu_object_set_checksum(objset_t *os, uint64_t object, uint8_t checksum,
void
dmu_object_set_compress(objset_t *os, uint64_t object, uint8_t compress,
- dmu_tx_t *tx)
+ dmu_tx_t *tx)
{
dnode_t *dn;
@@ -1891,8 +1950,10 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
* as well. Otherwise, the metadata checksum defaults
* to fletcher4.
*/
- if (zio_checksum_table[checksum].ci_correctable < 1 ||
- zio_checksum_table[checksum].ci_eck)
+ if (!(zio_checksum_table[checksum].ci_flags &
+ ZCHECKSUM_FLAG_METADATA) ||
+ (zio_checksum_table[checksum].ci_flags &
+ ZCHECKSUM_FLAG_EMBEDDED))
checksum = ZIO_CHECKSUM_FLETCHER_4;
if (os->os_redundant_metadata == ZFS_REDUNDANT_METADATA_ALL ||
@@ -1931,22 +1992,27 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
*/
if (dedup_checksum != ZIO_CHECKSUM_OFF) {
dedup = (wp & WP_DMU_SYNC) ? B_FALSE : B_TRUE;
- if (!zio_checksum_table[checksum].ci_dedup)
+ if (!(zio_checksum_table[checksum].ci_flags &
+ ZCHECKSUM_FLAG_DEDUP))
dedup_verify = B_TRUE;
}
/*
- * Enable nopwrite if we have a cryptographically secure
- * checksum that has no known collisions (i.e. SHA-256)
- * and compression is enabled. We don't enable nopwrite if
- * dedup is enabled as the two features are mutually exclusive.
+ * Enable nopwrite if we have secure enough checksum
+ * algorithm (see comment in zio_nop_write) and
+ * compression is enabled. We don't enable nopwrite if
+ * dedup is enabled as the two features are mutually
+ * exclusive.
*/
- nopwrite = (!dedup && zio_checksum_table[checksum].ci_dedup &&
+ nopwrite = (!dedup && (zio_checksum_table[checksum].ci_flags &
+ ZCHECKSUM_FLAG_NOPWRITE) &&
compress != ZIO_COMPRESS_OFF && zfs_nopwrite_enabled);
}
zp->zp_checksum = checksum;
zp->zp_compress = compress;
+ ASSERT3U(zp->zp_compress, !=, ZIO_COMPRESS_INHERIT);
+
zp->zp_type = (wp & WP_SPILL) ? dn->dn_bonustype : type;
zp->zp_level = level;
zp->zp_copies = MIN(copies, spa_max_replication(os->os_spa));
@@ -1955,24 +2021,43 @@ dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
zp->zp_nopwrite = nopwrite;
}
+/*
+ * This function is only called from zfs_holey_common() for zpl_llseek()
+ * in order to determine the location of holes. In order to accurately
+ * report holes all dirty data must be synced to disk. This causes extremely
+ * poor performance when seeking for holes in a dirty file. As a compromise,
+ * only provide hole data when the dnode is clean. When a dnode is dirty
+ * report the dnode as having no holes which is always a safe thing to do.
+ */
int
dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off)
{
dnode_t *dn;
int i, err;
+ boolean_t clean = B_TRUE;
err = dnode_hold(os, object, FTAG, &dn);
if (err)
return (err);
+
/*
- * Sync any current changes before
- * we go trundling through the block pointers.
+ * Check if dnode is dirty
*/
- for (i = 0; i < TXG_SIZE; i++) {
- if (list_link_active(&dn->dn_dirty_link[i]))
- break;
+ if (dn->dn_dirtyctx != DN_UNDIRTIED) {
+ for (i = 0; i < TXG_SIZE; i++) {
+ if (!list_is_empty(&dn->dn_dirty_records[i])) {
+ clean = B_FALSE;
+ break;
+ }
+ }
}
- if (i != TXG_SIZE) {
+
+ /*
+ * If compatibility option is on, sync any current changes before
+ * we go trundling through the block pointers.
+ */
+ if (!clean && zfs_dmu_offset_next_sync) {
+ clean = B_TRUE;
dnode_rele(dn, FTAG);
txg_wait_synced(dmu_objset_pool(os), 0);
err = dnode_hold(os, object, FTAG, &dn);
@@ -1980,7 +2065,12 @@ dmu_offset_next(objset_t *os, uint64_t object, boolean_t hole, uint64_t *off)
return (err);
}
- err = dnode_next_offset(dn, (hole ? DNODE_FIND_HOLE : 0), off, 1, 1, 0);
+ if (clean)
+ err = dnode_next_offset(dn,
+ (hole ? DNODE_FIND_HOLE : 0), off, 1, 1, 0);
+ else
+ err = SET_ERROR(EBUSY);
+
dnode_rele(dn, FTAG);
return (err);
@@ -1998,6 +2088,7 @@ __dmu_object_info_from_dnode(dnode_t *dn, dmu_object_info_t *doi)
doi->doi_type = dn->dn_type;
doi->doi_bonus_type = dn->dn_bonustype;
doi->doi_bonus_size = dn->dn_bonuslen;
+ doi->doi_dnodesize = dn->dn_num_slots << DNODE_SHIFT;
doi->doi_indirection = dn->dn_nlevels;
doi->doi_checksum = dn->dn_checksum;
doi->doi_compress = dn->dn_compress;
@@ -2069,9 +2160,21 @@ dmu_object_size_from_db(dmu_buf_t *db_fake, uint32_t *blksize,
dn = DB_DNODE(db);
*blksize = dn->dn_datablksz;
- /* add 1 for dnode space */
+ /* add in number of slots used for the dnode itself */
*nblk512 = ((DN_USED_BYTES(dn->dn_phys) + SPA_MINBLOCKSIZE/2) >>
- SPA_MINBLOCKSHIFT) + 1;
+ SPA_MINBLOCKSHIFT) + dn->dn_num_slots;
+ DB_DNODE_EXIT(db);
+}
+
+void
+dmu_object_dnsize_from_db(dmu_buf_t *db_fake, int *dnsize)
+{
+ dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;
+ dnode_t *dn;
+
+ DB_DNODE_ENTER(db);
+ dn = DB_DNODE(db);
+ *dnsize = dn->dn_num_slots << DNODE_SHIFT;
DB_DNODE_EXIT(db);
}
@@ -2123,16 +2226,17 @@ byteswap_uint8_array(void *vbuf, size_t size)
void
dmu_init(void)
{
+ abd_init();
zfs_dbgmsg_init();
sa_cache_init();
xuio_stat_init();
dmu_objset_init();
dnode_init();
- dbuf_init();
zfetch_init();
dmu_tx_init();
l2arc_init();
arc_init();
+ dbuf_init();
}
void
@@ -2148,6 +2252,7 @@ dmu_fini(void)
xuio_stat_fini();
sa_cache_fini();
zfs_dbgmsg_fini();
+ abd_fini();
}
#if defined(_KERNEL) && defined(HAVE_SPL)
@@ -2159,12 +2264,15 @@ EXPORT_SYMBOL(dmu_free_range);
EXPORT_SYMBOL(dmu_free_long_range);
EXPORT_SYMBOL(dmu_free_long_object);
EXPORT_SYMBOL(dmu_read);
+EXPORT_SYMBOL(dmu_read_by_dnode);
EXPORT_SYMBOL(dmu_write);
+EXPORT_SYMBOL(dmu_write_by_dnode);
EXPORT_SYMBOL(dmu_prealloc);
EXPORT_SYMBOL(dmu_object_info);
EXPORT_SYMBOL(dmu_object_info_from_dnode);
EXPORT_SYMBOL(dmu_object_info_from_db);
EXPORT_SYMBOL(dmu_object_size_from_db);
+EXPORT_SYMBOL(dmu_object_dnsize_from_db);
EXPORT_SYMBOL(dmu_object_set_blocksize);
EXPORT_SYMBOL(dmu_object_set_checksum);
EXPORT_SYMBOL(dmu_object_set_compress);
@@ -2176,10 +2284,21 @@ EXPORT_SYMBOL(dmu_assign_arcbuf);
EXPORT_SYMBOL(dmu_buf_hold);
EXPORT_SYMBOL(dmu_ot);
+/* BEGIN CSTYLED */
module_param(zfs_mdcomp_disable, int, 0644);
MODULE_PARM_DESC(zfs_mdcomp_disable, "Disable meta data compression");
module_param(zfs_nopwrite_enabled, int, 0644);
MODULE_PARM_DESC(zfs_nopwrite_enabled, "Enable NOP writes");
+module_param(zfs_per_txg_dirty_frees_percent, ulong, 0644);
+MODULE_PARM_DESC(zfs_per_txg_dirty_frees_percent,
+ "percentage of dirtied blocks from frees in one TXG");
+
+module_param(zfs_dmu_offset_next_sync, int, 0644);
+MODULE_PARM_DESC(zfs_dmu_offset_next_sync,
+ "Enable forcing txg sync to find holes");
+
+/* END CSTYLED */
+
#endif
diff --git a/zfs/module/zfs/dmu_diff.c b/zfs/module/zfs/dmu_diff.c
index 91415d0d2dcb..982b96132cc8 100644
--- a/zfs/module/zfs/dmu_diff.c
+++ b/zfs/module/zfs/dmu_diff.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
*/
#include <sys/dmu.h>
@@ -115,7 +115,7 @@ diff_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
if (issig(JUSTLOOKING) && issig(FORREAL))
return (SET_ERROR(EINTR));
- if (zb->zb_object != DMU_META_DNODE_OBJECT)
+ if (bp == NULL || zb->zb_object != DMU_META_DNODE_OBJECT)
return (0);
if (BP_IS_HOLE(bp)) {
@@ -146,7 +146,7 @@ diff_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
if (err)
break;
}
- (void) arc_buf_remove_ref(abuf, &abuf);
+ arc_buf_destroy(abuf, &abuf);
if (err)
return (err);
/* Don't care about the data blocks */
diff --git a/zfs/module/zfs/dmu_object.c b/zfs/module/zfs/dmu_object.c
index 177162f9365d..e7412b7509f4 100644
--- a/zfs/module/zfs/dmu_object.c
+++ b/zfs/module/zfs/dmu_object.c
@@ -30,43 +30,142 @@
#include <sys/dnode.h>
#include <sys/zap.h>
#include <sys/zfeature.h>
+#include <sys/dsl_dataset.h>
+
+/*
+ * Each of the concurrent object allocators will grab
+ * 2^dmu_object_alloc_chunk_shift dnode slots at a time. The default is to
+ * grab 128 slots, which is 4 blocks worth. This was experimentally
+ * determined to be the lowest value that eliminates the measurable effect
+ * of lock contention from this code path.
+ */
+int dmu_object_alloc_chunk_shift = 7;
uint64_t
dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize,
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
+{
+ return dmu_object_alloc_dnsize(os, ot, blocksize, bonustype, bonuslen,
+ 0, tx);
+}
+
+uint64_t
+dmu_object_alloc_dnsize(objset_t *os, dmu_object_type_t ot, int blocksize,
+ dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx)
{
uint64_t object;
- uint64_t L2_dnode_count = DNODES_PER_BLOCK <<
+ uint64_t L1_dnode_count = DNODES_PER_BLOCK <<
(DMU_META_DNODE(os)->dn_indblkshift - SPA_BLKPTRSHIFT);
dnode_t *dn = NULL;
- int restarted = B_FALSE;
+ int dn_slots = dnodesize >> DNODE_SHIFT;
+ boolean_t restarted = B_FALSE;
+ uint64_t *cpuobj = NULL;
+ int dnodes_per_chunk = 1 << dmu_object_alloc_chunk_shift;
+ int error;
+
+ kpreempt_disable();
+ cpuobj = &os->os_obj_next_percpu[CPU_SEQID %
+ os->os_obj_next_percpu_len];
+ kpreempt_enable();
- mutex_enter(&os->os_obj_lock);
+ if (dn_slots == 0) {
+ dn_slots = DNODE_MIN_SLOTS;
+ } else {
+ ASSERT3S(dn_slots, >=, DNODE_MIN_SLOTS);
+ ASSERT3S(dn_slots, <=, DNODE_MAX_SLOTS);
+ }
+
+ /*
+ * The "chunk" of dnodes that is assigned to a CPU-specific
+ * allocator needs to be at least one block's worth, to avoid
+ * lock contention on the dbuf. It can be at most one L1 block's
+ * worth, so that the "rescan after polishing off a L1's worth"
+ * logic below will be sure to kick in.
+ */
+ if (dnodes_per_chunk < DNODES_PER_BLOCK)
+ dnodes_per_chunk = DNODES_PER_BLOCK;
+ if (dnodes_per_chunk > L1_dnode_count)
+ dnodes_per_chunk = L1_dnode_count;
+
+ object = *cpuobj;
for (;;) {
- object = os->os_obj_next;
/*
- * Each time we polish off an L2 bp worth of dnodes
- * (2^13 objects), move to another L2 bp that's still
- * reasonably sparse (at most 1/4 full). Look from the
- * beginning once, but after that keep looking from here.
- * If we can't find one, just keep going from here.
- *
- * Note that dmu_traverse depends on the behavior that we use
- * multiple blocks of the dnode object before going back to
- * reuse objects. Any change to this algorithm should preserve
- * that property or find another solution to the issues
- * described in traverse_visitbp.
+ * If we finished a chunk of dnodes, get a new one from
+ * the global allocator.
*/
- if (P2PHASE(object, L2_dnode_count) == 0) {
- uint64_t offset = restarted ? object << DNODE_SHIFT : 0;
- int error = dnode_next_offset(DMU_META_DNODE(os),
- DNODE_FIND_HOLE,
- &offset, 2, DNODES_PER_BLOCK >> 2, 0);
- restarted = B_TRUE;
- if (error == 0)
- object = offset >> DNODE_SHIFT;
+ if ((P2PHASE(object, dnodes_per_chunk) == 0) ||
+ (P2PHASE(object + dn_slots - 1, dnodes_per_chunk) <
+ dn_slots)) {
+ DNODE_STAT_BUMP(dnode_alloc_next_chunk);
+ mutex_enter(&os->os_obj_lock);
+ ASSERT0(P2PHASE(os->os_obj_next_chunk,
+ dnodes_per_chunk));
+ object = os->os_obj_next_chunk;
+
+ /*
+ * Each time we polish off a L1 bp worth of dnodes
+ * (2^12 objects), move to another L1 bp that's
+ * still reasonably sparse (at most 1/4 full). Look
+ * from the beginning at most once per txg. If we
+ * still can't allocate from that L1 block, search
+ * for an empty L0 block, which will quickly skip
+ * to the end of the metadnode if no nearby L0
+ * blocks are empty. This fallback avoids a
+ * pathology where full dnode blocks containing
+ * large dnodes appear sparse because they have a
+ * low blk_fill, leading to many failed allocation
+ * attempts. In the long term a better mechanism to
+ * search for sparse metadnode regions, such as
+ * spacemaps, could be implemented.
+ *
+ * os_scan_dnodes is set during txg sync if enough
+ * objects have been freed since the previous
+ * rescan to justify backfilling again.
+ *
+ * Note that dmu_traverse depends on the behavior
+ * that we use multiple blocks of the dnode object
+ * before going back to reuse objects. Any change
+ * to this algorithm should preserve that property
+ * or find another solution to the issues described
+ * in traverse_visitbp.
+ */
+ if (P2PHASE(object, L1_dnode_count) == 0) {
+ uint64_t offset;
+ uint64_t blkfill;
+ int minlvl;
+ if (os->os_rescan_dnodes) {
+ offset = 0;
+ os->os_rescan_dnodes = B_FALSE;
+ } else {
+ offset = object << DNODE_SHIFT;
+ }
+ blkfill = restarted ? 1 : DNODES_PER_BLOCK >> 2;
+ minlvl = restarted ? 1 : 2;
+ restarted = B_TRUE;
+ error = dnode_next_offset(DMU_META_DNODE(os),
+ DNODE_FIND_HOLE, &offset, minlvl,
+ blkfill, 0);
+ if (error == 0) {
+ object = offset >> DNODE_SHIFT;
+ }
+ }
+ /*
+ * Note: if "restarted", we may find a L0 that
+ * is not suitably aligned.
+ */
+ os->os_obj_next_chunk =
+ P2ALIGN(object, dnodes_per_chunk) +
+ dnodes_per_chunk;
+ (void) atomic_swap_64(cpuobj, object);
+ mutex_exit(&os->os_obj_lock);
}
- os->os_obj_next = ++object;
+
+ /*
+ * The value of (*cpuobj) before adding dn_slots is the object
+ * ID assigned to us. The value afterwards is the object ID
+ * assigned to whoever wants to do an allocation next.
+ */
+ object = atomic_add_64_nv(cpuobj, dn_slots) - dn_slots;
/*
* XXX We should check for an i/o error here and return
@@ -74,65 +173,109 @@ dmu_object_alloc(objset_t *os, dmu_object_type_t ot, int blocksize,
* dmu_tx_assign(), but there is currently no mechanism
* to do so.
*/
- (void) dnode_hold_impl(os, object, DNODE_MUST_BE_FREE,
- FTAG, &dn);
- if (dn)
- break;
+ error = dnode_hold_impl(os, object, DNODE_MUST_BE_FREE,
+ dn_slots, FTAG, &dn);
+ if (error == 0) {
+ rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
+ /*
+ * Another thread could have allocated it; check
+ * again now that we have the struct lock.
+ */
+ if (dn->dn_type == DMU_OT_NONE) {
+ dnode_allocate(dn, ot, blocksize, 0,
+ bonustype, bonuslen, dn_slots, tx);
+ rw_exit(&dn->dn_struct_rwlock);
+ dmu_tx_add_new_object(tx, dn);
+ dnode_rele(dn, FTAG);
+ return (object);
+ }
+ rw_exit(&dn->dn_struct_rwlock);
+ dnode_rele(dn, FTAG);
+ DNODE_STAT_BUMP(dnode_alloc_race);
+ }
- if (dmu_object_next(os, &object, B_TRUE, 0) == 0)
- os->os_obj_next = object - 1;
+ /*
+ * Skip to next known valid starting point on error. This
+ * is the start of the next block of dnodes.
+ */
+ if (dmu_object_next(os, &object, B_TRUE, 0) != 0) {
+ object = P2ROUNDUP(object + 1, DNODES_PER_BLOCK);
+ DNODE_STAT_BUMP(dnode_alloc_next_block);
+ }
+ (void) atomic_swap_64(cpuobj, object);
}
-
- dnode_allocate(dn, ot, blocksize, 0, bonustype, bonuslen, tx);
- dnode_rele(dn, FTAG);
-
- mutex_exit(&os->os_obj_lock);
-
- dmu_tx_add_new_object(tx, os, object);
- return (object);
}
int
dmu_object_claim(objset_t *os, uint64_t object, dmu_object_type_t ot,
int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
+{
+ return (dmu_object_claim_dnsize(os, object, ot, blocksize, bonustype,
+ bonuslen, 0, tx));
+}
+
+int
+dmu_object_claim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot,
+ int blocksize, dmu_object_type_t bonustype, int bonuslen,
+ int dnodesize, dmu_tx_t *tx)
{
dnode_t *dn;
+ int dn_slots = dnodesize >> DNODE_SHIFT;
int err;
+ if (dn_slots == 0)
+ dn_slots = DNODE_MIN_SLOTS;
+ ASSERT3S(dn_slots, >=, DNODE_MIN_SLOTS);
+ ASSERT3S(dn_slots, <=, DNODE_MAX_SLOTS);
+
if (object == DMU_META_DNODE_OBJECT && !dmu_tx_private_ok(tx))
return (SET_ERROR(EBADF));
- err = dnode_hold_impl(os, object, DNODE_MUST_BE_FREE, FTAG, &dn);
+ err = dnode_hold_impl(os, object, DNODE_MUST_BE_FREE, dn_slots,
+ FTAG, &dn);
if (err)
return (err);
- dnode_allocate(dn, ot, blocksize, 0, bonustype, bonuslen, tx);
+
+ dnode_allocate(dn, ot, blocksize, 0, bonustype, bonuslen, dn_slots, tx);
+ dmu_tx_add_new_object(tx, dn);
+
dnode_rele(dn, FTAG);
- dmu_tx_add_new_object(tx, os, object);
return (0);
}
int
dmu_object_reclaim(objset_t *os, uint64_t object, dmu_object_type_t ot,
int blocksize, dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
+{
+ return (dmu_object_reclaim_dnsize(os, object, ot, blocksize, bonustype,
+ bonuslen, 0, tx));
+}
+
+int
+dmu_object_reclaim_dnsize(objset_t *os, uint64_t object, dmu_object_type_t ot,
+ int blocksize, dmu_object_type_t bonustype, int bonuslen, int dnodesize,
+ dmu_tx_t *tx)
{
dnode_t *dn;
+ int dn_slots = dnodesize >> DNODE_SHIFT;
int err;
if (object == DMU_META_DNODE_OBJECT)
return (SET_ERROR(EBADF));
- err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED,
+ err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, 0,
FTAG, &dn);
if (err)
return (err);
- dnode_reallocate(dn, ot, blocksize, bonustype, bonuslen, tx);
+ dnode_reallocate(dn, ot, blocksize, bonustype, bonuslen, dn_slots, tx);
dnode_rele(dn, FTAG);
return (err);
}
+
int
dmu_object_free(objset_t *os, uint64_t object, dmu_tx_t *tx)
{
@@ -141,7 +284,7 @@ dmu_object_free(objset_t *os, uint64_t object, dmu_tx_t *tx)
ASSERT(object != DMU_META_DNODE_OBJECT || dmu_tx_private_ok(tx));
- err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED,
+ err = dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, 0,
FTAG, &dn);
if (err)
return (err);
@@ -154,12 +297,62 @@ dmu_object_free(objset_t *os, uint64_t object, dmu_tx_t *tx)
return (0);
}
+/*
+ * Return (in *objectp) the next object which is allocated (or a hole)
+ * after *object, taking into account only objects that may have been modified
+ * after the specified txg.
+ */
int
dmu_object_next(objset_t *os, uint64_t *objectp, boolean_t hole, uint64_t txg)
{
- uint64_t offset = (*objectp + 1) << DNODE_SHIFT;
+ uint64_t offset;
+ uint64_t start_obj;
+ struct dsl_dataset *ds = os->os_dsl_dataset;
int error;
+ if (*objectp == 0) {
+ start_obj = 1;
+ } else if (ds && ds->ds_feature_inuse[SPA_FEATURE_LARGE_DNODE]) {
+ uint64_t i = *objectp + 1;
+ uint64_t last_obj = *objectp | (DNODES_PER_BLOCK - 1);
+ dmu_object_info_t doi;
+
+ /*
+ * Scan through the remaining meta dnode block. The contents
+ * of each slot in the block are known so it can be quickly
+ * checked. If the block is exhausted without a match then
+ * hand off to dnode_next_offset() for further scanning.
+ */
+ while (i <= last_obj) {
+ error = dmu_object_info(os, i, &doi);
+ if (error == ENOENT) {
+ if (hole) {
+ *objectp = i;
+ return (0);
+ } else {
+ i++;
+ }
+ } else if (error == EEXIST) {
+ i++;
+ } else if (error == 0) {
+ if (hole) {
+ i += doi.doi_dnodesize >> DNODE_SHIFT;
+ } else {
+ *objectp = i;
+ return (0);
+ }
+ } else {
+ return (error);
+ }
+ }
+
+ start_obj = i;
+ } else {
+ start_obj = *objectp + 1;
+ }
+
+ offset = start_obj << DNODE_SHIFT;
+
error = dnode_next_offset(DMU_META_DNODE(os),
(hole ? DNODE_FIND_HOLE : 0), &offset, 0, DNODES_PER_BLOCK, txg);
@@ -221,10 +414,19 @@ dmu_object_free_zapified(objset_t *mos, uint64_t object, dmu_tx_t *tx)
#if defined(_KERNEL) && defined(HAVE_SPL)
EXPORT_SYMBOL(dmu_object_alloc);
+EXPORT_SYMBOL(dmu_object_alloc_dnsize);
EXPORT_SYMBOL(dmu_object_claim);
+EXPORT_SYMBOL(dmu_object_claim_dnsize);
EXPORT_SYMBOL(dmu_object_reclaim);
+EXPORT_SYMBOL(dmu_object_reclaim_dnsize);
EXPORT_SYMBOL(dmu_object_free);
EXPORT_SYMBOL(dmu_object_next);
EXPORT_SYMBOL(dmu_object_zapify);
EXPORT_SYMBOL(dmu_object_free_zapified);
+
+/* BEGIN CSTYLED */
+module_param(dmu_object_alloc_chunk_shift, int, 0644);
+MODULE_PARM_DESC(dmu_object_alloc_chunk_shift,
+ "CPU-specific allocator grabs 2^N objects at once");
+/* END CSTYLED */
#endif
diff --git a/zfs/module/zfs/dmu_objset.c b/zfs/module/zfs/dmu_objset.c
index f2d492ebf0c3..9a7a6968d631 100644
--- a/zfs/module/zfs/dmu_objset.c
+++ b/zfs/module/zfs/dmu_objset.c
@@ -18,19 +18,21 @@
*
* CDDL HEADER END
*/
+
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2013, Joyent, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
- * Copyright (c) 2015 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2015, STRATO AG, Inc. All rights reserved.
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc.
*/
/* Portions Copyright 2010 Robert Milkowski */
+#include <sys/zfeature.h>
#include <sys/cred.h>
#include <sys/zfs_context.h>
#include <sys/dmu_objset.h>
@@ -52,6 +54,8 @@
#include <sys/zfs_onexit.h>
#include <sys/dsl_destroy.h>
#include <sys/vdev.h>
+#include <sys/policy.h>
+#include <sys/spa_impl.h>
/*
* Needed to close a window in dnode_move() that allows the objset to be freed
@@ -60,15 +64,25 @@
krwlock_t os_lock;
/*
- * Tunable to overwrite the maximum number of threads for the parallization
+ * Tunable to overwrite the maximum number of threads for the parallelization
* of dmu_objset_find_dp, needed to speed up the import of pools with many
* datasets.
* Default is 4 times the number of leaf vdevs.
*/
int dmu_find_threads = 0;
+/*
+ * Backfill lower metadnode objects after this many have been freed.
+ * Backfilling negatively impacts object creation rates, so only do it
+ * if there are enough holes to fill.
+ */
+int dmu_rescan_dnode_threshold = 1 << DN_MAX_INDBLKSHIFT;
+
static void dmu_objset_find_dp_cb(void *arg);
+static void dmu_objset_upgrade(objset_t *os, dmu_objset_upgrade_cb_t cb);
+static void dmu_objset_upgrade_stop(objset_t *os);
+
void
dmu_objset_init(void)
{
@@ -130,6 +144,12 @@ dmu_objset_id(objset_t *os)
return (ds ? ds->ds_object : 0);
}
+uint64_t
+dmu_objset_dnodesize(objset_t *os)
+{
+ return (os->os_dnodesize);
+}
+
zfs_sync_type_t
dmu_objset_syncprop(objset_t *os)
{
@@ -259,6 +279,34 @@ redundant_metadata_changed_cb(void *arg, uint64_t newval)
os->os_redundant_metadata = newval;
}
+static void
+dnodesize_changed_cb(void *arg, uint64_t newval)
+{
+ objset_t *os = arg;
+
+ switch (newval) {
+ case ZFS_DNSIZE_LEGACY:
+ os->os_dnodesize = DNODE_MIN_SIZE;
+ break;
+ case ZFS_DNSIZE_AUTO:
+ /*
+ * Choose a dnode size that will work well for most
+ * workloads if the user specified "auto". Future code
+ * improvements could dynamically select a dnode size
+ * based on observed workload patterns.
+ */
+ os->os_dnodesize = DNODE_MIN_SIZE * 2;
+ break;
+ case ZFS_DNSIZE_1K:
+ case ZFS_DNSIZE_2K:
+ case ZFS_DNSIZE_4K:
+ case ZFS_DNSIZE_8K:
+ case ZFS_DNSIZE_16K:
+ os->os_dnodesize = newval;
+ break;
+ }
+}
+
static void
logbias_changed_cb(void *arg, uint64_t newval)
{
@@ -295,6 +343,38 @@ dmu_objset_byteswap(void *buf, size_t size)
}
}
+/*
+ * The hash is a CRC-based hash of the objset_t pointer and the object number.
+ */
+static uint64_t
+dnode_hash(const objset_t *os, uint64_t obj)
+{
+ uintptr_t osv = (uintptr_t)os;
+ uint64_t crc = -1ULL;
+
+ ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY);
+ /*
+ * The low 6 bits of the pointer don't have much entropy, because
+ * the objset_t is larger than 2^6 bytes long.
+ */
+ crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (osv >> 6)) & 0xFF];
+ crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (obj >> 0)) & 0xFF];
+ crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (obj >> 8)) & 0xFF];
+ crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (obj >> 16)) & 0xFF];
+
+ crc ^= (osv>>14) ^ (obj>>24);
+
+ return (crc);
+}
+
+unsigned int
+dnode_multilist_index_func(multilist_t *ml, void *obj)
+{
+ dnode_t *dn = obj;
+ return (dnode_hash(dn->dn_objset, dn->dn_object) %
+ multilist_get_num_sublists(ml));
+}
+
int
dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
objset_t **osp)
@@ -316,8 +396,6 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
if (DMU_OS_IS_L2CACHEABLE(os))
aflags |= ARC_FLAG_L2CACHE;
- if (DMU_OS_IS_L2COMPRESSIBLE(os))
- aflags |= ARC_FLAG_L2COMPRESS;
dprintf_bp(os->os_rootbp, "reading %s", "");
err = arc_read(NULL, spa, os->os_rootbp,
@@ -334,14 +412,12 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
/* Increase the blocksize if we are permitted. */
if (spa_version(spa) >= SPA_VERSION_USERSPACE &&
arc_buf_size(os->os_phys_buf) < sizeof (objset_phys_t)) {
- arc_buf_t *buf = arc_buf_alloc(spa,
- sizeof (objset_phys_t), &os->os_phys_buf,
- ARC_BUFC_METADATA);
+ arc_buf_t *buf = arc_alloc_buf(spa, &os->os_phys_buf,
+ ARC_BUFC_METADATA, sizeof (objset_phys_t));
bzero(buf->b_data, sizeof (objset_phys_t));
bcopy(os->os_phys_buf->b_data, buf->b_data,
arc_buf_size(os->os_phys_buf));
- (void) arc_buf_remove_ref(os->os_phys_buf,
- &os->os_phys_buf);
+ arc_buf_destroy(os->os_phys_buf, &os->os_phys_buf);
os->os_phys_buf = buf;
}
@@ -350,8 +426,8 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
} else {
int size = spa_version(spa) >= SPA_VERSION_USERSPACE ?
sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE;
- os->os_phys_buf = arc_buf_alloc(spa, size,
- &os->os_phys_buf, ARC_BUFC_METADATA);
+ os->os_phys_buf = arc_alloc_buf(spa, &os->os_phys_buf,
+ ARC_BUFC_METADATA, size);
os->os_phys = os->os_phys_buf->b_data;
bzero(os->os_phys, size);
}
@@ -363,6 +439,17 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
* checksum/compression/copies.
*/
if (ds != NULL) {
+ boolean_t needlock = B_FALSE;
+
+ /*
+ * Note: it's valid to open the objset if the dataset is
+ * long-held, in which case the pool_config lock will not
+ * be held.
+ */
+ if (!dsl_pool_config_held(dmu_objset_pool(os))) {
+ needlock = B_TRUE;
+ dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
+ }
err = dsl_prop_register(ds,
zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE),
primary_cache_changed_cb, os);
@@ -413,10 +500,16 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
zfs_prop_to_name(ZFS_PROP_RECORDSIZE),
recordsize_changed_cb, os);
}
+ if (err == 0) {
+ err = dsl_prop_register(ds,
+ zfs_prop_to_name(ZFS_PROP_DNODESIZE),
+ dnodesize_changed_cb, os);
+ }
}
+ if (needlock)
+ dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
if (err != 0) {
- VERIFY(arc_buf_remove_ref(os->os_phys_buf,
- &os->os_phys_buf));
+ arc_buf_destroy(os->os_phys_buf, &os->os_phys_buf);
kmem_free(os, sizeof (objset_t));
return (err);
}
@@ -431,6 +524,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
os->os_sync = ZFS_SYNC_STANDARD;
os->os_primary_cache = ZFS_CACHE_ALL;
os->os_secondary_cache = ZFS_CACHE_ALL;
+ os->os_dnodesize = DNODE_MIN_SIZE;
}
if (ds == NULL || !ds->ds_is_snapshot)
@@ -438,10 +532,9 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
os->os_zil = zil_alloc(os, &os->os_zil_header);
for (i = 0; i < TXG_SIZE; i++) {
- list_create(&os->os_dirty_dnodes[i], sizeof (dnode_t),
- offsetof(dnode_t, dn_dirty_link[i]));
- list_create(&os->os_free_dnodes[i], sizeof (dnode_t),
- offsetof(dnode_t, dn_dirty_link[i]));
+ os->os_dirty_dnodes[i] = multilist_create(sizeof (dnode_t),
+ offsetof(dnode_t, dn_dirty_link[i]),
+ dnode_multilist_index_func);
}
list_create(&os->os_dnodes, sizeof (dnode_t),
offsetof(dnode_t, dn_link));
@@ -451,8 +544,12 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
list_link_init(&os->os_evicting_node);
mutex_init(&os->os_lock, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&os->os_userused_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&os->os_obj_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&os->os_user_ptr_lock, NULL, MUTEX_DEFAULT, NULL);
+ os->os_obj_next_percpu_len = boot_ncpus;
+ os->os_obj_next_percpu = kmem_zalloc(os->os_obj_next_percpu_len *
+ sizeof (os->os_obj_next_percpu[0]), KM_SLEEP);
dnode_special_open(os, &os->os_phys->os_meta_dnode,
DMU_META_DNODE_OBJECT, &os->os_meta_dnode);
@@ -463,6 +560,8 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
DMU_GROUPUSED_OBJECT, &os->os_groupused_dnode);
}
+ mutex_init(&os->os_upgrade_lock, NULL, MUTEX_DEFAULT, NULL);
+
*osp = os;
return (0);
}
@@ -472,11 +571,20 @@ dmu_objset_from_ds(dsl_dataset_t *ds, objset_t **osp)
{
int err = 0;
+ /*
+ * We shouldn't be doing anything with dsl_dataset_t's unless the
+ * pool_config lock is held, or the dataset is long-held.
+ */
+ ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool) ||
+ dsl_dataset_long_held(ds));
+
mutex_enter(&ds->ds_opening_lock);
if (ds->ds_objset == NULL) {
objset_t *os;
+ rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
err = dmu_objset_open_impl(dsl_dataset_get_spa(ds),
ds, dsl_dataset_get_blkptr(ds), &os);
+ rrw_exit(&ds->ds_bp_rwlock, FTAG);
if (err == 0) {
mutex_enter(&ds->ds_lock);
@@ -562,6 +670,9 @@ dmu_objset_own(const char *name, dmu_objset_type_t type,
err = dmu_objset_own_impl(ds, type, readonly, tag, osp);
dsl_pool_rele(dp, FTAG);
+ if (err == 0 && dmu_objset_userobjspace_upgradable(*osp))
+ dmu_objset_userobjspace_upgrade(*osp);
+
return (err);
}
@@ -603,7 +714,7 @@ dmu_objset_refresh_ownership(objset_t *os, void *tag)
{
dsl_pool_t *dp;
dsl_dataset_t *ds, *newds;
- char name[MAXNAMELEN];
+ char name[ZFS_MAX_DATASET_NAME_LEN];
ds = os->os_dsl_dataset;
VERIFY3P(ds, !=, NULL);
@@ -622,6 +733,10 @@ dmu_objset_refresh_ownership(objset_t *os, void *tag)
void
dmu_objset_disown(objset_t *os, void *tag)
{
+ /*
+ * Stop upgrading thread
+ */
+ dmu_objset_upgrade_stop(os);
dsl_dataset_disown(os->os_dsl_dataset, tag);
}
@@ -689,40 +804,8 @@ dmu_objset_evict(objset_t *os)
for (t = 0; t < TXG_SIZE; t++)
ASSERT(!dmu_objset_is_dirty(os, t));
- if (ds) {
- if (!ds->ds_is_snapshot) {
- VERIFY0(dsl_prop_unregister(ds,
- zfs_prop_to_name(ZFS_PROP_CHECKSUM),
- checksum_changed_cb, os));
- VERIFY0(dsl_prop_unregister(ds,
- zfs_prop_to_name(ZFS_PROP_COMPRESSION),
- compression_changed_cb, os));
- VERIFY0(dsl_prop_unregister(ds,
- zfs_prop_to_name(ZFS_PROP_COPIES),
- copies_changed_cb, os));
- VERIFY0(dsl_prop_unregister(ds,
- zfs_prop_to_name(ZFS_PROP_DEDUP),
- dedup_changed_cb, os));
- VERIFY0(dsl_prop_unregister(ds,
- zfs_prop_to_name(ZFS_PROP_LOGBIAS),
- logbias_changed_cb, os));
- VERIFY0(dsl_prop_unregister(ds,
- zfs_prop_to_name(ZFS_PROP_SYNC),
- sync_changed_cb, os));
- VERIFY0(dsl_prop_unregister(ds,
- zfs_prop_to_name(ZFS_PROP_REDUNDANT_METADATA),
- redundant_metadata_changed_cb, os));
- VERIFY0(dsl_prop_unregister(ds,
- zfs_prop_to_name(ZFS_PROP_RECORDSIZE),
- recordsize_changed_cb, os));
- }
- VERIFY0(dsl_prop_unregister(ds,
- zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE),
- primary_cache_changed_cb, os));
- VERIFY0(dsl_prop_unregister(ds,
- zfs_prop_to_name(ZFS_PROP_SECONDARYCACHE),
- secondary_cache_changed_cb, os));
- }
+ if (ds)
+ dsl_prop_unregister_all(ds, os);
if (os->os_sa)
sa_tear_down(os);
@@ -751,7 +834,7 @@ dmu_objset_evict_done(objset_t *os)
}
zil_free(os->os_zil);
- VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf));
+ arc_buf_destroy(os->os_phys_buf, &os->os_phys_buf);
/*
* This is a barrier to prevent the objset from going away in
@@ -762,9 +845,17 @@ dmu_objset_evict_done(objset_t *os)
rw_enter(&os_lock, RW_READER);
rw_exit(&os_lock);
+ kmem_free(os->os_obj_next_percpu,
+ os->os_obj_next_percpu_len * sizeof (os->os_obj_next_percpu[0]));
+
mutex_destroy(&os->os_lock);
+ mutex_destroy(&os->os_userused_lock);
mutex_destroy(&os->os_obj_lock);
mutex_destroy(&os->os_user_ptr_lock);
+ mutex_destroy(&os->os_upgrade_lock);
+ for (int i = 0; i < TXG_SIZE; i++) {
+ multilist_destroy(os->os_dirty_dnodes[i]);
+ }
spa_evicting_os_deregister(os->os_spa, os);
kmem_free(os, sizeof (objset_t));
}
@@ -792,8 +883,8 @@ dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
mdn = DMU_META_DNODE(os);
- dnode_allocate(mdn, DMU_OT_DNODE, 1 << DNODE_BLOCK_SHIFT,
- DN_MAX_INDBLKSHIFT, DMU_OT_NONE, 0, tx);
+ dnode_allocate(mdn, DMU_OT_DNODE, DNODE_BLOCK_SIZE, DN_MAX_INDBLKSHIFT,
+ DMU_OT_NONE, 0, DNODE_MIN_SLOTS, tx);
/*
* We don't want to have to increase the meta-dnode's nlevels
@@ -811,11 +902,17 @@ dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
/*
* Determine the number of levels necessary for the meta-dnode
- * to contain DN_MAX_OBJECT dnodes.
+ * to contain DN_MAX_OBJECT dnodes. Note that in order to
+ * ensure that we do not overflow 64 bits, there has to be
+ * a nlevels that gives us a number of blocks > DN_MAX_OBJECT
+ * but < 2^64. Therefore,
+ * (mdn->dn_indblkshift - SPA_BLKPTRSHIFT) (10) must be
+ * less than (64 - log2(DN_MAX_OBJECT)) (16).
*/
- while ((uint64_t)mdn->dn_nblkptr << (mdn->dn_datablkshift +
+ while ((uint64_t)mdn->dn_nblkptr <<
+ (mdn->dn_datablkshift - DNODE_SHIFT +
(levels - 1) * (mdn->dn_indblkshift - SPA_BLKPTRSHIFT)) <
- DN_MAX_OBJECT * sizeof (dnode_phys_t))
+ DN_MAX_OBJECT)
levels++;
mdn->dn_next_nlevels[tx->tx_txg & TXG_MASK] =
@@ -828,6 +925,12 @@ dmu_objset_create_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
os->os_phys->os_type = type;
if (dmu_objset_userused_enabled(os)) {
os->os_phys->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
+ if (dmu_objset_userobjused_enabled(os)) {
+ ds->ds_feature_activation_needed[
+ SPA_FEATURE_USEROBJ_ACCOUNTING] = B_TRUE;
+ os->os_phys->os_flags |=
+ OBJSET_FLAG_USEROBJACCOUNTING_COMPLETE;
+ }
os->os_flags = os->os_phys->os_flags;
}
@@ -859,6 +962,9 @@ dmu_objset_create_check(void *arg, dmu_tx_t *tx)
if (strchr(doca->doca_name, '@') != NULL)
return (SET_ERROR(EINVAL));
+ if (strlen(doca->doca_name) >= ZFS_MAX_DATASET_NAME_LEN)
+ return (SET_ERROR(ENAMETOOLONG));
+
error = dsl_dir_hold(dp, doca->doca_name, FTAG, &pdd, &tail);
if (error != 0)
return (error);
@@ -891,9 +997,11 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx)
doca->doca_cred, tx);
VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds));
+ rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
bp = dsl_dataset_get_blkptr(ds);
os = dmu_objset_create_impl(pdd->dd_pool->dp_spa,
ds, bp, doca->doca_type, tx);
+ rrw_exit(&ds->ds_bp_rwlock, FTAG);
if (doca->doca_userfunc != NULL) {
doca->doca_userfunc(os, doca->doca_userarg,
@@ -945,6 +1053,9 @@ dmu_objset_clone_check(void *arg, dmu_tx_t *tx)
if (strchr(doca->doca_clone, '@') != NULL)
return (SET_ERROR(EINVAL));
+ if (strlen(doca->doca_clone) >= ZFS_MAX_DATASET_NAME_LEN)
+ return (SET_ERROR(ENAMETOOLONG));
+
error = dsl_dir_hold(dp, doca->doca_clone, FTAG, &pdd, &tail);
if (error != 0)
return (error);
@@ -984,7 +1095,7 @@ dmu_objset_clone_sync(void *arg, dmu_tx_t *tx)
const char *tail;
dsl_dataset_t *origin, *ds;
uint64_t obj;
- char namebuf[MAXNAMELEN];
+ char namebuf[ZFS_MAX_DATASET_NAME_LEN];
VERIFY0(dsl_dir_hold(dp, doca->doca_clone, FTAG, &pdd, &tail));
VERIFY0(dsl_dataset_hold(dp, doca->doca_origin, FTAG, &origin));
@@ -1031,11 +1142,65 @@ dmu_objset_snapshot_one(const char *fsname, const char *snapname)
}
static void
-dmu_objset_sync_dnodes(list_t *list, list_t *newlist, dmu_tx_t *tx)
+dmu_objset_upgrade_task_cb(void *data)
+{
+ objset_t *os = data;
+
+ mutex_enter(&os->os_upgrade_lock);
+ os->os_upgrade_status = EINTR;
+ if (!os->os_upgrade_exit) {
+ mutex_exit(&os->os_upgrade_lock);
+
+ os->os_upgrade_status = os->os_upgrade_cb(os);
+ mutex_enter(&os->os_upgrade_lock);
+ }
+ os->os_upgrade_exit = B_TRUE;
+ os->os_upgrade_id = 0;
+ mutex_exit(&os->os_upgrade_lock);
+}
+
+static void
+dmu_objset_upgrade(objset_t *os, dmu_objset_upgrade_cb_t cb)
+{
+ if (os->os_upgrade_id != 0)
+ return;
+
+ mutex_enter(&os->os_upgrade_lock);
+ if (os->os_upgrade_id == 0 && os->os_upgrade_status == 0) {
+ os->os_upgrade_exit = B_FALSE;
+ os->os_upgrade_cb = cb;
+ os->os_upgrade_id = taskq_dispatch(
+ os->os_spa->spa_upgrade_taskq,
+ dmu_objset_upgrade_task_cb, os, TQ_SLEEP);
+ if (os->os_upgrade_id == TASKQID_INVALID)
+ os->os_upgrade_status = ENOMEM;
+ }
+ mutex_exit(&os->os_upgrade_lock);
+}
+
+static void
+dmu_objset_upgrade_stop(objset_t *os)
+{
+ mutex_enter(&os->os_upgrade_lock);
+ os->os_upgrade_exit = B_TRUE;
+ if (os->os_upgrade_id != 0) {
+ taskqid_t id = os->os_upgrade_id;
+
+ os->os_upgrade_id = 0;
+ mutex_exit(&os->os_upgrade_lock);
+
+ taskq_cancel_id(os->os_spa->spa_upgrade_taskq, id);
+ } else {
+ mutex_exit(&os->os_upgrade_lock);
+ }
+}
+
+static void
+dmu_objset_sync_dnodes(multilist_sublist_t *list, dmu_tx_t *tx)
{
dnode_t *dn;
- while ((dn = list_head(list))) {
+ while ((dn = multilist_sublist_head(list)) != NULL) {
ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT);
ASSERT(dn->dn_dbuf->db_data_pending);
/*
@@ -1046,11 +1211,12 @@ dmu_objset_sync_dnodes(list_t *list, list_t *newlist, dmu_tx_t *tx)
ASSERT(dn->dn_zio);
ASSERT3U(dn->dn_nlevels, <=, DN_MAX_LEVELS);
- list_remove(list, dn);
+ multilist_sublist_remove(list, dn);
- if (newlist) {
+ multilist_t *newlist = dn->dn_objset->os_synced_dnodes;
+ if (newlist != NULL) {
(void) dnode_add_ref(dn, newlist);
- list_insert_tail(newlist, dn);
+ multilist_insert(newlist, dn);
}
dnode_sync(dn, tx);
@@ -1068,7 +1234,6 @@ dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg)
dnode_phys_t *dnp = &os->os_phys->os_meta_dnode;
ASSERT(!BP_IS_EMBEDDED(bp));
- ASSERT3P(bp, ==, os->os_rootbp);
ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_OBJSET);
ASSERT0(BP_GET_LEVEL(bp));
@@ -1081,6 +1246,11 @@ dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg)
bp->blk_fill = 0;
for (i = 0; i < dnp->dn_nblkptr; i++)
bp->blk_fill += BP_GET_FILL(&dnp->dn_blkptr[i]);
+ if (os->os_dsl_dataset != NULL)
+ rrw_enter(&os->os_dsl_dataset->ds_bp_rwlock, RW_WRITER, FTAG);
+ *os->os_rootbp = *bp;
+ if (os->os_dsl_dataset != NULL)
+ rrw_exit(&os->os_dsl_dataset->ds_bp_rwlock, FTAG);
}
/* ARGSUSED */
@@ -1100,8 +1270,32 @@ dmu_objset_write_done(zio_t *zio, arc_buf_t *abuf, void *arg)
(void) dsl_dataset_block_kill(ds, bp_orig, tx, B_TRUE);
dsl_dataset_block_born(ds, bp, tx);
}
+ kmem_free(bp, sizeof (*bp));
}
+typedef struct sync_dnodes_arg {
+ multilist_t *sda_list;
+ int sda_sublist_idx;
+ multilist_t *sda_newlist;
+ dmu_tx_t *sda_tx;
+} sync_dnodes_arg_t;
+
+static void
+sync_dnodes_task(void *arg)
+{
+ sync_dnodes_arg_t *sda = arg;
+
+ multilist_sublist_t *ms =
+ multilist_sublist_lock(sda->sda_list, sda->sda_sublist_idx);
+
+ dmu_objset_sync_dnodes(ms, sda->sda_tx);
+
+ multilist_sublist_unlock(ms);
+
+ kmem_free(sda, sizeof (*sda));
+}
+
+
/* called from dsl */
void
dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
@@ -1111,8 +1305,9 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
zio_prop_t zp;
zio_t *zio;
list_t *list;
- list_t *newlist = NULL;
dbuf_dirty_record_t *dr;
+ blkptr_t *blkptr_copy = kmem_alloc(sizeof (*os->os_rootbp), KM_SLEEP);
+ *blkptr_copy = *os->os_rootbp;
dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg);
@@ -1140,10 +1335,9 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
dmu_write_policy(os, NULL, 0, 0, &zp);
zio = arc_write(pio, os->os_spa, tx->tx_txg,
- os->os_rootbp, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os),
- DMU_OS_IS_L2COMPRESSIBLE(os), &zp, dmu_objset_write_ready,
- NULL, dmu_objset_write_done, os, ZIO_PRIORITY_ASYNC_WRITE,
- ZIO_FLAG_MUSTSUCCEED, &zb);
+ blkptr_copy, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os),
+ &zp, dmu_objset_write_ready, NULL, NULL, dmu_objset_write_done,
+ os, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
/*
* Sync special dnodes - the parent IO for the sync is the root block
@@ -1164,25 +1358,48 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
txgoff = tx->tx_txg & TXG_MASK;
if (dmu_objset_userused_enabled(os)) {
- newlist = &os->os_synced_dnodes;
/*
* We must create the list here because it uses the
- * dn_dirty_link[] of this txg.
+ * dn_dirty_link[] of this txg. But it may already
+ * exist because we call dsl_dataset_sync() twice per txg.
*/
- list_create(newlist, sizeof (dnode_t),
- offsetof(dnode_t, dn_dirty_link[txgoff]));
+ if (os->os_synced_dnodes == NULL) {
+ os->os_synced_dnodes =
+ multilist_create(sizeof (dnode_t),
+ offsetof(dnode_t, dn_dirty_link[txgoff]),
+ dnode_multilist_index_func);
+ } else {
+ ASSERT3U(os->os_synced_dnodes->ml_offset, ==,
+ offsetof(dnode_t, dn_dirty_link[txgoff]));
+ }
}
- dmu_objset_sync_dnodes(&os->os_free_dnodes[txgoff], newlist, tx);
- dmu_objset_sync_dnodes(&os->os_dirty_dnodes[txgoff], newlist, tx);
+ for (int i = 0;
+ i < multilist_get_num_sublists(os->os_dirty_dnodes[txgoff]); i++) {
+ sync_dnodes_arg_t *sda = kmem_alloc(sizeof (*sda), KM_SLEEP);
+ sda->sda_list = os->os_dirty_dnodes[txgoff];
+ sda->sda_sublist_idx = i;
+ sda->sda_tx = tx;
+ (void) taskq_dispatch(dmu_objset_pool(os)->dp_sync_taskq,
+ sync_dnodes_task, sda, 0);
+ /* callback frees sda */
+ }
+ taskq_wait(dmu_objset_pool(os)->dp_sync_taskq);
list = &DMU_META_DNODE(os)->dn_dirty_records[txgoff];
- while ((dr = list_head(list))) {
+ while ((dr = list_head(list)) != NULL) {
ASSERT0(dr->dr_dbuf->db_level);
list_remove(list, dr);
if (dr->dr_zio)
zio_nowait(dr->dr_zio);
}
+
+ /* Enable dnode backfill if enough objects have been freed. */
+ if (os->os_freed_dnodes >= dmu_rescan_dnode_threshold) {
+ os->os_rescan_dnodes = B_TRUE;
+ os->os_freed_dnodes = 0;
+ }
+
/*
* Free intent log blocks up to this tx.
*/
@@ -1194,8 +1411,7 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
boolean_t
dmu_objset_is_dirty(objset_t *os, uint64_t txg)
{
- return (!list_is_empty(&os->os_dirty_dnodes[txg & TXG_MASK]) ||
- !list_is_empty(&os->os_free_dnodes[txg & TXG_MASK]));
+ return (!multilist_is_empty(os->os_dirty_dnodes[txg & TXG_MASK]));
}
static objset_used_cb_t *used_cbs[DMU_OST_NUMTYPES];
@@ -1214,64 +1430,182 @@ dmu_objset_userused_enabled(objset_t *os)
DMU_USERUSED_DNODE(os) != NULL);
}
+boolean_t
+dmu_objset_userobjused_enabled(objset_t *os)
+{
+ return (dmu_objset_userused_enabled(os) &&
+ spa_feature_is_enabled(os->os_spa, SPA_FEATURE_USEROBJ_ACCOUNTING));
+}
+
+typedef struct userquota_node {
+ /* must be in the first filed, see userquota_update_cache() */
+ char uqn_id[20 + DMU_OBJACCT_PREFIX_LEN];
+ int64_t uqn_delta;
+ avl_node_t uqn_node;
+} userquota_node_t;
+
+typedef struct userquota_cache {
+ avl_tree_t uqc_user_deltas;
+ avl_tree_t uqc_group_deltas;
+} userquota_cache_t;
+
+static int
+userquota_compare(const void *l, const void *r)
+{
+ const userquota_node_t *luqn = l;
+ const userquota_node_t *ruqn = r;
+ int rv;
+
+ /*
+ * NB: can only access uqn_id because userquota_update_cache() doesn't
+ * pass in an entire userquota_node_t.
+ */
+ rv = strcmp(luqn->uqn_id, ruqn->uqn_id);
+
+ return (AVL_ISIGN(rv));
+}
+
static void
-do_userquota_update(objset_t *os, uint64_t used, uint64_t flags,
- uint64_t user, uint64_t group, boolean_t subtract, dmu_tx_t *tx)
+do_userquota_cacheflush(objset_t *os, userquota_cache_t *cache, dmu_tx_t *tx)
+{
+ void *cookie;
+ userquota_node_t *uqn;
+
+ ASSERT(dmu_tx_is_syncing(tx));
+
+ cookie = NULL;
+ while ((uqn = avl_destroy_nodes(&cache->uqc_user_deltas,
+ &cookie)) != NULL) {
+ /*
+ * os_userused_lock protects against concurrent calls to
+ * zap_increment_int(). It's needed because zap_increment_int()
+ * is not thread-safe (i.e. not atomic).
+ */
+ mutex_enter(&os->os_userused_lock);
+ VERIFY0(zap_increment(os, DMU_USERUSED_OBJECT,
+ uqn->uqn_id, uqn->uqn_delta, tx));
+ mutex_exit(&os->os_userused_lock);
+ kmem_free(uqn, sizeof (*uqn));
+ }
+ avl_destroy(&cache->uqc_user_deltas);
+
+ cookie = NULL;
+ while ((uqn = avl_destroy_nodes(&cache->uqc_group_deltas,
+ &cookie)) != NULL) {
+ mutex_enter(&os->os_userused_lock);
+ VERIFY0(zap_increment(os, DMU_GROUPUSED_OBJECT,
+ uqn->uqn_id, uqn->uqn_delta, tx));
+ mutex_exit(&os->os_userused_lock);
+ kmem_free(uqn, sizeof (*uqn));
+ }
+ avl_destroy(&cache->uqc_group_deltas);
+}
+
+static void
+userquota_update_cache(avl_tree_t *avl, const char *id, int64_t delta)
+{
+ userquota_node_t *uqn;
+ avl_index_t idx;
+
+ ASSERT(strlen(id) < sizeof (uqn->uqn_id));
+ /*
+ * Use id directly for searching because uqn_id is the first field of
+ * userquota_node_t and fields after uqn_id won't be accessed in
+ * avl_find().
+ */
+ uqn = avl_find(avl, (const void *)id, &idx);
+ if (uqn == NULL) {
+ uqn = kmem_zalloc(sizeof (*uqn), KM_SLEEP);
+ strlcpy(uqn->uqn_id, id, sizeof (uqn->uqn_id));
+ avl_insert(avl, uqn, idx);
+ }
+ uqn->uqn_delta += delta;
+}
+
+static void
+do_userquota_update(userquota_cache_t *cache, uint64_t used, uint64_t flags,
+ uint64_t user, uint64_t group, boolean_t subtract)
{
if ((flags & DNODE_FLAG_USERUSED_ACCOUNTED)) {
- int64_t delta = DNODE_SIZE + used;
+ int64_t delta = DNODE_MIN_SIZE + used;
+ char name[20];
+
if (subtract)
delta = -delta;
- VERIFY3U(0, ==, zap_increment_int(os, DMU_USERUSED_OBJECT,
- user, delta, tx));
- VERIFY3U(0, ==, zap_increment_int(os, DMU_GROUPUSED_OBJECT,
- group, delta, tx));
+
+ (void) sprintf(name, "%llx", (longlong_t)user);
+ userquota_update_cache(&cache->uqc_user_deltas, name, delta);
+
+ (void) sprintf(name, "%llx", (longlong_t)group);
+ userquota_update_cache(&cache->uqc_group_deltas, name, delta);
}
}
-void
-dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx)
+static void
+do_userobjquota_update(userquota_cache_t *cache, uint64_t flags,
+ uint64_t user, uint64_t group, boolean_t subtract)
{
+ if (flags & DNODE_FLAG_USEROBJUSED_ACCOUNTED) {
+ char name[20 + DMU_OBJACCT_PREFIX_LEN];
+ int delta = subtract ? -1 : 1;
+
+ (void) snprintf(name, sizeof (name), DMU_OBJACCT_PREFIX "%llx",
+ (longlong_t)user);
+ userquota_update_cache(&cache->uqc_user_deltas, name, delta);
+
+ (void) snprintf(name, sizeof (name), DMU_OBJACCT_PREFIX "%llx",
+ (longlong_t)group);
+ userquota_update_cache(&cache->uqc_group_deltas, name, delta);
+ }
+}
+
+typedef struct userquota_updates_arg {
+ objset_t *uua_os;
+ int uua_sublist_idx;
+ dmu_tx_t *uua_tx;
+} userquota_updates_arg_t;
+
+static void
+userquota_updates_task(void *arg)
+{
+ userquota_updates_arg_t *uua = arg;
+ objset_t *os = uua->uua_os;
+ dmu_tx_t *tx = uua->uua_tx;
dnode_t *dn;
- list_t *list = &os->os_synced_dnodes;
+ userquota_cache_t cache = { { 0 } };
- ASSERT(list_head(list) == NULL || dmu_objset_userused_enabled(os));
+ multilist_sublist_t *list =
+ multilist_sublist_lock(os->os_synced_dnodes, uua->uua_sublist_idx);
- while ((dn = list_head(list))) {
+ ASSERT(multilist_sublist_head(list) == NULL ||
+ dmu_objset_userused_enabled(os));
+ avl_create(&cache.uqc_user_deltas, userquota_compare,
+ sizeof (userquota_node_t), offsetof(userquota_node_t, uqn_node));
+ avl_create(&cache.uqc_group_deltas, userquota_compare,
+ sizeof (userquota_node_t), offsetof(userquota_node_t, uqn_node));
+
+ while ((dn = multilist_sublist_head(list)) != NULL) {
int flags;
ASSERT(!DMU_OBJECT_IS_SPECIAL(dn->dn_object));
ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE ||
dn->dn_phys->dn_flags &
DNODE_FLAG_USERUSED_ACCOUNTED);
- /* Allocate the user/groupused objects if necessary. */
- if (DMU_USERUSED_DNODE(os)->dn_type == DMU_OT_NONE) {
- VERIFY(0 == zap_create_claim(os,
- DMU_USERUSED_OBJECT,
- DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
- VERIFY(0 == zap_create_claim(os,
- DMU_GROUPUSED_OBJECT,
- DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
- }
-
- /*
- * We intentionally modify the zap object even if the
- * net delta is zero. Otherwise
- * the block of the zap obj could be shared between
- * datasets but need to be different between them after
- * a bprewrite.
- */
-
flags = dn->dn_id_flags;
ASSERT(flags);
if (flags & DN_ID_OLD_EXIST) {
- do_userquota_update(os, dn->dn_oldused, dn->dn_oldflags,
- dn->dn_olduid, dn->dn_oldgid, B_TRUE, tx);
+ do_userquota_update(&cache,
+ dn->dn_oldused, dn->dn_oldflags,
+ dn->dn_olduid, dn->dn_oldgid, B_TRUE);
+ do_userobjquota_update(&cache, dn->dn_oldflags,
+ dn->dn_olduid, dn->dn_oldgid, B_TRUE);
}
if (flags & DN_ID_NEW_EXIST) {
- do_userquota_update(os, DN_USED_BYTES(dn->dn_phys),
- dn->dn_phys->dn_flags, dn->dn_newuid,
- dn->dn_newgid, B_FALSE, tx);
+ do_userquota_update(&cache,
+ DN_USED_BYTES(dn->dn_phys), dn->dn_phys->dn_flags,
+ dn->dn_newuid, dn->dn_newgid, B_FALSE);
+ do_userobjquota_update(&cache, dn->dn_phys->dn_flags,
+ dn->dn_newuid, dn->dn_newgid, B_FALSE);
}
mutex_enter(&dn->dn_mtx);
@@ -1289,8 +1623,41 @@ dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx)
dn->dn_id_flags &= ~(DN_ID_NEW_EXIST);
mutex_exit(&dn->dn_mtx);
- list_remove(list, dn);
- dnode_rele(dn, list);
+ multilist_sublist_remove(list, dn);
+ dnode_rele(dn, os->os_synced_dnodes);
+ }
+ do_userquota_cacheflush(os, &cache, tx);
+ multilist_sublist_unlock(list);
+ kmem_free(uua, sizeof (*uua));
+}
+
+void
+dmu_objset_do_userquota_updates(objset_t *os, dmu_tx_t *tx)
+{
+ if (!dmu_objset_userused_enabled(os))
+ return;
+
+ /* Allocate the user/groupused objects if necessary. */
+ if (DMU_USERUSED_DNODE(os)->dn_type == DMU_OT_NONE) {
+ VERIFY0(zap_create_claim(os,
+ DMU_USERUSED_OBJECT,
+ DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
+ VERIFY0(zap_create_claim(os,
+ DMU_GROUPUSED_OBJECT,
+ DMU_OT_USERGROUP_USED, DMU_OT_NONE, 0, tx));
+ }
+
+ for (int i = 0;
+ i < multilist_get_num_sublists(os->os_synced_dnodes); i++) {
+ userquota_updates_arg_t *uua =
+ kmem_alloc(sizeof (*uua), KM_SLEEP);
+ uua->uua_os = os;
+ uua->uua_sublist_idx = i;
+ uua->uua_tx = tx;
+ /* note: caller does taskq_wait() */
+ (void) taskq_dispatch(dmu_objset_pool(os)->dp_sync_taskq,
+ userquota_updates_task, uua, 0);
+ /* callback frees uua */
}
}
@@ -1443,19 +1810,19 @@ dmu_objset_userspace_present(objset_t *os)
OBJSET_FLAG_USERACCOUNTING_COMPLETE);
}
-int
-dmu_objset_userspace_upgrade(objset_t *os)
+boolean_t
+dmu_objset_userobjspace_present(objset_t *os)
+{
+ return (os->os_phys->os_flags &
+ OBJSET_FLAG_USEROBJACCOUNTING_COMPLETE);
+}
+
+static int
+dmu_objset_space_upgrade(objset_t *os)
{
uint64_t obj;
int err = 0;
- if (dmu_objset_userspace_present(os))
- return (0);
- if (!dmu_objset_userused_enabled(os))
- return (SET_ERROR(ENOTSUP));
- if (dmu_objset_is_snapshot(os))
- return (SET_ERROR(EINVAL));
-
/*
* We simply need to mark every object dirty, so that it will be
* synced out and now accounted. If this is called
@@ -1469,6 +1836,13 @@ dmu_objset_userspace_upgrade(objset_t *os)
dmu_buf_t *db;
int objerr;
+ mutex_enter(&os->os_upgrade_lock);
+ if (os->os_upgrade_exit)
+ err = SET_ERROR(EINTR);
+ mutex_exit(&os->os_upgrade_lock);
+ if (err != 0)
+ return (err);
+
if (issig(JUSTLOOKING) && issig(FORREAL))
return (SET_ERROR(EINTR));
@@ -1486,12 +1860,69 @@ dmu_objset_userspace_upgrade(objset_t *os)
dmu_buf_rele(db, FTAG);
dmu_tx_commit(tx);
}
+ return (0);
+}
+
+int
+dmu_objset_userspace_upgrade(objset_t *os)
+{
+ int err = 0;
+
+ if (dmu_objset_userspace_present(os))
+ return (0);
+ if (dmu_objset_is_snapshot(os))
+ return (SET_ERROR(EINVAL));
+ if (!dmu_objset_userused_enabled(os))
+ return (SET_ERROR(ENOTSUP));
+
+ err = dmu_objset_space_upgrade(os);
+ if (err)
+ return (err);
os->os_flags |= OBJSET_FLAG_USERACCOUNTING_COMPLETE;
txg_wait_synced(dmu_objset_pool(os), 0);
return (0);
}
+static int
+dmu_objset_userobjspace_upgrade_cb(objset_t *os)
+{
+ int err = 0;
+
+ if (dmu_objset_userobjspace_present(os))
+ return (0);
+ if (dmu_objset_is_snapshot(os))
+ return (SET_ERROR(EINVAL));
+ if (!dmu_objset_userobjused_enabled(os))
+ return (SET_ERROR(ENOTSUP));
+
+ dmu_objset_ds(os)->ds_feature_activation_needed[
+ SPA_FEATURE_USEROBJ_ACCOUNTING] = B_TRUE;
+
+ err = dmu_objset_space_upgrade(os);
+ if (err)
+ return (err);
+
+ os->os_flags |= OBJSET_FLAG_USEROBJACCOUNTING_COMPLETE;
+ txg_wait_synced(dmu_objset_pool(os), 0);
+ return (0);
+}
+
+void
+dmu_objset_userobjspace_upgrade(objset_t *os)
+{
+ dmu_objset_upgrade(os, dmu_objset_userobjspace_upgrade_cb);
+}
+
+boolean_t
+dmu_objset_userobjspace_upgradable(objset_t *os)
+{
+ return (dmu_objset_type(os) == DMU_OST_ZFS &&
+ !dmu_objset_is_snapshot(os) &&
+ dmu_objset_userobjused_enabled(os) &&
+ !dmu_objset_userobjspace_present(os));
+}
+
void
dmu_objset_space(objset_t *os, uint64_t *refdbytesp, uint64_t *availbytesp,
uint64_t *usedobjsp, uint64_t *availobjsp)
@@ -1550,7 +1981,7 @@ dmu_snapshot_realname(objset_t *os, char *name, char *real, int maxlen,
return (zap_lookup_norm(ds->ds_dir->dd_pool->dp_meta_objset,
dsl_dataset_phys(ds)->ds_snapnames_zapobj, name, 8, 1, &ignored,
- MT_FIRST, real, maxlen, conflict));
+ MT_NORMALIZE, real, maxlen, conflict));
}
int
@@ -1639,6 +2070,7 @@ typedef struct dmu_objset_find_ctx {
taskq_t *dc_tq;
dsl_pool_t *dc_dp;
uint64_t dc_ddobj;
+ char *dc_ddname; /* last component of ddobj's name */
int (*dc_func)(dsl_pool_t *, dsl_dataset_t *, void *);
void *dc_arg;
int dc_flags;
@@ -1662,7 +2094,12 @@ dmu_objset_find_dp_impl(dmu_objset_find_ctx_t *dcp)
if (*dcp->dc_error != 0)
goto out;
- err = dsl_dir_hold_obj(dp, dcp->dc_ddobj, NULL, FTAG, &dd);
+ /*
+ * Note: passing the name (dc_ddname) here is optional, but it
+ * improves performance because we don't need to call
+ * zap_value_search() to determine the name.
+ */
+ err = dsl_dir_hold_obj(dp, dcp->dc_ddobj, dcp->dc_ddname, FTAG, &dd);
if (err != 0)
goto out;
@@ -1687,9 +2124,11 @@ dmu_objset_find_dp_impl(dmu_objset_find_ctx_t *dcp)
sizeof (uint64_t));
ASSERT3U(attr->za_num_integers, ==, 1);
- child_dcp = kmem_alloc(sizeof (*child_dcp), KM_SLEEP);
+ child_dcp =
+ kmem_alloc(sizeof (*child_dcp), KM_SLEEP);
*child_dcp = *dcp;
child_dcp->dc_ddobj = attr->za_first_integer;
+ child_dcp->dc_ddname = spa_strdup(attr->za_name);
if (dcp->dc_tq != NULL)
(void) taskq_dispatch(dcp->dc_tq,
dmu_objset_find_dp_cb, child_dcp, TQ_SLEEP);
@@ -1732,16 +2171,25 @@ dmu_objset_find_dp_impl(dmu_objset_find_ctx_t *dcp)
}
}
- dsl_dir_rele(dd, FTAG);
kmem_free(attr, sizeof (zap_attribute_t));
- if (err != 0)
+ if (err != 0) {
+ dsl_dir_rele(dd, FTAG);
goto out;
+ }
/*
* Apply to self.
*/
err = dsl_dataset_hold_obj(dp, thisobj, FTAG, &ds);
+
+ /*
+ * Note: we hold the dir while calling dsl_dataset_hold_obj() so
+ * that the dir will remain cached, and we won't have to re-instantiate
+ * it (which could be expensive due to finding its name via
+ * zap_value_search()).
+ */
+ dsl_dir_rele(dd, FTAG);
if (err != 0)
goto out;
err = dcp->dc_func(dp, ds, dcp->dc_arg);
@@ -1756,6 +2204,8 @@ dmu_objset_find_dp_impl(dmu_objset_find_ctx_t *dcp)
mutex_exit(dcp->dc_error_lock);
}
+ if (dcp->dc_ddname != NULL)
+ spa_strfree(dcp->dc_ddname);
kmem_free(dcp, sizeof (*dcp));
}
@@ -1800,6 +2250,7 @@ dmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj,
dcp->dc_tq = NULL;
dcp->dc_dp = dp;
dcp->dc_ddobj = ddobj;
+ dcp->dc_ddname = NULL;
dcp->dc_func = func;
dcp->dc_arg = arg;
dcp->dc_flags = flags;
@@ -1821,6 +2272,7 @@ dmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj,
* thread suffices. For now, stay single threaded.
*/
dmu_objset_find_dp_impl(dcp);
+ mutex_destroy(&err_lock);
return (error);
}
@@ -1832,6 +2284,8 @@ dmu_objset_find_dp(dsl_pool_t *dp, uint64_t ddobj,
INT_MAX, 0);
if (tq == NULL) {
kmem_free(dcp, sizeof (*dcp));
+ mutex_destroy(&err_lock);
+
return (SET_ERROR(ENOMEM));
}
dcp->dc_tq = tq;
@@ -2001,7 +2455,7 @@ dmu_objset_get_user(objset_t *os)
/*
* Determine name of filesystem, given name of snapshot.
- * buf must be at least MAXNAMELEN bytes
+ * buf must be at least ZFS_MAX_DATASET_NAME_LEN bytes
*/
int
dmu_fsname(const char *snapname, char *buf)
@@ -2009,12 +2463,29 @@ dmu_fsname(const char *snapname, char *buf)
char *atp = strchr(snapname, '@');
if (atp == NULL)
return (SET_ERROR(EINVAL));
- if (atp - snapname >= MAXNAMELEN)
+ if (atp - snapname >= ZFS_MAX_DATASET_NAME_LEN)
return (SET_ERROR(ENAMETOOLONG));
(void) strlcpy(buf, snapname, atp - snapname + 1);
return (0);
}
+/*
+ * Call when we think we're going to write/free space in open context to track
+ * the amount of dirty data in the open txg, which is also the amount
+ * of memory that can not be evicted until this txg syncs.
+ */
+void
+dmu_objset_willuse_space(objset_t *os, int64_t space, dmu_tx_t *tx)
+{
+ dsl_dataset_t *ds = os->os_dsl_dataset;
+ int64_t aspace = spa_get_worst_case_asize(os->os_spa, space);
+
+ if (ds != NULL) {
+ dsl_dir_willuse_space(ds->ds_dir, aspace, tx);
+ dsl_pool_dirty_space(dmu_tx_pool(tx), space, tx);
+ }
+}
+
#if defined(_KERNEL) && defined(HAVE_SPL)
EXPORT_SYMBOL(dmu_objset_zil);
EXPORT_SYMBOL(dmu_objset_pool);
@@ -2037,6 +2508,7 @@ EXPORT_SYMBOL(dmu_objset_find);
EXPORT_SYMBOL(dmu_objset_byteswap);
EXPORT_SYMBOL(dmu_objset_evict_dbufs);
EXPORT_SYMBOL(dmu_objset_snap_cmtime);
+EXPORT_SYMBOL(dmu_objset_dnodesize);
EXPORT_SYMBOL(dmu_objset_sync);
EXPORT_SYMBOL(dmu_objset_is_dirty);
@@ -2049,4 +2521,8 @@ EXPORT_SYMBOL(dmu_objset_userquota_get_ids);
EXPORT_SYMBOL(dmu_objset_userused_enabled);
EXPORT_SYMBOL(dmu_objset_userspace_upgrade);
EXPORT_SYMBOL(dmu_objset_userspace_present);
+EXPORT_SYMBOL(dmu_objset_userobjused_enabled);
+EXPORT_SYMBOL(dmu_objset_userobjspace_upgrade);
+EXPORT_SYMBOL(dmu_objset_userobjspace_upgradable);
+EXPORT_SYMBOL(dmu_objset_userobjspace_present);
#endif
diff --git a/zfs/module/zfs/dmu_send.c b/zfs/module/zfs/dmu_send.c
index 940454977c69..344e42018df1 100644
--- a/zfs/module/zfs/dmu_send.c
+++ b/zfs/module/zfs/dmu_send.c
@@ -20,10 +20,11 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
* Copyright (c) 2014, Joyent, Inc. All rights reserved.
- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright 2014 HybridCluster. All rights reserved.
+ * Copyright 2016 RackTop Systems.
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
*/
@@ -54,13 +55,44 @@
#include <sys/blkptr.h>
#include <sys/dsl_bookmark.h>
#include <sys/zfeature.h>
+#include <sys/bqueue.h>
#include <sys/zvol.h>
+#include <sys/policy.h>
/* Set this tunable to TRUE to replace corrupt data with 0x2f5baddb10c */
int zfs_send_corrupt_data = B_FALSE;
+int zfs_send_queue_length = 16 * 1024 * 1024;
+int zfs_recv_queue_length = 16 * 1024 * 1024;
+/* Set this tunable to FALSE to disable setting of DRR_FLAG_FREERECORDS */
+int zfs_send_set_freerecords_bit = B_TRUE;
static char *dmu_recv_tag = "dmu_recv_tag";
-static const char *recv_clone_name = "%recv";
+const char *recv_clone_name = "%recv";
+
+#define BP_SPAN(datablkszsec, indblkshift, level) \
+ (((uint64_t)datablkszsec) << (SPA_MINBLOCKSHIFT + \
+ (level) * (indblkshift - SPA_BLKPTRSHIFT)))
+
+static void byteswap_record(dmu_replay_record_t *drr);
+
+struct send_thread_arg {
+ bqueue_t q;
+ dsl_dataset_t *ds; /* Dataset to traverse */
+ uint64_t fromtxg; /* Traverse from this txg */
+ int flags; /* flags to pass to traverse_dataset */
+ int error_code;
+ boolean_t cancel;
+ zbookmark_phys_t resume;
+};
+
+struct send_block_record {
+ boolean_t eos_marker; /* Marks the end of the stream */
+ blkptr_t bp;
+ zbookmark_phys_t zb;
+ uint8_t indblkshift;
+ uint16_t datablkszsec;
+ bqueue_node_t ln;
+};
typedef struct dump_bytes_io {
dmu_sendarg_t *dbi_dsp;
@@ -73,11 +105,23 @@ dump_bytes_cb(void *arg)
{
dump_bytes_io_t *dbi = (dump_bytes_io_t *)arg;
dmu_sendarg_t *dsp = dbi->dbi_dsp;
- dsl_dataset_t *ds = dsp->dsa_os->os_dsl_dataset;
+ dsl_dataset_t *ds = dmu_objset_ds(dsp->dsa_os);
ssize_t resid; /* have to get resid to get detailed errno */
+
+ /*
+ * The code does not rely on this (len being a multiple of 8). We keep
+ * this assertion because of the corresponding assertion in
+ * receive_read(). Keeping this assertion ensures that we do not
+ * inadvertently break backwards compatibility (causing the assertion
+ * in receive_read() to trigger on old software).
+ *
+ * Removing the assertions could be rolled into a new feature that uses
+ * data that isn't 8-byte aligned; if the assertions were removed, a
+ * feature flag would have to be added.
+ */
+
ASSERT0(dbi->dbi_len % 8);
- fletcher_4_incremental_native(dbi->dbi_buf, dbi->dbi_len, &dsp->dsa_zc);
dsp->dsa_err = vn_rdwr(UIO_WRITE, dsp->dsa_vp,
(caddr_t)dbi->dbi_buf, dbi->dbi_len,
0, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY, CRED(), &resid);
@@ -112,6 +156,51 @@ dump_bytes(dmu_sendarg_t *dsp, void *buf, int len)
return (dsp->dsa_err);
}
+/*
+ * For all record types except BEGIN, fill in the checksum (overlaid in
+ * drr_u.drr_checksum.drr_checksum). The checksum verifies everything
+ * up to the start of the checksum itself.
+ */
+static int
+dump_record(dmu_sendarg_t *dsp, void *payload, int payload_len)
+{
+ ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
+ ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
+ (void) fletcher_4_incremental_native(dsp->dsa_drr,
+ offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
+ &dsp->dsa_zc);
+ if (dsp->dsa_drr->drr_type == DRR_BEGIN) {
+ dsp->dsa_sent_begin = B_TRUE;
+ } else {
+ ASSERT(ZIO_CHECKSUM_IS_ZERO(&dsp->dsa_drr->drr_u.
+ drr_checksum.drr_checksum));
+ dsp->dsa_drr->drr_u.drr_checksum.drr_checksum = dsp->dsa_zc;
+ }
+ if (dsp->dsa_drr->drr_type == DRR_END) {
+ dsp->dsa_sent_end = B_TRUE;
+ }
+ (void) fletcher_4_incremental_native(&dsp->dsa_drr->
+ drr_u.drr_checksum.drr_checksum,
+ sizeof (zio_cksum_t), &dsp->dsa_zc);
+ if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
+ return (SET_ERROR(EINTR));
+ if (payload_len != 0) {
+ (void) fletcher_4_incremental_native(payload, payload_len,
+ &dsp->dsa_zc);
+ if (dump_bytes(dsp, payload, payload_len) != 0)
+ return (SET_ERROR(EINTR));
+ }
+ return (0);
+}
+
+/*
+ * Fill in the drr_free struct, or perform aggregation if the previous record is
+ * also a free record, and the two are adjacent.
+ *
+ * Note that we send free records even for a full send, because we want to be
+ * able to receive a full send as a clone, which requires a list of all the free
+ * and freeobject records that were generated on the source.
+ */
static int
dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
uint64_t length)
@@ -123,7 +212,7 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
* that the receiving system doesn't have any dbufs in the range
* being freed. This is always true because there is a one-record
* constraint: we only send one WRITE record for any given
- * object+offset. We know that the one-record constraint is
+ * object,offset. We know that the one-record constraint is
* true because we always send data in increasing order by
* object,offset.
*
@@ -135,15 +224,6 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
(object == dsp->dsa_last_data_object &&
offset > dsp->dsa_last_data_offset));
- /*
- * If we are doing a non-incremental send, then there can't
- * be any data in the dataset we're receiving into. Therefore
- * a free record would simply be a no-op. Save space by not
- * sending it to begin with.
- */
- if (!dsp->dsa_incremental)
- return (0);
-
if (length != -1ULL && offset + length < offset)
length = -1ULL;
@@ -156,8 +236,7 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
*/
if (dsp->dsa_pending_op != PENDING_NONE &&
dsp->dsa_pending_op != PENDING_FREE) {
- if (dump_bytes(dsp, dsp->dsa_drr,
- sizeof (dmu_replay_record_t)) != 0)
+ if (dump_record(dsp, NULL, 0) != 0)
return (SET_ERROR(EINTR));
dsp->dsa_pending_op = PENDING_NONE;
}
@@ -180,8 +259,7 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
return (0);
} else {
/* not a continuation. Push out pending record */
- if (dump_bytes(dsp, dsp->dsa_drr,
- sizeof (dmu_replay_record_t)) != 0)
+ if (dump_record(dsp, NULL, 0) != 0)
return (SET_ERROR(EINTR));
dsp->dsa_pending_op = PENDING_NONE;
}
@@ -194,8 +272,7 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
drrf->drr_length = length;
drrf->drr_toguid = dsp->dsa_toguid;
if (length == -1ULL) {
- if (dump_bytes(dsp, dsp->dsa_drr,
- sizeof (dmu_replay_record_t)) != 0)
+ if (dump_record(dsp, NULL, 0) != 0)
return (SET_ERROR(EINTR));
} else {
dsp->dsa_pending_op = PENDING_FREE;
@@ -206,8 +283,10 @@ dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
static int
dump_write(dmu_sendarg_t *dsp, dmu_object_type_t type,
- uint64_t object, uint64_t offset, int blksz, const blkptr_t *bp, void *data)
+ uint64_t object, uint64_t offset, int lsize, int psize, const blkptr_t *bp,
+ void *data)
{
+ uint64_t payload_size;
struct drr_write *drrw = &(dsp->dsa_drr->drr_u.drr_write);
/*
@@ -218,7 +297,7 @@ dump_write(dmu_sendarg_t *dsp, dmu_object_type_t type,
(object == dsp->dsa_last_data_object &&
offset > dsp->dsa_last_data_offset));
dsp->dsa_last_data_object = object;
- dsp->dsa_last_data_offset = offset + blksz - 1;
+ dsp->dsa_last_data_offset = offset + lsize - 1;
/*
* If there is any kind of pending aggregation (currently either
@@ -227,19 +306,36 @@ dump_write(dmu_sendarg_t *dsp, dmu_object_type_t type,
* of different types.
*/
if (dsp->dsa_pending_op != PENDING_NONE) {
- if (dump_bytes(dsp, dsp->dsa_drr,
- sizeof (dmu_replay_record_t)) != 0)
+ if (dump_record(dsp, NULL, 0) != 0)
return (SET_ERROR(EINTR));
dsp->dsa_pending_op = PENDING_NONE;
}
- /* write a DATA record */
+ /* write a WRITE record */
bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
dsp->dsa_drr->drr_type = DRR_WRITE;
drrw->drr_object = object;
drrw->drr_type = type;
drrw->drr_offset = offset;
- drrw->drr_length = blksz;
drrw->drr_toguid = dsp->dsa_toguid;
+ drrw->drr_logical_size = lsize;
+
+ /* only set the compression fields if the buf is compressed */
+ if (lsize != psize) {
+ ASSERT(dsp->dsa_featureflags & DMU_BACKUP_FEATURE_COMPRESSED);
+ ASSERT(!BP_IS_EMBEDDED(bp));
+ ASSERT(!BP_SHOULD_BYTESWAP(bp));
+ ASSERT(!DMU_OT_IS_METADATA(BP_GET_TYPE(bp)));
+ ASSERT3U(BP_GET_COMPRESS(bp), !=, ZIO_COMPRESS_OFF);
+ ASSERT3S(psize, >, 0);
+ ASSERT3S(lsize, >=, psize);
+
+ drrw->drr_compressiontype = BP_GET_COMPRESS(bp);
+ drrw->drr_compressed_size = psize;
+ payload_size = drrw->drr_compressed_size;
+ } else {
+ payload_size = drrw->drr_logical_size;
+ }
+
if (bp == NULL || BP_IS_EMBEDDED(bp)) {
/*
* There's no pre-computed checksum for partial-block
@@ -250,7 +346,8 @@ dump_write(dmu_sendarg_t *dsp, dmu_object_type_t type,
drrw->drr_checksumtype = ZIO_CHECKSUM_OFF;
} else {
drrw->drr_checksumtype = BP_GET_CHECKSUM(bp);
- if (zio_checksum_table[drrw->drr_checksumtype].ci_dedup)
+ if (zio_checksum_table[drrw->drr_checksumtype].ci_flags &
+ ZCHECKSUM_FLAG_DEDUP)
drrw->drr_checksumflags |= DRR_CHECKSUM_DEDUP;
DDK_SET_LSIZE(&drrw->drr_key, BP_GET_LSIZE(bp));
DDK_SET_PSIZE(&drrw->drr_key, BP_GET_PSIZE(bp));
@@ -258,9 +355,7 @@ dump_write(dmu_sendarg_t *dsp, dmu_object_type_t type,
drrw->drr_key.ddk_cksum = bp->blk_cksum;
}
- if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
- return (SET_ERROR(EINTR));
- if (dump_bytes(dsp, data, blksz) != 0)
+ if (dump_record(dsp, data, payload_size) != 0)
return (SET_ERROR(EINTR));
return (0);
}
@@ -274,8 +369,7 @@ dump_write_embedded(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
&(dsp->dsa_drr->drr_u.drr_write_embedded);
if (dsp->dsa_pending_op != PENDING_NONE) {
- if (dump_bytes(dsp, dsp->dsa_drr,
- sizeof (dmu_replay_record_t)) != 0)
+ if (dump_record(dsp, NULL, 0) != 0)
return (EINTR);
dsp->dsa_pending_op = PENDING_NONE;
}
@@ -295,9 +389,7 @@ dump_write_embedded(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
decode_embedded_bp_compressed(bp, buf);
- if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
- return (EINTR);
- if (dump_bytes(dsp, buf, P2ROUNDUP(drrw->drr_psize, 8)) != 0)
+ if (dump_record(dsp, buf, P2ROUNDUP(drrw->drr_psize, 8)) != 0)
return (EINTR);
return (0);
}
@@ -308,8 +400,7 @@ dump_spill(dmu_sendarg_t *dsp, uint64_t object, int blksz, void *data)
struct drr_spill *drrs = &(dsp->dsa_drr->drr_u.drr_spill);
if (dsp->dsa_pending_op != PENDING_NONE) {
- if (dump_bytes(dsp, dsp->dsa_drr,
- sizeof (dmu_replay_record_t)) != 0)
+ if (dump_record(dsp, NULL, 0) != 0)
return (SET_ERROR(EINTR));
dsp->dsa_pending_op = PENDING_NONE;
}
@@ -321,9 +412,7 @@ dump_spill(dmu_sendarg_t *dsp, uint64_t object, int blksz, void *data)
drrs->drr_length = blksz;
drrs->drr_toguid = dsp->dsa_toguid;
- if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)))
- return (SET_ERROR(EINTR));
- if (dump_bytes(dsp, data, blksz))
+ if (dump_record(dsp, data, blksz) != 0)
return (SET_ERROR(EINTR));
return (0);
}
@@ -332,10 +421,22 @@ static int
dump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs)
{
struct drr_freeobjects *drrfo = &(dsp->dsa_drr->drr_u.drr_freeobjects);
+ uint64_t maxobj = DNODES_PER_BLOCK *
+ (DMU_META_DNODE(dsp->dsa_os)->dn_maxblkid + 1);
- /* See comment in dump_free(). */
- if (!dsp->dsa_incremental)
- return (0);
+ /*
+ * ZoL < 0.7 does not handle large FREEOBJECTS records correctly,
+ * leading to zfs recv never completing. to avoid this issue, don't
+ * send FREEOBJECTS records for object IDs which cannot exist on the
+ * receiving side.
+ */
+ if (maxobj > 0) {
+ if (maxobj < firstobj)
+ return (0);
+
+ if (maxobj < firstobj + numobjs)
+ numobjs = maxobj - firstobj;
+ }
/*
* If there is a pending op, but it's not PENDING_FREEOBJECTS,
@@ -346,8 +447,7 @@ dump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs)
*/
if (dsp->dsa_pending_op != PENDING_NONE &&
dsp->dsa_pending_op != PENDING_FREEOBJECTS) {
- if (dump_bytes(dsp, dsp->dsa_drr,
- sizeof (dmu_replay_record_t)) != 0)
+ if (dump_record(dsp, NULL, 0) != 0)
return (SET_ERROR(EINTR));
dsp->dsa_pending_op = PENDING_NONE;
}
@@ -361,8 +461,7 @@ dump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs)
return (0);
} else {
/* can't be aggregated. Push out pending record */
- if (dump_bytes(dsp, dsp->dsa_drr,
- sizeof (dmu_replay_record_t)) != 0)
+ if (dump_record(dsp, NULL, 0) != 0)
return (SET_ERROR(EINTR));
dsp->dsa_pending_op = PENDING_NONE;
}
@@ -385,12 +484,24 @@ dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp)
{
struct drr_object *drro = &(dsp->dsa_drr->drr_u.drr_object);
+ if (object < dsp->dsa_resume_object) {
+ /*
+ * Note: when resuming, we will visit all the dnodes in
+ * the block of dnodes that we are resuming from. In
+ * this case it's unnecessary to send the dnodes prior to
+ * the one we are resuming from. We should be at most one
+ * block's worth of dnodes behind the resume point.
+ */
+ ASSERT3U(dsp->dsa_resume_object - object, <,
+ 1 << (DNODE_BLOCK_SHIFT - DNODE_SHIFT));
+ return (0);
+ }
+
if (dnp == NULL || dnp->dn_type == DMU_OT_NONE)
return (dump_freeobjects(dsp, object, 1));
if (dsp->dsa_pending_op != PENDING_NONE) {
- if (dump_bytes(dsp, dsp->dsa_drr,
- sizeof (dmu_replay_record_t)) != 0)
+ if (dump_record(dsp, NULL, 0) != 0)
return (SET_ERROR(EINTR));
dsp->dsa_pending_op = PENDING_NONE;
}
@@ -403,6 +514,7 @@ dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp)
drro->drr_bonustype = dnp->dn_bonustype;
drro->drr_blksz = dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT;
drro->drr_bonuslen = dnp->dn_bonuslen;
+ drro->drr_dn_slots = dnp->dn_extra_slots + 1;
drro->drr_checksumtype = dnp->dn_checksum;
drro->drr_compress = dnp->dn_compress;
drro->drr_toguid = dsp->dsa_toguid;
@@ -411,11 +523,10 @@ dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp)
drro->drr_blksz > SPA_OLD_MAXBLOCKSIZE)
drro->drr_blksz = SPA_OLD_MAXBLOCKSIZE;
- if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
- return (SET_ERROR(EINTR));
-
- if (dump_bytes(dsp, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0)
+ if (dump_record(dsp, DN_BONUS(dnp),
+ P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0) {
return (SET_ERROR(EINTR));
+ }
/* Free anything past the end of the file. */
if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) *
@@ -436,7 +547,7 @@ backup_do_embed(dmu_sendarg_t *dsp, const blkptr_t *bp)
* Compression function must be legacy, or explicitly enabled.
*/
if ((BP_GET_COMPRESS(bp) >= ZIO_COMPRESS_LEGACY_FUNCTIONS &&
- !(dsp->dsa_featureflags & DMU_BACKUP_FEATURE_EMBED_DATA_LZ4)))
+ !(dsp->dsa_featureflags & DMU_BACKUP_FEATURE_LZ4)))
return (B_FALSE);
/*
@@ -453,47 +564,119 @@ backup_do_embed(dmu_sendarg_t *dsp, const blkptr_t *bp)
return (B_FALSE);
}
-#define BP_SPAN(dnp, level) \
- (((uint64_t)dnp->dn_datablkszsec) << (SPA_MINBLOCKSHIFT + \
- (level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT)))
+/*
+ * This is the callback function to traverse_dataset that acts as the worker
+ * thread for dmu_send_impl.
+ */
+/*ARGSUSED*/
+static int
+send_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
+ const zbookmark_phys_t *zb, const struct dnode_phys *dnp, void *arg)
+{
+ struct send_thread_arg *sta = arg;
+ struct send_block_record *record;
+ uint64_t record_size;
+ int err = 0;
+
+ ASSERT(zb->zb_object == DMU_META_DNODE_OBJECT ||
+ zb->zb_object >= sta->resume.zb_object);
-/* ARGSUSED */
+ if (sta->cancel)
+ return (SET_ERROR(EINTR));
+
+ if (bp == NULL) {
+ ASSERT3U(zb->zb_level, ==, ZB_DNODE_LEVEL);
+ return (0);
+ } else if (zb->zb_level < 0) {
+ return (0);
+ }
+
+ record = kmem_zalloc(sizeof (struct send_block_record), KM_SLEEP);
+ record->eos_marker = B_FALSE;
+ record->bp = *bp;
+ record->zb = *zb;
+ record->indblkshift = dnp->dn_indblkshift;
+ record->datablkszsec = dnp->dn_datablkszsec;
+ record_size = dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT;
+ bqueue_enqueue(&sta->q, record, record_size);
+
+ return (err);
+}
+
+/*
+ * This function kicks off the traverse_dataset. It also handles setting the
+ * error code of the thread in case something goes wrong, and pushes the End of
+ * Stream record when the traverse_dataset call has finished. If there is no
+ * dataset to traverse, the thread immediately pushes End of Stream marker.
+ */
+static void
+send_traverse_thread(void *arg)
+{
+ struct send_thread_arg *st_arg = arg;
+ int err;
+ struct send_block_record *data;
+ fstrans_cookie_t cookie = spl_fstrans_mark();
+
+ if (st_arg->ds != NULL) {
+ err = traverse_dataset_resume(st_arg->ds,
+ st_arg->fromtxg, &st_arg->resume,
+ st_arg->flags, send_cb, st_arg);
+
+ if (err != EINTR)
+ st_arg->error_code = err;
+ }
+ data = kmem_zalloc(sizeof (*data), KM_SLEEP);
+ data->eos_marker = B_TRUE;
+ bqueue_enqueue(&st_arg->q, data, 1);
+ spl_fstrans_unmark(cookie);
+ thread_exit();
+}
+
+/*
+ * This function actually handles figuring out what kind of record needs to be
+ * dumped, reading the data (which has hopefully been prefetched), and calling
+ * the appropriate helper function.
+ */
static int
-backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
- const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
+do_dump(dmu_sendarg_t *dsa, struct send_block_record *data)
{
- dmu_sendarg_t *dsp = arg;
+ dsl_dataset_t *ds = dmu_objset_ds(dsa->dsa_os);
+ const blkptr_t *bp = &data->bp;
+ const zbookmark_phys_t *zb = &data->zb;
+ uint8_t indblkshift = data->indblkshift;
+ uint16_t dblkszsec = data->datablkszsec;
+ spa_t *spa = ds->ds_dir->dd_pool->dp_spa;
dmu_object_type_t type = bp ? BP_GET_TYPE(bp) : DMU_OT_NONE;
int err = 0;
+ uint64_t dnobj;
- if (issig(JUSTLOOKING) && issig(FORREAL))
- return (SET_ERROR(EINTR));
+ ASSERT3U(zb->zb_level, >=, 0);
+
+ ASSERT(zb->zb_object == DMU_META_DNODE_OBJECT ||
+ zb->zb_object >= dsa->dsa_resume_object);
if (zb->zb_object != DMU_META_DNODE_OBJECT &&
DMU_OBJECT_IS_SPECIAL(zb->zb_object)) {
return (0);
- } else if (zb->zb_level == ZB_ZIL_LEVEL) {
- /*
- * If we are sending a non-snapshot (which is allowed on
- * read-only pools), it may have a ZIL, which must be ignored.
- */
- return (0);
} else if (BP_IS_HOLE(bp) &&
zb->zb_object == DMU_META_DNODE_OBJECT) {
- uint64_t span = BP_SPAN(dnp, zb->zb_level);
+ uint64_t span = BP_SPAN(dblkszsec, indblkshift, zb->zb_level);
uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT;
- err = dump_freeobjects(dsp, dnobj, span >> DNODE_SHIFT);
+ err = dump_freeobjects(dsa, dnobj, span >> DNODE_SHIFT);
} else if (BP_IS_HOLE(bp)) {
- uint64_t span = BP_SPAN(dnp, zb->zb_level);
- err = dump_free(dsp, zb->zb_object, zb->zb_blkid * span, span);
+ uint64_t span = BP_SPAN(dblkszsec, indblkshift, zb->zb_level);
+ uint64_t offset = zb->zb_blkid * span;
+ err = dump_free(dsa, zb->zb_object, offset, span);
} else if (zb->zb_level > 0 || type == DMU_OT_OBJSET) {
return (0);
} else if (type == DMU_OT_DNODE) {
dnode_phys_t *blk;
- int i;
- int blksz = BP_GET_LSIZE(bp);
+ int epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
arc_flags_t aflags = ARC_FLAG_WAIT;
arc_buf_t *abuf;
+ int i;
+
+ ASSERT0(zb->zb_level);
if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL,
@@ -501,14 +684,13 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
return (SET_ERROR(EIO));
blk = abuf->b_data;
- for (i = 0; i < blksz >> DNODE_SHIFT; i++) {
- uint64_t dnobj = (zb->zb_blkid <<
- (DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i;
- err = dump_dnode(dsp, dnobj, blk+i);
+ dnobj = zb->zb_blkid * epb;
+ for (i = 0; i < epb; i += blk[i].dn_extra_slots + 1) {
+ err = dump_dnode(dsa, dnobj + i, blk + i);
if (err != 0)
break;
}
- (void) arc_buf_remove_ref(abuf, &abuf);
+ arc_buf_destroy(abuf, &abuf);
} else if (type == DMU_OT_SA) {
arc_flags_t aflags = ARC_FLAG_WAIT;
arc_buf_t *abuf;
@@ -519,29 +701,61 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
&aflags, zb) != 0)
return (SET_ERROR(EIO));
- err = dump_spill(dsp, zb->zb_object, blksz, abuf->b_data);
- (void) arc_buf_remove_ref(abuf, &abuf);
- } else if (backup_do_embed(dsp, bp)) {
+ err = dump_spill(dsa, zb->zb_object, blksz, abuf->b_data);
+ arc_buf_destroy(abuf, &abuf);
+ } else if (backup_do_embed(dsa, bp)) {
/* it's an embedded level-0 block of a regular object */
- int blksz = dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT;
- err = dump_write_embedded(dsp, zb->zb_object,
+ int blksz = dblkszsec << SPA_MINBLOCKSHIFT;
+ ASSERT0(zb->zb_level);
+ err = dump_write_embedded(dsa, zb->zb_object,
zb->zb_blkid * blksz, blksz, bp);
- } else { /* it's a level-0 block of a regular object */
- uint64_t offset;
+ } else {
+ /* it's a level-0 block of a regular object */
arc_flags_t aflags = ARC_FLAG_WAIT;
arc_buf_t *abuf;
- int blksz = BP_GET_LSIZE(bp);
+ int blksz = dblkszsec << SPA_MINBLOCKSHIFT;
+ uint64_t offset;
+
+ /*
+ * If we have large blocks stored on disk but the send flags
+ * don't allow us to send large blocks, we split the data from
+ * the arc buf into chunks.
+ */
+ boolean_t split_large_blocks = blksz > SPA_OLD_MAXBLOCKSIZE &&
+ !(dsa->dsa_featureflags & DMU_BACKUP_FEATURE_LARGE_BLOCKS);
+ /*
+ * We should only request compressed data from the ARC if all
+ * the following are true:
+ * - stream compression was requested
+ * - we aren't splitting large blocks into smaller chunks
+ * - the data won't need to be byteswapped before sending
+ * - this isn't an embedded block
+ * - this isn't metadata (if receiving on a different endian
+ * system it can be byteswapped more easily)
+ */
+ boolean_t request_compressed =
+ (dsa->dsa_featureflags & DMU_BACKUP_FEATURE_COMPRESSED) &&
+ !split_large_blocks && !BP_SHOULD_BYTESWAP(bp) &&
+ !BP_IS_EMBEDDED(bp) && !DMU_OT_IS_METADATA(BP_GET_TYPE(bp));
- ASSERT3U(blksz, ==, dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
ASSERT0(zb->zb_level);
+ ASSERT(zb->zb_object > dsa->dsa_resume_object ||
+ (zb->zb_object == dsa->dsa_resume_object &&
+ zb->zb_blkid * blksz >= dsa->dsa_resume_offset));
+
+ ASSERT3U(blksz, ==, BP_GET_LSIZE(bp));
+
+ enum zio_flag zioflags = ZIO_FLAG_CANFAIL;
+ if (request_compressed)
+ zioflags |= ZIO_FLAG_RAW;
+
if (arc_read(NULL, spa, bp, arc_getbuf_func, &abuf,
- ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL,
- &aflags, zb) != 0) {
+ ZIO_PRIORITY_ASYNC_READ, zioflags, &aflags, zb) != 0) {
if (zfs_send_corrupt_data) {
- uint64_t *ptr;
/* Send a block filled with 0x"zfs badd bloc" */
- abuf = arc_buf_alloc(spa, blksz, &abuf,
- ARC_BUFC_DATA);
+ abuf = arc_alloc_buf(spa, &abuf, ARC_BUFC_DATA,
+ blksz);
+ uint64_t *ptr;
for (ptr = abuf->b_data;
(char *)ptr < (char *)abuf->b_data + blksz;
ptr++)
@@ -553,23 +767,24 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
offset = zb->zb_blkid * blksz;
- if (!(dsp->dsa_featureflags &
- DMU_BACKUP_FEATURE_LARGE_BLOCKS) &&
- blksz > SPA_OLD_MAXBLOCKSIZE) {
+ if (split_large_blocks) {
+ ASSERT3U(arc_get_compression(abuf), ==,
+ ZIO_COMPRESS_OFF);
char *buf = abuf->b_data;
while (blksz > 0 && err == 0) {
int n = MIN(blksz, SPA_OLD_MAXBLOCKSIZE);
- err = dump_write(dsp, type, zb->zb_object,
- offset, n, NULL, buf);
+ err = dump_write(dsa, type, zb->zb_object,
+ offset, n, n, NULL, buf);
offset += n;
buf += n;
blksz -= n;
}
} else {
- err = dump_write(dsp, type, zb->zb_object,
- offset, blksz, bp, abuf->b_data);
+ err = dump_write(dsa, type, zb->zb_object, offset,
+ blksz, arc_buf_size(abuf), bp,
+ abuf->b_data);
}
- (void) arc_buf_remove_ref(abuf, &abuf);
+ arc_buf_destroy(abuf, &abuf);
}
ASSERT(err == 0 || err == EINTR);
@@ -577,12 +792,27 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
}
/*
- * Releases dp using the specified tag.
+ * Pop the new data off the queue, and free the old data.
+ */
+static struct send_block_record *
+get_next_record(bqueue_t *bq, struct send_block_record *data)
+{
+ struct send_block_record *tmp = bqueue_dequeue(bq);
+ kmem_free(data, sizeof (*data));
+ return (tmp);
+}
+
+/*
+ * Actually do the bulk of the work in a zfs send.
+ *
+ * Note: Releases dp using the specified tag.
*/
static int
-dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds,
- zfs_bookmark_phys_t *fromzb, boolean_t is_clone, boolean_t embedok,
- boolean_t large_block_ok, int outfd, vnode_t *vp, offset_t *off)
+dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
+ zfs_bookmark_phys_t *ancestor_zb, boolean_t is_clone,
+ boolean_t embedok, boolean_t large_block_ok, boolean_t compressok,
+ int outfd, uint64_t resumeobj, uint64_t resumeoff,
+ vnode_t *vp, offset_t *off)
{
objset_t *os;
dmu_replay_record_t *drr;
@@ -590,8 +820,12 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds,
int err;
uint64_t fromtxg = 0;
uint64_t featureflags = 0;
+ struct send_thread_arg to_arg;
+ void *payload = NULL;
+ size_t payload_len = 0;
+ struct send_block_record *to_data;
- err = dmu_objset_from_ds(ds, &os);
+ err = dmu_objset_from_ds(to_ds, &os);
if (err != 0) {
dsl_pool_rele(dp, tag);
return (err);
@@ -603,6 +837,8 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds,
DMU_SET_STREAM_HDRTYPE(drr->drr_u.drr_begin.drr_versioninfo,
DMU_SUBSTREAM);
+ bzero(&to_arg, sizeof (to_arg));
+
#ifdef _KERNEL
if (dmu_objset_type(os) == DMU_OST_ZFS) {
uint64_t version;
@@ -617,35 +853,48 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds,
}
#endif
- if (large_block_ok && ds->ds_large_blocks)
+ if (large_block_ok && to_ds->ds_feature_inuse[SPA_FEATURE_LARGE_BLOCKS])
featureflags |= DMU_BACKUP_FEATURE_LARGE_BLOCKS;
+ if (to_ds->ds_feature_inuse[SPA_FEATURE_LARGE_DNODE])
+ featureflags |= DMU_BACKUP_FEATURE_LARGE_DNODE;
if (embedok &&
spa_feature_is_active(dp->dp_spa, SPA_FEATURE_EMBEDDED_DATA)) {
featureflags |= DMU_BACKUP_FEATURE_EMBED_DATA;
- if (spa_feature_is_active(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS))
- featureflags |= DMU_BACKUP_FEATURE_EMBED_DATA_LZ4;
- } else {
- embedok = B_FALSE;
+ }
+ if (compressok) {
+ featureflags |= DMU_BACKUP_FEATURE_COMPRESSED;
+ }
+ if ((featureflags &
+ (DMU_BACKUP_FEATURE_EMBED_DATA | DMU_BACKUP_FEATURE_COMPRESSED)) !=
+ 0 && spa_feature_is_active(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS)) {
+ featureflags |= DMU_BACKUP_FEATURE_LZ4;
+ }
+
+ if (resumeobj != 0 || resumeoff != 0) {
+ featureflags |= DMU_BACKUP_FEATURE_RESUMING;
}
DMU_SET_FEATUREFLAGS(drr->drr_u.drr_begin.drr_versioninfo,
featureflags);
drr->drr_u.drr_begin.drr_creation_time =
- dsl_dataset_phys(ds)->ds_creation_time;
+ dsl_dataset_phys(to_ds)->ds_creation_time;
drr->drr_u.drr_begin.drr_type = dmu_objset_type(os);
if (is_clone)
drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CLONE;
- drr->drr_u.drr_begin.drr_toguid = dsl_dataset_phys(ds)->ds_guid;
- if (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_CI_DATASET)
+ drr->drr_u.drr_begin.drr_toguid = dsl_dataset_phys(to_ds)->ds_guid;
+ if (dsl_dataset_phys(to_ds)->ds_flags & DS_FLAG_CI_DATASET)
drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_CI_DATA;
+ if (zfs_send_set_freerecords_bit)
+ drr->drr_u.drr_begin.drr_flags |= DRR_FLAG_FREERECORDS;
- if (fromzb != NULL) {
- drr->drr_u.drr_begin.drr_fromguid = fromzb->zbm_guid;
- fromtxg = fromzb->zbm_creation_txg;
+ if (ancestor_zb != NULL) {
+ drr->drr_u.drr_begin.drr_fromguid =
+ ancestor_zb->zbm_guid;
+ fromtxg = ancestor_zb->zbm_creation_txg;
}
- dsl_dataset_name(ds, drr->drr_u.drr_begin.drr_toname);
- if (!ds->ds_is_snapshot) {
+ dsl_dataset_name(to_ds, drr->drr_u.drr_begin.drr_toname);
+ if (!to_ds->ds_is_snapshot) {
(void) strlcat(drr->drr_u.drr_begin.drr_toname, "@--head--",
sizeof (drr->drr_u.drr_begin.drr_toname));
}
@@ -658,29 +907,80 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds,
dsp->dsa_proc = curproc;
dsp->dsa_os = os;
dsp->dsa_off = off;
- dsp->dsa_toguid = dsl_dataset_phys(ds)->ds_guid;
- ZIO_SET_CHECKSUM(&dsp->dsa_zc, 0, 0, 0, 0);
+ dsp->dsa_toguid = dsl_dataset_phys(to_ds)->ds_guid;
dsp->dsa_pending_op = PENDING_NONE;
- dsp->dsa_incremental = (fromzb != NULL);
dsp->dsa_featureflags = featureflags;
+ dsp->dsa_resume_object = resumeobj;
+ dsp->dsa_resume_offset = resumeoff;
- mutex_enter(&ds->ds_sendstream_lock);
- list_insert_head(&ds->ds_sendstreams, dsp);
- mutex_exit(&ds->ds_sendstream_lock);
+ mutex_enter(&to_ds->ds_sendstream_lock);
+ list_insert_head(&to_ds->ds_sendstreams, dsp);
+ mutex_exit(&to_ds->ds_sendstream_lock);
- dsl_dataset_long_hold(ds, FTAG);
+ dsl_dataset_long_hold(to_ds, FTAG);
dsl_pool_rele(dp, tag);
- if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) {
+ if (resumeobj != 0 || resumeoff != 0) {
+ dmu_object_info_t to_doi;
+ nvlist_t *nvl;
+ err = dmu_object_info(os, resumeobj, &to_doi);
+ if (err != 0)
+ goto out;
+ SET_BOOKMARK(&to_arg.resume, to_ds->ds_object, resumeobj, 0,
+ resumeoff / to_doi.doi_data_block_size);
+
+ nvl = fnvlist_alloc();
+ fnvlist_add_uint64(nvl, "resume_object", resumeobj);
+ fnvlist_add_uint64(nvl, "resume_offset", resumeoff);
+ payload = fnvlist_pack(nvl, &payload_len);
+ drr->drr_payloadlen = payload_len;
+ fnvlist_free(nvl);
+ }
+
+ err = dump_record(dsp, payload, payload_len);
+ fnvlist_pack_free(payload, payload_len);
+ if (err != 0) {
err = dsp->dsa_err;
goto out;
}
- err = traverse_dataset(ds, fromtxg, TRAVERSE_PRE | TRAVERSE_PREFETCH,
- backup_cb, dsp);
+ err = bqueue_init(&to_arg.q, zfs_send_queue_length,
+ offsetof(struct send_block_record, ln));
+ to_arg.error_code = 0;
+ to_arg.cancel = B_FALSE;
+ to_arg.ds = to_ds;
+ to_arg.fromtxg = fromtxg;
+ to_arg.flags = TRAVERSE_PRE | TRAVERSE_PREFETCH;
+ (void) thread_create(NULL, 0, send_traverse_thread, &to_arg, 0, curproc,
+ TS_RUN, minclsyspri);
+
+ to_data = bqueue_dequeue(&to_arg.q);
+
+ while (!to_data->eos_marker && err == 0) {
+ err = do_dump(dsp, to_data);
+ to_data = get_next_record(&to_arg.q, to_data);
+ if (issig(JUSTLOOKING) && issig(FORREAL))
+ err = EINTR;
+ }
+
+ if (err != 0) {
+ to_arg.cancel = B_TRUE;
+ while (!to_data->eos_marker) {
+ to_data = get_next_record(&to_arg.q, to_data);
+ }
+ }
+ kmem_free(to_data, sizeof (*to_data));
+
+ bqueue_destroy(&to_arg.q);
+
+ if (err == 0 && to_arg.error_code != 0)
+ err = to_arg.error_code;
+
+ if (err != 0)
+ goto out;
if (dsp->dsa_pending_op != PENDING_NONE)
- if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0)
+ if (dump_record(dsp, NULL, 0) != 0)
err = SET_ERROR(EINTR);
if (err != 0) {
@@ -694,27 +994,27 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds,
drr->drr_u.drr_end.drr_checksum = dsp->dsa_zc;
drr->drr_u.drr_end.drr_toguid = dsp->dsa_toguid;
- if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) {
+ if (dump_record(dsp, NULL, 0) != 0)
err = dsp->dsa_err;
- goto out;
- }
out:
- mutex_enter(&ds->ds_sendstream_lock);
- list_remove(&ds->ds_sendstreams, dsp);
- mutex_exit(&ds->ds_sendstream_lock);
+ mutex_enter(&to_ds->ds_sendstream_lock);
+ list_remove(&to_ds->ds_sendstreams, dsp);
+ mutex_exit(&to_ds->ds_sendstream_lock);
+
+ VERIFY(err != 0 || (dsp->dsa_sent_begin && dsp->dsa_sent_end));
kmem_free(drr, sizeof (dmu_replay_record_t));
kmem_free(dsp, sizeof (dmu_sendarg_t));
- dsl_dataset_long_rele(ds, FTAG);
+ dsl_dataset_long_rele(to_ds, FTAG);
return (err);
}
int
dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
- boolean_t embedok, boolean_t large_block_ok,
+ boolean_t embedok, boolean_t large_block_ok, boolean_t compressok,
int outfd, vnode_t *vp, offset_t *off)
{
dsl_pool_t *dp;
@@ -751,19 +1051,20 @@ dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
is_clone = (fromds->ds_dir != ds->ds_dir);
dsl_dataset_rele(fromds, FTAG);
err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone,
- embedok, large_block_ok, outfd, vp, off);
+ embedok, large_block_ok, compressok, outfd, 0, 0, vp, off);
} else {
err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE,
- embedok, large_block_ok, outfd, vp, off);
+ embedok, large_block_ok, compressok, outfd, 0, 0, vp, off);
}
dsl_dataset_rele(ds, FTAG);
return (err);
}
int
-dmu_send(const char *tosnap, const char *fromsnap,
- boolean_t embedok, boolean_t large_block_ok,
- int outfd, vnode_t *vp, offset_t *off)
+dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
+ boolean_t large_block_ok, boolean_t compressok, int outfd,
+ uint64_t resumeobj, uint64_t resumeoff,
+ vnode_t *vp, offset_t *off)
{
dsl_pool_t *dp;
dsl_dataset_t *ds;
@@ -830,10 +1131,12 @@ dmu_send(const char *tosnap, const char *fromsnap,
return (err);
}
err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone,
- embedok, large_block_ok, outfd, vp, off);
+ embedok, large_block_ok, compressok,
+ outfd, resumeobj, resumeoff, vp, off);
} else {
err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE,
- embedok, large_block_ok, outfd, vp, off);
+ embedok, large_block_ok, compressok,
+ outfd, resumeobj, resumeoff, vp, off);
}
if (owned)
dsl_dataset_disown(ds, FTAG);
@@ -843,33 +1146,53 @@ dmu_send(const char *tosnap, const char *fromsnap,
}
static int
-dmu_adjust_send_estimate_for_indirects(dsl_dataset_t *ds, uint64_t size,
- uint64_t *sizep)
+dmu_adjust_send_estimate_for_indirects(dsl_dataset_t *ds, uint64_t uncompressed,
+ uint64_t compressed, boolean_t stream_compressed, uint64_t *sizep)
{
int err;
+ uint64_t size;
/*
* Assume that space (both on-disk and in-stream) is dominated by
* data. We will adjust for indirect blocks and the copies property,
* but ignore per-object space used (eg, dnodes and DRR_OBJECT records).
*/
+ uint64_t recordsize;
+ uint64_t record_count;
+ objset_t *os;
+ VERIFY0(dmu_objset_from_ds(ds, &os));
+
+ /* Assume all (uncompressed) blocks are recordsize. */
+ if (os->os_phys->os_type == DMU_OST_ZVOL) {
+ err = dsl_prop_get_int_ds(ds,
+ zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &recordsize);
+ } else {
+ err = dsl_prop_get_int_ds(ds,
+ zfs_prop_to_name(ZFS_PROP_RECORDSIZE), &recordsize);
+ }
+ if (err != 0)
+ return (err);
+ record_count = uncompressed / recordsize;
+
+ /*
+ * If we're estimating a send size for a compressed stream, use the
+ * compressed data size to estimate the stream size. Otherwise, use the
+ * uncompressed data size.
+ */
+ size = stream_compressed ? compressed : uncompressed;
+
/*
* Subtract out approximate space used by indirect blocks.
* Assume most space is used by data blocks (non-indirect, non-dnode).
- * Assume all blocks are recordsize. Assume ditto blocks and
- * internal fragmentation counter out compression.
+ * Assume no ditto blocks or internal fragmentation.
*
* Therefore, space used by indirect blocks is sizeof(blkptr_t) per
- * block, which we observe in practice.
+ * block.
*/
- uint64_t recordsize;
- err = dsl_prop_get_int_ds(ds, "recordsize", &recordsize);
- if (err != 0)
- return (err);
- size -= size / recordsize * sizeof (blkptr_t);
+ size -= record_count * sizeof (blkptr_t);
/* Add in the space for the record associated with each block. */
- size += size / recordsize * sizeof (dmu_replay_record_t);
+ size += record_count * sizeof (dmu_replay_record_t);
*sizep = size;
@@ -877,10 +1200,11 @@ dmu_adjust_send_estimate_for_indirects(dsl_dataset_t *ds, uint64_t size,
}
int
-dmu_send_estimate(dsl_dataset_t *ds, dsl_dataset_t *fromds, uint64_t *sizep)
+dmu_send_estimate(dsl_dataset_t *ds, dsl_dataset_t *fromds,
+ boolean_t stream_compressed, uint64_t *sizep)
{
int err;
- uint64_t size;
+ uint64_t uncomp, comp;
ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));
@@ -899,33 +1223,45 @@ dmu_send_estimate(dsl_dataset_t *ds, dsl_dataset_t *fromds, uint64_t *sizep)
if (fromds != NULL && !dsl_dataset_is_before(ds, fromds, 0))
return (SET_ERROR(EXDEV));
- /* Get uncompressed size estimate of changed data. */
+ /* Get compressed and uncompressed size estimates of changed data. */
if (fromds == NULL) {
- size = dsl_dataset_phys(ds)->ds_uncompressed_bytes;
+ uncomp = dsl_dataset_phys(ds)->ds_uncompressed_bytes;
+ comp = dsl_dataset_phys(ds)->ds_compressed_bytes;
} else {
- uint64_t used, comp;
+ uint64_t used;
err = dsl_dataset_space_written(fromds, ds,
- &used, &comp, &size);
+ &used, &comp, &uncomp);
if (err != 0)
return (err);
}
- err = dmu_adjust_send_estimate_for_indirects(ds, size, sizep);
+ err = dmu_adjust_send_estimate_for_indirects(ds, uncomp, comp,
+ stream_compressed, sizep);
+ /*
+ * Add the size of the BEGIN and END records to the estimate.
+ */
+ *sizep += 2 * sizeof (dmu_replay_record_t);
return (err);
}
+struct calculate_send_arg {
+ uint64_t uncompressed;
+ uint64_t compressed;
+};
+
/*
* Simple callback used to traverse the blocks of a snapshot and sum their
- * uncompressed size
+ * uncompressed and compressed sizes.
*/
/* ARGSUSED */
static int
dmu_calculate_send_traversal(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
{
- uint64_t *spaceptr = arg;
+ struct calculate_send_arg *space = arg;
if (bp != NULL && !BP_IS_HOLE(bp)) {
- *spaceptr += BP_GET_UCSIZE(bp);
+ space->uncompressed += BP_GET_UCSIZE(bp);
+ space->compressed += BP_GET_PSIZE(bp);
}
return (0);
}
@@ -937,10 +1273,10 @@ dmu_calculate_send_traversal(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
*/
int
dmu_send_estimate_from_txg(dsl_dataset_t *ds, uint64_t from_txg,
- uint64_t *sizep)
+ boolean_t stream_compressed, uint64_t *sizep)
{
int err;
- uint64_t size = 0;
+ struct calculate_send_arg size = { 0 };
ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));
@@ -958,10 +1294,12 @@ dmu_send_estimate_from_txg(dsl_dataset_t *ds, uint64_t from_txg,
*/
err = traverse_dataset(ds, from_txg, TRAVERSE_POST,
dmu_calculate_send_traversal, &size);
+
if (err)
return (err);
- err = dmu_adjust_send_estimate_for_indirects(ds, size, sizep);
+ err = dmu_adjust_send_estimate_for_indirects(ds, size.uncompressed,
+ size.compressed, stream_compressed, sizep);
return (err);
}
@@ -1073,6 +1411,7 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
/* already checked */
ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC);
+ ASSERT(!(featureflags & DMU_BACKUP_FEATURE_RESUMING));
if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
DMU_COMPOUNDSTREAM ||
@@ -1085,34 +1424,42 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
spa_version(dp->dp_spa) < SPA_VERSION_SA)
return (SET_ERROR(ENOTSUP));
+ if (drba->drba_cookie->drc_resumable &&
+ !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_EXTENSIBLE_DATASET))
+ return (SET_ERROR(ENOTSUP));
+
/*
* The receiving code doesn't know how to translate a WRITE_EMBEDDED
- * record to a plan WRITE record, so the pool must have the
+ * record to a plain WRITE record, so the pool must have the
* EMBEDDED_DATA feature enabled if the stream has WRITE_EMBEDDED
* records. Same with WRITE_EMBEDDED records that use LZ4 compression.
*/
if ((featureflags & DMU_BACKUP_FEATURE_EMBED_DATA) &&
!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_EMBEDDED_DATA))
return (SET_ERROR(ENOTSUP));
- if ((featureflags & DMU_BACKUP_FEATURE_EMBED_DATA_LZ4) &&
+ if ((featureflags & DMU_BACKUP_FEATURE_LZ4) &&
!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS))
return (SET_ERROR(ENOTSUP));
/*
* The receiving code doesn't know how to translate large blocks
* to smaller ones, so the pool must have the LARGE_BLOCKS
- * feature enabled if the stream has LARGE_BLOCKS.
+ * feature enabled if the stream has LARGE_BLOCKS. Same with
+ * large dnodes.
*/
if ((featureflags & DMU_BACKUP_FEATURE_LARGE_BLOCKS) &&
!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LARGE_BLOCKS))
return (SET_ERROR(ENOTSUP));
+ if ((featureflags & DMU_BACKUP_FEATURE_LARGE_DNODE) &&
+ !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LARGE_DNODE))
+ return (SET_ERROR(ENOTSUP));
error = dsl_dataset_hold(dp, tofs, FTAG, &ds);
if (error == 0) {
/* target fs already exists; recv into temp clone */
/* Can't recv a clone into an existing fs */
- if (flags & DRR_FLAG_CLONE) {
+ if (flags & DRR_FLAG_CLONE || drba->drba_origin) {
dsl_dataset_rele(ds, FTAG);
return (SET_ERROR(EINVAL));
}
@@ -1121,17 +1468,27 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
dsl_dataset_rele(ds, FTAG);
} else if (error == ENOENT) {
/* target fs does not exist; must be a full backup or clone */
- char buf[MAXNAMELEN];
+ char buf[ZFS_MAX_DATASET_NAME_LEN];
/*
* If it's a non-clone incremental, we are missing the
* target fs, so fail the recv.
*/
- if (fromguid != 0 && !(flags & DRR_FLAG_CLONE))
+ if (fromguid != 0 && !(flags & DRR_FLAG_CLONE ||
+ drba->drba_origin))
return (SET_ERROR(ENOENT));
+ /*
+ * If we're receiving a full send as a clone, and it doesn't
+ * contain all the necessary free records and freeobject
+ * records, reject it.
+ */
+ if (fromguid == 0 && drba->drba_origin &&
+ !(flags & DRR_FLAG_FREERECORDS))
+ return (SET_ERROR(EINVAL));
+
/* Open the parent of tofs */
- ASSERT3U(strlen(tofs), <, MAXNAMELEN);
+ ASSERT3U(strlen(tofs), <, sizeof (buf));
(void) strlcpy(buf, tofs, strrchr(tofs, '/') - tofs + 1);
error = dsl_dataset_hold(dp, buf, FTAG, &ds);
if (error != 0)
@@ -1169,7 +1526,8 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
dsl_dataset_rele(ds, FTAG);
return (SET_ERROR(EINVAL));
}
- if (dsl_dataset_phys(origin)->ds_guid != fromguid) {
+ if (dsl_dataset_phys(origin)->ds_guid != fromguid &&
+ fromguid != 0) {
dsl_dataset_rele(origin, FTAG);
dsl_dataset_rele(ds, FTAG);
return (SET_ERROR(ENODEV));
@@ -1187,15 +1545,16 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx)
{
dmu_recv_begin_arg_t *drba = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
+ objset_t *mos = dp->dp_meta_objset;
struct drr_begin *drrb = drba->drba_cookie->drc_drrb;
const char *tofs = drba->drba_cookie->drc_tofs;
dsl_dataset_t *ds, *newds;
uint64_t dsobj;
int error;
- uint64_t crflags;
+ uint64_t crflags = 0;
- crflags = (drrb->drr_flags & DRR_FLAG_CI_DATA) ?
- DS_FLAG_CI_DATASET : 0;
+ if (drrb->drr_flags & DRR_FLAG_CI_DATA)
+ crflags |= DS_FLAG_CI_DATASET;
error = dsl_dataset_hold(dp, tofs, FTAG, &ds);
if (error == 0) {
@@ -1207,7 +1566,8 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx)
}
dsobj = dsl_dataset_create_sync(ds->ds_dir, recv_clone_name,
snap, crflags, drba->drba_cred, tx);
- dsl_dataset_rele(snap, FTAG);
+ if (drba->drba_snapobj != 0)
+ dsl_dataset_rele(snap, FTAG);
dsl_dataset_rele(ds, FTAG);
} else {
dsl_dir_t *dd;
@@ -1232,11 +1592,40 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx)
}
VERIFY0(dsl_dataset_own_obj(dp, dsobj, dmu_recv_tag, &newds));
- if ((DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
- DMU_BACKUP_FEATURE_LARGE_BLOCKS) &&
- !newds->ds_large_blocks) {
- dsl_dataset_activate_large_blocks_sync_impl(dsobj, tx);
- newds->ds_large_blocks = B_TRUE;
+ if (drba->drba_cookie->drc_resumable) {
+ uint64_t one = 1;
+ uint64_t zero = 0;
+
+ dsl_dataset_zapify(newds, tx);
+ if (drrb->drr_fromguid != 0) {
+ VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_FROMGUID,
+ 8, 1, &drrb->drr_fromguid, tx));
+ }
+ VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_TOGUID,
+ 8, 1, &drrb->drr_toguid, tx));
+ VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_TONAME,
+ 1, strlen(drrb->drr_toname) + 1, drrb->drr_toname, tx));
+ VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_OBJECT,
+ 8, 1, &one, tx));
+ VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_OFFSET,
+ 8, 1, &zero, tx));
+ VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_BYTES,
+ 8, 1, &zero, tx));
+ if (DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
+ DMU_BACKUP_FEATURE_LARGE_BLOCKS) {
+ VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_LARGEBLOCK,
+ 8, 1, &one, tx));
+ }
+ if (DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
+ DMU_BACKUP_FEATURE_EMBED_DATA) {
+ VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_EMBEDOK,
+ 8, 1, &one, tx));
+ }
+ if (DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
+ DMU_BACKUP_FEATURE_COMPRESSED) {
+ VERIFY0(zap_add(mos, dsobj, DS_FIELD_RESUME_COMPRESSOK,
+ 8, 1, &one, tx));
+ }
}
dmu_buf_will_dirty(newds->ds_dbuf, tx);
@@ -1246,96 +1635,308 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx)
* If we actually created a non-clone, we need to create the
* objset in our new dataset.
*/
+ rrw_enter(&newds->ds_bp_rwlock, RW_READER, FTAG);
if (BP_IS_HOLE(dsl_dataset_get_blkptr(newds))) {
(void) dmu_objset_create_impl(dp->dp_spa,
newds, dsl_dataset_get_blkptr(newds), drrb->drr_type, tx);
}
+ rrw_exit(&newds->ds_bp_rwlock, FTAG);
drba->drba_cookie->drc_ds = newds;
spa_history_log_internal_ds(newds, "receive", tx, "");
}
-/*
- * NB: callers *MUST* call dmu_recv_stream() if dmu_recv_begin()
- * succeeds; otherwise we will leak the holds on the datasets.
- */
-int
-dmu_recv_begin(char *tofs, char *tosnap, struct drr_begin *drrb,
- boolean_t force, char *origin, dmu_recv_cookie_t *drc)
+static int
+dmu_recv_resume_begin_check(void *arg, dmu_tx_t *tx)
{
- dmu_recv_begin_arg_t drba = { 0 };
- dmu_replay_record_t *drr;
+ dmu_recv_begin_arg_t *drba = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ struct drr_begin *drrb = drba->drba_cookie->drc_drrb;
+ int error;
+ uint64_t featureflags = DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo);
+ dsl_dataset_t *ds;
+ const char *tofs = drba->drba_cookie->drc_tofs;
+ uint64_t val;
- bzero(drc, sizeof (dmu_recv_cookie_t));
- drc->drc_drrb = drrb;
- drc->drc_tosnap = tosnap;
- drc->drc_tofs = tofs;
- drc->drc_force = force;
- drc->drc_cred = CRED();
+ /* 6 extra bytes for /%recv */
+ char recvname[ZFS_MAX_DATASET_NAME_LEN + 6];
- if (drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC))
- drc->drc_byteswap = B_TRUE;
- else if (drrb->drr_magic != DMU_BACKUP_MAGIC)
+ /* already checked */
+ ASSERT3U(drrb->drr_magic, ==, DMU_BACKUP_MAGIC);
+ ASSERT(featureflags & DMU_BACKUP_FEATURE_RESUMING);
+
+ if (DMU_GET_STREAM_HDRTYPE(drrb->drr_versioninfo) ==
+ DMU_COMPOUNDSTREAM ||
+ drrb->drr_type >= DMU_OST_NUMTYPES)
return (SET_ERROR(EINVAL));
- drr = kmem_zalloc(sizeof (dmu_replay_record_t), KM_SLEEP);
- drr->drr_type = DRR_BEGIN;
- drr->drr_u.drr_begin = *drc->drc_drrb;
- if (drc->drc_byteswap) {
- fletcher_4_incremental_byteswap(drr,
- sizeof (dmu_replay_record_t), &drc->drc_cksum);
- } else {
- fletcher_4_incremental_native(drr,
- sizeof (dmu_replay_record_t), &drc->drc_cksum);
- }
- kmem_free(drr, sizeof (dmu_replay_record_t));
+ /* Verify pool version supports SA if SA_SPILL feature set */
+ if ((featureflags & DMU_BACKUP_FEATURE_SA_SPILL) &&
+ spa_version(dp->dp_spa) < SPA_VERSION_SA)
+ return (SET_ERROR(ENOTSUP));
- if (drc->drc_byteswap) {
- drrb->drr_magic = BSWAP_64(drrb->drr_magic);
- drrb->drr_versioninfo = BSWAP_64(drrb->drr_versioninfo);
- drrb->drr_creation_time = BSWAP_64(drrb->drr_creation_time);
- drrb->drr_type = BSWAP_32(drrb->drr_type);
- drrb->drr_toguid = BSWAP_64(drrb->drr_toguid);
- drrb->drr_fromguid = BSWAP_64(drrb->drr_fromguid);
- }
+ /*
+ * The receiving code doesn't know how to translate a WRITE_EMBEDDED
+ * record to a plain WRITE record, so the pool must have the
+ * EMBEDDED_DATA feature enabled if the stream has WRITE_EMBEDDED
+ * records. Same with WRITE_EMBEDDED records that use LZ4 compression.
+ */
+ if ((featureflags & DMU_BACKUP_FEATURE_EMBED_DATA) &&
+ !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_EMBEDDED_DATA))
+ return (SET_ERROR(ENOTSUP));
+ if ((featureflags & DMU_BACKUP_FEATURE_LZ4) &&
+ !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS))
+ return (SET_ERROR(ENOTSUP));
- drba.drba_origin = origin;
- drba.drba_cookie = drc;
- drba.drba_cred = CRED();
+ /*
+ * The receiving code doesn't know how to translate large blocks
+ * to smaller ones, so the pool must have the LARGE_BLOCKS
+ * feature enabled if the stream has LARGE_BLOCKS. Same with
+ * large dnodes.
+ */
+ if ((featureflags & DMU_BACKUP_FEATURE_LARGE_BLOCKS) &&
+ !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LARGE_BLOCKS))
+ return (SET_ERROR(ENOTSUP));
+ if ((featureflags & DMU_BACKUP_FEATURE_LARGE_DNODE) &&
+ !spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LARGE_DNODE))
+ return (SET_ERROR(ENOTSUP));
- return (dsl_sync_task(tofs, dmu_recv_begin_check, dmu_recv_begin_sync,
- &drba, 5, ZFS_SPACE_CHECK_NORMAL));
-}
+ (void) snprintf(recvname, sizeof (recvname), "%s/%s",
+ tofs, recv_clone_name);
-struct restorearg {
- int err;
- boolean_t byteswap;
- vnode_t *vp;
- char *buf;
- uint64_t voff;
- int bufsize; /* amount of memory allocated for buf */
- zio_cksum_t cksum;
- avl_tree_t *guid_to_ds_map;
-};
+ if (dsl_dataset_hold(dp, recvname, FTAG, &ds) != 0) {
+ /* %recv does not exist; continue in tofs */
+ error = dsl_dataset_hold(dp, tofs, FTAG, &ds);
+ if (error != 0)
+ return (error);
+ }
-typedef struct guid_map_entry {
- uint64_t guid;
- dsl_dataset_t *gme_ds;
- avl_node_t avlnode;
-} guid_map_entry_t;
+ /* check that ds is marked inconsistent */
+ if (!DS_IS_INCONSISTENT(ds)) {
+ dsl_dataset_rele(ds, FTAG);
+ return (SET_ERROR(EINVAL));
+ }
+
+ /* check that there is resuming data, and that the toguid matches */
+ if (!dsl_dataset_is_zapified(ds)) {
+ dsl_dataset_rele(ds, FTAG);
+ return (SET_ERROR(EINVAL));
+ }
+ error = zap_lookup(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_TOGUID, sizeof (val), 1, &val);
+ if (error != 0 || drrb->drr_toguid != val) {
+ dsl_dataset_rele(ds, FTAG);
+ return (SET_ERROR(EINVAL));
+ }
+
+ /*
+ * Check if the receive is still running. If so, it will be owned.
+ * Note that nothing else can own the dataset (e.g. after the receive
+ * fails) because it will be marked inconsistent.
+ */
+ if (dsl_dataset_has_owner(ds)) {
+ dsl_dataset_rele(ds, FTAG);
+ return (SET_ERROR(EBUSY));
+ }
+
+ /* There should not be any snapshots of this fs yet. */
+ if (ds->ds_prev != NULL && ds->ds_prev->ds_dir == ds->ds_dir) {
+ dsl_dataset_rele(ds, FTAG);
+ return (SET_ERROR(EINVAL));
+ }
+
+ /*
+ * Note: resume point will be checked when we process the first WRITE
+ * record.
+ */
+
+ /* check that the origin matches */
+ val = 0;
+ (void) zap_lookup(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_FROMGUID, sizeof (val), 1, &val);
+ if (drrb->drr_fromguid != val) {
+ dsl_dataset_rele(ds, FTAG);
+ return (SET_ERROR(EINVAL));
+ }
+
+ dsl_dataset_rele(ds, FTAG);
+ return (0);
+}
+
+static void
+dmu_recv_resume_begin_sync(void *arg, dmu_tx_t *tx)
+{
+ dmu_recv_begin_arg_t *drba = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ const char *tofs = drba->drba_cookie->drc_tofs;
+ dsl_dataset_t *ds;
+ uint64_t dsobj;
+ /* 6 extra bytes for /%recv */
+ char recvname[ZFS_MAX_DATASET_NAME_LEN + 6];
+
+ (void) snprintf(recvname, sizeof (recvname), "%s/%s",
+ tofs, recv_clone_name);
+
+ if (dsl_dataset_hold(dp, recvname, FTAG, &ds) != 0) {
+ /* %recv does not exist; continue in tofs */
+ VERIFY0(dsl_dataset_hold(dp, tofs, FTAG, &ds));
+ drba->drba_cookie->drc_newfs = B_TRUE;
+ }
+
+ /* clear the inconsistent flag so that we can own it */
+ ASSERT(DS_IS_INCONSISTENT(ds));
+ dmu_buf_will_dirty(ds->ds_dbuf, tx);
+ dsl_dataset_phys(ds)->ds_flags &= ~DS_FLAG_INCONSISTENT;
+ dsobj = ds->ds_object;
+ dsl_dataset_rele(ds, FTAG);
+
+ VERIFY0(dsl_dataset_own_obj(dp, dsobj, dmu_recv_tag, &ds));
+
+ dmu_buf_will_dirty(ds->ds_dbuf, tx);
+ dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_INCONSISTENT;
+
+ rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
+ ASSERT(!BP_IS_HOLE(dsl_dataset_get_blkptr(ds)));
+ rrw_exit(&ds->ds_bp_rwlock, FTAG);
+
+ drba->drba_cookie->drc_ds = ds;
+
+ spa_history_log_internal_ds(ds, "resume receive", tx, "");
+}
+
+/*
+ * NB: callers *MUST* call dmu_recv_stream() if dmu_recv_begin()
+ * succeeds; otherwise we will leak the holds on the datasets.
+ */
+int
+dmu_recv_begin(char *tofs, char *tosnap, dmu_replay_record_t *drr_begin,
+ boolean_t force, boolean_t resumable, char *origin, dmu_recv_cookie_t *drc)
+{
+ dmu_recv_begin_arg_t drba = { 0 };
+
+ bzero(drc, sizeof (dmu_recv_cookie_t));
+ drc->drc_drr_begin = drr_begin;
+ drc->drc_drrb = &drr_begin->drr_u.drr_begin;
+ drc->drc_tosnap = tosnap;
+ drc->drc_tofs = tofs;
+ drc->drc_force = force;
+ drc->drc_resumable = resumable;
+ drc->drc_cred = CRED();
+ drc->drc_clone = (origin != NULL);
+
+ if (drc->drc_drrb->drr_magic == BSWAP_64(DMU_BACKUP_MAGIC)) {
+ drc->drc_byteswap = B_TRUE;
+ (void) fletcher_4_incremental_byteswap(drr_begin,
+ sizeof (dmu_replay_record_t), &drc->drc_cksum);
+ byteswap_record(drr_begin);
+ } else if (drc->drc_drrb->drr_magic == DMU_BACKUP_MAGIC) {
+ (void) fletcher_4_incremental_native(drr_begin,
+ sizeof (dmu_replay_record_t), &drc->drc_cksum);
+ } else {
+ return (SET_ERROR(EINVAL));
+ }
+
+ drba.drba_origin = origin;
+ drba.drba_cookie = drc;
+ drba.drba_cred = CRED();
+
+ if (DMU_GET_FEATUREFLAGS(drc->drc_drrb->drr_versioninfo) &
+ DMU_BACKUP_FEATURE_RESUMING) {
+ return (dsl_sync_task(tofs,
+ dmu_recv_resume_begin_check, dmu_recv_resume_begin_sync,
+ &drba, 5, ZFS_SPACE_CHECK_NORMAL));
+ } else {
+ return (dsl_sync_task(tofs,
+ dmu_recv_begin_check, dmu_recv_begin_sync,
+ &drba, 5, ZFS_SPACE_CHECK_NORMAL));
+ }
+}
+
+struct receive_record_arg {
+ dmu_replay_record_t header;
+ void *payload; /* Pointer to a buffer containing the payload */
+ /*
+ * If the record is a write, pointer to the arc_buf_t containing the
+ * payload.
+ */
+ arc_buf_t *write_buf;
+ int payload_size;
+ uint64_t bytes_read; /* bytes read from stream when record created */
+ boolean_t eos_marker; /* Marks the end of the stream */
+ bqueue_node_t node;
+};
+
+struct receive_writer_arg {
+ objset_t *os;
+ boolean_t byteswap;
+ bqueue_t q;
+
+ /*
+ * These three args are used to signal to the main thread that we're
+ * done.
+ */
+ kmutex_t mutex;
+ kcondvar_t cv;
+ boolean_t done;
+
+ int err;
+ /* A map from guid to dataset to help handle dedup'd streams. */
+ avl_tree_t *guid_to_ds_map;
+ boolean_t resumable;
+ uint64_t last_object;
+ uint64_t last_offset;
+ uint64_t max_object; /* highest object ID referenced in stream */
+ uint64_t bytes_read; /* bytes read when current record created */
+};
+
+struct objlist {
+ list_t list; /* List of struct receive_objnode. */
+ /*
+ * Last object looked up. Used to assert that objects are being looked
+ * up in ascending order.
+ */
+ uint64_t last_lookup;
+};
+
+struct receive_objnode {
+ list_node_t node;
+ uint64_t object;
+};
+
+struct receive_arg {
+ objset_t *os;
+ vnode_t *vp; /* The vnode to read the stream from */
+ uint64_t voff; /* The current offset in the stream */
+ uint64_t bytes_read;
+ /*
+ * A record that has had its payload read in, but hasn't yet been handed
+ * off to the worker thread.
+ */
+ struct receive_record_arg *rrd;
+ /* A record that has had its header read in, but not its payload. */
+ struct receive_record_arg *next_rrd;
+ zio_cksum_t cksum;
+ zio_cksum_t prev_cksum;
+ int err;
+ boolean_t byteswap;
+ /* Sorted list of objects not to issue prefetches for. */
+ struct objlist ignore_objlist;
+};
+
+typedef struct guid_map_entry {
+ uint64_t guid;
+ dsl_dataset_t *gme_ds;
+ avl_node_t avlnode;
+} guid_map_entry_t;
static int
guid_compare(const void *arg1, const void *arg2)
{
- const guid_map_entry_t *gmep1 = arg1;
- const guid_map_entry_t *gmep2 = arg2;
+ const guid_map_entry_t *gmep1 = (const guid_map_entry_t *)arg1;
+ const guid_map_entry_t *gmep2 = (const guid_map_entry_t *)arg2;
- if (gmep1->guid < gmep2->guid)
- return (-1);
- else if (gmep1->guid > gmep2->guid)
- return (1);
- return (0);
+ return (AVL_CMP(gmep1->guid, gmep2->guid));
}
static void
@@ -1354,49 +1955,52 @@ free_guid_map_onexit(void *arg)
kmem_free(ca, sizeof (avl_tree_t));
}
-static void *
-restore_read(struct restorearg *ra, int len, char *buf)
+static int
+receive_read(struct receive_arg *ra, int len, void *buf)
{
int done = 0;
- if (buf == NULL)
- buf = ra->buf;
-
- /* some things will require 8-byte alignment, so everything must */
+ /*
+ * The code doesn't rely on this (lengths being multiples of 8). See
+ * comment in dump_bytes.
+ */
ASSERT0(len % 8);
- ASSERT3U(len, <=, ra->bufsize);
while (done < len) {
ssize_t resid;
ra->err = vn_rdwr(UIO_READ, ra->vp,
- buf + done, len - done,
+ (char *)buf + done, len - done,
ra->voff, UIO_SYSSPACE, FAPPEND,
RLIM64_INFINITY, CRED(), &resid);
- if (resid == len - done)
- ra->err = SET_ERROR(EINVAL);
+ if (resid == len - done) {
+ /*
+ * Note: ECKSUM indicates that the receive
+ * was interrupted and can potentially be resumed.
+ */
+ ra->err = SET_ERROR(ECKSUM);
+ }
ra->voff += len - done - resid;
done = len - resid;
if (ra->err != 0)
- return (NULL);
+ return (ra->err);
}
+ ra->bytes_read += len;
+
ASSERT3U(done, ==, len);
- if (ra->byteswap)
- fletcher_4_incremental_byteswap(buf, len, &ra->cksum);
- else
- fletcher_4_incremental_native(buf, len, &ra->cksum);
- return (buf);
+ return (0);
}
noinline static void
-backup_byteswap(dmu_replay_record_t *drr)
+byteswap_record(dmu_replay_record_t *drr)
{
#define DO64(X) (drr->drr_u.X = BSWAP_64(drr->drr_u.X))
#define DO32(X) (drr->drr_u.X = BSWAP_32(drr->drr_u.X))
drr->drr_type = BSWAP_32(drr->drr_type);
drr->drr_payloadlen = BSWAP_32(drr->drr_payloadlen);
+
switch (drr->drr_type) {
case DRR_BEGIN:
DO64(drr_begin.drr_magic);
@@ -1424,13 +2028,11 @@ backup_byteswap(dmu_replay_record_t *drr)
DO64(drr_write.drr_object);
DO32(drr_write.drr_type);
DO64(drr_write.drr_offset);
- DO64(drr_write.drr_length);
+ DO64(drr_write.drr_logical_size);
DO64(drr_write.drr_toguid);
- DO64(drr_write.drr_key.ddk_cksum.zc_word[0]);
- DO64(drr_write.drr_key.ddk_cksum.zc_word[1]);
- DO64(drr_write.drr_key.ddk_cksum.zc_word[2]);
- DO64(drr_write.drr_key.ddk_cksum.zc_word[3]);
+ ZIO_CHECKSUM_BSWAP(&drr->drr_u.drr_write.drr_key.ddk_cksum);
DO64(drr_write.drr_key.ddk_prop);
+ DO64(drr_write.drr_compressed_size);
break;
case DRR_WRITE_BYREF:
DO64(drr_write_byref.drr_object);
@@ -1440,10 +2042,8 @@ backup_byteswap(dmu_replay_record_t *drr)
DO64(drr_write_byref.drr_refguid);
DO64(drr_write_byref.drr_refobject);
DO64(drr_write_byref.drr_refoffset);
- DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[0]);
- DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[1]);
- DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[2]);
- DO64(drr_write_byref.drr_key.ddk_cksum.zc_word[3]);
+ ZIO_CHECKSUM_BSWAP(&drr->drr_u.drr_write_byref.
+ drr_key.ddk_cksum);
DO64(drr_write_byref.drr_key.ddk_prop);
break;
case DRR_WRITE_EMBEDDED:
@@ -1466,15 +2066,17 @@ backup_byteswap(dmu_replay_record_t *drr)
DO64(drr_spill.drr_toguid);
break;
case DRR_END:
- DO64(drr_end.drr_checksum.zc_word[0]);
- DO64(drr_end.drr_checksum.zc_word[1]);
- DO64(drr_end.drr_checksum.zc_word[2]);
- DO64(drr_end.drr_checksum.zc_word[3]);
DO64(drr_end.drr_toguid);
+ ZIO_CHECKSUM_BSWAP(&drr->drr_u.drr_end.drr_checksum);
break;
default:
break;
}
+
+ if (drr->drr_type != DRR_BEGIN) {
+ ZIO_CHECKSUM_BSWAP(&drr->drr_u.drr_checksum.drr_checksum);
+ }
+
#undef DO64
#undef DO32
}
@@ -1486,16 +2088,54 @@ deduce_nblkptr(dmu_object_type_t bonus_type, uint64_t bonus_size)
return (1);
} else {
return (1 +
- ((DN_MAX_BONUSLEN - bonus_size) >> SPA_BLKPTRSHIFT));
+ ((DN_OLD_MAX_BONUSLEN -
+ MIN(DN_OLD_MAX_BONUSLEN, bonus_size)) >> SPA_BLKPTRSHIFT));
}
}
+static void
+save_resume_state(struct receive_writer_arg *rwa,
+ uint64_t object, uint64_t offset, dmu_tx_t *tx)
+{
+ int txgoff = dmu_tx_get_txg(tx) & TXG_MASK;
+
+ if (!rwa->resumable)
+ return;
+
+ /*
+ * We use ds_resume_bytes[] != 0 to indicate that we need to
+ * update this on disk, so it must not be 0.
+ */
+ ASSERT(rwa->bytes_read != 0);
+
+ /*
+ * We only resume from write records, which have a valid
+ * (non-meta-dnode) object number.
+ */
+ ASSERT(object != 0);
+
+ /*
+ * For resuming to work correctly, we must receive records in order,
+ * sorted by object,offset. This is checked by the callers, but
+ * assert it here for good measure.
+ */
+ ASSERT3U(object, >=, rwa->os->os_dsl_dataset->ds_resume_object[txgoff]);
+ ASSERT(object != rwa->os->os_dsl_dataset->ds_resume_object[txgoff] ||
+ offset >= rwa->os->os_dsl_dataset->ds_resume_offset[txgoff]);
+ ASSERT3U(rwa->bytes_read, >=,
+ rwa->os->os_dsl_dataset->ds_resume_bytes[txgoff]);
+
+ rwa->os->os_dsl_dataset->ds_resume_object[txgoff] = object;
+ rwa->os->os_dsl_dataset->ds_resume_offset[txgoff] = offset;
+ rwa->os->os_dsl_dataset->ds_resume_bytes[txgoff] = rwa->bytes_read;
+}
+
noinline static int
-restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro)
+receive_object(struct receive_writer_arg *rwa, struct drr_object *drro,
+ void *data)
{
dmu_object_info_t doi;
dmu_tx_t *tx;
- void *data = NULL;
uint64_t object;
int err;
@@ -1506,22 +2146,22 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro)
drro->drr_compress >= ZIO_COMPRESS_FUNCTIONS ||
P2PHASE(drro->drr_blksz, SPA_MINBLOCKSIZE) ||
drro->drr_blksz < SPA_MINBLOCKSIZE ||
- drro->drr_blksz > spa_maxblocksize(dmu_objset_spa(os)) ||
- drro->drr_bonuslen > DN_MAX_BONUSLEN) {
+ drro->drr_blksz > spa_maxblocksize(dmu_objset_spa(rwa->os)) ||
+ drro->drr_bonuslen >
+ DN_BONUS_SIZE(spa_maxdnodesize(dmu_objset_spa(rwa->os))) ||
+ drro->drr_dn_slots >
+ (spa_maxdnodesize(dmu_objset_spa(rwa->os)) >> DNODE_SHIFT)) {
return (SET_ERROR(EINVAL));
}
- err = dmu_object_info(os, drro->drr_object, &doi);
+ err = dmu_object_info(rwa->os, drro->drr_object, &doi);
if (err != 0 && err != ENOENT)
return (SET_ERROR(EINVAL));
object = err == 0 ? drro->drr_object : DMU_NEW_OBJECT;
- if (drro->drr_bonuslen) {
- data = restore_read(ra, P2ROUNDUP(drro->drr_bonuslen, 8), NULL);
- if (ra->err != 0)
- return (ra->err);
- }
+ if (drro->drr_object > rwa->max_object)
+ rwa->max_object = drro->drr_object;
/*
* If we are losing blkptrs or changing the block size this must
@@ -1536,14 +2176,14 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro)
if (drro->drr_blksz != doi.doi_data_block_size ||
nblkptr < doi.doi_nblkptr) {
- err = dmu_free_long_range(os, drro->drr_object,
+ err = dmu_free_long_range(rwa->os, drro->drr_object,
0, DMU_OBJECT_END);
if (err != 0)
return (SET_ERROR(EINVAL));
}
}
- tx = dmu_tx_create(os);
+ tx = dmu_tx_create(rwa->os);
dmu_tx_hold_bonus(tx, object);
err = dmu_tx_assign(tx, TXG_WAIT);
if (err != 0) {
@@ -1553,15 +2193,16 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro)
if (object == DMU_NEW_OBJECT) {
/* currently free, want to be allocated */
- err = dmu_object_claim(os, drro->drr_object,
+ err = dmu_object_claim_dnsize(rwa->os, drro->drr_object,
drro->drr_type, drro->drr_blksz,
- drro->drr_bonustype, drro->drr_bonuslen, tx);
+ drro->drr_bonustype, drro->drr_bonuslen,
+ drro->drr_dn_slots << DNODE_SHIFT, tx);
} else if (drro->drr_type != doi.doi_type ||
drro->drr_blksz != doi.doi_data_block_size ||
drro->drr_bonustype != doi.doi_bonus_type ||
drro->drr_bonuslen != doi.doi_bonus_size) {
/* currently allocated, but with different properties */
- err = dmu_object_reclaim(os, drro->drr_object,
+ err = dmu_object_reclaim(rwa->os, drro->drr_object,
drro->drr_type, drro->drr_blksz,
drro->drr_bonustype, drro->drr_bonuslen, tx);
}
@@ -1570,19 +2211,20 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro)
return (SET_ERROR(EINVAL));
}
- dmu_object_set_checksum(os, drro->drr_object, drro->drr_checksumtype,
- tx);
- dmu_object_set_compress(os, drro->drr_object, drro->drr_compress, tx);
+ dmu_object_set_checksum(rwa->os, drro->drr_object,
+ drro->drr_checksumtype, tx);
+ dmu_object_set_compress(rwa->os, drro->drr_object,
+ drro->drr_compress, tx);
if (data != NULL) {
dmu_buf_t *db;
- VERIFY(0 == dmu_bonus_hold(os, drro->drr_object, FTAG, &db));
+ VERIFY0(dmu_bonus_hold(rwa->os, drro->drr_object, FTAG, &db));
dmu_buf_will_dirty(db, tx);
ASSERT3U(db->db_size, >=, drro->drr_bonuslen);
bcopy(data, db->db_data, drro->drr_bonuslen);
- if (ra->byteswap) {
+ if (rwa->byteswap) {
dmu_object_byteswap_t byteswap =
DMU_OT_BYTESWAP(drro->drr_bonustype);
dmu_ot_byteswap[byteswap].ob_func(db->db_data,
@@ -1591,82 +2233,106 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro)
dmu_buf_rele(db, FTAG);
}
dmu_tx_commit(tx);
+
return (0);
}
/* ARGSUSED */
noinline static int
-restore_freeobjects(struct restorearg *ra, objset_t *os,
+receive_freeobjects(struct receive_writer_arg *rwa,
struct drr_freeobjects *drrfo)
{
uint64_t obj;
+ int next_err = 0;
if (drrfo->drr_firstobj + drrfo->drr_numobjs < drrfo->drr_firstobj)
return (SET_ERROR(EINVAL));
- for (obj = drrfo->drr_firstobj;
- obj < drrfo->drr_firstobj + drrfo->drr_numobjs;
- (void) dmu_object_next(os, &obj, FALSE, 0)) {
+ for (obj = drrfo->drr_firstobj == 0 ? 1 : drrfo->drr_firstobj;
+ obj < drrfo->drr_firstobj + drrfo->drr_numobjs && next_err == 0;
+ next_err = dmu_object_next(rwa->os, &obj, FALSE, 0)) {
+ dmu_object_info_t doi;
int err;
- if (dmu_object_info(os, obj, NULL) != 0)
+ err = dmu_object_info(rwa->os, obj, &doi);
+ if (err == ENOENT)
continue;
+ else if (err != 0)
+ return (err);
- err = dmu_free_long_object(os, obj);
+ err = dmu_free_long_object(rwa->os, obj);
if (err != 0)
return (err);
+
+ if (obj > rwa->max_object)
+ rwa->max_object = obj;
}
+ if (next_err != ESRCH)
+ return (next_err);
return (0);
}
noinline static int
-restore_write(struct restorearg *ra, objset_t *os,
- struct drr_write *drrw)
+receive_write(struct receive_writer_arg *rwa, struct drr_write *drrw,
+ arc_buf_t *abuf)
{
dmu_tx_t *tx;
dmu_buf_t *bonus;
- arc_buf_t *abuf;
- void *data;
int err;
- if (drrw->drr_offset + drrw->drr_length < drrw->drr_offset ||
+ if (drrw->drr_offset + drrw->drr_logical_size < drrw->drr_offset ||
!DMU_OT_IS_VALID(drrw->drr_type))
return (SET_ERROR(EINVAL));
- if (dmu_object_info(os, drrw->drr_object, NULL) != 0)
- return (SET_ERROR(EINVAL));
-
- if (dmu_bonus_hold(os, drrw->drr_object, FTAG, &bonus) != 0)
+ /*
+ * For resuming to work, records must be in increasing order
+ * by (object, offset).
+ */
+ if (drrw->drr_object < rwa->last_object ||
+ (drrw->drr_object == rwa->last_object &&
+ drrw->drr_offset < rwa->last_offset)) {
return (SET_ERROR(EINVAL));
+ }
+ rwa->last_object = drrw->drr_object;
+ rwa->last_offset = drrw->drr_offset;
- abuf = dmu_request_arcbuf(bonus, drrw->drr_length);
+ if (rwa->last_object > rwa->max_object)
+ rwa->max_object = rwa->last_object;
- data = restore_read(ra, drrw->drr_length, abuf->b_data);
- if (data == NULL) {
- dmu_return_arcbuf(abuf);
- dmu_buf_rele(bonus, FTAG);
- return (ra->err);
- }
+ if (dmu_object_info(rwa->os, drrw->drr_object, NULL) != 0)
+ return (SET_ERROR(EINVAL));
- tx = dmu_tx_create(os);
+ tx = dmu_tx_create(rwa->os);
dmu_tx_hold_write(tx, drrw->drr_object,
- drrw->drr_offset, drrw->drr_length);
+ drrw->drr_offset, drrw->drr_logical_size);
err = dmu_tx_assign(tx, TXG_WAIT);
if (err != 0) {
- dmu_return_arcbuf(abuf);
- dmu_buf_rele(bonus, FTAG);
dmu_tx_abort(tx);
return (err);
}
- if (ra->byteswap) {
+ if (rwa->byteswap) {
dmu_object_byteswap_t byteswap =
DMU_OT_BYTESWAP(drrw->drr_type);
- dmu_ot_byteswap[byteswap].ob_func(data, drrw->drr_length);
+ dmu_ot_byteswap[byteswap].ob_func(abuf->b_data,
+ DRR_WRITE_PAYLOAD_SIZE(drrw));
}
+
+ /* use the bonus buf to look up the dnode in dmu_assign_arcbuf */
+ if (dmu_bonus_hold(rwa->os, drrw->drr_object, FTAG, &bonus) != 0)
+ return (SET_ERROR(EINVAL));
dmu_assign_arcbuf(bonus, drrw->drr_offset, abuf, tx);
+
+ /*
+ * Note: If the receive fails, we want the resume stream to start
+ * with the same record that we last successfully received (as opposed
+ * to the next record), so that we can verify that we are
+ * resuming from the correct location.
+ */
+ save_resume_state(rwa, drrw->drr_object, drrw->drr_offset, tx);
dmu_tx_commit(tx);
dmu_buf_rele(bonus, FTAG);
+
return (0);
}
@@ -1678,7 +2344,7 @@ restore_write(struct restorearg *ra, objset_t *os,
* data from the stream to fulfill this write.
*/
static int
-restore_write_byref(struct restorearg *ra, objset_t *os,
+receive_write_byref(struct receive_writer_arg *rwa,
struct drr_write_byref *drrwbr)
{
dmu_tx_t *tx;
@@ -1698,22 +2364,25 @@ restore_write_byref(struct restorearg *ra, objset_t *os,
*/
if (drrwbr->drr_toguid != drrwbr->drr_refguid) {
gmesrch.guid = drrwbr->drr_refguid;
- if ((gmep = avl_find(ra->guid_to_ds_map, &gmesrch,
+ if ((gmep = avl_find(rwa->guid_to_ds_map, &gmesrch,
&where)) == NULL) {
return (SET_ERROR(EINVAL));
}
if (dmu_objset_from_ds(gmep->gme_ds, &ref_os))
return (SET_ERROR(EINVAL));
} else {
- ref_os = os;
+ ref_os = rwa->os;
}
+ if (drrwbr->drr_object > rwa->max_object)
+ rwa->max_object = drrwbr->drr_object;
+
err = dmu_buf_hold(ref_os, drrwbr->drr_refobject,
drrwbr->drr_refoffset, FTAG, &dbp, DMU_READ_PREFETCH);
if (err != 0)
return (err);
- tx = dmu_tx_create(os);
+ tx = dmu_tx_create(rwa->os);
dmu_tx_hold_write(tx, drrwbr->drr_object,
drrwbr->drr_offset, drrwbr->drr_length);
@@ -1722,81 +2391,83 @@ restore_write_byref(struct restorearg *ra, objset_t *os,
dmu_tx_abort(tx);
return (err);
}
- dmu_write(os, drrwbr->drr_object,
+ dmu_write(rwa->os, drrwbr->drr_object,
drrwbr->drr_offset, drrwbr->drr_length, dbp->db_data, tx);
dmu_buf_rele(dbp, FTAG);
+
+ /* See comment in restore_write. */
+ save_resume_state(rwa, drrwbr->drr_object, drrwbr->drr_offset, tx);
dmu_tx_commit(tx);
return (0);
}
static int
-restore_write_embedded(struct restorearg *ra, objset_t *os,
- struct drr_write_embedded *drrwnp)
+receive_write_embedded(struct receive_writer_arg *rwa,
+ struct drr_write_embedded *drrwe, void *data)
{
dmu_tx_t *tx;
int err;
- void *data;
- if (drrwnp->drr_offset + drrwnp->drr_length < drrwnp->drr_offset)
+ if (drrwe->drr_offset + drrwe->drr_length < drrwe->drr_offset)
return (EINVAL);
- if (drrwnp->drr_psize > BPE_PAYLOAD_SIZE)
+ if (drrwe->drr_psize > BPE_PAYLOAD_SIZE)
return (EINVAL);
- if (drrwnp->drr_etype >= NUM_BP_EMBEDDED_TYPES)
+ if (drrwe->drr_etype >= NUM_BP_EMBEDDED_TYPES)
return (EINVAL);
- if (drrwnp->drr_compression >= ZIO_COMPRESS_FUNCTIONS)
+ if (drrwe->drr_compression >= ZIO_COMPRESS_FUNCTIONS)
return (EINVAL);
- data = restore_read(ra, P2ROUNDUP(drrwnp->drr_psize, 8), NULL);
- if (data == NULL)
- return (ra->err);
+ if (drrwe->drr_object > rwa->max_object)
+ rwa->max_object = drrwe->drr_object;
- tx = dmu_tx_create(os);
+ tx = dmu_tx_create(rwa->os);
- dmu_tx_hold_write(tx, drrwnp->drr_object,
- drrwnp->drr_offset, drrwnp->drr_length);
+ dmu_tx_hold_write(tx, drrwe->drr_object,
+ drrwe->drr_offset, drrwe->drr_length);
err = dmu_tx_assign(tx, TXG_WAIT);
if (err != 0) {
dmu_tx_abort(tx);
return (err);
}
- dmu_write_embedded(os, drrwnp->drr_object,
- drrwnp->drr_offset, data, drrwnp->drr_etype,
- drrwnp->drr_compression, drrwnp->drr_lsize, drrwnp->drr_psize,
- ra->byteswap ^ ZFS_HOST_BYTEORDER, tx);
+ dmu_write_embedded(rwa->os, drrwe->drr_object,
+ drrwe->drr_offset, data, drrwe->drr_etype,
+ drrwe->drr_compression, drrwe->drr_lsize, drrwe->drr_psize,
+ rwa->byteswap ^ ZFS_HOST_BYTEORDER, tx);
+ /* See comment in restore_write. */
+ save_resume_state(rwa, drrwe->drr_object, drrwe->drr_offset, tx);
dmu_tx_commit(tx);
return (0);
}
static int
-restore_spill(struct restorearg *ra, objset_t *os, struct drr_spill *drrs)
+receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs,
+ void *data)
{
dmu_tx_t *tx;
- void *data;
dmu_buf_t *db, *db_spill;
int err;
if (drrs->drr_length < SPA_MINBLOCKSIZE ||
- drrs->drr_length > spa_maxblocksize(dmu_objset_spa(os)))
+ drrs->drr_length > spa_maxblocksize(dmu_objset_spa(rwa->os)))
return (SET_ERROR(EINVAL));
- data = restore_read(ra, drrs->drr_length, NULL);
- if (data == NULL)
- return (ra->err);
-
- if (dmu_object_info(os, drrs->drr_object, NULL) != 0)
+ if (dmu_object_info(rwa->os, drrs->drr_object, NULL) != 0)
return (SET_ERROR(EINVAL));
- VERIFY(0 == dmu_bonus_hold(os, drrs->drr_object, FTAG, &db));
+ if (drrs->drr_object > rwa->max_object)
+ rwa->max_object = drrs->drr_object;
+
+ VERIFY0(dmu_bonus_hold(rwa->os, drrs->drr_object, FTAG, &db));
if ((err = dmu_spill_hold_by_bonus(db, FTAG, &db_spill)) != 0) {
dmu_buf_rele(db, FTAG);
return (err);
}
- tx = dmu_tx_create(os);
+ tx = dmu_tx_create(rwa->os);
dmu_tx_hold_spill(tx, db->db_object);
@@ -1823,8 +2494,7 @@ restore_spill(struct restorearg *ra, objset_t *os, struct drr_spill *drrs)
/* ARGSUSED */
noinline static int
-restore_free(struct restorearg *ra, objset_t *os,
- struct drr_free *drrf)
+receive_free(struct receive_writer_arg *rwa, struct drr_free *drrf)
{
int err;
@@ -1832,11 +2502,15 @@ restore_free(struct restorearg *ra, objset_t *os,
drrf->drr_offset + drrf->drr_length < drrf->drr_offset)
return (SET_ERROR(EINVAL));
- if (dmu_object_info(os, drrf->drr_object, NULL) != 0)
+ if (dmu_object_info(rwa->os, drrf->drr_object, NULL) != 0)
return (SET_ERROR(EINVAL));
- err = dmu_free_long_range(os, drrf->drr_object,
+ if (drrf->drr_object > rwa->max_object)
+ rwa->max_object = drrf->drr_object;
+
+ err = dmu_free_long_range(rwa->os, drrf->drr_object,
drrf->drr_offset, drrf->drr_length);
+
return (err);
}
@@ -1844,31 +2518,587 @@ restore_free(struct restorearg *ra, objset_t *os,
static void
dmu_recv_cleanup_ds(dmu_recv_cookie_t *drc)
{
- char name[MAXNAMELEN];
- dsl_dataset_name(drc->drc_ds, name);
- dsl_dataset_disown(drc->drc_ds, dmu_recv_tag);
- (void) dsl_destroy_head(name);
+ if (drc->drc_resumable) {
+ /* wait for our resume state to be written to disk */
+ txg_wait_synced(drc->drc_ds->ds_dir->dd_pool, 0);
+ dsl_dataset_disown(drc->drc_ds, dmu_recv_tag);
+ } else {
+ char name[ZFS_MAX_DATASET_NAME_LEN];
+ dsl_dataset_name(drc->drc_ds, name);
+ dsl_dataset_disown(drc->drc_ds, dmu_recv_tag);
+ (void) dsl_destroy_head(name);
+ }
+}
+
+static void
+receive_cksum(struct receive_arg *ra, int len, void *buf)
+{
+ if (ra->byteswap) {
+ (void) fletcher_4_incremental_byteswap(buf, len, &ra->cksum);
+ } else {
+ (void) fletcher_4_incremental_native(buf, len, &ra->cksum);
+ }
+}
+
+/*
+ * Read the payload into a buffer of size len, and update the current record's
+ * payload field.
+ * Allocate ra->next_rrd and read the next record's header into
+ * ra->next_rrd->header.
+ * Verify checksum of payload and next record.
+ */
+static int
+receive_read_payload_and_next_header(struct receive_arg *ra, int len, void *buf)
+{
+ int err;
+ zio_cksum_t cksum_orig;
+ zio_cksum_t *cksump;
+
+ if (len != 0) {
+ ASSERT3U(len, <=, SPA_MAXBLOCKSIZE);
+ err = receive_read(ra, len, buf);
+ if (err != 0)
+ return (err);
+ receive_cksum(ra, len, buf);
+
+ /* note: rrd is NULL when reading the begin record's payload */
+ if (ra->rrd != NULL) {
+ ra->rrd->payload = buf;
+ ra->rrd->payload_size = len;
+ ra->rrd->bytes_read = ra->bytes_read;
+ }
+ }
+
+ ra->prev_cksum = ra->cksum;
+
+ ra->next_rrd = kmem_zalloc(sizeof (*ra->next_rrd), KM_SLEEP);
+ err = receive_read(ra, sizeof (ra->next_rrd->header),
+ &ra->next_rrd->header);
+ ra->next_rrd->bytes_read = ra->bytes_read;
+ if (err != 0) {
+ kmem_free(ra->next_rrd, sizeof (*ra->next_rrd));
+ ra->next_rrd = NULL;
+ return (err);
+ }
+ if (ra->next_rrd->header.drr_type == DRR_BEGIN) {
+ kmem_free(ra->next_rrd, sizeof (*ra->next_rrd));
+ ra->next_rrd = NULL;
+ return (SET_ERROR(EINVAL));
+ }
+
+ /*
+ * Note: checksum is of everything up to but not including the
+ * checksum itself.
+ */
+ ASSERT3U(offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
+ ==, sizeof (dmu_replay_record_t) - sizeof (zio_cksum_t));
+ receive_cksum(ra,
+ offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
+ &ra->next_rrd->header);
+
+ cksum_orig = ra->next_rrd->header.drr_u.drr_checksum.drr_checksum;
+ cksump = &ra->next_rrd->header.drr_u.drr_checksum.drr_checksum;
+
+ if (ra->byteswap)
+ byteswap_record(&ra->next_rrd->header);
+
+ if ((!ZIO_CHECKSUM_IS_ZERO(cksump)) &&
+ !ZIO_CHECKSUM_EQUAL(ra->cksum, *cksump)) {
+ kmem_free(ra->next_rrd, sizeof (*ra->next_rrd));
+ ra->next_rrd = NULL;
+ return (SET_ERROR(ECKSUM));
+ }
+
+ receive_cksum(ra, sizeof (cksum_orig), &cksum_orig);
+
+ return (0);
+}
+
+static void
+objlist_create(struct objlist *list)
+{
+ list_create(&list->list, sizeof (struct receive_objnode),
+ offsetof(struct receive_objnode, node));
+ list->last_lookup = 0;
+}
+
+static void
+objlist_destroy(struct objlist *list)
+{
+ struct receive_objnode *n;
+
+ for (n = list_remove_head(&list->list);
+ n != NULL; n = list_remove_head(&list->list)) {
+ kmem_free(n, sizeof (*n));
+ }
+ list_destroy(&list->list);
+}
+
+/*
+ * This function looks through the objlist to see if the specified object number
+ * is contained in the objlist. In the process, it will remove all object
+ * numbers in the list that are smaller than the specified object number. Thus,
+ * any lookup of an object number smaller than a previously looked up object
+ * number will always return false; therefore, all lookups should be done in
+ * ascending order.
+ */
+static boolean_t
+objlist_exists(struct objlist *list, uint64_t object)
+{
+ struct receive_objnode *node = list_head(&list->list);
+ ASSERT3U(object, >=, list->last_lookup);
+ list->last_lookup = object;
+ while (node != NULL && node->object < object) {
+ VERIFY3P(node, ==, list_remove_head(&list->list));
+ kmem_free(node, sizeof (*node));
+ node = list_head(&list->list);
+ }
+ return (node != NULL && node->object == object);
+}
+
+/*
+ * The objlist is a list of object numbers stored in ascending order. However,
+ * the insertion of new object numbers does not seek out the correct location to
+ * store a new object number; instead, it appends it to the list for simplicity.
+ * Thus, any users must take care to only insert new object numbers in ascending
+ * order.
+ */
+static void
+objlist_insert(struct objlist *list, uint64_t object)
+{
+ struct receive_objnode *node = kmem_zalloc(sizeof (*node), KM_SLEEP);
+ node->object = object;
+#ifdef ZFS_DEBUG
+ {
+ struct receive_objnode *last_object = list_tail(&list->list);
+ uint64_t last_objnum = (last_object != NULL ? last_object->object : 0);
+ ASSERT3U(node->object, >, last_objnum);
+ }
+#endif
+ list_insert_tail(&list->list, node);
+}
+
+/*
+ * Issue the prefetch reads for any necessary indirect blocks.
+ *
+ * We use the object ignore list to tell us whether or not to issue prefetches
+ * for a given object. We do this for both correctness (in case the blocksize
+ * of an object has changed) and performance (if the object doesn't exist, don't
+ * needlessly try to issue prefetches). We also trim the list as we go through
+ * the stream to prevent it from growing to an unbounded size.
+ *
+ * The object numbers within will always be in sorted order, and any write
+ * records we see will also be in sorted order, but they're not sorted with
+ * respect to each other (i.e. we can get several object records before
+ * receiving each object's write records). As a result, once we've reached a
+ * given object number, we can safely remove any reference to lower object
+ * numbers in the ignore list. In practice, we receive up to 32 object records
+ * before receiving write records, so the list can have up to 32 nodes in it.
+ */
+/* ARGSUSED */
+static void
+receive_read_prefetch(struct receive_arg *ra,
+ uint64_t object, uint64_t offset, uint64_t length)
+{
+ if (!objlist_exists(&ra->ignore_objlist, object)) {
+ dmu_prefetch(ra->os, object, 1, offset, length,
+ ZIO_PRIORITY_SYNC_READ);
+ }
+}
+
+/*
+ * Read records off the stream, issuing any necessary prefetches.
+ */
+static int
+receive_read_record(struct receive_arg *ra)
+{
+ int err;
+
+ switch (ra->rrd->header.drr_type) {
+ case DRR_OBJECT:
+ {
+ struct drr_object *drro = &ra->rrd->header.drr_u.drr_object;
+ uint32_t size = P2ROUNDUP(drro->drr_bonuslen, 8);
+ void *buf = kmem_zalloc(size, KM_SLEEP);
+ dmu_object_info_t doi;
+ err = receive_read_payload_and_next_header(ra, size, buf);
+ if (err != 0) {
+ kmem_free(buf, size);
+ return (err);
+ }
+ err = dmu_object_info(ra->os, drro->drr_object, &doi);
+ /*
+ * See receive_read_prefetch for an explanation why we're
+ * storing this object in the ignore_obj_list.
+ */
+ if (err == ENOENT ||
+ (err == 0 && doi.doi_data_block_size != drro->drr_blksz)) {
+ objlist_insert(&ra->ignore_objlist, drro->drr_object);
+ err = 0;
+ }
+ return (err);
+ }
+ case DRR_FREEOBJECTS:
+ {
+ err = receive_read_payload_and_next_header(ra, 0, NULL);
+ return (err);
+ }
+ case DRR_WRITE:
+ {
+ struct drr_write *drrw = &ra->rrd->header.drr_u.drr_write;
+ arc_buf_t *abuf;
+ boolean_t is_meta = DMU_OT_IS_METADATA(drrw->drr_type);
+ if (DRR_WRITE_COMPRESSED(drrw)) {
+ ASSERT3U(drrw->drr_compressed_size, >, 0);
+ ASSERT3U(drrw->drr_logical_size, >=,
+ drrw->drr_compressed_size);
+ ASSERT(!is_meta);
+ abuf = arc_loan_compressed_buf(
+ dmu_objset_spa(ra->os),
+ drrw->drr_compressed_size, drrw->drr_logical_size,
+ drrw->drr_compressiontype);
+ } else {
+ abuf = arc_loan_buf(dmu_objset_spa(ra->os),
+ is_meta, drrw->drr_logical_size);
+ }
+
+ err = receive_read_payload_and_next_header(ra,
+ DRR_WRITE_PAYLOAD_SIZE(drrw), abuf->b_data);
+ if (err != 0) {
+ dmu_return_arcbuf(abuf);
+ return (err);
+ }
+ ra->rrd->write_buf = abuf;
+ receive_read_prefetch(ra, drrw->drr_object, drrw->drr_offset,
+ drrw->drr_logical_size);
+ return (err);
+ }
+ case DRR_WRITE_BYREF:
+ {
+ struct drr_write_byref *drrwb =
+ &ra->rrd->header.drr_u.drr_write_byref;
+ err = receive_read_payload_and_next_header(ra, 0, NULL);
+ receive_read_prefetch(ra, drrwb->drr_object, drrwb->drr_offset,
+ drrwb->drr_length);
+ return (err);
+ }
+ case DRR_WRITE_EMBEDDED:
+ {
+ struct drr_write_embedded *drrwe =
+ &ra->rrd->header.drr_u.drr_write_embedded;
+ uint32_t size = P2ROUNDUP(drrwe->drr_psize, 8);
+ void *buf = kmem_zalloc(size, KM_SLEEP);
+
+ err = receive_read_payload_and_next_header(ra, size, buf);
+ if (err != 0) {
+ kmem_free(buf, size);
+ return (err);
+ }
+
+ receive_read_prefetch(ra, drrwe->drr_object, drrwe->drr_offset,
+ drrwe->drr_length);
+ return (err);
+ }
+ case DRR_FREE:
+ {
+ /*
+ * It might be beneficial to prefetch indirect blocks here, but
+ * we don't really have the data to decide for sure.
+ */
+ err = receive_read_payload_and_next_header(ra, 0, NULL);
+ return (err);
+ }
+ case DRR_END:
+ {
+ struct drr_end *drre = &ra->rrd->header.drr_u.drr_end;
+ if (!ZIO_CHECKSUM_EQUAL(ra->prev_cksum, drre->drr_checksum))
+ return (SET_ERROR(ECKSUM));
+ return (0);
+ }
+ case DRR_SPILL:
+ {
+ struct drr_spill *drrs = &ra->rrd->header.drr_u.drr_spill;
+ void *buf = kmem_zalloc(drrs->drr_length, KM_SLEEP);
+ err = receive_read_payload_and_next_header(ra, drrs->drr_length,
+ buf);
+ if (err != 0)
+ kmem_free(buf, drrs->drr_length);
+ return (err);
+ }
+ default:
+ return (SET_ERROR(EINVAL));
+ }
+}
+
+static void
+dprintf_drr(struct receive_record_arg *rrd, int err)
+{
+ switch (rrd->header.drr_type) {
+ case DRR_OBJECT:
+ {
+ struct drr_object *drro = &rrd->header.drr_u.drr_object;
+ dprintf("drr_type = OBJECT obj = %llu type = %u "
+ "bonustype = %u blksz = %u bonuslen = %u cksumtype = %u "
+ "compress = %u dn_slots = %u err = %d\n",
+ drro->drr_object, drro->drr_type, drro->drr_bonustype,
+ drro->drr_blksz, drro->drr_bonuslen,
+ drro->drr_checksumtype, drro->drr_compress,
+ drro->drr_dn_slots, err);
+ break;
+ }
+ case DRR_FREEOBJECTS:
+ {
+ struct drr_freeobjects *drrfo =
+ &rrd->header.drr_u.drr_freeobjects;
+ dprintf("drr_type = FREEOBJECTS firstobj = %llu "
+ "numobjs = %llu err = %d\n",
+ drrfo->drr_firstobj, drrfo->drr_numobjs, err);
+ break;
+ }
+ case DRR_WRITE:
+ {
+ struct drr_write *drrw = &rrd->header.drr_u.drr_write;
+ dprintf("drr_type = WRITE obj = %llu type = %u offset = %llu "
+ "lsize = %llu cksumtype = %u cksumflags = %u "
+ "compress = %u psize = %llu err = %d\n",
+ drrw->drr_object, drrw->drr_type, drrw->drr_offset,
+ drrw->drr_logical_size, drrw->drr_checksumtype,
+ drrw->drr_checksumflags, drrw->drr_compressiontype,
+ drrw->drr_compressed_size, err);
+ break;
+ }
+ case DRR_WRITE_BYREF:
+ {
+ struct drr_write_byref *drrwbr =
+ &rrd->header.drr_u.drr_write_byref;
+ dprintf("drr_type = WRITE_BYREF obj = %llu offset = %llu "
+ "length = %llu toguid = %llx refguid = %llx "
+ "refobject = %llu refoffset = %llu cksumtype = %u "
+ "cksumflags = %u err = %d\n",
+ drrwbr->drr_object, drrwbr->drr_offset,
+ drrwbr->drr_length, drrwbr->drr_toguid,
+ drrwbr->drr_refguid, drrwbr->drr_refobject,
+ drrwbr->drr_refoffset, drrwbr->drr_checksumtype,
+ drrwbr->drr_checksumflags, err);
+ break;
+ }
+ case DRR_WRITE_EMBEDDED:
+ {
+ struct drr_write_embedded *drrwe =
+ &rrd->header.drr_u.drr_write_embedded;
+ dprintf("drr_type = WRITE_EMBEDDED obj = %llu offset = %llu "
+ "length = %llu compress = %u etype = %u lsize = %u "
+ "psize = %u err = %d\n",
+ drrwe->drr_object, drrwe->drr_offset, drrwe->drr_length,
+ drrwe->drr_compression, drrwe->drr_etype,
+ drrwe->drr_lsize, drrwe->drr_psize, err);
+ break;
+ }
+ case DRR_FREE:
+ {
+ struct drr_free *drrf = &rrd->header.drr_u.drr_free;
+ dprintf("drr_type = FREE obj = %llu offset = %llu "
+ "length = %lld err = %d\n",
+ drrf->drr_object, drrf->drr_offset, drrf->drr_length,
+ err);
+ break;
+ }
+ case DRR_SPILL:
+ {
+ struct drr_spill *drrs = &rrd->header.drr_u.drr_spill;
+ dprintf("drr_type = SPILL obj = %llu length = %llu "
+ "err = %d\n", drrs->drr_object, drrs->drr_length, err);
+ break;
+ }
+ default:
+ return;
+ }
}
/*
+ * Commit the records to the pool.
+ */
+static int
+receive_process_record(struct receive_writer_arg *rwa,
+ struct receive_record_arg *rrd)
+{
+ int err;
+
+ /* Processing in order, therefore bytes_read should be increasing. */
+ ASSERT3U(rrd->bytes_read, >=, rwa->bytes_read);
+ rwa->bytes_read = rrd->bytes_read;
+
+ switch (rrd->header.drr_type) {
+ case DRR_OBJECT:
+ {
+ struct drr_object *drro = &rrd->header.drr_u.drr_object;
+ err = receive_object(rwa, drro, rrd->payload);
+ kmem_free(rrd->payload, rrd->payload_size);
+ rrd->payload = NULL;
+ break;
+ }
+ case DRR_FREEOBJECTS:
+ {
+ struct drr_freeobjects *drrfo =
+ &rrd->header.drr_u.drr_freeobjects;
+ err = receive_freeobjects(rwa, drrfo);
+ break;
+ }
+ case DRR_WRITE:
+ {
+ struct drr_write *drrw = &rrd->header.drr_u.drr_write;
+ err = receive_write(rwa, drrw, rrd->write_buf);
+ /* if receive_write() is successful, it consumes the arc_buf */
+ if (err != 0)
+ dmu_return_arcbuf(rrd->write_buf);
+ rrd->write_buf = NULL;
+ rrd->payload = NULL;
+ break;
+ }
+ case DRR_WRITE_BYREF:
+ {
+ struct drr_write_byref *drrwbr =
+ &rrd->header.drr_u.drr_write_byref;
+ err = receive_write_byref(rwa, drrwbr);
+ break;
+ }
+ case DRR_WRITE_EMBEDDED:
+ {
+ struct drr_write_embedded *drrwe =
+ &rrd->header.drr_u.drr_write_embedded;
+ err = receive_write_embedded(rwa, drrwe, rrd->payload);
+ kmem_free(rrd->payload, rrd->payload_size);
+ rrd->payload = NULL;
+ break;
+ }
+ case DRR_FREE:
+ {
+ struct drr_free *drrf = &rrd->header.drr_u.drr_free;
+ err = receive_free(rwa, drrf);
+ break;
+ }
+ case DRR_SPILL:
+ {
+ struct drr_spill *drrs = &rrd->header.drr_u.drr_spill;
+ err = receive_spill(rwa, drrs, rrd->payload);
+ kmem_free(rrd->payload, rrd->payload_size);
+ rrd->payload = NULL;
+ break;
+ }
+ default:
+ return (SET_ERROR(EINVAL));
+ }
+
+ if (err != 0)
+ dprintf_drr(rrd, err);
+
+ return (err);
+}
+
+/*
+ * dmu_recv_stream's worker thread; pull records off the queue, and then call
+ * receive_process_record When we're done, signal the main thread and exit.
+ */
+static void
+receive_writer_thread(void *arg)
+{
+ struct receive_writer_arg *rwa = arg;
+ struct receive_record_arg *rrd;
+ fstrans_cookie_t cookie = spl_fstrans_mark();
+
+ for (rrd = bqueue_dequeue(&rwa->q); !rrd->eos_marker;
+ rrd = bqueue_dequeue(&rwa->q)) {
+ /*
+ * If there's an error, the main thread will stop putting things
+ * on the queue, but we need to clear everything in it before we
+ * can exit.
+ */
+ if (rwa->err == 0) {
+ rwa->err = receive_process_record(rwa, rrd);
+ } else if (rrd->write_buf != NULL) {
+ dmu_return_arcbuf(rrd->write_buf);
+ rrd->write_buf = NULL;
+ rrd->payload = NULL;
+ } else if (rrd->payload != NULL) {
+ kmem_free(rrd->payload, rrd->payload_size);
+ rrd->payload = NULL;
+ }
+ kmem_free(rrd, sizeof (*rrd));
+ }
+ kmem_free(rrd, sizeof (*rrd));
+ mutex_enter(&rwa->mutex);
+ rwa->done = B_TRUE;
+ cv_signal(&rwa->cv);
+ mutex_exit(&rwa->mutex);
+ spl_fstrans_unmark(cookie);
+ thread_exit();
+}
+
+static int
+resume_check(struct receive_arg *ra, nvlist_t *begin_nvl)
+{
+ uint64_t val;
+ objset_t *mos = dmu_objset_pool(ra->os)->dp_meta_objset;
+ uint64_t dsobj = dmu_objset_id(ra->os);
+ uint64_t resume_obj, resume_off;
+
+ if (nvlist_lookup_uint64(begin_nvl,
+ "resume_object", &resume_obj) != 0 ||
+ nvlist_lookup_uint64(begin_nvl,
+ "resume_offset", &resume_off) != 0) {
+ return (SET_ERROR(EINVAL));
+ }
+ VERIFY0(zap_lookup(mos, dsobj,
+ DS_FIELD_RESUME_OBJECT, sizeof (val), 1, &val));
+ if (resume_obj != val)
+ return (SET_ERROR(EINVAL));
+ VERIFY0(zap_lookup(mos, dsobj,
+ DS_FIELD_RESUME_OFFSET, sizeof (val), 1, &val));
+ if (resume_off != val)
+ return (SET_ERROR(EINVAL));
+
+ return (0);
+}
+
+/*
+ * Read in the stream's records, one by one, and apply them to the pool. There
+ * are two threads involved; the thread that calls this function will spin up a
+ * worker thread, read the records off the stream one by one, and issue
+ * prefetches for any necessary indirect blocks. It will then push the records
+ * onto an internal blocking queue. The worker thread will pull the records off
+ * the queue, and actually write the data into the DMU. This way, the worker
+ * thread doesn't have to wait for reads to complete, since everything it needs
+ * (the indirect blocks) will be prefetched.
+ *
* NB: callers *must* call dmu_recv_end() if this succeeds.
*/
int
dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
int cleanup_fd, uint64_t *action_handlep)
{
- struct restorearg ra = { 0 };
- dmu_replay_record_t *drr;
- objset_t *os;
- zio_cksum_t pcksum;
+ int err = 0;
+ struct receive_arg *ra;
+ struct receive_writer_arg *rwa;
int featureflags;
+ uint32_t payloadlen;
+ void *payload;
+ nvlist_t *begin_nvl = NULL;
+
+ ra = kmem_zalloc(sizeof (*ra), KM_SLEEP);
+ rwa = kmem_zalloc(sizeof (*rwa), KM_SLEEP);
+
+ ra->byteswap = drc->drc_byteswap;
+ ra->cksum = drc->drc_cksum;
+ ra->vp = vp;
+ ra->voff = *voffp;
+
+ if (dsl_dataset_is_zapified(drc->drc_ds)) {
+ (void) zap_lookup(drc->drc_ds->ds_dir->dd_pool->dp_meta_objset,
+ drc->drc_ds->ds_object, DS_FIELD_RESUME_BYTES,
+ sizeof (ra->bytes_read), 1, &ra->bytes_read);
+ }
- ra.byteswap = drc->drc_byteswap;
- ra.cksum = drc->drc_cksum;
- ra.vp = vp;
- ra.voff = *voffp;
- ra.bufsize = SPA_MAXBLOCKSIZE;
- ra.buf = vmem_alloc(ra.bufsize, KM_SLEEP);
+ objlist_create(&ra->ignore_objlist);
/* these were verified in dmu_recv_begin */
ASSERT3U(DMU_GET_STREAM_HDRTYPE(drc->drc_drrb->drr_versioninfo), ==,
@@ -1878,7 +3108,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
/*
* Open the objset we are modifying.
*/
- VERIFY0(dmu_objset_from_ds(drc->drc_ds, &os));
+ VERIFY0(dmu_objset_from_ds(drc->drc_ds, &ra->os));
ASSERT(dsl_dataset_phys(drc->drc_ds)->ds_flags & DS_FLAG_INCONSISTENT);
@@ -1889,136 +3119,169 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
minor_t minor;
if (cleanup_fd == -1) {
- ra.err = SET_ERROR(EBADF);
+ ra->err = SET_ERROR(EBADF);
goto out;
}
- ra.err = zfs_onexit_fd_hold(cleanup_fd, &minor);
- if (ra.err != 0) {
+ ra->err = zfs_onexit_fd_hold(cleanup_fd, &minor);
+ if (ra->err != 0) {
cleanup_fd = -1;
goto out;
}
if (*action_handlep == 0) {
- ra.guid_to_ds_map =
+ rwa->guid_to_ds_map =
kmem_alloc(sizeof (avl_tree_t), KM_SLEEP);
- avl_create(ra.guid_to_ds_map, guid_compare,
+ avl_create(rwa->guid_to_ds_map, guid_compare,
sizeof (guid_map_entry_t),
offsetof(guid_map_entry_t, avlnode));
- ra.err = zfs_onexit_add_cb(minor,
- free_guid_map_onexit, ra.guid_to_ds_map,
+ err = zfs_onexit_add_cb(minor,
+ free_guid_map_onexit, rwa->guid_to_ds_map,
action_handlep);
- if (ra.err != 0)
+ if (ra->err != 0)
goto out;
} else {
- ra.err = zfs_onexit_cb_data(minor, *action_handlep,
- (void **)&ra.guid_to_ds_map);
- if (ra.err != 0)
+ err = zfs_onexit_cb_data(minor, *action_handlep,
+ (void **)&rwa->guid_to_ds_map);
+ if (ra->err != 0)
goto out;
}
- drc->drc_guid_to_ds_map = ra.guid_to_ds_map;
+ drc->drc_guid_to_ds_map = rwa->guid_to_ds_map;
}
+ payloadlen = drc->drc_drr_begin->drr_payloadlen;
+ payload = NULL;
+ if (payloadlen != 0)
+ payload = kmem_alloc(payloadlen, KM_SLEEP);
+
+ err = receive_read_payload_and_next_header(ra, payloadlen, payload);
+ if (err != 0) {
+ if (payloadlen != 0)
+ kmem_free(payload, payloadlen);
+ goto out;
+ }
+ if (payloadlen != 0) {
+ err = nvlist_unpack(payload, payloadlen, &begin_nvl, KM_SLEEP);
+ kmem_free(payload, payloadlen);
+ if (err != 0)
+ goto out;
+ }
+
+ if (featureflags & DMU_BACKUP_FEATURE_RESUMING) {
+ err = resume_check(ra, begin_nvl);
+ if (err != 0)
+ goto out;
+ }
+
+ (void) bqueue_init(&rwa->q, zfs_recv_queue_length,
+ offsetof(struct receive_record_arg, node));
+ cv_init(&rwa->cv, NULL, CV_DEFAULT, NULL);
+ mutex_init(&rwa->mutex, NULL, MUTEX_DEFAULT, NULL);
+ rwa->os = ra->os;
+ rwa->byteswap = drc->drc_byteswap;
+ rwa->resumable = drc->drc_resumable;
+
+ (void) thread_create(NULL, 0, receive_writer_thread, rwa, 0, curproc,
+ TS_RUN, minclsyspri);
/*
- * Read records and process them.
+ * We're reading rwa->err without locks, which is safe since we are the
+ * only reader, and the worker thread is the only writer. It's ok if we
+ * miss a write for an iteration or two of the loop, since the writer
+ * thread will keep freeing records we send it until we send it an eos
+ * marker.
+ *
+ * We can leave this loop in 3 ways: First, if rwa->err is
+ * non-zero. In that case, the writer thread will free the rrd we just
+ * pushed. Second, if we're interrupted; in that case, either it's the
+ * first loop and ra->rrd was never allocated, or it's later and ra->rrd
+ * has been handed off to the writer thread who will free it. Finally,
+ * if receive_read_record fails or we're at the end of the stream, then
+ * we free ra->rrd and exit.
*/
- pcksum = ra.cksum;
- while (ra.err == 0 &&
- NULL != (drr = restore_read(&ra, sizeof (*drr), NULL))) {
+ while (rwa->err == 0) {
if (issig(JUSTLOOKING) && issig(FORREAL)) {
- ra.err = SET_ERROR(EINTR);
- goto out;
+ err = SET_ERROR(EINTR);
+ break;
}
- if (ra.byteswap)
- backup_byteswap(drr);
+ ASSERT3P(ra->rrd, ==, NULL);
+ ra->rrd = ra->next_rrd;
+ ra->next_rrd = NULL;
+ /* Allocates and loads header into ra->next_rrd */
+ err = receive_read_record(ra);
- switch (drr->drr_type) {
- case DRR_OBJECT:
- {
- /*
- * We need to make a copy of the record header,
- * because restore_{object,write} may need to
- * restore_read(), which will invalidate drr.
- */
- struct drr_object drro = drr->drr_u.drr_object;
- ra.err = restore_object(&ra, os, &drro);
- break;
- }
- case DRR_FREEOBJECTS:
- {
- struct drr_freeobjects drrfo =
- drr->drr_u.drr_freeobjects;
- ra.err = restore_freeobjects(&ra, os, &drrfo);
- break;
- }
- case DRR_WRITE:
- {
- struct drr_write drrw = drr->drr_u.drr_write;
- ra.err = restore_write(&ra, os, &drrw);
- break;
- }
- case DRR_WRITE_BYREF:
- {
- struct drr_write_byref drrwbr =
- drr->drr_u.drr_write_byref;
- ra.err = restore_write_byref(&ra, os, &drrwbr);
+ if (ra->rrd->header.drr_type == DRR_END || err != 0) {
+ kmem_free(ra->rrd, sizeof (*ra->rrd));
+ ra->rrd = NULL;
break;
}
- case DRR_WRITE_EMBEDDED:
- {
- struct drr_write_embedded drrwe =
- drr->drr_u.drr_write_embedded;
- ra.err = restore_write_embedded(&ra, os, &drrwe);
- break;
- }
- case DRR_FREE:
- {
- struct drr_free drrf = drr->drr_u.drr_free;
- ra.err = restore_free(&ra, os, &drrf);
- break;
- }
- case DRR_END:
- {
- struct drr_end drre = drr->drr_u.drr_end;
- /*
- * We compare against the *previous* checksum
- * value, because the stored checksum is of
- * everything before the DRR_END record.
- */
- if (!ZIO_CHECKSUM_EQUAL(drre.drr_checksum, pcksum))
- ra.err = SET_ERROR(ECKSUM);
- goto out;
- }
- case DRR_SPILL:
- {
- struct drr_spill drrs = drr->drr_u.drr_spill;
- ra.err = restore_spill(&ra, os, &drrs);
- break;
+
+ bqueue_enqueue(&rwa->q, ra->rrd,
+ sizeof (struct receive_record_arg) + ra->rrd->payload_size);
+ ra->rrd = NULL;
+ }
+ if (ra->next_rrd == NULL)
+ ra->next_rrd = kmem_zalloc(sizeof (*ra->next_rrd), KM_SLEEP);
+ ra->next_rrd->eos_marker = B_TRUE;
+ bqueue_enqueue(&rwa->q, ra->next_rrd, 1);
+
+ mutex_enter(&rwa->mutex);
+ while (!rwa->done) {
+ cv_wait(&rwa->cv, &rwa->mutex);
+ }
+ mutex_exit(&rwa->mutex);
+
+ /*
+ * If we are receiving a full stream as a clone, all object IDs which
+ * are greater than the maximum ID referenced in the stream are
+ * by definition unused and must be freed.
+ */
+ if (drc->drc_clone && drc->drc_drrb->drr_fromguid == 0) {
+ uint64_t obj = rwa->max_object + 1;
+ int free_err = 0;
+ int next_err = 0;
+
+ while (next_err == 0) {
+ free_err = dmu_free_long_object(rwa->os, obj);
+ if (free_err != 0 && free_err != ENOENT)
+ break;
+
+ next_err = dmu_object_next(rwa->os, &obj, FALSE, 0);
}
- default:
- ra.err = SET_ERROR(EINVAL);
- goto out;
+
+ if (err == 0) {
+ if (free_err != 0 && free_err != ENOENT)
+ err = free_err;
+ else if (next_err != ESRCH)
+ err = next_err;
}
- pcksum = ra.cksum;
}
- ASSERT(ra.err != 0);
+
+ cv_destroy(&rwa->cv);
+ mutex_destroy(&rwa->mutex);
+ bqueue_destroy(&rwa->q);
+ if (err == 0)
+ err = rwa->err;
out:
+ nvlist_free(begin_nvl);
if ((featureflags & DMU_BACKUP_FEATURE_DEDUP) && (cleanup_fd != -1))
zfs_onexit_fd_rele(cleanup_fd);
- if (ra.err != 0) {
+ if (err != 0) {
/*
- * destroy what we created, so we don't leave it in the
- * inconsistent restoring state.
+ * Clean up references. If receive is not resumable,
+ * destroy what we created, so we don't leave it in
+ * the inconsistent state.
*/
dmu_recv_cleanup_ds(drc);
}
- vmem_free(ra.buf, ra.bufsize);
- *voffp = ra.voff;
- return (ra.err);
+ *voffp = ra->voff;
+ objlist_destroy(&ra->ignore_objlist);
+ kmem_free(ra, sizeof (*ra));
+ kmem_free(rwa, sizeof (*rwa));
+ return (err);
}
static int
@@ -2145,6 +3408,9 @@ dmu_recv_end_sync(void *arg, dmu_tx_t *tx)
dsl_dataset_phys(origin_head)->ds_flags &=
~DS_FLAG_INCONSISTENT;
+ drc->drc_newsnapobj =
+ dsl_dataset_phys(origin_head)->ds_prev_snap_obj;
+
dsl_dataset_rele(origin_head, FTAG);
dsl_destroy_head_sync_impl(drc->drc_ds, tx);
@@ -2166,8 +3432,23 @@ dmu_recv_end_sync(void *arg, dmu_tx_t *tx)
dmu_buf_will_dirty(ds->ds_dbuf, tx);
dsl_dataset_phys(ds)->ds_flags &= ~DS_FLAG_INCONSISTENT;
+ if (dsl_dataset_has_resume_receive_state(ds)) {
+ (void) zap_remove(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_FROMGUID, tx);
+ (void) zap_remove(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_OBJECT, tx);
+ (void) zap_remove(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_OFFSET, tx);
+ (void) zap_remove(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_BYTES, tx);
+ (void) zap_remove(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_TOGUID, tx);
+ (void) zap_remove(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_TONAME, tx);
+ }
+ drc->drc_newsnapobj =
+ dsl_dataset_phys(drc->drc_ds)->ds_prev_snap_obj;
}
- drc->drc_newsnapobj = dsl_dataset_phys(drc->drc_ds)->ds_prev_snap_obj;
zvol_create_minors(dp->dp_spa, drc->drc_tofs, B_TRUE);
/*
* Release the hold from dmu_recv_begin. This must be done before
@@ -2211,38 +3492,40 @@ static int dmu_recv_end_modified_blocks = 3;
static int
dmu_recv_existing_end(dmu_recv_cookie_t *drc)
{
- int error;
-
#ifdef _KERNEL
- char *name;
-
/*
* We will be destroying the ds; make sure its origin is unmounted if
* necessary.
*/
- name = kmem_alloc(MAXNAMELEN, KM_SLEEP);
+ char name[ZFS_MAX_DATASET_NAME_LEN];
dsl_dataset_name(drc->drc_ds, name);
zfs_destroy_unmount_origin(name);
- kmem_free(name, MAXNAMELEN);
#endif
- error = dsl_sync_task(drc->drc_tofs,
+ return (dsl_sync_task(drc->drc_tofs,
dmu_recv_end_check, dmu_recv_end_sync, drc,
- dmu_recv_end_modified_blocks, ZFS_SPACE_CHECK_NORMAL);
-
- if (error != 0)
- dmu_recv_cleanup_ds(drc);
- return (error);
+ dmu_recv_end_modified_blocks, ZFS_SPACE_CHECK_NORMAL));
}
static int
dmu_recv_new_end(dmu_recv_cookie_t *drc)
+{
+ return (dsl_sync_task(drc->drc_tofs,
+ dmu_recv_end_check, dmu_recv_end_sync, drc,
+ dmu_recv_end_modified_blocks, ZFS_SPACE_CHECK_NORMAL));
+}
+
+int
+dmu_recv_end(dmu_recv_cookie_t *drc, void *owner)
{
int error;
- error = dsl_sync_task(drc->drc_tofs,
- dmu_recv_end_check, dmu_recv_end_sync, drc,
- dmu_recv_end_modified_blocks, ZFS_SPACE_CHECK_NORMAL);
+ drc->drc_owner = owner;
+
+ if (drc->drc_newfs)
+ error = dmu_recv_new_end(drc);
+ else
+ error = dmu_recv_existing_end(drc);
if (error != 0) {
dmu_recv_cleanup_ds(drc);
@@ -2254,17 +3537,6 @@ dmu_recv_new_end(dmu_recv_cookie_t *drc)
return (error);
}
-int
-dmu_recv_end(dmu_recv_cookie_t *drc, void *owner)
-{
- drc->drc_owner = owner;
-
- if (drc->drc_newfs)
- return (dmu_recv_new_end(drc));
- else
- return (dmu_recv_existing_end(drc));
-}
-
/*
* Return TRUE if this objset is currently being received into.
*/
diff --git a/zfs/module/zfs/dmu_traverse.c b/zfs/module/zfs/dmu_traverse.c
index a58f77f082f3..c78228d74588 100644
--- a/zfs/module/zfs/dmu_traverse.c
+++ b/zfs/module/zfs/dmu_traverse.c
@@ -39,7 +39,7 @@
#include <sys/zfeature.h>
int32_t zfs_pd_bytes_max = 50 * 1024 * 1024; /* 50MB */
-int32_t ignore_hole_birth = 1;
+int32_t send_holes_without_birth_time = 1;
typedef struct prefetch_data {
kmutex_t pd_mtx;
@@ -48,6 +48,7 @@ typedef struct prefetch_data {
int pd_flags;
boolean_t pd_cancel;
boolean_t pd_exited;
+ zbookmark_phys_t pd_resume;
} prefetch_data_t;
typedef struct traverse_data {
@@ -159,7 +160,7 @@ resume_skip_check(traverse_data_t *td, const dnode_phys_t *dnp,
* If we already visited this bp & everything below,
* don't bother doing it again.
*/
- if (zbookmark_is_before(dnp, zb, td->td_resume))
+ if (zbookmark_subtree_completed(dnp, zb, td->td_resume))
return (RESUME_SKIP_ALL);
/*
@@ -251,9 +252,10 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
*
* Note that the meta-dnode cannot be reallocated.
*/
- if (!ignore_hole_birth && (!td->td_realloc_possible ||
- zb->zb_object == DMU_META_DNODE_OBJECT) &&
- td->td_hole_birth_enabled_txg <= td->td_min_txg)
+ if (!send_holes_without_birth_time &&
+ (!td->td_realloc_possible ||
+ zb->zb_object == DMU_META_DNODE_OBJECT) &&
+ td->td_hole_birth_enabled_txg <= td->td_min_txg)
return (0);
} else if (bp->blk_birth <= td->td_min_txg) {
return (0);
@@ -324,30 +326,29 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
uint32_t flags = ARC_FLAG_WAIT;
int32_t i;
int32_t epb = BP_GET_LSIZE(bp) >> DNODE_SHIFT;
- dnode_phys_t *cdnp;
+ dnode_phys_t *child_dnp;
err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
if (err != 0)
goto post;
- cdnp = buf->b_data;
+ child_dnp = buf->b_data;
- for (i = 0; i < epb; i++) {
- prefetch_dnode_metadata(td, &cdnp[i], zb->zb_objset,
- zb->zb_blkid * epb + i);
+ for (i = 0; i < epb; i += child_dnp[i].dn_extra_slots + 1) {
+ prefetch_dnode_metadata(td, &child_dnp[i],
+ zb->zb_objset, zb->zb_blkid * epb + i);
}
/* recursively visitbp() blocks below this */
- for (i = 0; i < epb; i++) {
- err = traverse_dnode(td, &cdnp[i], zb->zb_objset,
- zb->zb_blkid * epb + i);
+ for (i = 0; i < epb; i += child_dnp[i].dn_extra_slots + 1) {
+ err = traverse_dnode(td, &child_dnp[i],
+ zb->zb_objset, zb->zb_blkid * epb + i);
if (err != 0)
break;
}
} else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
arc_flags_t flags = ARC_FLAG_WAIT;
objset_phys_t *osp;
- dnode_phys_t *mdnp, *gdnp, *udnp;
err = arc_read(NULL, td->td_spa, bp, arc_getbuf_func, &buf,
ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb);
@@ -355,11 +356,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
goto post;
osp = buf->b_data;
- mdnp = &osp->os_meta_dnode;
- gdnp = &osp->os_groupused_dnode;
- udnp = &osp->os_userused_dnode;
-
- prefetch_dnode_metadata(td, mdnp, zb->zb_objset,
+ prefetch_dnode_metadata(td, &osp->os_meta_dnode, zb->zb_objset,
DMU_META_DNODE_OBJECT);
/*
* See the block comment above for the goal of this variable.
@@ -371,26 +368,26 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
td->td_realloc_possible = B_FALSE;
if (arc_buf_size(buf) >= sizeof (objset_phys_t)) {
- prefetch_dnode_metadata(td, gdnp, zb->zb_objset,
- DMU_GROUPUSED_OBJECT);
- prefetch_dnode_metadata(td, udnp, zb->zb_objset,
- DMU_USERUSED_OBJECT);
+ prefetch_dnode_metadata(td, &osp->os_groupused_dnode,
+ zb->zb_objset, DMU_GROUPUSED_OBJECT);
+ prefetch_dnode_metadata(td, &osp->os_userused_dnode,
+ zb->zb_objset, DMU_USERUSED_OBJECT);
}
- err = traverse_dnode(td, mdnp, zb->zb_objset,
+ err = traverse_dnode(td, &osp->os_meta_dnode, zb->zb_objset,
DMU_META_DNODE_OBJECT);
if (err == 0 && arc_buf_size(buf) >= sizeof (objset_phys_t)) {
- err = traverse_dnode(td, gdnp, zb->zb_objset,
- DMU_GROUPUSED_OBJECT);
+ err = traverse_dnode(td, &osp->os_groupused_dnode,
+ zb->zb_objset, DMU_GROUPUSED_OBJECT);
}
if (err == 0 && arc_buf_size(buf) >= sizeof (objset_phys_t)) {
- err = traverse_dnode(td, udnp, zb->zb_objset,
- DMU_USERUSED_OBJECT);
+ err = traverse_dnode(td, &osp->os_userused_dnode,
+ zb->zb_objset, DMU_USERUSED_OBJECT);
}
}
if (buf)
- (void) arc_buf_remove_ref(buf, &buf);
+ arc_buf_destroy(buf, &buf);
post:
if (err == 0 && (td->td_flags & TRAVERSE_POST))
@@ -417,9 +414,15 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
* Set the bookmark to the first level-0 block that we need
* to visit. This way, the resuming code does not need to
* deal with resuming from indirect blocks.
+ *
+ * Note, if zb_level <= 0, dnp may be NULL, so we don't want
+ * to dereference it.
*/
- td->td_resume->zb_blkid = zb->zb_blkid <<
- (zb->zb_level * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT));
+ td->td_resume->zb_blkid = zb->zb_blkid;
+ if (zb->zb_level > 0) {
+ td->td_resume->zb_blkid <<= zb->zb_level *
+ (dnp->dn_indblkshift - SPA_BLKPTRSHIFT);
+ }
td->td_paused = B_TRUE;
}
@@ -440,7 +443,7 @@ prefetch_dnode_metadata(traverse_data_t *td, const dnode_phys_t *dnp,
if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
SET_BOOKMARK(&czb, objset, object, 0, DMU_SPILL_BLKID);
- traverse_prefetch_metadata(td, &dnp->dn_spill, &czb);
+ traverse_prefetch_metadata(td, DN_SPILL_BLKPTR(dnp), &czb);
}
}
@@ -451,6 +454,21 @@ traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp,
int j, err = 0;
zbookmark_phys_t czb;
+ if (object != DMU_META_DNODE_OBJECT && td->td_resume != NULL &&
+ object < td->td_resume->zb_object)
+ return (0);
+
+ if (td->td_flags & TRAVERSE_PRE) {
+ SET_BOOKMARK(&czb, objset, object, ZB_DNODE_LEVEL,
+ ZB_DNODE_BLKID);
+ err = td->td_func(td->td_spa, NULL, NULL, &czb, dnp,
+ td->td_arg);
+ if (err == TRAVERSE_VISIT_NO_CHILDREN)
+ return (0);
+ if (err != 0)
+ return (err);
+ }
+
for (j = 0; j < dnp->dn_nblkptr; j++) {
SET_BOOKMARK(&czb, objset, object, dnp->dn_nlevels - 1, j);
err = traverse_visitbp(td, dnp, &dnp->dn_blkptr[j], &czb);
@@ -458,9 +476,20 @@ traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp,
break;
}
- if (err == 0 && dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) {
+ if (err == 0 && (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR)) {
SET_BOOKMARK(&czb, objset, object, 0, DMU_SPILL_BLKID);
- err = traverse_visitbp(td, dnp, &dnp->dn_spill, &czb);
+ err = traverse_visitbp(td, dnp, DN_SPILL_BLKPTR(dnp), &czb);
+ }
+
+ if (err == 0 && (td->td_flags & TRAVERSE_POST)) {
+ SET_BOOKMARK(&czb, objset, object, ZB_DNODE_LEVEL,
+ ZB_DNODE_BLKID);
+ err = td->td_func(td->td_spa, NULL, NULL, &czb, dnp,
+ td->td_arg);
+ if (err == TRAVERSE_VISIT_NO_CHILDREN)
+ return (0);
+ if (err != 0)
+ return (err);
}
return (err);
}
@@ -474,6 +503,8 @@ traverse_prefetcher(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
arc_flags_t aflags = ARC_FLAG_NOWAIT | ARC_FLAG_PREFETCH;
ASSERT(pfd->pd_bytes_fetched >= 0);
+ if (bp == NULL)
+ return (0);
if (pfd->pd_cancel)
return (SET_ERROR(EINTR));
@@ -504,6 +535,7 @@ traverse_prefetch_thread(void *arg)
td.td_func = traverse_prefetcher;
td.td_arg = td_main->td_pfd;
td.td_pfd = NULL;
+ td.td_resume = &td_main->td_pfd->pd_resume;
SET_BOOKMARK(&czb, td.td_objset,
ZB_ROOT_OBJECT, ZB_ROOT_LEVEL, ZB_ROOT_BLKID);
@@ -533,12 +565,6 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
ASSERT(ds == NULL || objset == ds->ds_object);
ASSERT(!(flags & TRAVERSE_PRE) || !(flags & TRAVERSE_POST));
- /*
- * The data prefetching mechanism (the prefetch thread) is incompatible
- * with resuming from a bookmark.
- */
- ASSERT(resume == NULL || !(flags & TRAVERSE_PREFETCH_DATA));
-
td = kmem_alloc(sizeof (traverse_data_t), KM_SLEEP);
pd = kmem_zalloc(sizeof (prefetch_data_t), KM_SLEEP);
czb = kmem_alloc(sizeof (zbookmark_phys_t), KM_SLEEP);
@@ -563,6 +589,8 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
}
pd->pd_flags = flags;
+ if (resume != NULL)
+ pd->pd_resume = *resume;
mutex_init(&pd->pd_mtx, NULL, MUTEX_DEFAULT, NULL);
cv_init(&pd->pd_cv, NULL, CV_DEFAULT, NULL);
@@ -583,12 +611,12 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
osp = buf->b_data;
traverse_zil(td, &osp->os_zil_header);
- (void) arc_buf_remove_ref(buf, &buf);
+ arc_buf_destroy(buf, &buf);
}
if (!(flags & TRAVERSE_PREFETCH_DATA) ||
- 0 == taskq_dispatch(system_taskq, traverse_prefetch_thread,
- td, TQ_NOQUEUE))
+ taskq_dispatch(system_taskq, traverse_prefetch_thread,
+ td, TQ_NOQUEUE) == TASKQID_INVALID)
pd->pd_exited = B_TRUE;
err = traverse_visitbp(td, NULL, rootbp, czb);
@@ -615,11 +643,19 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
* in syncing context).
*/
int
-traverse_dataset(dsl_dataset_t *ds, uint64_t txg_start, int flags,
- blkptr_cb_t func, void *arg)
+traverse_dataset_resume(dsl_dataset_t *ds, uint64_t txg_start,
+ zbookmark_phys_t *resume,
+ int flags, blkptr_cb_t func, void *arg)
{
return (traverse_impl(ds->ds_dir->dd_pool->dp_spa, ds, ds->ds_object,
- &dsl_dataset_phys(ds)->ds_bp, txg_start, NULL, flags, func, arg));
+ &dsl_dataset_phys(ds)->ds_bp, txg_start, resume, flags, func, arg));
+}
+
+int
+traverse_dataset(dsl_dataset_t *ds, uint64_t txg_start,
+ int flags, blkptr_cb_t func, void *arg)
+{
+ return (traverse_dataset_resume(ds, txg_start, NULL, flags, func, arg));
}
int
@@ -652,7 +688,7 @@ traverse_pool(spa_t *spa, uint64_t txg_start, int flags,
/* visit each dataset */
for (obj = 1; err == 0;
- err = dmu_object_next(mos, &obj, FALSE, txg_start)) {
+ err = dmu_object_next(mos, &obj, B_FALSE, txg_start)) {
dmu_object_info_t doi;
err = dmu_object_info(mos, obj, &doi);
@@ -694,6 +730,11 @@ EXPORT_SYMBOL(traverse_pool);
module_param(zfs_pd_bytes_max, int, 0644);
MODULE_PARM_DESC(zfs_pd_bytes_max, "Max number of bytes to prefetch");
-module_param(ignore_hole_birth, int, 0644);
-MODULE_PARM_DESC(ignore_hole_birth, "Ignore hole_birth txg for send");
+module_param_named(ignore_hole_birth, send_holes_without_birth_time, int, 0644);
+MODULE_PARM_DESC(ignore_hole_birth, "Alias for send_holes_without_birth_time");
+
+module_param_named(send_holes_without_birth_time,
+ send_holes_without_birth_time, int, 0644);
+MODULE_PARM_DESC(send_holes_without_birth_time,
+ "Ignore hole_birth txg for zfs send");
#endif
diff --git a/zfs/module/zfs/dmu_tx.c b/zfs/module/zfs/dmu_tx.c
index 5ae429f70866..097fa774ad06 100644
--- a/zfs/module/zfs/dmu_tx.c
+++ b/zfs/module/zfs/dmu_tx.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
*/
#include <sys/dmu.h>
@@ -29,10 +29,10 @@
#include <sys/dbuf.h>
#include <sys/dmu_tx.h>
#include <sys/dmu_objset.h>
-#include <sys/dsl_dataset.h> /* for dsl_dataset_block_freeable() */
-#include <sys/dsl_dir.h> /* for dsl_dir_tempreserve_*() */
+#include <sys/dsl_dataset.h>
+#include <sys/dsl_dir.h>
#include <sys/dsl_pool.h>
-#include <sys/zap_impl.h> /* for fzap_default_block_shift */
+#include <sys/zap_impl.h>
#include <sys/spa.h>
#include <sys/sa.h>
#include <sys/sa_impl.h>
@@ -71,10 +71,6 @@ dmu_tx_create_dd(dsl_dir_t *dd)
list_create(&tx->tx_callbacks, sizeof (dmu_tx_callback_t),
offsetof(dmu_tx_callback_t, dcb_node));
tx->tx_start = gethrtime();
-#ifdef DEBUG_DMU_TX
- refcount_create(&tx->tx_space_written);
- refcount_create(&tx->tx_space_freed);
-#endif
return (tx);
}
@@ -83,7 +79,6 @@ dmu_tx_create(objset_t *os)
{
dmu_tx_t *tx = dmu_tx_create_dd(os->os_dsl_dataset->ds_dir);
tx->tx_objset = os;
- tx->tx_lastsnap_txg = dsl_dataset_prev_snap_txg(os->os_dsl_dataset);
return (tx);
}
@@ -92,7 +87,7 @@ dmu_tx_create_assigned(struct dsl_pool *dp, uint64_t txg)
{
dmu_tx_t *tx = dmu_tx_create_dd(NULL);
- ASSERT3U(txg, <=, dp->dp_tx.tx_open_txg);
+ txg_verify(dp->dp_spa, txg);
tx->tx_pool = dp;
tx->tx_txg = txg;
tx->tx_anyobj = TRUE;
@@ -113,21 +108,14 @@ dmu_tx_private_ok(dmu_tx_t *tx)
}
static dmu_tx_hold_t *
-dmu_tx_hold_object_impl(dmu_tx_t *tx, objset_t *os, uint64_t object,
- enum dmu_tx_hold_type type, uint64_t arg1, uint64_t arg2)
+dmu_tx_hold_dnode_impl(dmu_tx_t *tx, dnode_t *dn, enum dmu_tx_hold_type type,
+ uint64_t arg1, uint64_t arg2)
{
dmu_tx_hold_t *txh;
- dnode_t *dn = NULL;
- int err;
- if (object != DMU_NEW_OBJECT) {
- err = dnode_hold(os, object, tx, &dn);
- if (err) {
- tx->tx_err = err;
- return (NULL);
- }
-
- if (err == 0 && tx->tx_txg != 0) {
+ if (dn != NULL) {
+ (void) refcount_add(&dn->dn_holds, tx);
+ if (tx->tx_txg != 0) {
mutex_enter(&dn->dn_mtx);
/*
* dn->dn_assigned_txg == tx->tx_txg doesn't pose a
@@ -144,29 +132,76 @@ dmu_tx_hold_object_impl(dmu_tx_t *tx, objset_t *os, uint64_t object,
txh = kmem_zalloc(sizeof (dmu_tx_hold_t), KM_SLEEP);
txh->txh_tx = tx;
txh->txh_dnode = dn;
-#ifdef DEBUG_DMU_TX
+ refcount_create(&txh->txh_space_towrite);
+ refcount_create(&txh->txh_memory_tohold);
txh->txh_type = type;
txh->txh_arg1 = arg1;
txh->txh_arg2 = arg2;
-#endif
list_insert_tail(&tx->tx_holds, txh);
return (txh);
}
+static dmu_tx_hold_t *
+dmu_tx_hold_object_impl(dmu_tx_t *tx, objset_t *os, uint64_t object,
+ enum dmu_tx_hold_type type, uint64_t arg1, uint64_t arg2)
+{
+ dnode_t *dn = NULL;
+ dmu_tx_hold_t *txh;
+ int err;
+
+ if (object != DMU_NEW_OBJECT) {
+ err = dnode_hold(os, object, FTAG, &dn);
+ if (err != 0) {
+ tx->tx_err = err;
+ return (NULL);
+ }
+ }
+ txh = dmu_tx_hold_dnode_impl(tx, dn, type, arg1, arg2);
+ if (dn != NULL)
+ dnode_rele(dn, FTAG);
+ return (txh);
+}
+
void
-dmu_tx_add_new_object(dmu_tx_t *tx, objset_t *os, uint64_t object)
+dmu_tx_add_new_object(dmu_tx_t *tx, dnode_t *dn)
{
/*
* If we're syncing, they can manipulate any object anyhow, and
* the hold on the dnode_t can cause problems.
*/
- if (!dmu_tx_is_syncing(tx)) {
- (void) dmu_tx_hold_object_impl(tx, os,
- object, THT_NEWOBJECT, 0, 0);
- }
+ if (!dmu_tx_is_syncing(tx))
+ (void) dmu_tx_hold_dnode_impl(tx, dn, THT_NEWOBJECT, 0, 0);
}
+/*
+ * This function reads specified data from disk. The specified data will
+ * be needed to perform the transaction -- i.e, it will be read after
+ * we do dmu_tx_assign(). There are two reasons that we read the data now
+ * (before dmu_tx_assign()):
+ *
+ * 1. Reading it now has potentially better performance. The transaction
+ * has not yet been assigned, so the TXG is not held open, and also the
+ * caller typically has less locks held when calling dmu_tx_hold_*() than
+ * after the transaction has been assigned. This reduces the lock (and txg)
+ * hold times, thus reducing lock contention.
+ *
+ * 2. It is easier for callers (primarily the ZPL) to handle i/o errors
+ * that are detected before they start making changes to the DMU state
+ * (i.e. now). Once the transaction has been assigned, and some DMU
+ * state has been changed, it can be difficult to recover from an i/o
+ * error (e.g. to undo the changes already made in memory at the DMU
+ * layer). Typically code to do so does not exist in the caller -- it
+ * assumes that the data has already been cached and thus i/o errors are
+ * not possible.
+ *
+ * It has been observed that the i/o initiated here can be a performance
+ * problem, and it appears to be optional, because we don't look at the
+ * data which is read. However, removing this read would only serve to
+ * move the work elsewhere (after the dmu_tx_assign()), where it may
+ * have a greater impact on performance (in addition to the impact on
+ * fault tolerance noted above).
+ */
static int
dmu_tx_check_ioerr(zio_t *zio, dnode_t *dn, int level, uint64_t blkid)
{
@@ -183,243 +218,84 @@ dmu_tx_check_ioerr(zio_t *zio, dnode_t *dn, int level, uint64_t blkid)
return (err);
}
-static void
-dmu_tx_count_twig(dmu_tx_hold_t *txh, dnode_t *dn, dmu_buf_impl_t *db,
- int level, uint64_t blkid, boolean_t freeable, uint64_t *history)
-{
- objset_t *os = dn->dn_objset;
- dsl_dataset_t *ds = os->os_dsl_dataset;
- int epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
- dmu_buf_impl_t *parent = NULL;
- blkptr_t *bp = NULL;
- uint64_t space;
-
- if (level >= dn->dn_nlevels || history[level] == blkid)
- return;
-
- history[level] = blkid;
-
- space = (level == 0) ? dn->dn_datablksz : (1ULL << dn->dn_indblkshift);
-
- if (db == NULL || db == dn->dn_dbuf) {
- ASSERT(level != 0);
- db = NULL;
- } else {
- ASSERT(DB_DNODE(db) == dn);
- ASSERT(db->db_level == level);
- ASSERT(db->db.db_size == space);
- ASSERT(db->db_blkid == blkid);
- bp = db->db_blkptr;
- parent = db->db_parent;
- }
-
- freeable = (bp && (freeable ||
- dsl_dataset_block_freeable(ds, bp, bp->blk_birth)));
-
- if (freeable)
- txh->txh_space_tooverwrite += space;
- else
- txh->txh_space_towrite += space;
- if (bp)
- txh->txh_space_tounref += bp_get_dsize(os->os_spa, bp);
-
- dmu_tx_count_twig(txh, dn, parent, level + 1,
- blkid >> epbs, freeable, history);
-}
-
/* ARGSUSED */
static void
dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
{
dnode_t *dn = txh->txh_dnode;
- uint64_t start, end, i;
- int min_bs, max_bs, min_ibs, max_ibs, epbs, bits;
int err = 0;
- int l;
if (len == 0)
return;
- min_bs = SPA_MINBLOCKSHIFT;
- max_bs = highbit64(txh->txh_tx->tx_objset->os_recordsize) - 1;
- min_ibs = DN_MIN_INDBLKSHIFT;
- max_ibs = DN_MAX_INDBLKSHIFT;
+ (void) refcount_add_many(&txh->txh_space_towrite, len, FTAG);
- if (dn) {
- uint64_t history[DN_MAX_LEVELS];
- int nlvls = dn->dn_nlevels;
- int delta;
-
- /*
- * For i/o error checking, read the first and last level-0
- * blocks (if they are not aligned), and all the level-1 blocks.
- */
- if (dn->dn_maxblkid == 0) {
- delta = dn->dn_datablksz;
- start = (off < dn->dn_datablksz) ? 0 : 1;
- end = (off+len <= dn->dn_datablksz) ? 0 : 1;
- if (start == 0 && (off > 0 || len < dn->dn_datablksz)) {
- err = dmu_tx_check_ioerr(NULL, dn, 0, 0);
- if (err)
- goto out;
- delta -= off;
- }
- } else {
- zio_t *zio = zio_root(dn->dn_objset->os_spa,
- NULL, NULL, ZIO_FLAG_CANFAIL);
-
- /* first level-0 block */
- start = off >> dn->dn_datablkshift;
- if (P2PHASE(off, dn->dn_datablksz) ||
- len < dn->dn_datablksz) {
- err = dmu_tx_check_ioerr(zio, dn, 0, start);
- if (err)
- goto out;
- }
+ if (refcount_count(&txh->txh_space_towrite) > 2 * DMU_MAX_ACCESS)
+ err = SET_ERROR(EFBIG);
- /* last level-0 block */
- end = (off+len-1) >> dn->dn_datablkshift;
- if (end != start && end <= dn->dn_maxblkid &&
- P2PHASE(off+len, dn->dn_datablksz)) {
- err = dmu_tx_check_ioerr(zio, dn, 0, end);
- if (err)
- goto out;
- }
+ if (dn == NULL)
+ return;
- /* level-1 blocks */
- if (nlvls > 1) {
- int shft = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
- for (i = (start>>shft)+1; i < end>>shft; i++) {
- err = dmu_tx_check_ioerr(zio, dn, 1, i);
- if (err)
- goto out;
- }
+ /*
+ * For i/o error checking, read the blocks that will be needed
+ * to perform the write: the first and last level-0 blocks (if
+ * they are not aligned, i.e. if they are partial-block writes),
+ * and all the level-1 blocks.
+ */
+ if (dn->dn_maxblkid == 0) {
+ if (off < dn->dn_datablksz &&
+ (off > 0 || len < dn->dn_datablksz)) {
+ err = dmu_tx_check_ioerr(NULL, dn, 0, 0);
+ if (err != 0) {
+ txh->txh_tx->tx_err = err;
}
-
- err = zio_wait(zio);
- if (err)
- goto out;
- delta = P2NPHASE(off, dn->dn_datablksz);
- }
-
- min_ibs = max_ibs = dn->dn_indblkshift;
- if (dn->dn_maxblkid > 0) {
- /*
- * The blocksize can't change,
- * so we can make a more precise estimate.
- */
- ASSERT(dn->dn_datablkshift != 0);
- min_bs = max_bs = dn->dn_datablkshift;
- } else {
- /*
- * The blocksize can increase up to the recordsize,
- * or if it is already more than the recordsize,
- * up to the next power of 2.
- */
- min_bs = highbit64(dn->dn_datablksz - 1);
- max_bs = MAX(max_bs, highbit64(dn->dn_datablksz - 1));
}
+ } else {
+ zio_t *zio = zio_root(dn->dn_objset->os_spa,
+ NULL, NULL, ZIO_FLAG_CANFAIL);
- /*
- * If this write is not off the end of the file
- * we need to account for overwrites/unref.
- */
- if (start <= dn->dn_maxblkid) {
- for (l = 0; l < DN_MAX_LEVELS; l++)
- history[l] = -1ULL;
+ /* first level-0 block */
+ uint64_t start = off >> dn->dn_datablkshift;
+ if (P2PHASE(off, dn->dn_datablksz) || len < dn->dn_datablksz) {
+ err = dmu_tx_check_ioerr(zio, dn, 0, start);
+ if (err != 0) {
+ txh->txh_tx->tx_err = err;
+ }
}
- while (start <= dn->dn_maxblkid) {
- dmu_buf_impl_t *db;
-
- rw_enter(&dn->dn_struct_rwlock, RW_READER);
- err = dbuf_hold_impl(dn, 0, start, FALSE, FTAG, &db);
- rw_exit(&dn->dn_struct_rwlock);
- if (err) {
+ /* last level-0 block */
+ uint64_t end = (off + len - 1) >> dn->dn_datablkshift;
+ if (end != start && end <= dn->dn_maxblkid &&
+ P2PHASE(off + len, dn->dn_datablksz)) {
+ err = dmu_tx_check_ioerr(zio, dn, 0, end);
+ if (err != 0) {
txh->txh_tx->tx_err = err;
- return;
}
+ }
- dmu_tx_count_twig(txh, dn, db, 0, start, B_FALSE,
- history);
- dbuf_rele(db, FTAG);
- if (++start > end) {
- /*
- * Account for new indirects appearing
- * before this IO gets assigned into a txg.
- */
- bits = 64 - min_bs;
- epbs = min_ibs - SPA_BLKPTRSHIFT;
- for (bits -= epbs * (nlvls - 1);
- bits >= 0; bits -= epbs)
- txh->txh_fudge += 1ULL << max_ibs;
- goto out;
+ /* level-1 blocks */
+ if (dn->dn_nlevels > 1) {
+ int shft = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
+ for (uint64_t i = (start >> shft) + 1;
+ i < end >> shft; i++) {
+ err = dmu_tx_check_ioerr(zio, dn, 1, i);
+ if (err != 0) {
+ txh->txh_tx->tx_err = err;
+ }
}
- off += delta;
- if (len >= delta)
- len -= delta;
- delta = dn->dn_datablksz;
}
- }
-
- /*
- * 'end' is the last thing we will access, not one past.
- * This way we won't overflow when accessing the last byte.
- */
- start = P2ALIGN(off, 1ULL << max_bs);
- end = P2ROUNDUP(off + len, 1ULL << max_bs) - 1;
- txh->txh_space_towrite += end - start + 1;
-
- start >>= min_bs;
- end >>= min_bs;
- epbs = min_ibs - SPA_BLKPTRSHIFT;
-
- /*
- * The object contains at most 2^(64 - min_bs) blocks,
- * and each indirect level maps 2^epbs.
- */
- for (bits = 64 - min_bs; bits >= 0; bits -= epbs) {
- start >>= epbs;
- end >>= epbs;
- ASSERT3U(end, >=, start);
- txh->txh_space_towrite += (end - start + 1) << max_ibs;
- if (start != 0) {
- /*
- * We also need a new blkid=0 indirect block
- * to reference any existing file data.
- */
- txh->txh_space_towrite += 1ULL << max_ibs;
+ err = zio_wait(zio);
+ if (err != 0) {
+ txh->txh_tx->tx_err = err;
}
}
-
-out:
- if (txh->txh_space_towrite + txh->txh_space_tooverwrite >
- 2 * DMU_MAX_ACCESS)
- err = SET_ERROR(EFBIG);
-
- if (err)
- txh->txh_tx->tx_err = err;
}
static void
dmu_tx_count_dnode(dmu_tx_hold_t *txh)
{
- dnode_t *dn = txh->txh_dnode;
- dnode_t *mdn = DMU_META_DNODE(txh->txh_tx->tx_objset);
- uint64_t space = mdn->dn_datablksz +
- ((mdn->dn_nlevels-1) << mdn->dn_indblkshift);
-
- if (dn && dn->dn_dbuf->db_blkptr &&
- dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset,
- dn->dn_dbuf->db_blkptr, dn->dn_dbuf->db_blkptr->blk_birth)) {
- txh->txh_space_tooverwrite += space;
- txh->txh_space_tounref += space;
- } else {
- txh->txh_space_towrite += space;
- if (dn && dn->dn_dbuf->db_blkptr)
- txh->txh_space_tounref += space;
- }
+ (void) refcount_add_many(&txh->txh_space_towrite, DNODE_MIN_SIZE, FTAG);
}
void
@@ -427,208 +303,63 @@ dmu_tx_hold_write(dmu_tx_t *tx, uint64_t object, uint64_t off, int len)
{
dmu_tx_hold_t *txh;
- ASSERT(tx->tx_txg == 0);
- ASSERT(len <= DMU_MAX_ACCESS);
+ ASSERT0(tx->tx_txg);
+ ASSERT3U(len, <=, DMU_MAX_ACCESS);
ASSERT(len == 0 || UINT64_MAX - off >= len - 1);
txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
object, THT_WRITE, off, len);
- if (txh == NULL)
- return;
-
- dmu_tx_count_write(txh, off, len);
- dmu_tx_count_dnode(txh);
+ if (txh != NULL) {
+ dmu_tx_count_write(txh, off, len);
+ dmu_tx_count_dnode(txh);
+ }
}
-static void
-dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
+void
+dmu_tx_hold_write_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off, int len)
{
- uint64_t blkid, nblks, lastblk;
- uint64_t space = 0, unref = 0, skipped = 0;
- dnode_t *dn = txh->txh_dnode;
- dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
- spa_t *spa = txh->txh_tx->tx_pool->dp_spa;
- int epbs;
- uint64_t l0span = 0, nl1blks = 0;
-
- if (dn->dn_nlevels == 0)
- return;
-
- /*
- * The struct_rwlock protects us against dn_nlevels
- * changing, in case (against all odds) we manage to dirty &
- * sync out the changes after we check for being dirty.
- * Also, dbuf_hold_impl() wants us to have the struct_rwlock.
- */
- rw_enter(&dn->dn_struct_rwlock, RW_READER);
- epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
- if (dn->dn_maxblkid == 0) {
- if (off == 0 && len >= dn->dn_datablksz) {
- blkid = 0;
- nblks = 1;
- } else {
- rw_exit(&dn->dn_struct_rwlock);
- return;
- }
- } else {
- blkid = off >> dn->dn_datablkshift;
- nblks = (len + dn->dn_datablksz - 1) >> dn->dn_datablkshift;
-
- if (blkid > dn->dn_maxblkid) {
- rw_exit(&dn->dn_struct_rwlock);
- return;
- }
- if (blkid + nblks > dn->dn_maxblkid)
- nblks = dn->dn_maxblkid - blkid + 1;
-
- }
- l0span = nblks; /* save for later use to calc level > 1 overhead */
- if (dn->dn_nlevels == 1) {
- int i;
- for (i = 0; i < nblks; i++) {
- blkptr_t *bp = dn->dn_phys->dn_blkptr;
- ASSERT3U(blkid + i, <, dn->dn_nblkptr);
- bp += blkid + i;
- if (dsl_dataset_block_freeable(ds, bp, bp->blk_birth)) {
- dprintf_bp(bp, "can free old%s", "");
- space += bp_get_dsize(spa, bp);
- }
- unref += BP_GET_ASIZE(bp);
- }
- nl1blks = 1;
- nblks = 0;
- }
-
- lastblk = blkid + nblks - 1;
- while (nblks) {
- dmu_buf_impl_t *dbuf;
- uint64_t ibyte, new_blkid;
- int epb = 1 << epbs;
- int err, i, blkoff, tochk;
- blkptr_t *bp;
-
- ibyte = blkid << dn->dn_datablkshift;
- err = dnode_next_offset(dn,
- DNODE_FIND_HAVELOCK, &ibyte, 2, 1, 0);
- new_blkid = ibyte >> dn->dn_datablkshift;
- if (err == ESRCH) {
- skipped += (lastblk >> epbs) - (blkid >> epbs) + 1;
- break;
- }
- if (err) {
- txh->txh_tx->tx_err = err;
- break;
- }
- if (new_blkid > lastblk) {
- skipped += (lastblk >> epbs) - (blkid >> epbs) + 1;
- break;
- }
-
- if (new_blkid > blkid) {
- ASSERT((new_blkid >> epbs) > (blkid >> epbs));
- skipped += (new_blkid >> epbs) - (blkid >> epbs) - 1;
- nblks -= new_blkid - blkid;
- blkid = new_blkid;
- }
- blkoff = P2PHASE(blkid, epb);
- tochk = MIN(epb - blkoff, nblks);
-
- err = dbuf_hold_impl(dn, 1, blkid >> epbs, FALSE, FTAG, &dbuf);
- if (err) {
- txh->txh_tx->tx_err = err;
- break;
- }
-
- txh->txh_memory_tohold += dbuf->db.db_size;
-
- /*
- * We don't check memory_tohold against DMU_MAX_ACCESS because
- * memory_tohold is an over-estimation (especially the >L1
- * indirect blocks), so it could fail. Callers should have
- * already verified that they will not be holding too much
- * memory.
- */
-
- err = dbuf_read(dbuf, NULL, DB_RF_HAVESTRUCT | DB_RF_CANFAIL);
- if (err != 0) {
- txh->txh_tx->tx_err = err;
- dbuf_rele(dbuf, FTAG);
- break;
- }
-
- bp = dbuf->db.db_data;
- bp += blkoff;
-
- for (i = 0; i < tochk; i++) {
- if (dsl_dataset_block_freeable(ds, &bp[i],
- bp[i].blk_birth)) {
- dprintf_bp(&bp[i], "can free old%s", "");
- space += bp_get_dsize(spa, &bp[i]);
- }
- unref += BP_GET_ASIZE(bp);
- }
- dbuf_rele(dbuf, FTAG);
-
- ++nl1blks;
- blkid += tochk;
- nblks -= tochk;
- }
- rw_exit(&dn->dn_struct_rwlock);
-
- /*
- * Add in memory requirements of higher-level indirects.
- * This assumes a worst-possible scenario for dn_nlevels and a
- * worst-possible distribution of l1-blocks over the region to free.
- */
- {
- uint64_t blkcnt = 1 + ((l0span >> epbs) >> epbs);
- int level = 2;
- /*
- * Here we don't use DN_MAX_LEVEL, but calculate it with the
- * given datablkshift and indblkshift. This makes the
- * difference between 19 and 8 on large files.
- */
- int maxlevel = 2 + (DN_MAX_OFFSET_SHIFT - dn->dn_datablkshift) /
- (dn->dn_indblkshift - SPA_BLKPTRSHIFT);
+ dmu_tx_hold_t *txh;
- while (level++ < maxlevel) {
- txh->txh_memory_tohold += MAX(MIN(blkcnt, nl1blks), 1)
- << dn->dn_indblkshift;
- blkcnt = 1 + (blkcnt >> epbs);
- }
- }
+ ASSERT0(tx->tx_txg);
+ ASSERT3U(len, <=, DMU_MAX_ACCESS);
+ ASSERT(len == 0 || UINT64_MAX - off >= len - 1);
- /* account for new level 1 indirect blocks that might show up */
- if (skipped > 0) {
- txh->txh_fudge += skipped << dn->dn_indblkshift;
- skipped = MIN(skipped, DMU_MAX_DELETEBLKCNT >> epbs);
- txh->txh_memory_tohold += skipped << dn->dn_indblkshift;
+ txh = dmu_tx_hold_dnode_impl(tx, dn, THT_WRITE, off, len);
+ if (txh != NULL) {
+ dmu_tx_count_write(txh, off, len);
+ dmu_tx_count_dnode(txh);
}
- txh->txh_space_tofree += space;
- txh->txh_space_tounref += unref;
}
+/*
+ * This function marks the transaction as being a "net free". The end
+ * result is that refquotas will be disabled for this transaction, and
+ * this transaction will be able to use half of the pool space overhead
+ * (see dsl_pool_adjustedsize()). Therefore this function should only
+ * be called for transactions that we expect will not cause a net increase
+ * in the amount of space used (but it's OK if that is occasionally not true).
+ */
void
-dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off, uint64_t len)
+dmu_tx_mark_netfree(dmu_tx_t *tx)
{
- dmu_tx_hold_t *txh;
- dnode_t *dn;
+ tx->tx_netfree = B_TRUE;
+}
+
+static void
+dmu_tx_hold_free_impl(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
+{
+ dmu_tx_t *tx = txh->txh_tx;
+ dnode_t *dn = txh->txh_dnode;
int err;
- zio_t *zio;
ASSERT(tx->tx_txg == 0);
- txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
- object, THT_FREE, off, len);
- if (txh == NULL)
- return;
- dn = txh->txh_dnode;
dmu_tx_count_dnode(txh);
- if (off >= (dn->dn_maxblkid+1) * dn->dn_datablksz)
+ if (off >= (dn->dn_maxblkid + 1) * dn->dn_datablksz)
return;
if (len == DMU_OBJECT_END)
- len = (dn->dn_maxblkid+1) * dn->dn_datablksz - off;
+ len = (dn->dn_maxblkid + 1) * dn->dn_datablksz - off;
dmu_tx_count_dnode(txh);
@@ -650,7 +381,7 @@ dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off, uint64_t len)
dmu_tx_count_write(txh, off, 1);
/* last block will be modified if it is not aligned */
if (!IS_P2ALIGNED(off + len, 1 << dn->dn_datablkshift))
- dmu_tx_count_write(txh, off+len, 1);
+ dmu_tx_count_write(txh, off + len, 1);
}
/*
@@ -673,7 +404,7 @@ dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off, uint64_t len)
if (dn->dn_datablkshift == 0)
start = end = 0;
- zio = zio_root(tx->tx_pool->dp_spa,
+ zio_t *zio = zio_root(tx->tx_pool->dp_spa,
NULL, NULL, ZIO_FLAG_CANFAIL);
for (i = start; i <= end; i++) {
uint64_t ibyte = i << shift;
@@ -681,113 +412,125 @@ dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off, uint64_t len)
i = ibyte >> shift;
if (err == ESRCH || i > end)
break;
- if (err) {
+ if (err != 0) {
tx->tx_err = err;
+ (void) zio_wait(zio);
return;
}
+ (void) refcount_add_many(&txh->txh_memory_tohold,
+ 1 << dn->dn_indblkshift, FTAG);
+
err = dmu_tx_check_ioerr(zio, dn, 1, i);
- if (err) {
+ if (err != 0) {
tx->tx_err = err;
+ (void) zio_wait(zio);
return;
}
}
err = zio_wait(zio);
- if (err) {
+ if (err != 0) {
tx->tx_err = err;
return;
}
}
+}
+
+void
+dmu_tx_hold_free(dmu_tx_t *tx, uint64_t object, uint64_t off, uint64_t len)
+{
+ dmu_tx_hold_t *txh;
- dmu_tx_count_free(txh, off, len);
+ txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
+ object, THT_FREE, off, len);
+ if (txh != NULL)
+ (void) dmu_tx_hold_free_impl(txh, off, len);
}
void
-dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name)
+dmu_tx_hold_free_by_dnode(dmu_tx_t *tx, dnode_t *dn, uint64_t off, uint64_t len)
{
dmu_tx_hold_t *txh;
- dnode_t *dn;
- dsl_dataset_phys_t *ds_phys;
- uint64_t nblocks;
- int epbs, err;
- ASSERT(tx->tx_txg == 0);
+ txh = dmu_tx_hold_dnode_impl(tx, dn, THT_FREE, off, len);
+ if (txh != NULL)
+ (void) dmu_tx_hold_free_impl(txh, off, len);
+}
- txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
- object, THT_ZAP, add, (uintptr_t)name);
- if (txh == NULL)
- return;
- dn = txh->txh_dnode;
+static void
+dmu_tx_hold_zap_impl(dmu_tx_hold_t *txh, const char *name)
+{
+ dmu_tx_t *tx = txh->txh_tx;
+ dnode_t *dn = txh->txh_dnode;
+ int err;
+
+ ASSERT(tx->tx_txg == 0);
dmu_tx_count_dnode(txh);
- if (dn == NULL) {
- /*
- * We will be able to fit a new object's entries into one leaf
- * block. So there will be at most 2 blocks total,
- * including the header block.
- */
- dmu_tx_count_write(txh, 0, 2 << fzap_default_block_shift);
+ /*
+ * Modifying a almost-full microzap is around the worst case (128KB)
+ *
+ * If it is a fat zap, the worst case would be 7*16KB=112KB:
+ * - 3 blocks overwritten: target leaf, ptrtbl block, header block
+ * - 4 new blocks written if adding:
+ * - 2 blocks for possibly split leaves,
+ * - 2 grown ptrtbl blocks
+ */
+ (void) refcount_add_many(&txh->txh_space_towrite,
+ MZAP_MAX_BLKSZ, FTAG);
+
+ if (dn == NULL)
return;
- }
ASSERT3U(DMU_OT_BYTESWAP(dn->dn_type), ==, DMU_BSWAP_ZAP);
- if (dn->dn_maxblkid == 0 && !add) {
- blkptr_t *bp;
-
+ if (dn->dn_maxblkid == 0 || name == NULL) {
/*
- * If there is only one block (i.e. this is a micro-zap)
- * and we are not adding anything, the accounting is simple.
+ * This is a microzap (only one block), or we don't know
+ * the name. Check the first block for i/o errors.
*/
err = dmu_tx_check_ioerr(NULL, dn, 0, 0);
- if (err) {
+ if (err != 0) {
tx->tx_err = err;
- return;
}
-
- /*
- * Use max block size here, since we don't know how much
- * the size will change between now and the dbuf dirty call.
- */
- bp = &dn->dn_phys->dn_blkptr[0];
- if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset,
- bp, bp->blk_birth))
- txh->txh_space_tooverwrite += MZAP_MAX_BLKSZ;
- else
- txh->txh_space_towrite += MZAP_MAX_BLKSZ;
- if (!BP_IS_HOLE(bp))
- txh->txh_space_tounref += MZAP_MAX_BLKSZ;
- return;
- }
-
- if (dn->dn_maxblkid > 0 && name) {
+ } else {
/*
- * access the name in this fat-zap so that we'll check
- * for i/o errors to the leaf blocks, etc.
+ * Access the name so that we'll check for i/o errors to
+ * the leaf blocks, etc. We ignore ENOENT, as this name
+ * may not yet exist.
*/
- err = zap_lookup(dn->dn_objset, dn->dn_object, name,
- 8, 0, NULL);
- if (err == EIO) {
+ err = zap_lookup_by_dnode(dn, name, 8, 0, NULL);
+ if (err == EIO || err == ECKSUM || err == ENXIO) {
tx->tx_err = err;
- return;
}
}
+}
+
+void
+dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name)
+{
+ dmu_tx_hold_t *txh;
- err = zap_count_write(dn->dn_objset, dn->dn_object, name, add,
- &txh->txh_space_towrite, &txh->txh_space_tooverwrite);
+ ASSERT0(tx->tx_txg);
- /*
- * If the modified blocks are scattered to the four winds,
- * we'll have to modify an indirect twig for each.
- */
- epbs = dn->dn_indblkshift - SPA_BLKPTRSHIFT;
- ds_phys = dsl_dataset_phys(dn->dn_objset->os_dsl_dataset);
- for (nblocks = dn->dn_maxblkid >> epbs; nblocks != 0; nblocks >>= epbs)
- if (ds_phys->ds_prev_snap_obj)
- txh->txh_space_towrite += 3 << dn->dn_indblkshift;
- else
- txh->txh_space_tooverwrite += 3 << dn->dn_indblkshift;
+ txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
+ object, THT_ZAP, add, (uintptr_t)name);
+ if (txh != NULL)
+ dmu_tx_hold_zap_impl(txh, name);
+}
+
+void
+dmu_tx_hold_zap_by_dnode(dmu_tx_t *tx, dnode_t *dn, int add, const char *name)
+{
+ dmu_tx_hold_t *txh;
+
+ ASSERT0(tx->tx_txg);
+ ASSERT(dn != NULL);
+
+ txh = dmu_tx_hold_dnode_impl(tx, dn, THT_ZAP, add, (uintptr_t)name);
+ if (txh != NULL)
+ dmu_tx_hold_zap_impl(txh, name);
}
void
@@ -804,55 +547,39 @@ dmu_tx_hold_bonus(dmu_tx_t *tx, uint64_t object)
}
void
-dmu_tx_hold_space(dmu_tx_t *tx, uint64_t space)
+dmu_tx_hold_bonus_by_dnode(dmu_tx_t *tx, dnode_t *dn)
{
dmu_tx_hold_t *txh;
- ASSERT(tx->tx_txg == 0);
+ ASSERT0(tx->tx_txg);
- txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
- DMU_NEW_OBJECT, THT_SPACE, space, 0);
+ txh = dmu_tx_hold_dnode_impl(tx, dn, THT_BONUS, 0, 0);
if (txh)
- txh->txh_space_towrite += space;
+ dmu_tx_count_dnode(txh);
}
-int
-dmu_tx_holds(dmu_tx_t *tx, uint64_t object)
+void
+dmu_tx_hold_space(dmu_tx_t *tx, uint64_t space)
{
dmu_tx_hold_t *txh;
- int holds = 0;
-
- /*
- * By asserting that the tx is assigned, we're counting the
- * number of dn_tx_holds, which is the same as the number of
- * dn_holds. Otherwise, we'd be counting dn_holds, but
- * dn_tx_holds could be 0.
- */
- ASSERT(tx->tx_txg != 0);
- /* if (tx->tx_anyobj == TRUE) */
- /* return (0); */
-
- for (txh = list_head(&tx->tx_holds); txh;
- txh = list_next(&tx->tx_holds, txh)) {
- if (txh->txh_dnode && txh->txh_dnode->dn_object == object)
- holds++;
- }
+ ASSERT(tx->tx_txg == 0);
- return (holds);
+ txh = dmu_tx_hold_object_impl(tx, tx->tx_objset,
+ DMU_NEW_OBJECT, THT_SPACE, space, 0);
+ if (txh)
+ (void) refcount_add_many(&txh->txh_space_towrite, space, FTAG);
}
-#ifdef DEBUG_DMU_TX
+#ifdef ZFS_DEBUG
void
dmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db)
{
- dmu_tx_hold_t *txh;
- int match_object = FALSE, match_offset = FALSE;
- dnode_t *dn;
+ boolean_t match_object = B_FALSE;
+ boolean_t match_offset = B_FALSE;
DB_DNODE_ENTER(db);
- dn = DB_DNODE(db);
- ASSERT(dn != NULL);
+ dnode_t *dn = DB_DNODE(db);
ASSERT(tx->tx_txg != 0);
ASSERT(tx->tx_objset == NULL || dn->dn_objset == tx->tx_objset);
ASSERT3U(dn->dn_object, ==, db->db.db_object);
@@ -868,7 +595,7 @@ dmu_tx_dirty_buf(dmu_tx_t *tx, dmu_buf_impl_t *db)
return;
}
- for (txh = list_head(&tx->tx_holds); txh;
+ for (dmu_tx_hold_t *txh = list_head(&tx->tx_holds); txh != NULL;
txh = list_next(&tx->tx_holds, txh)) {
ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg);
if (txh->txh_dnode == dn && txh->txh_type != THT_NEWOBJECT)
@@ -1087,13 +814,49 @@ dmu_tx_delay(dmu_tx_t *tx, uint64_t dirty)
zfs_sleep_until(wakeup);
}
+/*
+ * This routine attempts to assign the transaction to a transaction group.
+ * To do so, we must determine if there is sufficient free space on disk.
+ *
+ * If this is a "netfree" transaction (i.e. we called dmu_tx_mark_netfree()
+ * on it), then it is assumed that there is sufficient free space,
+ * unless there's insufficient slop space in the pool (see the comment
+ * above spa_slop_shift in spa_misc.c).
+ *
+ * If it is not a "netfree" transaction, then if the data already on disk
+ * is over the allowed usage (e.g. quota), this will fail with EDQUOT or
+ * ENOSPC. Otherwise, if the current rough estimate of pending changes,
+ * plus the rough estimate of this transaction's changes, may exceed the
+ * allowed usage, then this will fail with ERESTART, which will cause the
+ * caller to wait for the pending changes to be written to disk (by waiting
+ * for the next TXG to open), and then check the space usage again.
+ *
+ * The rough estimate of pending changes is comprised of the sum of:
+ *
+ * - this transaction's holds' txh_space_towrite
+ *
+ * - dd_tempreserved[], which is the sum of in-flight transactions'
+ * holds' txh_space_towrite (i.e. those transactions that have called
+ * dmu_tx_assign() but not yet called dmu_tx_commit()).
+ *
+ * - dd_space_towrite[], which is the amount of dirtied dbufs.
+ *
+ * Note that all of these values are inflated by spa_get_worst_case_asize(),
+ * which means that we may get ERESTART well before we are actually in danger
+ * of running out of space, but this also mitigates any small inaccuracies
+ * in the rough estimate (e.g. txh_space_towrite doesn't take into account
+ * indirect blocks, and dd_space_towrite[] doesn't take into account changes
+ * to the MOS).
+ *
+ * Note that due to this algorithm, it is possible to exceed the allowed
+ * usage by one transaction. Also, as we approach the allowed usage,
+ * we will allow a very limited amount of changes into each TXG, thus
+ * decreasing performance.
+ */
static int
dmu_tx_try_assign(dmu_tx_t *tx, txg_how_t txg_how)
{
- dmu_tx_hold_t *txh;
spa_t *spa = tx->tx_pool->dp_spa;
- uint64_t memory, asize, fsize, usize;
- uint64_t towrite, tofree, tooverwrite, tounref, tohold, fudge;
ASSERT0(tx->tx_txg);
@@ -1137,8 +900,9 @@ dmu_tx_try_assign(dmu_tx_t *tx, txg_how_t txg_how)
* dmu_tx_unassign() logic.
*/
- towrite = tofree = tooverwrite = tounref = tohold = fudge = 0;
- for (txh = list_head(&tx->tx_holds); txh;
+ uint64_t towrite = 0;
+ uint64_t tohold = 0;
+ for (dmu_tx_hold_t *txh = list_head(&tx->tx_holds); txh != NULL;
txh = list_next(&tx->tx_holds, txh)) {
dnode_t *dn = txh->txh_dnode;
if (dn != NULL) {
@@ -1155,51 +919,19 @@ dmu_tx_try_assign(dmu_tx_t *tx, txg_how_t txg_how)
(void) refcount_add(&dn->dn_tx_holds, tx);
mutex_exit(&dn->dn_mtx);
}
- towrite += txh->txh_space_towrite;
- tofree += txh->txh_space_tofree;
- tooverwrite += txh->txh_space_tooverwrite;
- tounref += txh->txh_space_tounref;
- tohold += txh->txh_memory_tohold;
- fudge += txh->txh_fudge;
- }
-
- /*
- * If a snapshot has been taken since we made our estimates,
- * assume that we won't be able to free or overwrite anything.
- */
- if (tx->tx_objset &&
- dsl_dataset_prev_snap_txg(tx->tx_objset->os_dsl_dataset) >
- tx->tx_lastsnap_txg) {
- towrite += tooverwrite;
- tooverwrite = tofree = 0;
+ towrite += refcount_count(&txh->txh_space_towrite);
+ tohold += refcount_count(&txh->txh_memory_tohold);
}
/* needed allocation: worst-case estimate of write space */
- asize = spa_get_asize(tx->tx_pool->dp_spa, towrite + tooverwrite);
- /* freed space estimate: worst-case overwrite + free estimate */
- fsize = spa_get_asize(tx->tx_pool->dp_spa, tooverwrite) + tofree;
- /* convert unrefd space to worst-case estimate */
- usize = spa_get_asize(tx->tx_pool->dp_spa, tounref);
+ uint64_t asize = spa_get_worst_case_asize(tx->tx_pool->dp_spa, towrite);
/* calculate memory footprint estimate */
- memory = towrite + tooverwrite + tohold;
-
-#ifdef DEBUG_DMU_TX
- /*
- * Add in 'tohold' to account for our dirty holds on this memory
- * XXX - the "fudge" factor is to account for skipped blocks that
- * we missed because dnode_next_offset() misses in-core-only blocks.
- */
- tx->tx_space_towrite = asize +
- spa_get_asize(tx->tx_pool->dp_spa, tohold + fudge);
- tx->tx_space_tofree = tofree;
- tx->tx_space_tooverwrite = tooverwrite;
- tx->tx_space_tounref = tounref;
-#endif
+ uint64_t memory = towrite + tohold;
- if (tx->tx_dir && asize != 0) {
+ if (tx->tx_dir != NULL && asize != 0) {
int err = dsl_dir_tempreserve_space(tx->tx_dir, memory,
- asize, fsize, usize, &tx->tx_tempreserve_cookie, tx);
- if (err)
+ asize, tx->tx_netfree, &tx->tx_tempreserve_cookie, tx);
+ if (err != 0)
return (err);
}
@@ -1211,8 +943,6 @@ dmu_tx_try_assign(dmu_tx_t *tx, txg_how_t txg_how)
static void
dmu_tx_unassign(dmu_tx_t *tx)
{
- dmu_tx_hold_t *txh;
-
if (tx->tx_txg == 0)
return;
@@ -1222,7 +952,8 @@ dmu_tx_unassign(dmu_tx_t *tx)
* Walk the transaction's hold list, removing the hold on the
* associated dnode, and notifying waiters if the refcount drops to 0.
*/
- for (txh = list_head(&tx->tx_holds); txh != tx->tx_needassign_txh;
+ for (dmu_tx_hold_t *txh = list_head(&tx->tx_holds);
+ txh && txh != tx->tx_needassign_txh;
txh = list_next(&tx->tx_holds, txh)) {
dnode_t *dn = txh->txh_dnode;
@@ -1357,21 +1088,27 @@ dmu_tx_wait(dmu_tx_t *tx)
spa_tx_assign_add_nsecs(spa, gethrtime() - before);
}
-void
-dmu_tx_willuse_space(dmu_tx_t *tx, int64_t delta)
+static void
+dmu_tx_destroy(dmu_tx_t *tx)
{
-#ifdef DEBUG_DMU_TX
- if (tx->tx_dir == NULL || delta == 0)
- return;
+ dmu_tx_hold_t *txh;
- if (delta > 0) {
- ASSERT3U(refcount_count(&tx->tx_space_written) + delta, <=,
- tx->tx_space_towrite);
- (void) refcount_add_many(&tx->tx_space_written, delta, NULL);
- } else {
- (void) refcount_add_many(&tx->tx_space_freed, -delta, NULL);
+ while ((txh = list_head(&tx->tx_holds)) != NULL) {
+ dnode_t *dn = txh->txh_dnode;
+
+ list_remove(&tx->tx_holds, txh);
+ refcount_destroy_many(&txh->txh_space_towrite,
+ refcount_count(&txh->txh_space_towrite));
+ refcount_destroy_many(&txh->txh_memory_tohold,
+ refcount_count(&txh->txh_memory_tohold));
+ kmem_free(txh, sizeof (dmu_tx_hold_t));
+ if (dn != NULL)
+ dnode_rele(dn, tx);
}
-#endif
+
+ list_destroy(&tx->tx_callbacks);
+ list_destroy(&tx->tx_holds);
+ kmem_free(tx, sizeof (dmu_tx_t));
}
void
@@ -1385,13 +1122,13 @@ dmu_tx_commit(dmu_tx_t *tx)
* Go through the transaction's hold list and remove holds on
* associated dnodes, notifying waiters if no holds remain.
*/
- while ((txh = list_head(&tx->tx_holds))) {
+ for (txh = list_head(&tx->tx_holds); txh != NULL;
+ txh = list_next(&tx->tx_holds, txh)) {
dnode_t *dn = txh->txh_dnode;
- list_remove(&tx->tx_holds, txh);
- kmem_free(txh, sizeof (dmu_tx_hold_t));
if (dn == NULL)
continue;
+
mutex_enter(&dn->dn_mtx);
ASSERT3U(dn->dn_assigned_txg, ==, tx->tx_txg);
@@ -1400,7 +1137,6 @@ dmu_tx_commit(dmu_tx_t *tx)
cv_broadcast(&dn->dn_notxholds);
}
mutex_exit(&dn->dn_mtx);
- dnode_rele(dn, tx);
}
if (tx->tx_tempreserve_cookie)
@@ -1412,51 +1148,21 @@ dmu_tx_commit(dmu_tx_t *tx)
if (tx->tx_anyobj == FALSE)
txg_rele_to_sync(&tx->tx_txgh);
- list_destroy(&tx->tx_callbacks);
- list_destroy(&tx->tx_holds);
-#ifdef DEBUG_DMU_TX
- dprintf("towrite=%llu written=%llu tofree=%llu freed=%llu\n",
- tx->tx_space_towrite, refcount_count(&tx->tx_space_written),
- tx->tx_space_tofree, refcount_count(&tx->tx_space_freed));
- refcount_destroy_many(&tx->tx_space_written,
- refcount_count(&tx->tx_space_written));
- refcount_destroy_many(&tx->tx_space_freed,
- refcount_count(&tx->tx_space_freed));
-#endif
- kmem_free(tx, sizeof (dmu_tx_t));
+ dmu_tx_destroy(tx);
}
void
dmu_tx_abort(dmu_tx_t *tx)
{
- dmu_tx_hold_t *txh;
-
ASSERT(tx->tx_txg == 0);
- while ((txh = list_head(&tx->tx_holds))) {
- dnode_t *dn = txh->txh_dnode;
-
- list_remove(&tx->tx_holds, txh);
- kmem_free(txh, sizeof (dmu_tx_hold_t));
- if (dn != NULL)
- dnode_rele(dn, tx);
- }
-
/*
* Call any registered callbacks with an error code.
*/
if (!list_is_empty(&tx->tx_callbacks))
dmu_tx_do_callbacks(&tx->tx_callbacks, ECANCELED);
- list_destroy(&tx->tx_callbacks);
- list_destroy(&tx->tx_holds);
-#ifdef DEBUG_DMU_TX
- refcount_destroy_many(&tx->tx_space_written,
- refcount_count(&tx->tx_space_written));
- refcount_destroy_many(&tx->tx_space_freed,
- refcount_count(&tx->tx_space_freed));
-#endif
- kmem_free(tx, sizeof (dmu_tx_t));
+ dmu_tx_destroy(tx);
}
uint64_t
@@ -1494,7 +1200,7 @@ dmu_tx_do_callbacks(list_t *cb_list, int error)
{
dmu_tx_callback_t *dcb;
- while ((dcb = list_head(cb_list))) {
+ while ((dcb = list_head(cb_list)) != NULL) {
list_remove(cb_list, dcb);
dcb->dcb_func(dcb->dcb_data, error);
kmem_free(dcb, sizeof (dmu_tx_callback_t));
@@ -1518,12 +1224,10 @@ dmu_tx_do_callbacks(list_t *cb_list, int error)
static void
dmu_tx_sa_registration_hold(sa_os_t *sa, dmu_tx_t *tx)
{
- int i;
-
if (!sa->sa_need_attr_registration)
return;
- for (i = 0; i != sa->sa_num_attrs; i++) {
+ for (int i = 0; i != sa->sa_num_attrs; i++) {
if (!sa->sa_attr_table[i].sa_registered) {
if (sa->sa_reg_attr_obj)
dmu_tx_hold_zap(tx, sa->sa_reg_attr_obj,
@@ -1535,38 +1239,16 @@ dmu_tx_sa_registration_hold(sa_os_t *sa, dmu_tx_t *tx)
}
}
-
void
dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object)
{
- dnode_t *dn;
dmu_tx_hold_t *txh;
txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, object,
THT_SPILL, 0, 0);
- if (txh == NULL)
- return;
-
- dn = txh->txh_dnode;
-
- if (dn == NULL)
- return;
-
- /* If blkptr doesn't exist then add space to towrite */
- if (!(dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR)) {
- txh->txh_space_towrite += SPA_OLD_MAXBLOCKSIZE;
- } else {
- blkptr_t *bp;
-
- bp = &dn->dn_phys->dn_spill;
- if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset,
- bp, bp->blk_birth))
- txh->txh_space_tooverwrite += SPA_OLD_MAXBLOCKSIZE;
- else
- txh->txh_space_towrite += SPA_OLD_MAXBLOCKSIZE;
- if (!BP_IS_HOLE(bp))
- txh->txh_space_tounref += SPA_OLD_MAXBLOCKSIZE;
- }
+ if (txh != NULL)
+ (void) refcount_add_many(&txh->txh_space_towrite,
+ SPA_OLD_MAXBLOCKSIZE, FTAG);
}
void
@@ -1579,9 +1261,9 @@ dmu_tx_hold_sa_create(dmu_tx_t *tx, int attrsize)
if (tx->tx_objset->os_sa->sa_master_obj == 0)
return;
- if (tx->tx_objset->os_sa->sa_layout_attr_obj)
+ if (tx->tx_objset->os_sa->sa_layout_attr_obj) {
dmu_tx_hold_zap(tx, sa->sa_layout_attr_obj, B_TRUE, NULL);
- else {
+ } else {
dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_LAYOUTS);
dmu_tx_hold_zap(tx, sa->sa_master_obj, B_TRUE, SA_REGISTRY);
dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, B_TRUE, NULL);
@@ -1590,7 +1272,7 @@ dmu_tx_hold_sa_create(dmu_tx_t *tx, int attrsize)
dmu_tx_sa_registration_hold(sa, tx);
- if (attrsize <= DN_MAX_BONUSLEN && !sa->sa_force_spill)
+ if (attrsize <= DN_OLD_MAX_BONUSLEN && !sa->sa_force_spill)
return;
(void) dmu_tx_hold_object_impl(tx, tx->tx_objset, DMU_NEW_OBJECT,
@@ -1676,13 +1358,18 @@ dmu_tx_fini(void)
#if defined(_KERNEL) && defined(HAVE_SPL)
EXPORT_SYMBOL(dmu_tx_create);
EXPORT_SYMBOL(dmu_tx_hold_write);
+EXPORT_SYMBOL(dmu_tx_hold_write_by_dnode);
EXPORT_SYMBOL(dmu_tx_hold_free);
+EXPORT_SYMBOL(dmu_tx_hold_free_by_dnode);
EXPORT_SYMBOL(dmu_tx_hold_zap);
+EXPORT_SYMBOL(dmu_tx_hold_zap_by_dnode);
EXPORT_SYMBOL(dmu_tx_hold_bonus);
+EXPORT_SYMBOL(dmu_tx_hold_bonus_by_dnode);
EXPORT_SYMBOL(dmu_tx_abort);
EXPORT_SYMBOL(dmu_tx_assign);
EXPORT_SYMBOL(dmu_tx_wait);
EXPORT_SYMBOL(dmu_tx_commit);
+EXPORT_SYMBOL(dmu_tx_mark_netfree);
EXPORT_SYMBOL(dmu_tx_get_txg);
EXPORT_SYMBOL(dmu_tx_callback_register);
EXPORT_SYMBOL(dmu_tx_do_callbacks);
diff --git a/zfs/module/zfs/dmu_zfetch.c b/zfs/module/zfs/dmu_zfetch.c
index 8ff2f0509787..1bf5c4e34d68 100644
--- a/zfs/module/zfs/dmu_zfetch.c
+++ b/zfs/module/zfs/dmu_zfetch.c
@@ -24,7 +24,7 @@
*/
/*
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2015 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -36,209 +36,45 @@
#include <sys/kstat.h>
/*
- * I'm against tune-ables, but these should probably exist as tweakable globals
- * until we can get this working the way we want it to.
+ * This tunable disables predictive prefetch. Note that it leaves "prescient"
+ * prefetch (e.g. prefetch for zfs send) intact. Unlike predictive prefetch,
+ * prescient prefetch never issues i/os that end up not being needed,
+ * so it can't hurt performance.
*/
-int zfs_prefetch_disable = 0;
+int zfs_prefetch_disable = B_FALSE;
/* max # of streams per zfetch */
unsigned int zfetch_max_streams = 8;
/* min time before stream reclaim */
unsigned int zfetch_min_sec_reap = 2;
-/* max number of blocks to fetch at a time */
-unsigned int zfetch_block_cap = 256;
-/* number of bytes in a array_read at which we stop prefetching (1Mb) */
+/* max bytes to prefetch per stream (default 8MB) */
+unsigned int zfetch_max_distance = 8 * 1024 * 1024;
+/* max bytes to prefetch indirects for per stream (default 64MB) */
+unsigned int zfetch_max_idistance = 64 * 1024 * 1024;
+/* max number of bytes in an array_read in which we allow prefetching (1MB) */
unsigned long zfetch_array_rd_sz = 1024 * 1024;
-/* forward decls for static routines */
-static boolean_t dmu_zfetch_colinear(zfetch_t *, zstream_t *);
-static void dmu_zfetch_dofetch(zfetch_t *, zstream_t *);
-static uint64_t dmu_zfetch_fetch(dnode_t *, uint64_t, uint64_t);
-static uint64_t dmu_zfetch_fetchsz(dnode_t *, uint64_t, uint64_t);
-static boolean_t dmu_zfetch_find(zfetch_t *, zstream_t *, int);
-static int dmu_zfetch_stream_insert(zfetch_t *, zstream_t *);
-static zstream_t *dmu_zfetch_stream_reclaim(zfetch_t *);
-static void dmu_zfetch_stream_remove(zfetch_t *, zstream_t *);
-static int dmu_zfetch_streams_equal(zstream_t *, zstream_t *);
-
typedef struct zfetch_stats {
kstat_named_t zfetchstat_hits;
kstat_named_t zfetchstat_misses;
- kstat_named_t zfetchstat_colinear_hits;
- kstat_named_t zfetchstat_colinear_misses;
- kstat_named_t zfetchstat_stride_hits;
- kstat_named_t zfetchstat_stride_misses;
- kstat_named_t zfetchstat_reclaim_successes;
- kstat_named_t zfetchstat_reclaim_failures;
- kstat_named_t zfetchstat_stream_resets;
- kstat_named_t zfetchstat_stream_noresets;
- kstat_named_t zfetchstat_bogus_streams;
+ kstat_named_t zfetchstat_max_streams;
} zfetch_stats_t;
static zfetch_stats_t zfetch_stats = {
{ "hits", KSTAT_DATA_UINT64 },
{ "misses", KSTAT_DATA_UINT64 },
- { "colinear_hits", KSTAT_DATA_UINT64 },
- { "colinear_misses", KSTAT_DATA_UINT64 },
- { "stride_hits", KSTAT_DATA_UINT64 },
- { "stride_misses", KSTAT_DATA_UINT64 },
- { "reclaim_successes", KSTAT_DATA_UINT64 },
- { "reclaim_failures", KSTAT_DATA_UINT64 },
- { "streams_resets", KSTAT_DATA_UINT64 },
- { "streams_noresets", KSTAT_DATA_UINT64 },
- { "bogus_streams", KSTAT_DATA_UINT64 },
+ { "max_streams", KSTAT_DATA_UINT64 },
};
-#define ZFETCHSTAT_INCR(stat, val) \
- atomic_add_64(&zfetch_stats.stat.value.ui64, (val));
-
-#define ZFETCHSTAT_BUMP(stat) ZFETCHSTAT_INCR(stat, 1);
+#define ZFETCHSTAT_BUMP(stat) \
+ atomic_inc_64(&zfetch_stats.stat.value.ui64);
kstat_t *zfetch_ksp;
-/*
- * Given a zfetch structure and a zstream structure, determine whether the
- * blocks to be read are part of a co-linear pair of existing prefetch
- * streams. If a set is found, coalesce the streams, removing one, and
- * configure the prefetch so it looks for a strided access pattern.
- *
- * In other words: if we find two sequential access streams that are
- * the same length and distance N appart, and this read is N from the
- * last stream, then we are probably in a strided access pattern. So
- * combine the two sequential streams into a single strided stream.
- *
- * Returns whether co-linear streams were found.
- */
-static boolean_t
-dmu_zfetch_colinear(zfetch_t *zf, zstream_t *zh)
-{
- zstream_t *z_walk;
- zstream_t *z_comp;
-
- if (! rw_tryenter(&zf->zf_rwlock, RW_WRITER))
- return (0);
-
- if (zh == NULL) {
- rw_exit(&zf->zf_rwlock);
- return (0);
- }
-
- for (z_walk = list_head(&zf->zf_stream); z_walk;
- z_walk = list_next(&zf->zf_stream, z_walk)) {
- for (z_comp = list_next(&zf->zf_stream, z_walk); z_comp;
- z_comp = list_next(&zf->zf_stream, z_comp)) {
- int64_t diff;
-
- if (z_walk->zst_len != z_walk->zst_stride ||
- z_comp->zst_len != z_comp->zst_stride) {
- continue;
- }
-
- diff = z_comp->zst_offset - z_walk->zst_offset;
- if (z_comp->zst_offset + diff == zh->zst_offset) {
- z_walk->zst_offset = zh->zst_offset;
- z_walk->zst_direction = diff < 0 ?
- ZFETCH_BACKWARD : ZFETCH_FORWARD;
- z_walk->zst_stride =
- diff * z_walk->zst_direction;
- z_walk->zst_ph_offset =
- zh->zst_offset + z_walk->zst_stride;
- dmu_zfetch_stream_remove(zf, z_comp);
- mutex_destroy(&z_comp->zst_lock);
- kmem_free(z_comp, sizeof (zstream_t));
-
- dmu_zfetch_dofetch(zf, z_walk);
-
- rw_exit(&zf->zf_rwlock);
- return (1);
- }
-
- diff = z_walk->zst_offset - z_comp->zst_offset;
- if (z_walk->zst_offset + diff == zh->zst_offset) {
- z_walk->zst_offset = zh->zst_offset;
- z_walk->zst_direction = diff < 0 ?
- ZFETCH_BACKWARD : ZFETCH_FORWARD;
- z_walk->zst_stride =
- diff * z_walk->zst_direction;
- z_walk->zst_ph_offset =
- zh->zst_offset + z_walk->zst_stride;
- dmu_zfetch_stream_remove(zf, z_comp);
- mutex_destroy(&z_comp->zst_lock);
- kmem_free(z_comp, sizeof (zstream_t));
-
- dmu_zfetch_dofetch(zf, z_walk);
-
- rw_exit(&zf->zf_rwlock);
- return (1);
- }
- }
- }
-
- rw_exit(&zf->zf_rwlock);
- return (0);
-}
-
-/*
- * Given a zstream_t, determine the bounds of the prefetch. Then call the
- * routine that actually prefetches the individual blocks.
- */
-static void
-dmu_zfetch_dofetch(zfetch_t *zf, zstream_t *zs)
-{
- uint64_t prefetch_tail;
- uint64_t prefetch_limit;
- uint64_t prefetch_ofst;
- uint64_t prefetch_len;
- uint64_t blocks_fetched;
-
- zs->zst_stride = MAX((int64_t)zs->zst_stride, zs->zst_len);
- zs->zst_cap = MIN(zfetch_block_cap, 2 * zs->zst_cap);
-
- prefetch_tail = MAX((int64_t)zs->zst_ph_offset,
- (int64_t)(zs->zst_offset + zs->zst_stride));
- /*
- * XXX: use a faster division method?
- */
- prefetch_limit = zs->zst_offset + zs->zst_len +
- (zs->zst_cap * zs->zst_stride) / zs->zst_len;
-
- while (prefetch_tail < prefetch_limit) {
- prefetch_ofst = zs->zst_offset + zs->zst_direction *
- (prefetch_tail - zs->zst_offset);
-
- prefetch_len = zs->zst_len;
-
- /*
- * Don't prefetch beyond the end of the file, if working
- * backwards.
- */
- if ((zs->zst_direction == ZFETCH_BACKWARD) &&
- (prefetch_ofst > prefetch_tail)) {
- prefetch_len += prefetch_ofst;
- prefetch_ofst = 0;
- }
-
- /* don't prefetch more than we're supposed to */
- if (prefetch_len > zs->zst_len)
- break;
-
- blocks_fetched = dmu_zfetch_fetch(zf->zf_dnode,
- prefetch_ofst, zs->zst_len);
-
- prefetch_tail += zs->zst_stride;
- /* stop if we've run out of stuff to prefetch */
- if (blocks_fetched < zs->zst_len)
- break;
- }
- zs->zst_ph_offset = prefetch_tail;
- zs->zst_last = ddi_get_lbolt();
-}
-
void
zfetch_init(void)
{
-
zfetch_ksp = kstat_create("zfs", 0, "zfetchstats", "misc",
KSTAT_TYPE_NAMED, sizeof (zfetch_stats) / sizeof (kstat_named_t),
KSTAT_FLAG_VIRTUAL);
@@ -266,272 +102,41 @@ zfetch_fini(void)
void
dmu_zfetch_init(zfetch_t *zf, dnode_t *dno)
{
- if (zf == NULL) {
+ if (zf == NULL)
return;
- }
zf->zf_dnode = dno;
- zf->zf_stream_cnt = 0;
- zf->zf_alloc_fail = 0;
list_create(&zf->zf_stream, sizeof (zstream_t),
- offsetof(zstream_t, zst_node));
+ offsetof(zstream_t, zs_node));
rw_init(&zf->zf_rwlock, NULL, RW_DEFAULT, NULL);
}
-/*
- * This function computes the actual size, in blocks, that can be prefetched,
- * and fetches it.
- */
-static uint64_t
-dmu_zfetch_fetch(dnode_t *dn, uint64_t blkid, uint64_t nblks)
-{
- uint64_t fetchsz;
- uint64_t i;
-
- fetchsz = dmu_zfetch_fetchsz(dn, blkid, nblks);
-
- for (i = 0; i < fetchsz; i++) {
- dbuf_prefetch(dn, blkid + i, ZIO_PRIORITY_ASYNC_READ);
- }
-
- return (fetchsz);
-}
-
-/*
- * this function returns the number of blocks that would be prefetched, based
- * upon the supplied dnode, blockid, and nblks. This is used so that we can
- * update streams in place, and then prefetch with their old value after the
- * fact. This way, we can delay the prefetch, but subsequent accesses to the
- * stream won't result in the same data being prefetched multiple times.
- */
-static uint64_t
-dmu_zfetch_fetchsz(dnode_t *dn, uint64_t blkid, uint64_t nblks)
-{
- uint64_t fetchsz;
-
- if (blkid > dn->dn_maxblkid) {
- return (0);
- }
-
- /* compute fetch size */
- if (blkid + nblks + 1 > dn->dn_maxblkid) {
- fetchsz = (dn->dn_maxblkid - blkid) + 1;
- ASSERT(blkid + fetchsz - 1 <= dn->dn_maxblkid);
- } else {
- fetchsz = nblks;
- }
-
-
- return (fetchsz);
-}
-
-/*
- * given a zfetch and a zstream structure, see if there is an associated zstream
- * for this block read. If so, it starts a prefetch for the stream it
- * located and returns true, otherwise it returns false
- */
-static boolean_t
-dmu_zfetch_find(zfetch_t *zf, zstream_t *zh, int prefetched)
+static void
+dmu_zfetch_stream_remove(zfetch_t *zf, zstream_t *zs)
{
- zstream_t *zs;
- int64_t diff;
- int reset = !prefetched;
- int rc = 0;
-
- if (zh == NULL)
- return (0);
-
- /*
- * XXX: This locking strategy is a bit coarse; however, it's impact has
- * yet to be tested. If this turns out to be an issue, it can be
- * modified in a number of different ways.
- */
-
- rw_enter(&zf->zf_rwlock, RW_READER);
-top:
-
- for (zs = list_head(&zf->zf_stream); zs;
- zs = list_next(&zf->zf_stream, zs)) {
-
- /*
- * XXX - should this be an assert?
- */
- if (zs->zst_len == 0) {
- /* bogus stream */
- ZFETCHSTAT_BUMP(zfetchstat_bogus_streams);
- continue;
- }
-
- /*
- * We hit this case when we are in a strided prefetch stream:
- * we will read "len" blocks before "striding".
- */
- if (zh->zst_offset >= zs->zst_offset &&
- zh->zst_offset < zs->zst_offset + zs->zst_len) {
- if (prefetched) {
- /* already fetched */
- ZFETCHSTAT_BUMP(zfetchstat_stride_hits);
- rc = 1;
- goto out;
- } else {
- ZFETCHSTAT_BUMP(zfetchstat_stride_misses);
- }
- }
-
- /*
- * This is the forward sequential read case: we increment
- * len by one each time we hit here, so we will enter this
- * case on every read.
- */
- if (zh->zst_offset == zs->zst_offset + zs->zst_len) {
-
- reset = !prefetched && zs->zst_len > 1;
-
- mutex_enter(&zs->zst_lock);
-
- if (zh->zst_offset != zs->zst_offset + zs->zst_len) {
- mutex_exit(&zs->zst_lock);
- goto top;
- }
- zs->zst_len += zh->zst_len;
- diff = zs->zst_len - zfetch_block_cap;
- if (diff > 0) {
- zs->zst_offset += diff;
- zs->zst_len = zs->zst_len > diff ?
- zs->zst_len - diff : 0;
- }
- zs->zst_direction = ZFETCH_FORWARD;
-
- break;
-
- /*
- * Same as above, but reading backwards through the file.
- */
- } else if (zh->zst_offset == zs->zst_offset - zh->zst_len) {
- /* backwards sequential access */
-
- reset = !prefetched && zs->zst_len > 1;
-
- mutex_enter(&zs->zst_lock);
-
- if (zh->zst_offset != zs->zst_offset - zh->zst_len) {
- mutex_exit(&zs->zst_lock);
- goto top;
- }
-
- zs->zst_offset = zs->zst_offset > zh->zst_len ?
- zs->zst_offset - zh->zst_len : 0;
- zs->zst_ph_offset = zs->zst_ph_offset > zh->zst_len ?
- zs->zst_ph_offset - zh->zst_len : 0;
- zs->zst_len += zh->zst_len;
-
- diff = zs->zst_len - zfetch_block_cap;
- if (diff > 0) {
- zs->zst_ph_offset = zs->zst_ph_offset > diff ?
- zs->zst_ph_offset - diff : 0;
- zs->zst_len = zs->zst_len > diff ?
- zs->zst_len - diff : zs->zst_len;
- }
- zs->zst_direction = ZFETCH_BACKWARD;
-
- break;
-
- } else if ((zh->zst_offset - zs->zst_offset - zs->zst_stride <
- zs->zst_len) && (zs->zst_len != zs->zst_stride)) {
- /* strided forward access */
-
- mutex_enter(&zs->zst_lock);
-
- if ((zh->zst_offset - zs->zst_offset - zs->zst_stride >=
- zs->zst_len) || (zs->zst_len == zs->zst_stride)) {
- mutex_exit(&zs->zst_lock);
- goto top;
- }
-
- zs->zst_offset += zs->zst_stride;
- zs->zst_direction = ZFETCH_FORWARD;
-
- break;
-
- } else if ((zh->zst_offset - zs->zst_offset + zs->zst_stride <
- zs->zst_len) && (zs->zst_len != zs->zst_stride)) {
- /* strided reverse access */
-
- mutex_enter(&zs->zst_lock);
-
- if ((zh->zst_offset - zs->zst_offset + zs->zst_stride >=
- zs->zst_len) || (zs->zst_len == zs->zst_stride)) {
- mutex_exit(&zs->zst_lock);
- goto top;
- }
-
- zs->zst_offset = zs->zst_offset > zs->zst_stride ?
- zs->zst_offset - zs->zst_stride : 0;
- zs->zst_ph_offset = (zs->zst_ph_offset >
- (2 * zs->zst_stride)) ?
- (zs->zst_ph_offset - (2 * zs->zst_stride)) : 0;
- zs->zst_direction = ZFETCH_BACKWARD;
-
- break;
- }
- }
-
- if (zs) {
- if (reset) {
- zstream_t *remove = zs;
-
- ZFETCHSTAT_BUMP(zfetchstat_stream_resets);
- rc = 0;
- mutex_exit(&zs->zst_lock);
- rw_exit(&zf->zf_rwlock);
- rw_enter(&zf->zf_rwlock, RW_WRITER);
- /*
- * Relocate the stream, in case someone removes
- * it while we were acquiring the WRITER lock.
- */
- for (zs = list_head(&zf->zf_stream); zs;
- zs = list_next(&zf->zf_stream, zs)) {
- if (zs == remove) {
- dmu_zfetch_stream_remove(zf, zs);
- mutex_destroy(&zs->zst_lock);
- kmem_free(zs, sizeof (zstream_t));
- break;
- }
- }
- } else {
- ZFETCHSTAT_BUMP(zfetchstat_stream_noresets);
- rc = 1;
- dmu_zfetch_dofetch(zf, zs);
- mutex_exit(&zs->zst_lock);
- }
- }
-out:
- rw_exit(&zf->zf_rwlock);
- return (rc);
+ ASSERT(RW_WRITE_HELD(&zf->zf_rwlock));
+ list_remove(&zf->zf_stream, zs);
+ mutex_destroy(&zs->zs_lock);
+ kmem_free(zs, sizeof (*zs));
}
/*
- * Clean-up state associated with a zfetch structure. This frees allocated
- * structure members, empties the zf_stream tree, and generally makes things
- * nice. This doesn't free the zfetch_t itself, that's left to the caller.
+ * Clean-up state associated with a zfetch structure (e.g. destroy the
+ * streams). This doesn't free the zfetch_t itself, that's left to the caller.
*/
void
-dmu_zfetch_rele(zfetch_t *zf)
+dmu_zfetch_fini(zfetch_t *zf)
{
- zstream_t *zs;
- zstream_t *zs_next;
+ zstream_t *zs;
ASSERT(!RW_LOCK_HELD(&zf->zf_rwlock));
- for (zs = list_head(&zf->zf_stream); zs; zs = zs_next) {
- zs_next = list_next(&zf->zf_stream, zs);
-
- list_remove(&zf->zf_stream, zs);
- mutex_destroy(&zs->zst_lock);
- kmem_free(zs, sizeof (zstream_t));
- }
+ rw_enter(&zf->zf_rwlock, RW_WRITER);
+ while ((zs = list_head(&zf->zf_stream)) != NULL)
+ dmu_zfetch_stream_remove(zf, zs);
+ rw_exit(&zf->zf_rwlock);
list_destroy(&zf->zf_stream);
rw_destroy(&zf->zf_rwlock);
@@ -539,198 +144,199 @@ dmu_zfetch_rele(zfetch_t *zf)
}
/*
- * Given a zfetch and zstream structure, insert the zstream structure into the
- * AVL tree contained within the zfetch structure. Peform the appropriate
- * book-keeping. It is possible that another thread has inserted a stream which
- * matches one that we are about to insert, so we must be sure to check for this
- * case. If one is found, return failure, and let the caller cleanup the
- * duplicates.
+ * If there aren't too many streams already, create a new stream.
+ * The "blkid" argument is the next block that we expect this stream to access.
+ * While we're here, clean up old streams (which haven't been
+ * accessed for at least zfetch_min_sec_reap seconds).
*/
-static int
-dmu_zfetch_stream_insert(zfetch_t *zf, zstream_t *zs)
+static void
+dmu_zfetch_stream_create(zfetch_t *zf, uint64_t blkid)
{
- zstream_t *zs_walk;
- zstream_t *zs_next;
+ zstream_t *zs;
+ zstream_t *zs_next;
+ int numstreams = 0;
+ uint32_t max_streams;
ASSERT(RW_WRITE_HELD(&zf->zf_rwlock));
- for (zs_walk = list_head(&zf->zf_stream); zs_walk; zs_walk = zs_next) {
- zs_next = list_next(&zf->zf_stream, zs_walk);
-
- if (dmu_zfetch_streams_equal(zs_walk, zs)) {
- return (0);
- }
- }
-
- list_insert_head(&zf->zf_stream, zs);
- zf->zf_stream_cnt++;
- return (1);
-}
-
-
-/*
- * Walk the list of zstreams in the given zfetch, find an old one (by time), and
- * reclaim it for use by the caller.
- */
-static zstream_t *
-dmu_zfetch_stream_reclaim(zfetch_t *zf)
-{
- zstream_t *zs;
-
- if (! rw_tryenter(&zf->zf_rwlock, RW_WRITER))
- return (0);
-
- for (zs = list_head(&zf->zf_stream); zs;
- zs = list_next(&zf->zf_stream, zs)) {
-
- if (((ddi_get_lbolt() - zs->zst_last)/hz) > zfetch_min_sec_reap)
- break;
+ /*
+ * Clean up old streams.
+ */
+ for (zs = list_head(&zf->zf_stream);
+ zs != NULL; zs = zs_next) {
+ zs_next = list_next(&zf->zf_stream, zs);
+ if (((gethrtime() - zs->zs_atime) / NANOSEC) >
+ zfetch_min_sec_reap)
+ dmu_zfetch_stream_remove(zf, zs);
+ else
+ numstreams++;
}
- if (zs) {
- dmu_zfetch_stream_remove(zf, zs);
- mutex_destroy(&zs->zst_lock);
- bzero(zs, sizeof (zstream_t));
- } else {
- zf->zf_alloc_fail++;
+ /*
+ * The maximum number of streams is normally zfetch_max_streams,
+ * but for small files we lower it such that it's at least possible
+ * for all the streams to be non-overlapping.
+ *
+ * If we are already at the maximum number of streams for this file,
+ * even after removing old streams, then don't create this stream.
+ */
+ max_streams = MAX(1, MIN(zfetch_max_streams,
+ zf->zf_dnode->dn_maxblkid * zf->zf_dnode->dn_datablksz /
+ zfetch_max_distance));
+ if (numstreams >= max_streams) {
+ ZFETCHSTAT_BUMP(zfetchstat_max_streams);
+ return;
}
- rw_exit(&zf->zf_rwlock);
- return (zs);
-}
-
-/*
- * Given a zfetch and zstream structure, remove the zstream structure from its
- * container in the zfetch structure. Perform the appropriate book-keeping.
- */
-static void
-dmu_zfetch_stream_remove(zfetch_t *zf, zstream_t *zs)
-{
- ASSERT(RW_WRITE_HELD(&zf->zf_rwlock));
-
- list_remove(&zf->zf_stream, zs);
- zf->zf_stream_cnt--;
-}
-
-static int
-dmu_zfetch_streams_equal(zstream_t *zs1, zstream_t *zs2)
-{
- if (zs1->zst_offset != zs2->zst_offset)
- return (0);
-
- if (zs1->zst_len != zs2->zst_len)
- return (0);
+ zs = kmem_zalloc(sizeof (*zs), KM_SLEEP);
+ zs->zs_blkid = blkid;
+ zs->zs_pf_blkid = blkid;
+ zs->zs_ipf_blkid = blkid;
+ zs->zs_atime = gethrtime();
+ mutex_init(&zs->zs_lock, NULL, MUTEX_DEFAULT, NULL);
- if (zs1->zst_stride != zs2->zst_stride)
- return (0);
-
- if (zs1->zst_ph_offset != zs2->zst_ph_offset)
- return (0);
-
- if (zs1->zst_cap != zs2->zst_cap)
- return (0);
-
- if (zs1->zst_direction != zs2->zst_direction)
- return (0);
-
- return (1);
+ list_insert_head(&zf->zf_stream, zs);
}
/*
- * This is the prefetch entry point. It calls all of the other dmu_zfetch
- * routines to create, delete, find, or operate upon prefetch streams.
+ * This is the predictive prefetch entry point. It associates dnode access
+ * specified with blkid and nblks arguments with prefetch stream, predicts
+ * further accesses based on that stats and initiates speculative prefetch.
+ * fetch_data argument specifies whether actual data blocks should be fetched:
+ * FALSE -- prefetch only indirect blocks for predicted data blocks;
+ * TRUE -- prefetch predicted data blocks plus following indirect blocks.
*/
void
-dmu_zfetch(zfetch_t *zf, uint64_t offset, uint64_t size, int prefetched)
+dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data)
{
- zstream_t zst;
- zstream_t *newstream;
- boolean_t fetched;
- int inserted;
- unsigned int blkshft;
- uint64_t blksz;
+ zstream_t *zs;
+ int64_t pf_start, ipf_start, ipf_istart, ipf_iend;
+ int64_t pf_ahead_blks, max_blks, iblk;
+ int epbs, max_dist_blks, pf_nblks, ipf_nblks, i;
+ uint64_t end_of_access_blkid;
+ end_of_access_blkid = blkid + nblks;
if (zfs_prefetch_disable)
return;
- /* files that aren't ln2 blocksz are only one block -- nothing to do */
- if (!zf->zf_dnode->dn_datablkshift)
+ /*
+ * As a fast path for small (single-block) files, ignore access
+ * to the first block.
+ */
+ if (blkid == 0)
return;
- /* convert offset and size, into blockid and nblocks */
- blkshft = zf->zf_dnode->dn_datablkshift;
- blksz = (1 << blkshft);
+ rw_enter(&zf->zf_rwlock, RW_READER);
- bzero(&zst, sizeof (zstream_t));
- zst.zst_offset = offset >> blkshft;
- zst.zst_len = (P2ROUNDUP(offset + size, blksz) -
- P2ALIGN(offset, blksz)) >> blkshft;
+ for (zs = list_head(&zf->zf_stream); zs != NULL;
+ zs = list_next(&zf->zf_stream, zs)) {
+ if (blkid == zs->zs_blkid) {
+ mutex_enter(&zs->zs_lock);
+ /*
+ * zs_blkid could have changed before we
+ * acquired zs_lock; re-check them here.
+ */
+ if (blkid != zs->zs_blkid) {
+ mutex_exit(&zs->zs_lock);
+ continue;
+ }
+ break;
+ }
+ }
- fetched = dmu_zfetch_find(zf, &zst, prefetched);
- if (fetched) {
- ZFETCHSTAT_BUMP(zfetchstat_hits);
- } else {
+ if (zs == NULL) {
+ /*
+ * This access is not part of any existing stream. Create
+ * a new stream for it.
+ */
ZFETCHSTAT_BUMP(zfetchstat_misses);
- if ((fetched = dmu_zfetch_colinear(zf, &zst))) {
- ZFETCHSTAT_BUMP(zfetchstat_colinear_hits);
- } else {
- ZFETCHSTAT_BUMP(zfetchstat_colinear_misses);
- }
+ if (rw_tryupgrade(&zf->zf_rwlock))
+ dmu_zfetch_stream_create(zf, end_of_access_blkid);
+ rw_exit(&zf->zf_rwlock);
+ return;
}
- if (!fetched) {
- newstream = dmu_zfetch_stream_reclaim(zf);
+ /*
+ * This access was to a block that we issued a prefetch for on
+ * behalf of this stream. Issue further prefetches for this stream.
+ *
+ * Normally, we start prefetching where we stopped
+ * prefetching last (zs_pf_blkid). But when we get our first
+ * hit on this stream, zs_pf_blkid == zs_blkid, we don't
+ * want to prefetch the block we just accessed. In this case,
+ * start just after the block we just accessed.
+ */
+ pf_start = MAX(zs->zs_pf_blkid, end_of_access_blkid);
+ /*
+ * Double our amount of prefetched data, but don't let the
+ * prefetch get further ahead than zfetch_max_distance.
+ */
+ if (fetch_data) {
+ max_dist_blks =
+ zfetch_max_distance >> zf->zf_dnode->dn_datablkshift;
/*
- * we still couldn't find a stream, drop the lock, and allocate
- * one if possible. Otherwise, give up and go home.
+ * Previously, we were (zs_pf_blkid - blkid) ahead. We
+ * want to now be double that, so read that amount again,
+ * plus the amount we are catching up by (i.e. the amount
+ * read just now).
*/
- if (newstream) {
- ZFETCHSTAT_BUMP(zfetchstat_reclaim_successes);
- } else {
- uint64_t maxblocks;
- uint32_t max_streams;
- uint32_t cur_streams;
-
- ZFETCHSTAT_BUMP(zfetchstat_reclaim_failures);
- cur_streams = zf->zf_stream_cnt;
- maxblocks = zf->zf_dnode->dn_maxblkid;
-
- max_streams = MIN(zfetch_max_streams,
- (maxblocks / zfetch_block_cap));
- if (max_streams == 0) {
- max_streams++;
- }
-
- if (cur_streams >= max_streams) {
- return;
- }
- newstream =
- kmem_zalloc(sizeof (zstream_t), KM_SLEEP);
- }
+ pf_ahead_blks = zs->zs_pf_blkid - blkid + nblks;
+ max_blks = max_dist_blks - (pf_start - end_of_access_blkid);
+ pf_nblks = MIN(pf_ahead_blks, max_blks);
+ } else {
+ pf_nblks = 0;
+ }
- newstream->zst_offset = zst.zst_offset;
- newstream->zst_len = zst.zst_len;
- newstream->zst_stride = zst.zst_len;
- newstream->zst_ph_offset = zst.zst_len + zst.zst_offset;
- newstream->zst_cap = zst.zst_len;
- newstream->zst_direction = ZFETCH_FORWARD;
- newstream->zst_last = ddi_get_lbolt();
+ zs->zs_pf_blkid = pf_start + pf_nblks;
- mutex_init(&newstream->zst_lock, NULL, MUTEX_DEFAULT, NULL);
+ /*
+ * Do the same for indirects, starting from where we stopped last,
+ * or where we will stop reading data blocks (and the indirects
+ * that point to them).
+ */
+ ipf_start = MAX(zs->zs_ipf_blkid, zs->zs_pf_blkid);
+ max_dist_blks = zfetch_max_idistance >> zf->zf_dnode->dn_datablkshift;
+ /*
+ * We want to double our distance ahead of the data prefetch
+ * (or reader, if we are not prefetching data). Previously, we
+ * were (zs_ipf_blkid - blkid) ahead. To double that, we read
+ * that amount again, plus the amount we are catching up by
+ * (i.e. the amount read now + the amount of data prefetched now).
+ */
+ pf_ahead_blks = zs->zs_ipf_blkid - blkid + nblks + pf_nblks;
+ max_blks = max_dist_blks - (ipf_start - end_of_access_blkid);
+ ipf_nblks = MIN(pf_ahead_blks, max_blks);
+ zs->zs_ipf_blkid = ipf_start + ipf_nblks;
+
+ epbs = zf->zf_dnode->dn_indblkshift - SPA_BLKPTRSHIFT;
+ ipf_istart = P2ROUNDUP(ipf_start, 1 << epbs) >> epbs;
+ ipf_iend = P2ROUNDUP(zs->zs_ipf_blkid, 1 << epbs) >> epbs;
+
+ zs->zs_atime = gethrtime();
+ zs->zs_blkid = end_of_access_blkid;
+ mutex_exit(&zs->zs_lock);
+ rw_exit(&zf->zf_rwlock);
- rw_enter(&zf->zf_rwlock, RW_WRITER);
- inserted = dmu_zfetch_stream_insert(zf, newstream);
- rw_exit(&zf->zf_rwlock);
+ /*
+ * dbuf_prefetch() is asynchronous (even when it needs to read
+ * indirect blocks), but we still prefer to drop our locks before
+ * calling it to reduce the time we hold them.
+ */
- if (!inserted) {
- mutex_destroy(&newstream->zst_lock);
- kmem_free(newstream, sizeof (zstream_t));
- }
+ for (i = 0; i < pf_nblks; i++) {
+ dbuf_prefetch(zf->zf_dnode, 0, pf_start + i,
+ ZIO_PRIORITY_ASYNC_READ, ARC_FLAG_PREDICTIVE_PREFETCH);
+ }
+ for (iblk = ipf_istart; iblk < ipf_iend; iblk++) {
+ dbuf_prefetch(zf->zf_dnode, 1, iblk,
+ ZIO_PRIORITY_ASYNC_READ, ARC_FLAG_PREDICTIVE_PREFETCH);
}
+ ZFETCHSTAT_BUMP(zfetchstat_hits);
}
#if defined(_KERNEL) && defined(HAVE_SPL)
+/* BEGIN CSTYLED */
module_param(zfs_prefetch_disable, int, 0644);
MODULE_PARM_DESC(zfs_prefetch_disable, "Disable all ZFS prefetching");
@@ -740,9 +346,11 @@ MODULE_PARM_DESC(zfetch_max_streams, "Max number of streams per zfetch");
module_param(zfetch_min_sec_reap, uint, 0644);
MODULE_PARM_DESC(zfetch_min_sec_reap, "Min time before stream reclaim");
-module_param(zfetch_block_cap, uint, 0644);
-MODULE_PARM_DESC(zfetch_block_cap, "Max number of blocks to fetch at a time");
+module_param(zfetch_max_distance, uint, 0644);
+MODULE_PARM_DESC(zfetch_max_distance,
+ "Max bytes to prefetch per stream (default 8MB)");
module_param(zfetch_array_rd_sz, ulong, 0644);
MODULE_PARM_DESC(zfetch_array_rd_sz, "Number of bytes in a array_read");
+/* END CSTYLED */
#endif
diff --git a/zfs/module/zfs/dnode.c b/zfs/module/zfs/dnode.c
index 2858bbfb492e..e05a4d0a5538 100644
--- a/zfs/module/zfs/dnode.c
+++ b/zfs/module/zfs/dnode.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
@@ -39,20 +39,39 @@
#include <sys/range_tree.h>
#include <sys/trace_dnode.h>
+dnode_stats_t dnode_stats = {
+ { "dnode_hold_dbuf_hold", KSTAT_DATA_UINT64 },
+ { "dnode_hold_dbuf_read", KSTAT_DATA_UINT64 },
+ { "dnode_hold_alloc_hits", KSTAT_DATA_UINT64 },
+ { "dnode_hold_alloc_misses", KSTAT_DATA_UINT64 },
+ { "dnode_hold_alloc_interior", KSTAT_DATA_UINT64 },
+ { "dnode_hold_alloc_lock_retry", KSTAT_DATA_UINT64 },
+ { "dnode_hold_alloc_lock_misses", KSTAT_DATA_UINT64 },
+ { "dnode_hold_alloc_type_none", KSTAT_DATA_UINT64 },
+ { "dnode_hold_free_hits", KSTAT_DATA_UINT64 },
+ { "dnode_hold_free_misses", KSTAT_DATA_UINT64 },
+ { "dnode_hold_free_lock_misses", KSTAT_DATA_UINT64 },
+ { "dnode_hold_free_lock_retry", KSTAT_DATA_UINT64 },
+ { "dnode_hold_free_overflow", KSTAT_DATA_UINT64 },
+ { "dnode_hold_free_refcount", KSTAT_DATA_UINT64 },
+ { "dnode_hold_free_txg", KSTAT_DATA_UINT64 },
+ { "dnode_allocate", KSTAT_DATA_UINT64 },
+ { "dnode_reallocate", KSTAT_DATA_UINT64 },
+ { "dnode_buf_evict", KSTAT_DATA_UINT64 },
+ { "dnode_alloc_next_chunk", KSTAT_DATA_UINT64 },
+ { "dnode_alloc_race", KSTAT_DATA_UINT64 },
+ { "dnode_alloc_next_block", KSTAT_DATA_UINT64 },
+ { "dnode_move_invalid", KSTAT_DATA_UINT64 },
+ { "dnode_move_recheck1", KSTAT_DATA_UINT64 },
+ { "dnode_move_recheck2", KSTAT_DATA_UINT64 },
+ { "dnode_move_special", KSTAT_DATA_UINT64 },
+ { "dnode_move_handle", KSTAT_DATA_UINT64 },
+ { "dnode_move_rwlock", KSTAT_DATA_UINT64 },
+ { "dnode_move_active", KSTAT_DATA_UINT64 },
+};
+
+static kstat_t *dnode_ksp;
static kmem_cache_t *dnode_cache;
-/*
- * Define DNODE_STATS to turn on statistic gathering. By default, it is only
- * turned on when DEBUG is also defined.
- */
-#ifdef DEBUG
-#define DNODE_STATS
-#endif /* DEBUG */
-
-#ifdef DNODE_STATS
-#define DNODE_STAT_ADD(stat) ((stat)++)
-#else
-#define DNODE_STAT_ADD(stat) /* nothing */
-#endif /* DNODE_STATS */
ASSERTV(static dnode_phys_t dnode_phys_zero);
@@ -69,19 +88,13 @@ dbuf_compare(const void *x1, const void *x2)
const dmu_buf_impl_t *d1 = x1;
const dmu_buf_impl_t *d2 = x2;
- if (d1->db_level < d2->db_level) {
- return (-1);
- }
- if (d1->db_level > d2->db_level) {
- return (1);
- }
+ int cmp = AVL_CMP(d1->db_level, d2->db_level);
+ if (likely(cmp))
+ return (cmp);
- if (d1->db_blkid < d2->db_blkid) {
- return (-1);
- }
- if (d1->db_blkid > d2->db_blkid) {
- return (1);
- }
+ cmp = AVL_CMP(d1->db_blkid, d2->db_blkid);
+ if (likely(cmp))
+ return (cmp);
if (d1->db_state == DB_SEARCH) {
ASSERT3S(d2->db_state, !=, DB_SEARCH);
@@ -91,13 +104,7 @@ dbuf_compare(const void *x1, const void *x2)
return (1);
}
- if ((uintptr_t)d1 < (uintptr_t)d2) {
- return (-1);
- }
- if ((uintptr_t)d1 > (uintptr_t)d2) {
- return (1);
- }
- return (0);
+ return (AVL_PCMP(d1, d2));
}
/* ARGSUSED */
@@ -107,7 +114,7 @@ dnode_cons(void *arg, void *unused, int kmflag)
dnode_t *dn = arg;
int i;
- rw_init(&dn->dn_struct_rwlock, NULL, RW_DEFAULT, NULL);
+ rw_init(&dn->dn_struct_rwlock, NULL, RW_NOLOCKDEP, NULL);
mutex_init(&dn->dn_mtx, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&dn->dn_dbufs_mtx, NULL, MUTEX_DEFAULT, NULL);
cv_init(&dn->dn_notxholds, NULL, CV_DEFAULT, NULL);
@@ -153,7 +160,6 @@ dnode_cons(void *arg, void *unused, int kmflag)
dn->dn_id_flags = 0;
dn->dn_dbufs_count = 0;
- dn->dn_unlisted_l0_blkid = 0;
avl_create(&dn->dn_dbufs, dbuf_compare, sizeof (dmu_buf_impl_t),
offsetof(dmu_buf_impl_t, db_link));
@@ -206,7 +212,6 @@ dnode_dest(void *arg, void *unused)
ASSERT0(dn->dn_id_flags);
ASSERT0(dn->dn_dbufs_count);
- ASSERT0(dn->dn_unlisted_l0_blkid);
avl_destroy(&dn->dn_dbufs);
}
@@ -217,11 +222,24 @@ dnode_init(void)
dnode_cache = kmem_cache_create("dnode_t", sizeof (dnode_t),
0, dnode_cons, dnode_dest, NULL, NULL, NULL, 0);
kmem_cache_set_move(dnode_cache, dnode_move);
+
+ dnode_ksp = kstat_create("zfs", 0, "dnodestats", "misc",
+ KSTAT_TYPE_NAMED, sizeof (dnode_stats) / sizeof (kstat_named_t),
+ KSTAT_FLAG_VIRTUAL);
+ if (dnode_ksp != NULL) {
+ dnode_ksp->ks_data = &dnode_stats;
+ kstat_install(dnode_ksp);
+ }
}
void
dnode_fini(void)
{
+ if (dnode_ksp != NULL) {
+ kstat_delete(dnode_ksp);
+ dnode_ksp = NULL;
+ }
+
kmem_cache_destroy(dnode_cache);
dnode_cache = NULL;
}
@@ -248,6 +266,7 @@ dnode_verify(dnode_t *dn)
}
if (dn->dn_phys->dn_type != DMU_OT_NONE || dn->dn_allocated_txg != 0) {
int i;
+ int max_bonuslen = DN_SLOTS_TO_BONUSLEN(dn->dn_num_slots);
ASSERT3U(dn->dn_indblkshift, <=, SPA_MAXBLOCKSHIFT);
if (dn->dn_datablkshift) {
ASSERT3U(dn->dn_datablkshift, >=, SPA_MINBLOCKSHIFT);
@@ -258,12 +277,12 @@ dnode_verify(dnode_t *dn)
ASSERT(DMU_OT_IS_VALID(dn->dn_type));
ASSERT3U(dn->dn_nblkptr, >=, 1);
ASSERT3U(dn->dn_nblkptr, <=, DN_MAX_NBLKPTR);
- ASSERT3U(dn->dn_bonuslen, <=, DN_MAX_BONUSLEN);
+ ASSERT3U(dn->dn_bonuslen, <=, max_bonuslen);
ASSERT3U(dn->dn_datablksz, ==,
dn->dn_datablkszsec << SPA_MINBLOCKSHIFT);
ASSERT3U(ISP2(dn->dn_datablksz), ==, dn->dn_datablkshift != 0);
ASSERT3U((dn->dn_nblkptr - 1) * sizeof (blkptr_t) +
- dn->dn_bonuslen, <=, DN_MAX_BONUSLEN);
+ dn->dn_bonuslen, <=, max_bonuslen);
for (i = 0; i < TXG_SIZE; i++) {
ASSERT3U(dn->dn_next_nlevels[i], <=, dn->dn_nlevels);
}
@@ -294,6 +313,7 @@ dnode_byteswap(dnode_phys_t *dnp)
dnp->dn_datablkszsec = BSWAP_16(dnp->dn_datablkszsec);
dnp->dn_bonuslen = BSWAP_16(dnp->dn_bonuslen);
+ dnp->dn_extra_slots = BSWAP_8(dnp->dn_extra_slots);
dnp->dn_maxblkid = BSWAP_64(dnp->dn_maxblkid);
dnp->dn_used = BSWAP_64(dnp->dn_used);
@@ -320,7 +340,8 @@ dnode_byteswap(dnode_phys_t *dnp)
* dnode buffer).
*/
int off = (dnp->dn_nblkptr-1) * sizeof (blkptr_t);
- size_t len = DN_MAX_BONUSLEN - off;
+ int slots = dnp->dn_extra_slots + 1;
+ size_t len = DN_SLOTS_TO_BONUSLEN(slots) - off;
dmu_object_byteswap_t byteswap;
ASSERT(DMU_OT_IS_VALID(dnp->dn_bonustype));
byteswap = DMU_OT_BYTESWAP(dnp->dn_bonustype);
@@ -329,23 +350,24 @@ dnode_byteswap(dnode_phys_t *dnp)
/* Swap SPILL block if we have one */
if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR)
- byteswap_uint64_array(&dnp->dn_spill, sizeof (blkptr_t));
-
+ byteswap_uint64_array(DN_SPILL_BLKPTR(dnp), sizeof (blkptr_t));
}
void
dnode_buf_byteswap(void *vbuf, size_t size)
{
- dnode_phys_t *buf = vbuf;
- int i;
+ int i = 0;
ASSERT3U(sizeof (dnode_phys_t), ==, (1<<DNODE_SHIFT));
ASSERT((size & (sizeof (dnode_phys_t)-1)) == 0);
- size >>= DNODE_SHIFT;
- for (i = 0; i < size; i++) {
- dnode_byteswap(buf);
- buf++;
+ while (i < size) {
+ dnode_phys_t *dnp = (void *)(((char *)vbuf) + i);
+ dnode_byteswap(dnp);
+
+ i += DNODE_MIN_SIZE;
+ if (dnp->dn_type != DMU_OT_NONE)
+ i += dnp->dn_extra_slots * DNODE_MIN_SIZE;
}
}
@@ -356,7 +378,7 @@ dnode_setbonuslen(dnode_t *dn, int newsize, dmu_tx_t *tx)
dnode_setdirty(dn, tx);
rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
- ASSERT3U(newsize, <=, DN_MAX_BONUSLEN -
+ ASSERT3U(newsize, <=, DN_SLOTS_TO_BONUSLEN(dn->dn_num_slots) -
(dn->dn_nblkptr-1) * sizeof (blkptr_t));
dn->dn_bonuslen = newsize;
if (newsize == 0)
@@ -434,6 +456,7 @@ dnode_create(objset_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db,
dn->dn_compress = dnp->dn_compress;
dn->dn_bonustype = dnp->dn_bonustype;
dn->dn_bonuslen = dnp->dn_bonuslen;
+ dn->dn_num_slots = dnp->dn_extra_slots + 1;
dn->dn_maxblkid = dnp->dn_maxblkid;
dn->dn_have_spill = ((dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) != 0);
dn->dn_id_flags = 0;
@@ -441,14 +464,10 @@ dnode_create(objset_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db,
dmu_zfetch_init(&dn->dn_zfetch, dn);
ASSERT(DMU_OT_IS_VALID(dn->dn_phys->dn_type));
+ ASSERT(zrl_is_locked(&dnh->dnh_zrlock));
+ ASSERT(!DN_SLOT_IS_PTR(dnh->dnh_dnode));
mutex_enter(&os->os_lock);
- if (dnh->dnh_dnode != NULL) {
- /* Lost the allocation race. */
- mutex_exit(&os->os_lock);
- kmem_cache_free(dnode_cache, dn);
- return (dnh->dnh_dnode);
- }
/*
* Exclude special dnodes from os_dnodes so an empty os_dnodes
@@ -470,7 +489,8 @@ dnode_create(objset_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db,
dnh->dnh_dnode = dn;
mutex_exit(&os->os_lock);
- arc_space_consume(sizeof (dnode_t), ARC_SPACE_OTHER);
+ arc_space_consume(sizeof (dnode_t), ARC_SPACE_DNODE);
+
return (dn);
}
@@ -509,7 +529,7 @@ dnode_destroy(dnode_t *dn)
}
if (dn->dn_bonus != NULL) {
mutex_enter(&dn->dn_bonus->db_mtx);
- dbuf_evict(dn->dn_bonus);
+ dbuf_destroy(dn->dn_bonus);
dn->dn_bonus = NULL;
}
dn->dn_zio = NULL;
@@ -522,11 +542,10 @@ dnode_destroy(dnode_t *dn)
dn->dn_newuid = 0;
dn->dn_newgid = 0;
dn->dn_id_flags = 0;
- dn->dn_unlisted_l0_blkid = 0;
- dmu_zfetch_rele(&dn->dn_zfetch);
+ dmu_zfetch_fini(&dn->dn_zfetch);
kmem_cache_free(dnode_cache, dn);
- arc_space_return(sizeof (dnode_t), ARC_SPACE_OTHER);
+ arc_space_return(sizeof (dnode_t), ARC_SPACE_DNODE);
if (complete_os_eviction)
dmu_objset_evict_done(os);
@@ -534,10 +553,13 @@ dnode_destroy(dnode_t *dn)
void
dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
- dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
+ dmu_object_type_t bonustype, int bonuslen, int dn_slots, dmu_tx_t *tx)
{
int i;
+ ASSERT3U(dn_slots, >, 0);
+ ASSERT3U(dn_slots << DNODE_SHIFT, <=,
+ spa_maxdnodesize(dmu_objset_spa(dn->dn_objset)));
ASSERT3U(blocksize, <=,
spa_maxblocksize(dmu_objset_spa(dn->dn_objset)));
if (blocksize == 0)
@@ -550,8 +572,9 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
ibs = MIN(MAX(ibs, DN_MIN_INDBLKSHIFT), DN_MAX_INDBLKSHIFT);
- dprintf("os=%p obj=%llu txg=%llu blocksize=%d ibs=%d\n", dn->dn_objset,
- dn->dn_object, tx->tx_txg, blocksize, ibs);
+ dprintf("os=%p obj=%llu txg=%llu blocksize=%d ibs=%d dn_slots=%d\n",
+ dn->dn_objset, dn->dn_object, tx->tx_txg, blocksize, ibs, dn_slots);
+ DNODE_STAT_BUMP(dnode_allocate);
ASSERT(dn->dn_type == DMU_OT_NONE);
ASSERT(bcmp(dn->dn_phys, &dnode_phys_zero, sizeof (dnode_phys_t)) == 0);
@@ -562,7 +585,7 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
(bonustype == DMU_OT_SA && bonuslen == 0) ||
(bonustype != DMU_OT_NONE && bonuslen != 0));
ASSERT(DMU_OT_IS_VALID(bonustype));
- ASSERT3U(bonuslen, <=, DN_MAX_BONUSLEN);
+ ASSERT3U(bonuslen, <=, DN_SLOTS_TO_BONUSLEN(dn_slots));
ASSERT(dn->dn_type == DMU_OT_NONE);
ASSERT0(dn->dn_maxblkid);
ASSERT0(dn->dn_allocated_txg);
@@ -588,11 +611,15 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
dnode_setdblksz(dn, blocksize);
dn->dn_indblkshift = ibs;
dn->dn_nlevels = 1;
+ dn->dn_num_slots = dn_slots;
if (bonustype == DMU_OT_SA) /* Maximize bonus space for SA */
dn->dn_nblkptr = 1;
- else
- dn->dn_nblkptr = 1 +
- ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
+ else {
+ dn->dn_nblkptr = MIN(DN_MAX_NBLKPTR,
+ 1 + ((DN_SLOTS_TO_BONUSLEN(dn_slots) - bonuslen) >>
+ SPA_BLKPTRSHIFT));
+ }
+
dn->dn_bonustype = bonustype;
dn->dn_bonuslen = bonuslen;
dn->dn_checksum = ZIO_CHECKSUM_INHERIT;
@@ -617,7 +644,7 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
void
dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
- dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
+ dmu_object_type_t bonustype, int bonuslen, int dn_slots, dmu_tx_t *tx)
{
int nblkptr;
@@ -631,7 +658,11 @@ dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
(bonustype != DMU_OT_NONE && bonuslen != 0) ||
(bonustype == DMU_OT_SA && bonuslen == 0));
ASSERT(DMU_OT_IS_VALID(bonustype));
- ASSERT3U(bonuslen, <=, DN_MAX_BONUSLEN);
+ ASSERT3U(bonuslen, <=,
+ DN_BONUS_SIZE(spa_maxdnodesize(dmu_objset_spa(dn->dn_objset))));
+
+ dn_slots = dn_slots > 0 ? dn_slots : DNODE_MIN_SLOTS;
+ DNODE_STAT_BUMP(dnode_reallocate);
/* clean up any unreferenced dbufs */
dnode_evict_dbufs(dn);
@@ -654,7 +685,9 @@ dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
if (bonustype == DMU_OT_SA) /* Maximize bonus space for SA */
nblkptr = 1;
else
- nblkptr = 1 + ((DN_MAX_BONUSLEN - bonuslen) >> SPA_BLKPTRSHIFT);
+ nblkptr = MIN(DN_MAX_NBLKPTR,
+ 1 + ((DN_SLOTS_TO_BONUSLEN(dn_slots) - bonuslen) >>
+ SPA_BLKPTRSHIFT));
if (dn->dn_bonustype != bonustype)
dn->dn_next_bonustype[tx->tx_txg&TXG_MASK] = bonustype;
if (dn->dn_nblkptr != nblkptr)
@@ -672,6 +705,7 @@ dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
mutex_enter(&dn->dn_mtx);
dn->dn_bonustype = bonustype;
dn->dn_bonuslen = bonuslen;
+ dn->dn_num_slots = dn_slots;
dn->dn_nblkptr = nblkptr;
dn->dn_checksum = ZIO_CHECKSUM_INHERIT;
dn->dn_compress = ZIO_COMPRESS_INHERIT;
@@ -680,7 +714,8 @@ dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
/* fix up the bonus db_size */
if (dn->dn_bonus) {
dn->dn_bonus->db.db_size =
- DN_MAX_BONUSLEN - (dn->dn_nblkptr-1) * sizeof (blkptr_t);
+ DN_SLOTS_TO_BONUSLEN(dn->dn_num_slots) -
+ (dn->dn_nblkptr-1) * sizeof (blkptr_t);
ASSERT(dn->dn_bonuslen <= dn->dn_bonus->db.db_size);
}
@@ -689,18 +724,6 @@ dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
}
#ifdef _KERNEL
-#ifdef DNODE_STATS
-static struct {
- uint64_t dms_dnode_invalid;
- uint64_t dms_dnode_recheck1;
- uint64_t dms_dnode_recheck2;
- uint64_t dms_dnode_special;
- uint64_t dms_dnode_handle;
- uint64_t dms_dnode_rwlock;
- uint64_t dms_dnode_active;
-} dnode_move_stats;
-#endif /* DNODE_STATS */
-
static void
dnode_move_impl(dnode_t *odn, dnode_t *ndn)
{
@@ -729,6 +752,7 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn)
ndn->dn_datablkszsec = odn->dn_datablkszsec;
ndn->dn_datablksz = odn->dn_datablksz;
ndn->dn_maxblkid = odn->dn_maxblkid;
+ ndn->dn_num_slots = odn->dn_num_slots;
bcopy(&odn->dn_next_nblkptr[0], &ndn->dn_next_nblkptr[0],
sizeof (odn->dn_next_nblkptr));
bcopy(&odn->dn_next_nlevels[0], &ndn->dn_next_nlevels[0],
@@ -759,7 +783,6 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn)
ASSERT(avl_is_empty(&ndn->dn_dbufs));
avl_swap(&ndn->dn_dbufs, &odn->dn_dbufs);
ndn->dn_dbufs_count = odn->dn_dbufs_count;
- ndn->dn_unlisted_l0_blkid = odn->dn_unlisted_l0_blkid;
ndn->dn_bonus = odn->dn_bonus;
ndn->dn_have_spill = odn->dn_have_spill;
ndn->dn_zio = odn->dn_zio;
@@ -773,8 +796,6 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn)
dmu_zfetch_init(&ndn->dn_zfetch, NULL);
list_move_tail(&ndn->dn_zfetch.zf_stream, &odn->dn_zfetch.zf_stream);
ndn->dn_zfetch.zf_dnode = odn->dn_zfetch.zf_dnode;
- ndn->dn_zfetch.zf_stream_cnt = odn->dn_zfetch.zf_stream_cnt;
- ndn->dn_zfetch.zf_alloc_fail = odn->dn_zfetch.zf_alloc_fail;
/*
* Update back pointers. Updating the handle fixes the back pointer of
@@ -794,7 +815,6 @@ dnode_move_impl(dnode_t *odn, dnode_t *ndn)
avl_create(&odn->dn_dbufs, dbuf_compare, sizeof (dmu_buf_impl_t),
offsetof(dmu_buf_impl_t, db_link));
odn->dn_dbufs_count = 0;
- odn->dn_unlisted_l0_blkid = 0;
odn->dn_bonus = NULL;
odn->dn_zfetch.zf_dnode = NULL;
@@ -861,7 +881,7 @@ dnode_move(void *buf, void *newbuf, size_t size, void *arg)
*/
os = odn->dn_objset;
if (!POINTER_IS_VALID(os)) {
- DNODE_STAT_ADD(dnode_move_stats.dms_dnode_invalid);
+ DNODE_STAT_BUMP(dnode_move_invalid);
return (KMEM_CBRC_DONT_KNOW);
}
@@ -871,7 +891,7 @@ dnode_move(void *buf, void *newbuf, size_t size, void *arg)
rw_enter(&os_lock, RW_WRITER);
if (os != odn->dn_objset) {
rw_exit(&os_lock);
- DNODE_STAT_ADD(dnode_move_stats.dms_dnode_recheck1);
+ DNODE_STAT_BUMP(dnode_move_recheck1);
return (KMEM_CBRC_DONT_KNOW);
}
@@ -889,7 +909,7 @@ dnode_move(void *buf, void *newbuf, size_t size, void *arg)
if (os != odn->dn_objset) {
mutex_exit(&os->os_lock);
rw_exit(&os_lock);
- DNODE_STAT_ADD(dnode_move_stats.dms_dnode_recheck2);
+ DNODE_STAT_BUMP(dnode_move_recheck2);
return (KMEM_CBRC_DONT_KNOW);
}
@@ -902,7 +922,7 @@ dnode_move(void *buf, void *newbuf, size_t size, void *arg)
rw_exit(&os_lock);
if (DMU_OBJECT_IS_SPECIAL(odn->dn_object)) {
mutex_exit(&os->os_lock);
- DNODE_STAT_ADD(dnode_move_stats.dms_dnode_special);
+ DNODE_STAT_BUMP(dnode_move_special);
return (KMEM_CBRC_NO);
}
ASSERT(odn->dn_dbuf != NULL); /* only "special" dnodes have no parent */
@@ -917,7 +937,7 @@ dnode_move(void *buf, void *newbuf, size_t size, void *arg)
*/
if (!zrl_tryenter(&odn->dn_handle->dnh_zrlock)) {
mutex_exit(&os->os_lock);
- DNODE_STAT_ADD(dnode_move_stats.dms_dnode_handle);
+ DNODE_STAT_BUMP(dnode_move_handle);
return (KMEM_CBRC_LATER);
}
@@ -933,7 +953,7 @@ dnode_move(void *buf, void *newbuf, size_t size, void *arg)
if (!rw_tryenter(&odn->dn_struct_rwlock, RW_WRITER)) {
zrl_exit(&odn->dn_handle->dnh_zrlock);
mutex_exit(&os->os_lock);
- DNODE_STAT_ADD(dnode_move_stats.dms_dnode_rwlock);
+ DNODE_STAT_BUMP(dnode_move_rwlock);
return (KMEM_CBRC_LATER);
}
@@ -959,7 +979,7 @@ dnode_move(void *buf, void *newbuf, size_t size, void *arg)
rw_exit(&odn->dn_struct_rwlock);
zrl_exit(&odn->dn_handle->dnh_zrlock);
mutex_exit(&os->os_lock);
- DNODE_STAT_ADD(dnode_move_stats.dms_dnode_active);
+ DNODE_STAT_BUMP(dnode_move_active);
return (KMEM_CBRC_LATER);
}
@@ -983,6 +1003,78 @@ dnode_move(void *buf, void *newbuf, size_t size, void *arg)
}
#endif /* _KERNEL */
+static void
+dnode_slots_hold(dnode_children_t *children, int idx, int slots)
+{
+ ASSERT3S(idx + slots, <=, DNODES_PER_BLOCK);
+
+ for (int i = idx; i < idx + slots; i++) {
+ dnode_handle_t *dnh = &children->dnc_children[i];
+ zrl_add(&dnh->dnh_zrlock);
+ }
+}
+
+static void
+dnode_slots_rele(dnode_children_t *children, int idx, int slots)
+{
+ ASSERT3S(idx + slots, <=, DNODES_PER_BLOCK);
+
+ for (int i = idx; i < idx + slots; i++) {
+ dnode_handle_t *dnh = &children->dnc_children[i];
+
+ if (zrl_is_locked(&dnh->dnh_zrlock))
+ zrl_exit(&dnh->dnh_zrlock);
+ else
+ zrl_remove(&dnh->dnh_zrlock);
+ }
+}
+
+static int
+dnode_slots_tryenter(dnode_children_t *children, int idx, int slots)
+{
+ ASSERT3S(idx + slots, <=, DNODES_PER_BLOCK);
+
+ for (int i = idx; i < idx + slots; i++) {
+ dnode_handle_t *dnh = &children->dnc_children[i];
+
+ if (!zrl_tryenter(&dnh->dnh_zrlock)) {
+ for (int j = idx; j < i; j++) {
+ dnh = &children->dnc_children[j];
+ zrl_exit(&dnh->dnh_zrlock);
+ }
+
+ return (0);
+ }
+ }
+
+ return (1);
+}
+
+static void
+dnode_set_slots(dnode_children_t *children, int idx, int slots, void *ptr)
+{
+ ASSERT3S(idx + slots, <=, DNODES_PER_BLOCK);
+
+ for (int i = idx; i < idx + slots; i++) {
+ dnode_handle_t *dnh = &children->dnc_children[i];
+ dnh->dnh_dnode = ptr;
+ }
+}
+
+static boolean_t
+dnode_check_slots(dnode_children_t *children, int idx, int slots, void *ptr)
+{
+ ASSERT3S(idx + slots, <=, DNODES_PER_BLOCK);
+
+ for (int i = idx; i < idx + slots; i++) {
+ dnode_handle_t *dnh = &children->dnc_children[i];
+ if (dnh->dnh_dnode != ptr)
+ return (B_FALSE);
+ }
+
+ return (B_TRUE);
+}
+
void
dnode_special_close(dnode_handle_t *dnh)
{
@@ -990,7 +1082,7 @@ dnode_special_close(dnode_handle_t *dnh)
/*
* Wait for final references to the dnode to clear. This can
- * only happen if the arc is asyncronously evicting state that
+ * only happen if the arc is asynchronously evicting state that
* has a hold on this dnode while we are trying to evict this
* dnode.
*/
@@ -1010,19 +1102,24 @@ dnode_special_open(objset_t *os, dnode_phys_t *dnp, uint64_t object,
{
dnode_t *dn;
- dn = dnode_create(os, dnp, NULL, object, dnh);
zrl_init(&dnh->dnh_zrlock);
+ zrl_tryenter(&dnh->dnh_zrlock);
+
+ dn = dnode_create(os, dnp, NULL, object, dnh);
DNODE_VERIFY(dn);
+
+ zrl_exit(&dnh->dnh_zrlock);
}
static void
-dnode_buf_pageout(void *dbu)
+dnode_buf_evict_async(void *dbu)
{
- dnode_children_t *children_dnodes = dbu;
- int i;
+ dnode_children_t *dnc = dbu;
- for (i = 0; i < children_dnodes->dnc_count; i++) {
- dnode_handle_t *dnh = &children_dnodes->dnc_children[i];
+ DNODE_STAT_BUMP(dnode_buf_evict);
+
+ for (int i = 0; i < dnc->dnc_count; i++) {
+ dnode_handle_t *dnh = &dnc->dnc_children[i];
dnode_t *dn;
/*
@@ -1030,8 +1127,9 @@ dnode_buf_pageout(void *dbu)
* another valid address, so there is no need here to guard
* against changes to or from NULL.
*/
- if (dnh->dnh_dnode == NULL) {
+ if (!DN_SLOT_IS_PTR(dnh->dnh_dnode)) {
zrl_destroy(&dnh->dnh_zrlock);
+ dnh->dnh_dnode = DN_SLOT_UNINIT;
continue;
}
@@ -1046,22 +1144,27 @@ dnode_buf_pageout(void *dbu)
ASSERT(refcount_is_zero(&dn->dn_holds));
ASSERT(refcount_is_zero(&dn->dn_tx_holds));
- dnode_destroy(dn); /* implicit zrl_remove() */
+ dnode_destroy(dn); /* implicit zrl_remove() for first slot */
zrl_destroy(&dnh->dnh_zrlock);
- dnh->dnh_dnode = NULL;
+ dnh->dnh_dnode = DN_SLOT_UNINIT;
}
- kmem_free(children_dnodes, sizeof (dnode_children_t) +
- children_dnodes->dnc_count * sizeof (dnode_handle_t));
+ kmem_free(dnc, sizeof (dnode_children_t) +
+ dnc->dnc_count * sizeof (dnode_handle_t));
}
/*
* errors:
- * EINVAL - invalid object number.
- * EIO - i/o error.
+ * EINVAL - Invalid object number or flags.
+ * ENOSPC - Hole too small to fulfill "slots" request (DNODE_MUST_BE_FREE)
+ * EEXIST - Refers to an allocated dnode (DNODE_MUST_BE_FREE)
+ * - Refers to an interior dnode slot (DNODE_MUST_BE_ALLOCATED)
+ * ENOENT - The requested dnode is not allocated (DNODE_MUST_BE_ALLOCATED)
+ * EIO - I/O error when reading the meta dnode dbuf.
+ *
* succeeds even for free dnodes.
*/
int
-dnode_hold_impl(objset_t *os, uint64_t object, int flag,
+dnode_hold_impl(objset_t *os, uint64_t object, int flag, int slots,
void *tag, dnode_t **dnp)
{
int epb, idx, err;
@@ -1070,9 +1173,13 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag,
uint64_t blk;
dnode_t *mdn, *dn;
dmu_buf_impl_t *db;
- dnode_children_t *children_dnodes;
+ dnode_children_t *dnc;
+ dnode_phys_t *dn_block;
dnode_handle_t *dnh;
+ ASSERT(!(flag & DNODE_MUST_BE_ALLOCATED) || (slots == 0));
+ ASSERT(!(flag & DNODE_MUST_BE_FREE) || (slots > 0));
+
/*
* If you are holding the spa config lock as writer, you shouldn't
* be asking the DMU to do *anything* unless it's the root pool
@@ -1112,15 +1219,18 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag,
drop_struct_lock = TRUE;
}
- blk = dbuf_whichblock(mdn, object * sizeof (dnode_phys_t));
+ blk = dbuf_whichblock(mdn, 0, object * sizeof (dnode_phys_t));
db = dbuf_hold(mdn, blk, FTAG);
if (drop_struct_lock)
rw_exit(&mdn->dn_struct_rwlock);
- if (db == NULL)
+ if (db == NULL) {
+ DNODE_STAT_BUMP(dnode_hold_dbuf_hold);
return (SET_ERROR(EIO));
+ }
err = dbuf_read(db, NULL, DB_RF_CANFAIL);
if (err) {
+ DNODE_STAT_BUMP(dnode_hold_dbuf_read);
dbuf_rele(db, FTAG);
return (err);
}
@@ -1128,62 +1238,179 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag,
ASSERT3U(db->db.db_size, >=, 1<<DNODE_SHIFT);
epb = db->db.db_size >> DNODE_SHIFT;
- idx = object & (epb-1);
+ idx = object & (epb - 1);
+ dn_block = (dnode_phys_t *)db->db.db_data;
ASSERT(DB_DNODE(db)->dn_type == DMU_OT_DNODE);
- children_dnodes = dmu_buf_get_user(&db->db);
- if (children_dnodes == NULL) {
- int i;
+ dnc = dmu_buf_get_user(&db->db);
+ dnh = NULL;
+ if (dnc == NULL) {
dnode_children_t *winner;
- children_dnodes = kmem_zalloc(sizeof (dnode_children_t) +
+ int skip = 0;
+
+ dnc = kmem_zalloc(sizeof (dnode_children_t) +
epb * sizeof (dnode_handle_t), KM_SLEEP);
- children_dnodes->dnc_count = epb;
- dnh = &children_dnodes->dnc_children[0];
- for (i = 0; i < epb; i++) {
+ dnc->dnc_count = epb;
+ dnh = &dnc->dnc_children[0];
+
+ /* Initialize dnode slot status from dnode_phys_t */
+ for (int i = 0; i < epb; i++) {
zrl_init(&dnh[i].dnh_zrlock);
+
+ if (skip) {
+ skip--;
+ continue;
+ }
+
+ if (dn_block[i].dn_type != DMU_OT_NONE) {
+ int interior = dn_block[i].dn_extra_slots;
+
+ dnode_set_slots(dnc, i, 1, DN_SLOT_ALLOCATED);
+ dnode_set_slots(dnc, i + 1, interior,
+ DN_SLOT_INTERIOR);
+ skip = interior;
+ } else {
+ dnh[i].dnh_dnode = DN_SLOT_FREE;
+ skip = 0;
+ }
}
- dmu_buf_init_user(&children_dnodes->dnc_dbu,
- dnode_buf_pageout, NULL);
- winner = dmu_buf_set_user(&db->db, &children_dnodes->dnc_dbu);
+
+ dmu_buf_init_user(&dnc->dnc_dbu, NULL,
+ dnode_buf_evict_async, NULL);
+ winner = dmu_buf_set_user(&db->db, &dnc->dnc_dbu);
if (winner != NULL) {
- for (i = 0; i < epb; i++) {
+ for (int i = 0; i < epb; i++)
zrl_destroy(&dnh[i].dnh_zrlock);
- }
- kmem_free(children_dnodes, sizeof (dnode_children_t) +
+ kmem_free(dnc, sizeof (dnode_children_t) +
epb * sizeof (dnode_handle_t));
- children_dnodes = winner;
+ dnc = winner;
}
}
- ASSERT(children_dnodes->dnc_count == epb);
- dnh = &children_dnodes->dnc_children[idx];
- zrl_add(&dnh->dnh_zrlock);
- dn = dnh->dnh_dnode;
- if (dn == NULL) {
- dnode_phys_t *phys = (dnode_phys_t *)db->db.db_data+idx;
+ ASSERT(dnc->dnc_count == epb);
+ dn = DN_SLOT_UNINIT;
+
+ if (flag & DNODE_MUST_BE_ALLOCATED) {
+ slots = 1;
+
+ while (dn == DN_SLOT_UNINIT) {
+ dnode_slots_hold(dnc, idx, slots);
+ dnh = &dnc->dnc_children[idx];
+
+ if (DN_SLOT_IS_PTR(dnh->dnh_dnode)) {
+ dn = dnh->dnh_dnode;
+ break;
+ } else if (dnh->dnh_dnode == DN_SLOT_INTERIOR) {
+ DNODE_STAT_BUMP(dnode_hold_alloc_interior);
+ dnode_slots_rele(dnc, idx, slots);
+ dbuf_rele(db, FTAG);
+ return (SET_ERROR(EEXIST));
+ } else if (dnh->dnh_dnode != DN_SLOT_ALLOCATED) {
+ DNODE_STAT_BUMP(dnode_hold_alloc_misses);
+ dnode_slots_rele(dnc, idx, slots);
+ dbuf_rele(db, FTAG);
+ return (SET_ERROR(ENOENT));
+ }
+
+ dnode_slots_rele(dnc, idx, slots);
+ if (!dnode_slots_tryenter(dnc, idx, slots)) {
+ DNODE_STAT_BUMP(dnode_hold_alloc_lock_retry);
+ continue;
+ }
+
+ /*
+ * Someone else won the race and called dnode_create()
+ * after we checked DN_SLOT_IS_PTR() above but before
+ * we acquired the lock.
+ */
+ if (DN_SLOT_IS_PTR(dnh->dnh_dnode)) {
+ DNODE_STAT_BUMP(dnode_hold_alloc_lock_misses);
+ dn = dnh->dnh_dnode;
+ } else {
+ dn = dnode_create(os, dn_block + idx, db,
+ object, dnh);
+ }
+ }
+
+ mutex_enter(&dn->dn_mtx);
+ if (dn->dn_type == DMU_OT_NONE) {
+ DNODE_STAT_BUMP(dnode_hold_alloc_type_none);
+ mutex_exit(&dn->dn_mtx);
+ dnode_slots_rele(dnc, idx, slots);
+ dbuf_rele(db, FTAG);
+ return (SET_ERROR(ENOENT));
+ }
+
+ DNODE_STAT_BUMP(dnode_hold_alloc_hits);
+ } else if (flag & DNODE_MUST_BE_FREE) {
+
+ if (idx + slots - 1 >= DNODES_PER_BLOCK) {
+ DNODE_STAT_BUMP(dnode_hold_free_overflow);
+ dbuf_rele(db, FTAG);
+ return (SET_ERROR(ENOSPC));
+ }
+
+ while (dn == DN_SLOT_UNINIT) {
+ dnode_slots_hold(dnc, idx, slots);
- dn = dnode_create(os, phys, db, object, dnh);
+ if (!dnode_check_slots(dnc, idx, slots, DN_SLOT_FREE)) {
+ DNODE_STAT_BUMP(dnode_hold_free_misses);
+ dnode_slots_rele(dnc, idx, slots);
+ dbuf_rele(db, FTAG);
+ return (SET_ERROR(ENOSPC));
+ }
+
+ dnode_slots_rele(dnc, idx, slots);
+ if (!dnode_slots_tryenter(dnc, idx, slots)) {
+ DNODE_STAT_BUMP(dnode_hold_free_lock_retry);
+ continue;
+ }
+
+ if (!dnode_check_slots(dnc, idx, slots, DN_SLOT_FREE)) {
+ DNODE_STAT_BUMP(dnode_hold_free_lock_misses);
+ dnode_slots_rele(dnc, idx, slots);
+ dbuf_rele(db, FTAG);
+ return (SET_ERROR(ENOSPC));
+ }
+
+ dnh = &dnc->dnc_children[idx];
+ dn = dnode_create(os, dn_block + idx, db, object, dnh);
+ }
+
+ mutex_enter(&dn->dn_mtx);
+ if (!refcount_is_zero(&dn->dn_holds)) {
+ DNODE_STAT_BUMP(dnode_hold_free_refcount);
+ mutex_exit(&dn->dn_mtx);
+ dnode_slots_rele(dnc, idx, slots);
+ dbuf_rele(db, FTAG);
+ return (SET_ERROR(EEXIST));
+ }
+
+ dnode_set_slots(dnc, idx + 1, slots - 1, DN_SLOT_INTERIOR);
+ DNODE_STAT_BUMP(dnode_hold_free_hits);
+ } else {
+ dbuf_rele(db, FTAG);
+ return (SET_ERROR(EINVAL));
}
- mutex_enter(&dn->dn_mtx);
- type = dn->dn_type;
- if (dn->dn_free_txg ||
- ((flag & DNODE_MUST_BE_ALLOCATED) && type == DMU_OT_NONE) ||
- ((flag & DNODE_MUST_BE_FREE) &&
- (type != DMU_OT_NONE || !refcount_is_zero(&dn->dn_holds)))) {
+ if (dn->dn_free_txg) {
+ DNODE_STAT_BUMP(dnode_hold_free_txg);
+ type = dn->dn_type;
mutex_exit(&dn->dn_mtx);
- zrl_remove(&dnh->dnh_zrlock);
+ dnode_slots_rele(dnc, idx, slots);
dbuf_rele(db, FTAG);
return (type == DMU_OT_NONE ? ENOENT : EEXIST);
}
+
if (refcount_add(&dn->dn_holds, tag) == 1)
dbuf_add_ref(db, dnh);
+
mutex_exit(&dn->dn_mtx);
/* Now we can rely on the hold to prevent the dnode from moving. */
- zrl_remove(&dnh->dnh_zrlock);
+ dnode_slots_rele(dnc, idx, slots);
DNODE_VERIFY(dn);
ASSERT3P(dn->dn_dbuf, ==, db);
@@ -1200,7 +1427,8 @@ dnode_hold_impl(objset_t *os, uint64_t object, int flag,
int
dnode_hold(objset_t *os, uint64_t object, void *tag, dnode_t **dnp)
{
- return (dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, tag, dnp));
+ return (dnode_hold_impl(os, object, DNODE_MUST_BE_ALLOCATED, 0, tag,
+ dnp));
}
/*
@@ -1289,13 +1517,14 @@ dnode_setdirty(dnode_t *dn, dmu_tx_t *tx)
*/
dmu_objset_userquota_get_ids(dn, B_TRUE, tx);
- mutex_enter(&os->os_lock);
+ multilist_t *dirtylist = os->os_dirty_dnodes[txg & TXG_MASK];
+ multilist_sublist_t *mls = multilist_sublist_lock_obj(dirtylist, dn);
/*
* If we are already marked dirty, we're done.
*/
if (list_link_active(&dn->dn_dirty_link[txg & TXG_MASK])) {
- mutex_exit(&os->os_lock);
+ multilist_sublist_unlock(mls);
return;
}
@@ -1309,13 +1538,9 @@ dnode_setdirty(dnode_t *dn, dmu_tx_t *tx)
dprintf_ds(os->os_dsl_dataset, "obj=%llu txg=%llu\n",
dn->dn_object, txg);
- if (dn->dn_free_txg > 0 && dn->dn_free_txg <= txg) {
- list_insert_tail(&os->os_free_dnodes[txg&TXG_MASK], dn);
- } else {
- list_insert_tail(&os->os_dirty_dnodes[txg&TXG_MASK], dn);
- }
+ multilist_sublist_insert_head(mls, dn);
- mutex_exit(&os->os_lock);
+ multilist_sublist_unlock(mls);
/*
* The dnode maintains a hold on its containing dbuf as
@@ -1336,13 +1561,6 @@ dnode_setdirty(dnode_t *dn, dmu_tx_t *tx)
void
dnode_free(dnode_t *dn, dmu_tx_t *tx)
{
- int txgoff = tx->tx_txg & TXG_MASK;
-
- dprintf("dn=%p txg=%llu\n", dn, tx->tx_txg);
-
- /* we should be the only holder... hopefully */
- /* ASSERT3U(refcount_count(&dn->dn_holds), ==, 1); */
-
mutex_enter(&dn->dn_mtx);
if (dn->dn_type == DMU_OT_NONE || dn->dn_free_txg) {
mutex_exit(&dn->dn_mtx);
@@ -1351,19 +1569,7 @@ dnode_free(dnode_t *dn, dmu_tx_t *tx)
dn->dn_free_txg = tx->tx_txg;
mutex_exit(&dn->dn_mtx);
- /*
- * If the dnode is already dirty, it needs to be moved from
- * the dirty list to the free list.
- */
- mutex_enter(&dn->dn_objset->os_lock);
- if (list_link_active(&dn->dn_dirty_link[txgoff])) {
- list_remove(&dn->dn_objset->os_dirty_dnodes[txgoff], dn);
- list_insert_tail(&dn->dn_objset->os_free_dnodes[txgoff], dn);
- mutex_exit(&dn->dn_objset->os_lock);
- } else {
- mutex_exit(&dn->dn_objset->os_lock);
- dnode_setdirty(dn, tx);
- }
+ dnode_setdirty(dn, tx);
}
/*
@@ -1409,7 +1615,7 @@ dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx)
goto fail;
/* resize the old block */
- err = dbuf_hold_impl(dn, 0, 0, TRUE, FTAG, &db);
+ err = dbuf_hold_impl(dn, 0, 0, TRUE, FALSE, FTAG, &db);
if (err == 0)
dbuf_new_size(db, size, tx);
else if (err != ENOENT)
@@ -1476,6 +1682,8 @@ dnode_new_blkid(dnode_t *dn, uint64_t blkid, dmu_tx_t *tx, boolean_t have_read)
sz <= blkid && sz >= dn->dn_nblkptr; sz <<= epbs)
new_nlevels++;
+ ASSERT3U(new_nlevels, <=, DN_MAX_LEVELS);
+
if (new_nlevels > dn->dn_nlevels) {
int old_nlevels = dn->dn_nlevels;
dmu_buf_impl_t *db;
@@ -1582,8 +1790,8 @@ dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx)
ASSERT3U(blkoff + head, ==, blksz);
if (len < head)
head = len;
- if (dbuf_hold_impl(dn, 0, dbuf_whichblock(dn, off), TRUE,
- FTAG, &db) == 0) {
+ if (dbuf_hold_impl(dn, 0, dbuf_whichblock(dn, 0, off),
+ TRUE, FALSE, FTAG, &db) == 0) {
caddr_t data;
/* don't dirty if it isn't on disk and isn't dirty */
@@ -1620,8 +1828,8 @@ dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx)
if (tail) {
if (len < tail)
tail = len;
- if (dbuf_hold_impl(dn, 0, dbuf_whichblock(dn, off+len),
- TRUE, FTAG, &db) == 0) {
+ if (dbuf_hold_impl(dn, 0, dbuf_whichblock(dn, 0, off+len),
+ TRUE, FALSE, FTAG, &db) == 0) {
/* don't dirty if not on disk and not dirty */
if (db->db_last_dirty ||
(db->db_blkptr && !BP_IS_HOLE(db->db_blkptr))) {
@@ -1814,25 +2022,6 @@ dnode_diduse_space(dnode_t *dn, int64_t delta)
mutex_exit(&dn->dn_mtx);
}
-/*
- * Call when we think we're going to write/free space in open context to track
- * the amount of memory in use by the currently open txg.
- */
-void
-dnode_willuse_space(dnode_t *dn, int64_t space, dmu_tx_t *tx)
-{
- objset_t *os = dn->dn_objset;
- dsl_dataset_t *ds = os->os_dsl_dataset;
- int64_t aspace = spa_get_asize(os->os_spa, space);
-
- if (ds != NULL) {
- dsl_dir_willuse_space(ds->ds_dir, aspace, tx);
- dsl_pool_dirty_space(dmu_tx_pool(tx), space, tx);
- }
-
- dmu_tx_willuse_space(tx, aspace);
-}
-
/*
* Scans a block at the indicated "level" looking for a hole or data,
* depending on 'flags'.
@@ -1853,7 +2042,7 @@ dnode_willuse_space(dnode_t *dn, int64_t space, dmu_tx_t *tx)
*/
static int
dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset,
- int lvl, uint64_t blkfill, uint64_t txg)
+ int lvl, uint64_t blkfill, uint64_t txg)
{
dmu_buf_impl_t *db = NULL;
void *data = NULL;
@@ -1863,9 +2052,6 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset,
boolean_t hole;
int i, inc, error, span;
- dprintf("probing object %llu offset %llx level %d of %u\n",
- dn->dn_object, *offset, lvl, dn->dn_phys->dn_nlevels);
-
hole = ((flags & DNODE_FIND_HOLE) != 0);
inc = (flags & DNODE_FIND_BACKWARDS) ? -1 : 1;
ASSERT(txg == 0 || !hole);
@@ -1875,8 +2061,8 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset,
epb = dn->dn_phys->dn_nblkptr;
data = dn->dn_phys->dn_blkptr;
} else {
- uint64_t blkid = dbuf_whichblock(dn, *offset) >> (epbs * lvl);
- error = dbuf_hold_impl(dn, lvl, blkid, TRUE, FTAG, &db);
+ uint64_t blkid = dbuf_whichblock(dn, lvl, *offset);
+ error = dbuf_hold_impl(dn, lvl, blkid, TRUE, FALSE, FTAG, &db);
if (error) {
if (error != ENOENT)
return (error);
@@ -1910,17 +2096,21 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset,
error = SET_ERROR(ESRCH);
} else if (lvl == 0) {
dnode_phys_t *dnp = data;
- span = DNODE_SHIFT;
+
ASSERT(dn->dn_type == DMU_OT_DNODE);
+ ASSERT(!(flags & DNODE_FIND_BACKWARDS));
- for (i = (*offset >> span) & (blkfill - 1);
- i >= 0 && i < blkfill; i += inc) {
+ for (i = (*offset >> DNODE_SHIFT) & (blkfill - 1);
+ i < blkfill; i += dnp[i].dn_extra_slots + 1) {
if ((dnp[i].dn_type == DMU_OT_NONE) == hole)
break;
- *offset += (1ULL << span) * inc;
}
- if (i < 0 || i == blkfill)
+
+ if (i == blkfill)
error = SET_ERROR(ESRCH);
+
+ *offset = (*offset & ~(DNODE_BLOCK_SIZE - 1)) +
+ (i << DNODE_SHIFT);
} else {
blkptr_t *bp = data;
uint64_t start = *offset;
@@ -1933,7 +2123,14 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset,
else
minfill++;
- *offset = *offset >> span;
+ if (span >= 8 * sizeof (*offset)) {
+ /* This only happens on the highest indirection level */
+ ASSERT3U((lvl - 1), ==, dn->dn_phys->dn_nlevels - 1);
+ *offset = 0;
+ } else {
+ *offset = *offset >> span;
+ }
+
for (i = BF64_GET(*offset, 0, epbs);
i >= 0 && i < epb; i += inc) {
if (BP_GET_FILL(&bp[i]) >= minfill &&
@@ -1943,7 +2140,13 @@ dnode_next_offset_level(dnode_t *dn, int flags, uint64_t *offset,
if (inc > 0 || *offset > 0)
*offset += inc;
}
- *offset = *offset << span;
+
+ if (span >= 8 * sizeof (*offset)) {
+ *offset = start;
+ } else {
+ *offset = *offset << span;
+ }
+
if (inc < 0) {
/* traversing backwards; position offset at the end */
ASSERT3U(*offset, <=, start);
diff --git a/zfs/module/zfs/dnode_sync.c b/zfs/module/zfs/dnode_sync.c
index df5c8e4ee6c4..742d962bc232 100644
--- a/zfs/module/zfs/dnode_sync.c
+++ b/zfs/module/zfs/dnode_sync.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
@@ -60,20 +60,14 @@ dnode_increase_indirection(dnode_t *dn, dmu_tx_t *tx)
dprintf("os=%p obj=%llu, increase to %d\n", dn->dn_objset,
dn->dn_object, dn->dn_phys->dn_nlevels);
- /* check for existing blkptrs in the dnode */
- for (i = 0; i < nblkptr; i++)
- if (!BP_IS_HOLE(&dn->dn_phys->dn_blkptr[i]))
- break;
- if (i != nblkptr) {
- /* transfer dnode's block pointers to new indirect block */
- (void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED|DB_RF_HAVESTRUCT);
- ASSERT(db->db.db_data);
- ASSERT(arc_released(db->db_buf));
- ASSERT3U(sizeof (blkptr_t) * nblkptr, <=, db->db.db_size);
- bcopy(dn->dn_phys->dn_blkptr, db->db.db_data,
- sizeof (blkptr_t) * nblkptr);
- arc_buf_freeze(db->db_buf);
- }
+ /* transfer dnode's block pointers to new indirect block */
+ (void) dbuf_read(db, NULL, DB_RF_MUST_SUCCEED|DB_RF_HAVESTRUCT);
+ ASSERT(db->db.db_data);
+ ASSERT(arc_released(db->db_buf));
+ ASSERT3U(sizeof (blkptr_t) * nblkptr, <=, db->db.db_size);
+ bcopy(dn->dn_phys->dn_blkptr, db->db.db_data,
+ sizeof (blkptr_t) * nblkptr);
+ arc_buf_freeze(db->db_buf);
/* set dbuf's parent pointers to new indirect buf */
for (i = 0; i < nblkptr; i++) {
@@ -192,7 +186,7 @@ free_verify(dmu_buf_impl_t *db, uint64_t start, uint64_t end, dmu_tx_t *tx)
rw_enter(&dn->dn_struct_rwlock, RW_READER);
err = dbuf_hold_impl(dn, db->db_level-1,
- (db->db_blkid << epbs) + i, TRUE, FTAG, &child);
+ (db->db_blkid << epbs) + i, TRUE, FALSE, FTAG, &child);
rw_exit(&dn->dn_struct_rwlock);
if (err == ENOENT)
continue;
@@ -246,8 +240,9 @@ free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks,
dnode_t *dn;
blkptr_t *bp;
dmu_buf_impl_t *subdb;
- uint64_t start, end, dbstart, dbend, i;
- int epbs, shift;
+ uint64_t start, end, dbstart, dbend;
+ unsigned int epbs, shift, i;
+ uint64_t id;
/*
* There is a small possibility that this block will not be cached:
@@ -264,6 +259,7 @@ free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks,
DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
epbs = dn->dn_phys->dn_indblkshift - SPA_BLKPTRSHIFT;
+ ASSERT3U(epbs, <, 31);
shift = (db->db_level - 1) * epbs;
dbstart = db->db_blkid << epbs;
start = blkid >> shift;
@@ -283,12 +279,12 @@ free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks,
FREE_VERIFY(db, start, end, tx);
free_blocks(dn, bp, end-start+1, tx);
} else {
- for (i = start; i <= end; i++, bp++) {
+ for (id = start; id <= end; id++, bp++) {
if (BP_IS_HOLE(bp))
continue;
rw_enter(&dn->dn_struct_rwlock, RW_READER);
VERIFY0(dbuf_hold_impl(dn, db->db_level - 1,
- i, B_TRUE, FTAG, &subdb));
+ id, TRUE, FALSE, FTAG, &subdb));
rw_exit(&dn->dn_struct_rwlock);
ASSERT3P(bp, ==, subdb->db_blkptr);
@@ -298,13 +294,19 @@ free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks,
}
/* If this whole block is free, free ourself too. */
- for (i = 0, bp = db->db.db_data; i < 1 << epbs; i++, bp++) {
+ for (i = 0, bp = db->db.db_data; i < 1ULL << epbs; i++, bp++) {
if (!BP_IS_HOLE(bp))
break;
}
if (i == 1 << epbs) {
- /* didn't find any non-holes */
+ /*
+ * We only found holes. Grab the rwlock to prevent
+ * anybody from reading the blocks we're about to
+ * zero out.
+ */
+ rw_enter(&dn->dn_struct_rwlock, RW_WRITER);
bzero(db->db.db_data, db->db.db_size);
+ rw_exit(&dn->dn_struct_rwlock);
free_blocks(dn, db->db_blkptr, 1, tx);
} else {
/*
@@ -362,7 +364,7 @@ dnode_sync_free_range_impl(dnode_t *dn, uint64_t blkid, uint64_t nblks,
continue;
rw_enter(&dn->dn_struct_rwlock, RW_READER);
VERIFY0(dbuf_hold_impl(dn, dnlevel - 1, i,
- TRUE, FTAG, &db));
+ TRUE, FALSE, FTAG, &db));
rw_exit(&dn->dn_struct_rwlock);
free_children(db, blkid, nblks, tx);
@@ -427,7 +429,7 @@ dnode_evict_dbufs(dnode_t *dn)
avl_insert_here(&dn->dn_dbufs, db_marker, db,
AVL_BEFORE);
- dbuf_clear(db);
+ dbuf_destroy(db);
db_next = AVL_NEXT(&dn->dn_dbufs, db_marker);
avl_remove(&dn->dn_dbufs, db_marker);
@@ -451,7 +453,7 @@ dnode_evict_bonus(dnode_t *dn)
if (dn->dn_bonus != NULL) {
if (refcount_is_zero(&dn->dn_bonus->db_holds)) {
mutex_enter(&dn->dn_bonus->db_mtx);
- dbuf_evict(dn->dn_bonus);
+ dbuf_destroy(dn->dn_bonus);
dn->dn_bonus = NULL;
} else {
dn->dn_bonus->db_pending_evict = TRUE;
@@ -530,7 +532,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
ASSERT(dn->dn_free_txg > 0);
if (dn->dn_allocated_txg != dn->dn_free_txg)
dmu_buf_will_dirty(&dn->dn_dbuf->db, tx);
- bzero(dn->dn_phys, sizeof (dnode_phys_t));
+ bzero(dn->dn_phys, sizeof (dnode_phys_t) * dn->dn_num_slots);
mutex_enter(&dn->dn_mtx);
dn->dn_type = DMU_OT_NONE;
@@ -545,7 +547,7 @@ dnode_sync_free(dnode_t *dn, dmu_tx_t *tx)
dnode_rele(dn, (void *)(uintptr_t)tx->tx_txg);
/*
* Now that we've released our hold, the dnode may
- * be evicted, so we musn't access it.
+ * be evicted, so we mustn't access it.
*/
}
@@ -565,7 +567,7 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
ASSERT(dmu_tx_is_syncing(tx));
ASSERT(dnp->dn_type != DMU_OT_NONE || dn->dn_allocated_txg);
ASSERT(dnp->dn_type != DMU_OT_NONE ||
- bcmp(dnp, &zerodn, DNODE_SIZE) == 0);
+ bcmp(dnp, &zerodn, DNODE_MIN_SIZE) == 0);
DNODE_VERIFY(dn);
ASSERT(dn->dn_dbuf == NULL || arc_released(dn->dn_dbuf->db_buf));
@@ -576,12 +578,17 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
dn->dn_oldused = DN_USED_BYTES(dn->dn_phys);
dn->dn_oldflags = dn->dn_phys->dn_flags;
dn->dn_phys->dn_flags |= DNODE_FLAG_USERUSED_ACCOUNTED;
+ if (dmu_objset_userobjused_enabled(dn->dn_objset))
+ dn->dn_phys->dn_flags |=
+ DNODE_FLAG_USEROBJUSED_ACCOUNTED;
mutex_exit(&dn->dn_mtx);
dmu_objset_userquota_get_ids(dn, B_FALSE, tx);
} else {
/* Once we account for it, we should always account for it. */
ASSERT(!(dn->dn_phys->dn_flags &
DNODE_FLAG_USERUSED_ACCOUNTED));
+ ASSERT(!(dn->dn_phys->dn_flags &
+ DNODE_FLAG_USEROBJUSED_ACCOUNTED));
}
mutex_enter(&dn->dn_mtx);
@@ -597,6 +604,9 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
dnp->dn_bonustype = dn->dn_bonustype;
dnp->dn_bonuslen = dn->dn_bonuslen;
}
+
+ dnp->dn_extra_slots = dn->dn_num_slots - 1;
+
ASSERT(dnp->dn_nlevels > 1 ||
BP_IS_HOLE(&dnp->dn_blkptr[0]) ||
BP_IS_EMBEDDED(&dnp->dn_blkptr[0]) ||
@@ -629,7 +639,8 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
dnp->dn_bonuslen = 0;
else
dnp->dn_bonuslen = dn->dn_next_bonuslen[txgoff];
- ASSERT(dnp->dn_bonuslen <= DN_MAX_BONUSLEN);
+ ASSERT(dnp->dn_bonuslen <=
+ DN_SLOTS_TO_BONUSLEN(dnp->dn_extra_slots + 1));
dn->dn_next_bonuslen[txgoff] = 0;
}
@@ -668,7 +679,7 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
mutex_exit(&dn->dn_mtx);
if (kill_spill) {
- free_blocks(dn, &dn->dn_phys->dn_spill, 1, tx);
+ free_blocks(dn, DN_SPILL_BLKPTR(dn->dn_phys), 1, tx);
mutex_enter(&dn->dn_mtx);
dnp->dn_flags &= ~DNODE_FLAG_SPILL_BLKPTR;
mutex_exit(&dn->dn_mtx);
@@ -688,10 +699,19 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
}
if (freeing_dnode) {
+ dn->dn_objset->os_freed_dnodes++;
dnode_sync_free(dn, tx);
return;
}
+ if (dn->dn_num_slots > DNODE_MIN_SLOTS) {
+ dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset;
+ mutex_enter(&ds->ds_lock);
+ ds->ds_feature_activation_needed[SPA_FEATURE_LARGE_DNODE] =
+ B_TRUE;
+ mutex_exit(&ds->ds_lock);
+ }
+
if (dn->dn_next_nlevels[txgoff]) {
dnode_increase_indirection(dn, tx);
dn->dn_next_nlevels[txgoff] = 0;
diff --git a/zfs/module/zfs/dsl_bookmark.c b/zfs/module/zfs/dsl_bookmark.c
index 447a3a2dc3a2..a6ca1ebab3b2 100644
--- a/zfs/module/zfs/dsl_bookmark.c
+++ b/zfs/module/zfs/dsl_bookmark.c
@@ -12,8 +12,10 @@
*
* CDDL HEADER END
*/
+
/*
* Copyright (c) 2013, 2014 by Delphix. All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc.
*/
#include <sys/zfs_context.h>
@@ -34,10 +36,10 @@ static int
dsl_bookmark_hold_ds(dsl_pool_t *dp, const char *fullname,
dsl_dataset_t **dsp, void *tag, char **shortnamep)
{
- char buf[MAXNAMELEN];
+ char buf[ZFS_MAX_DATASET_NAME_LEN];
char *hashp;
- if (strlen(fullname) >= MAXNAMELEN)
+ if (strlen(fullname) >= ZFS_MAX_DATASET_NAME_LEN)
return (SET_ERROR(ENAMETOOLONG));
hashp = strchr(fullname, '#');
if (hashp == NULL)
@@ -59,16 +61,14 @@ dsl_dataset_bmark_lookup(dsl_dataset_t *ds, const char *shortname,
{
objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
uint64_t bmark_zapobj = ds->ds_bookmarks;
- matchtype_t mt;
+ matchtype_t mt = 0;
int err;
if (bmark_zapobj == 0)
return (SET_ERROR(ESRCH));
if (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_CI_DATASET)
- mt = MT_FIRST;
- else
- mt = MT_EXACT;
+ mt = MT_NORMALIZE;
err = zap_lookup_norm(mos, bmark_zapobj, shortname, sizeof (uint64_t),
sizeof (*bmark_phys) / sizeof (uint64_t), bmark_phys, mt,
@@ -342,12 +342,10 @@ dsl_dataset_bookmark_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx)
{
objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
uint64_t bmark_zapobj = ds->ds_bookmarks;
- matchtype_t mt;
+ matchtype_t mt = 0;
if (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_CI_DATASET)
- mt = MT_FIRST;
- else
- mt = MT_EXACT;
+ mt = MT_NORMALIZE;
return (zap_remove_norm(mos, bmark_zapobj, name, mt, tx));
}
@@ -360,6 +358,9 @@ dsl_bookmark_destroy_check(void *arg, dmu_tx_t *tx)
int rv = 0;
nvpair_t *pair;
+ ASSERT(nvlist_empty(dbda->dbda_success));
+ ASSERT(nvlist_empty(dbda->dbda_errors));
+
if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_BOOKMARKS))
return (0);
@@ -389,7 +390,10 @@ dsl_bookmark_destroy_check(void *arg, dmu_tx_t *tx)
}
}
if (error == 0) {
- fnvlist_add_boolean(dbda->dbda_success, fullname);
+ if (dmu_tx_is_syncing(tx)) {
+ fnvlist_add_boolean(dbda->dbda_success,
+ fullname);
+ }
} else {
fnvlist_add_int32(dbda->dbda_errors, fullname, error);
rv = error;
diff --git a/zfs/module/zfs/dsl_dataset.c b/zfs/module/zfs/dsl_dataset.c
index 9bbb6fae23c1..bd03b486858b 100644
--- a/zfs/module/zfs/dsl_dataset.c
+++ b/zfs/module/zfs/dsl_dataset.c
@@ -18,13 +18,16 @@
*
* CDDL HEADER END
*/
+
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
* Copyright (c) 2014, Joyent, Inc. All rights reserved.
* Copyright (c) 2014 RackTop Systems.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
+ * Copyright 2016, OmniTI Computer Consulting, Inc. All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc.
*/
#include <sys/dmu_objset.h>
@@ -51,6 +54,11 @@
#include <sys/dsl_destroy.h>
#include <sys/dsl_userhold.h>
#include <sys/dsl_bookmark.h>
+#include <sys/policy.h>
+#include <sys/dmu_send.h>
+#include <sys/zio_compress.h>
+#include <zfs_fletcher.h>
+#include <sys/zio_checksum.h>
/*
* The SPA supports block sizes up to 16MB. However, very large blocks
@@ -74,8 +82,12 @@ int zfs_max_recordsize = 1 * 1024 * 1024;
extern inline dsl_dataset_phys_t *dsl_dataset_phys(dsl_dataset_t *ds);
+extern int spa_asize_inflation;
+
+static zil_header_t zero_zil;
+
/*
- * Figure out how much of this delta should be propogated to the dsl_dir
+ * Figure out how much of this delta should be propagated to the dsl_dir
* layer. If there's a refreservation, that space has already been
* partially accounted for in our ancestors.
*/
@@ -101,6 +113,7 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
{
int used, compressed, uncompressed;
int64_t delta;
+ spa_feature_t f;
used = bp_get_dsize_sync(tx->tx_pool->dp_spa, bp);
compressed = BP_GET_PSIZE(bp);
@@ -120,6 +133,7 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
return;
}
+ ASSERT3U(bp->blk_birth, >, dsl_dataset_phys(ds)->ds_prev_snap_txg);
dmu_buf_will_dirty(ds->ds_dbuf, tx);
mutex_enter(&ds->ds_lock);
delta = parent_delta(ds, used);
@@ -127,8 +141,16 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
dsl_dataset_phys(ds)->ds_compressed_bytes += compressed;
dsl_dataset_phys(ds)->ds_uncompressed_bytes += uncompressed;
dsl_dataset_phys(ds)->ds_unique_bytes += used;
- if (BP_GET_LSIZE(bp) > SPA_OLD_MAXBLOCKSIZE)
- ds->ds_need_large_blocks = B_TRUE;
+
+ if (BP_GET_LSIZE(bp) > SPA_OLD_MAXBLOCKSIZE) {
+ ds->ds_feature_activation_needed[SPA_FEATURE_LARGE_BLOCKS] =
+ B_TRUE;
+ }
+
+ f = zio_checksum_to_feature(BP_GET_CHECKSUM(bp));
+ if (f != SPA_FEATURE_NONE)
+ ds->ds_feature_activation_needed[f] = B_TRUE;
+
mutex_exit(&ds->ds_lock);
dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta,
compressed, uncompressed, tx);
@@ -220,44 +242,24 @@ dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
return (used);
}
-uint64_t
-dsl_dataset_prev_snap_txg(dsl_dataset_t *ds)
-{
- uint64_t trysnap = 0;
-
- if (ds == NULL)
- return (0);
- /*
- * The snapshot creation could fail, but that would cause an
- * incorrect FALSE return, which would only result in an
- * overestimation of the amount of space that an operation would
- * consume, which is OK.
- *
- * There's also a small window where we could miss a pending
- * snapshot, because we could set the sync task in the quiescing
- * phase. So this should only be used as a guess.
- */
- if (ds->ds_trysnap_txg >
- spa_last_synced_txg(ds->ds_dir->dd_pool->dp_spa))
- trysnap = ds->ds_trysnap_txg;
- return (MAX(dsl_dataset_phys(ds)->ds_prev_snap_txg, trysnap));
-}
-
-boolean_t
-dsl_dataset_block_freeable(dsl_dataset_t *ds, const blkptr_t *bp,
- uint64_t blk_birth)
+/*
+ * We have to release the fsid syncronously or we risk that a subsequent
+ * mount of the same dataset will fail to unique_insert the fsid. This
+ * failure would manifest itself as the fsid of this dataset changing
+ * between mounts which makes NFS clients quite unhappy.
+ */
+static void
+dsl_dataset_evict_sync(void *dbu)
{
- if (blk_birth <= dsl_dataset_prev_snap_txg(ds) ||
- (bp != NULL && BP_IS_HOLE(bp)))
- return (B_FALSE);
+ dsl_dataset_t *ds = dbu;
- ddt_prefetch(dsl_dataset_get_spa(ds), bp);
+ ASSERT(ds->ds_owner == NULL);
- return (B_TRUE);
+ unique_remove(ds->ds_fsid_guid);
}
static void
-dsl_dataset_evict(void *dbu)
+dsl_dataset_evict_async(void *dbu)
{
dsl_dataset_t *ds = dbu;
@@ -265,8 +267,6 @@ dsl_dataset_evict(void *dbu)
ds->ds_dbuf = NULL;
- unique_remove(ds->ds_fsid_guid);
-
if (ds->ds_objset != NULL)
dmu_objset_evict(ds->ds_objset);
@@ -283,10 +283,12 @@ dsl_dataset_evict(void *dbu)
ASSERT(!list_link_active(&ds->ds_synced_link));
+ list_destroy(&ds->ds_prop_cbs);
mutex_destroy(&ds->ds_lock);
mutex_destroy(&ds->ds_opening_lock);
mutex_destroy(&ds->ds_sendstream_lock);
refcount_destroy(&ds->ds_longholds);
+ rrw_destroy(&ds->ds_bp_rwlock);
kmem_free(ds, sizeof (dsl_dataset_t));
}
@@ -327,17 +329,15 @@ dsl_dataset_snap_lookup(dsl_dataset_t *ds, const char *name, uint64_t *value)
{
objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
uint64_t snapobj = dsl_dataset_phys(ds)->ds_snapnames_zapobj;
- matchtype_t mt;
+ matchtype_t mt = 0;
int err;
if (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_CI_DATASET)
- mt = MT_FIRST;
- else
- mt = MT_EXACT;
+ mt = MT_NORMALIZE;
err = zap_lookup_norm(mos, snapobj, name, 8, 1,
value, mt, NULL, 0, NULL);
- if (err == ENOTSUP && mt == MT_FIRST)
+ if (err == ENOTSUP && (mt & MT_NORMALIZE))
err = zap_lookup(mos, snapobj, name, 8, 1, value);
return (err);
}
@@ -348,18 +348,16 @@ dsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx,
{
objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
uint64_t snapobj = dsl_dataset_phys(ds)->ds_snapnames_zapobj;
- matchtype_t mt;
+ matchtype_t mt = 0;
int err;
dsl_dir_snap_cmtime_update(ds->ds_dir);
if (dsl_dataset_phys(ds)->ds_flags & DS_FLAG_CI_DATASET)
- mt = MT_FIRST;
- else
- mt = MT_EXACT;
+ mt = MT_NORMALIZE;
err = zap_remove_norm(mos, snapobj, name, mt, tx);
- if (err == ENOTSUP && mt == MT_FIRST)
+ if (err == ENOTSUP && (mt & MT_NORMALIZE))
err = zap_remove(mos, snapobj, name, tx);
if (err == 0 && adj_cnt)
@@ -423,6 +421,7 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL);
+ rrw_init(&ds->ds_bp_rwlock, B_FALSE);
refcount_create(&ds->ds_longholds);
bplist_create(&ds->ds_pending_deadlist);
@@ -432,20 +431,29 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
list_create(&ds->ds_sendstreams, sizeof (dmu_sendarg_t),
offsetof(dmu_sendarg_t, dsa_link));
+ list_create(&ds->ds_prop_cbs, sizeof (dsl_prop_cb_record_t),
+ offsetof(dsl_prop_cb_record_t, cbr_ds_node));
+
if (doi.doi_type == DMU_OTN_ZAP_METADATA) {
- int zaperr = zap_contains(mos, dsobj,
- DS_FIELD_LARGE_BLOCKS);
- if (zaperr != ENOENT) {
- VERIFY0(zaperr);
- ds->ds_large_blocks = B_TRUE;
+ spa_feature_t f;
+
+ for (f = 0; f < SPA_FEATURES; f++) {
+ if (!(spa_feature_table[f].fi_flags &
+ ZFEATURE_FLAG_PER_DATASET))
+ continue;
+ err = zap_contains(mos, dsobj,
+ spa_feature_table[f].fi_guid);
+ if (err == 0) {
+ ds->ds_feature_inuse[f] = B_TRUE;
+ } else {
+ ASSERT3U(err, ==, ENOENT);
+ err = 0;
+ }
}
}
- if (err == 0) {
- err = dsl_dir_hold_obj(dp,
- dsl_dataset_phys(ds)->ds_dir_obj, NULL, ds,
- &ds->ds_dir);
- }
+ err = dsl_dir_hold_obj(dp,
+ dsl_dataset_phys(ds)->ds_dir_obj, NULL, ds, &ds->ds_dir);
if (err != 0) {
mutex_destroy(&ds->ds_lock);
mutex_destroy(&ds->ds_opening_lock);
@@ -498,7 +506,8 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
ds->ds_reserved = ds->ds_quota = 0;
}
- dmu_buf_init_user(&ds->ds_dbu, dsl_dataset_evict, &ds->ds_dbuf);
+ dmu_buf_init_user(&ds->ds_dbu, dsl_dataset_evict_sync,
+ dsl_dataset_evict_async, &ds->ds_dbuf);
if (err == 0)
winner = dmu_buf_set_user_ie(dbuf, &ds->ds_dbu);
@@ -521,6 +530,16 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
} else {
ds->ds_fsid_guid =
unique_insert(dsl_dataset_phys(ds)->ds_fsid_guid);
+ if (ds->ds_fsid_guid !=
+ dsl_dataset_phys(ds)->ds_fsid_guid) {
+ zfs_dbgmsg("ds_fsid_guid changed from "
+ "%llx to %llx for pool %s dataset id %llu",
+ (long long)
+ dsl_dataset_phys(ds)->ds_fsid_guid,
+ (long long)ds->ds_fsid_guid,
+ spa_name(dp->dp_spa),
+ dsobj);
+ }
}
}
ASSERT3P(ds->ds_dbuf, ==, dbuf);
@@ -540,6 +559,7 @@ dsl_dataset_hold(dsl_pool_t *dp, const char *name,
const char *snapname;
uint64_t obj;
int err = 0;
+ dsl_dataset_t *ds;
err = dsl_dir_hold(dp, name, FTAG, &dd, &snapname);
if (err != 0)
@@ -548,36 +568,37 @@ dsl_dataset_hold(dsl_pool_t *dp, const char *name,
ASSERT(dsl_pool_config_held(dp));
obj = dsl_dir_phys(dd)->dd_head_dataset_obj;
if (obj != 0)
- err = dsl_dataset_hold_obj(dp, obj, tag, dsp);
+ err = dsl_dataset_hold_obj(dp, obj, tag, &ds);
else
err = SET_ERROR(ENOENT);
/* we may be looking for a snapshot */
if (err == 0 && snapname != NULL) {
- dsl_dataset_t *ds;
+ dsl_dataset_t *snap_ds;
if (*snapname++ != '@') {
- dsl_dataset_rele(*dsp, tag);
+ dsl_dataset_rele(ds, tag);
dsl_dir_rele(dd, FTAG);
return (SET_ERROR(ENOENT));
}
dprintf("looking for snapshot '%s'\n", snapname);
- err = dsl_dataset_snap_lookup(*dsp, snapname, &obj);
+ err = dsl_dataset_snap_lookup(ds, snapname, &obj);
if (err == 0)
- err = dsl_dataset_hold_obj(dp, obj, tag, &ds);
- dsl_dataset_rele(*dsp, tag);
+ err = dsl_dataset_hold_obj(dp, obj, tag, &snap_ds);
+ dsl_dataset_rele(ds, tag);
if (err == 0) {
- mutex_enter(&ds->ds_lock);
- if (ds->ds_snapname[0] == 0)
- (void) strlcpy(ds->ds_snapname, snapname,
- sizeof (ds->ds_snapname));
- mutex_exit(&ds->ds_lock);
- *dsp = ds;
+ mutex_enter(&snap_ds->ds_lock);
+ if (snap_ds->ds_snapname[0] == 0)
+ (void) strlcpy(snap_ds->ds_snapname, snapname,
+ sizeof (snap_ds->ds_snapname));
+ mutex_exit(&snap_ds->ds_lock);
+ ds = snap_ds;
}
}
-
+ if (err == 0)
+ *dsp = ds;
dsl_dir_rele(dd, FTAG);
return (err);
}
@@ -649,17 +670,22 @@ dsl_dataset_name(dsl_dataset_t *ds, char *name)
dsl_dir_name(ds->ds_dir, name);
VERIFY0(dsl_dataset_get_snapname(ds));
if (ds->ds_snapname[0]) {
- (void) strcat(name, "@");
+ VERIFY3U(strlcat(name, "@", ZFS_MAX_DATASET_NAME_LEN),
+ <, ZFS_MAX_DATASET_NAME_LEN);
/*
* We use a "recursive" mutex so that we
* can call dprintf_ds() with ds_lock held.
*/
if (!MUTEX_HELD(&ds->ds_lock)) {
mutex_enter(&ds->ds_lock);
- (void) strcat(name, ds->ds_snapname);
+ VERIFY3U(strlcat(name, ds->ds_snapname,
+ ZFS_MAX_DATASET_NAME_LEN), <,
+ ZFS_MAX_DATASET_NAME_LEN);
mutex_exit(&ds->ds_lock);
} else {
- (void) strcat(name, ds->ds_snapname);
+ VERIFY3U(strlcat(name, ds->ds_snapname,
+ ZFS_MAX_DATASET_NAME_LEN), <,
+ ZFS_MAX_DATASET_NAME_LEN);
}
}
}
@@ -704,6 +730,7 @@ dsl_dataset_tryown(dsl_dataset_t *ds, void *tag)
{
boolean_t gotit = FALSE;
+ ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));
mutex_enter(&ds->ds_lock);
if (ds->ds_owner == NULL && !DS_IS_INCONSISTENT(ds)) {
ds->ds_owner = tag;
@@ -714,6 +741,44 @@ dsl_dataset_tryown(dsl_dataset_t *ds, void *tag)
return (gotit);
}
+boolean_t
+dsl_dataset_has_owner(dsl_dataset_t *ds)
+{
+ boolean_t rv;
+ mutex_enter(&ds->ds_lock);
+ rv = (ds->ds_owner != NULL);
+ mutex_exit(&ds->ds_lock);
+ return (rv);
+}
+
+static void
+dsl_dataset_activate_feature(uint64_t dsobj, spa_feature_t f, dmu_tx_t *tx)
+{
+ spa_t *spa = dmu_tx_pool(tx)->dp_spa;
+ objset_t *mos = dmu_tx_pool(tx)->dp_meta_objset;
+ uint64_t zero = 0;
+
+ VERIFY(spa_feature_table[f].fi_flags & ZFEATURE_FLAG_PER_DATASET);
+
+ spa_feature_incr(spa, f, tx);
+ dmu_object_zapify(mos, dsobj, DMU_OT_DSL_DATASET, tx);
+
+ VERIFY0(zap_add(mos, dsobj, spa_feature_table[f].fi_guid,
+ sizeof (zero), 1, &zero, tx));
+}
+
+void
+dsl_dataset_deactivate_feature(uint64_t dsobj, spa_feature_t f, dmu_tx_t *tx)
+{
+ spa_t *spa = dmu_tx_pool(tx)->dp_spa;
+ objset_t *mos = dmu_tx_pool(tx)->dp_meta_objset;
+
+ VERIFY(spa_feature_table[f].fi_flags & ZFEATURE_FLAG_PER_DATASET);
+
+ VERIFY0(zap_remove(mos, dsobj, spa_feature_table[f].fi_guid, tx));
+ spa_feature_decr(spa, f, tx);
+}
+
uint64_t
dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
uint64_t flags, dmu_tx_t *tx)
@@ -752,6 +817,7 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
if (origin == NULL) {
dsphys->ds_deadlist_obj = dsl_deadlist_alloc(mos, tx);
} else {
+ spa_feature_t f;
dsl_dataset_t *ohds; /* head of the origin snapshot */
dsphys->ds_prev_snap_obj = origin->ds_object;
@@ -763,7 +829,9 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
dsl_dataset_phys(origin)->ds_compressed_bytes;
dsphys->ds_uncompressed_bytes =
dsl_dataset_phys(origin)->ds_uncompressed_bytes;
+ rrw_enter(&origin->ds_bp_rwlock, RW_READER, FTAG);
dsphys->ds_bp = dsl_dataset_phys(origin)->ds_bp;
+ rrw_exit(&origin->ds_bp_rwlock, FTAG);
/*
* Inherit flags that describe the dataset's contents
@@ -772,8 +840,10 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
dsphys->ds_flags |= dsl_dataset_phys(origin)->ds_flags &
(DS_FLAG_INCONSISTENT | DS_FLAG_CI_DATASET);
- if (origin->ds_large_blocks)
- dsl_dataset_activate_large_blocks_sync_impl(dsobj, tx);
+ for (f = 0; f < SPA_FEATURES; f++) {
+ if (origin->ds_feature_inuse[f])
+ dsl_dataset_activate_feature(dsobj, f, tx);
+ }
dmu_buf_will_dirty(origin->ds_dbuf, tx);
dsl_dataset_phys(origin)->ds_num_children++;
@@ -828,8 +898,20 @@ dsl_dataset_zero_zil(dsl_dataset_t *ds, dmu_tx_t *tx)
objset_t *os;
VERIFY0(dmu_objset_from_ds(ds, &os));
- bzero(&os->os_zil_header, sizeof (os->os_zil_header));
- dsl_dataset_dirty(ds, tx);
+ if (bcmp(&os->os_zil_header, &zero_zil, sizeof (zero_zil)) != 0) {
+ dsl_pool_t *dp = ds->ds_dir->dd_pool;
+ zio_t *zio;
+
+ bzero(&os->os_zil_header, sizeof (os->os_zil_header));
+
+ zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
+ dsl_dataset_sync(ds, zio, tx);
+ VERIFY0(zio_wait(zio));
+
+ /* dsl_dataset_sync_done will drop this reference. */
+ dmu_buf_add_ref(ds->ds_dbuf, ds);
+ dsl_dataset_sync_done(ds, tx);
+ }
}
uint64_t
@@ -950,19 +1032,6 @@ dsl_dataset_get_blkptr(dsl_dataset_t *ds)
return (&dsl_dataset_phys(ds)->ds_bp);
}
-void
-dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
-{
- ASSERT(dmu_tx_is_syncing(tx));
- /* If it's the meta-objset, set dp_meta_rootbp */
- if (ds == NULL) {
- tx->tx_pool->dp_meta_rootbp = *bp;
- } else {
- dmu_buf_will_dirty(ds->ds_dbuf, tx);
- dsl_dataset_phys(ds)->ds_bp = *bp;
- }
-}
-
spa_t *
dsl_dataset_get_spa(dsl_dataset_t *ds)
{
@@ -982,8 +1051,10 @@ dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx)
if (dsl_dataset_phys(ds)->ds_next_snap_obj != 0)
panic("dirtying snapshot!");
- dp = ds->ds_dir->dd_pool;
+ /* Must not dirty a dataset in the same txg where it got snapshotted. */
+ ASSERT3U(tx->tx_txg, >, dsl_dataset_phys(ds)->ds_prev_snap_txg);
+ dp = ds->ds_dir->dd_pool;
if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg)) {
/* up the hold count until we can be written out */
dmu_buf_add_ref(ds->ds_dbuf, ds);
@@ -1206,11 +1277,11 @@ dsl_dataset_snapshot_check(void *arg, dmu_tx_t *tx)
pair != NULL; pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) {
int error = 0;
dsl_dataset_t *ds;
- char *name, *atp;
- char dsname[MAXNAMELEN];
+ char *name, *atp = NULL;
+ char dsname[ZFS_MAX_DATASET_NAME_LEN];
name = nvpair_name(pair);
- if (strlen(name) >= MAXNAMELEN)
+ if (strlen(name) >= ZFS_MAX_DATASET_NAME_LEN)
error = SET_ERROR(ENAMETOOLONG);
if (error == 0) {
atp = strchr(name, '@');
@@ -1249,6 +1320,7 @@ dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname,
dsl_dataset_phys_t *dsphys;
uint64_t dsobj, crtxg;
objset_t *mos = dp->dp_meta_objset;
+ spa_feature_t f;
ASSERTV(static zil_header_t zero_zil);
ASSERTV(objset_t *os);
@@ -1263,6 +1335,10 @@ dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname,
bcmp(&os->os_phys->os_zil_header, &zero_zil,
sizeof (zero_zil)) == 0);
+ /* Should not snapshot a dirty dataset. */
+ ASSERT(!txg_list_member(&ds->ds_dir->dd_pool->dp_dirty_datasets,
+ ds, tx->tx_txg));
+
dsl_fs_ss_count_adjust(ds->ds_dir, 1, DD_FIELD_SNAPSHOT_COUNT, tx);
/*
@@ -1295,11 +1371,15 @@ dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname,
dsphys->ds_uncompressed_bytes =
dsl_dataset_phys(ds)->ds_uncompressed_bytes;
dsphys->ds_flags = dsl_dataset_phys(ds)->ds_flags;
+ rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
dsphys->ds_bp = dsl_dataset_phys(ds)->ds_bp;
+ rrw_exit(&ds->ds_bp_rwlock, FTAG);
dmu_buf_rele(dbuf, FTAG);
- if (ds->ds_large_blocks)
- dsl_dataset_activate_large_blocks_sync_impl(dsobj, tx);
+ for (f = 0; f < SPA_FEATURES; f++) {
+ if (ds->ds_feature_inuse[f])
+ dsl_dataset_activate_feature(dsobj, f, tx);
+ }
ASSERT3U(ds->ds_prev != 0, ==,
dsl_dataset_phys(ds)->ds_prev_snap_obj != 0);
@@ -1380,7 +1460,7 @@ dsl_dataset_snapshot_sync(void *arg, dmu_tx_t *tx)
pair != NULL; pair = nvlist_next_nvpair(ddsa->ddsa_snaps, pair)) {
dsl_dataset_t *ds;
char *name, *atp;
- char dsname[MAXNAMELEN];
+ char dsname[ZFS_MAX_DATASET_NAME_LEN];
name = nvpair_name(pair);
atp = strchr(name, '@');
@@ -1427,7 +1507,7 @@ dsl_dataset_snapshot(nvlist_t *snaps, nvlist_t *props, nvlist_t *errors)
suspended = fnvlist_alloc();
for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
pair = nvlist_next_nvpair(snaps, pair)) {
- char fsname[MAXNAMELEN];
+ char fsname[ZFS_MAX_DATASET_NAME_LEN];
char *snapname = nvpair_name(pair);
char *atp;
void *cookie;
@@ -1517,7 +1597,7 @@ dsl_dataset_snapshot_tmp_sync(void *arg, dmu_tx_t *tx)
{
dsl_dataset_snapshot_tmp_arg_t *ddsta = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
- dsl_dataset_t *ds;
+ dsl_dataset_t *ds = NULL;
VERIFY0(dsl_dataset_hold(dp, ddsta->ddsta_fsname, FTAG, &ds));
@@ -1568,6 +1648,8 @@ dsl_dataset_snapshot_tmp(const char *fsname, const char *snapname,
void
dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
{
+ spa_feature_t f;
+
ASSERT(dmu_tx_is_syncing(tx));
ASSERT(ds->ds_objset != NULL);
ASSERT(dsl_dataset_phys(ds)->ds_next_snap_obj == 0);
@@ -1579,12 +1661,57 @@ dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
dmu_buf_will_dirty(ds->ds_dbuf, tx);
dsl_dataset_phys(ds)->ds_fsid_guid = ds->ds_fsid_guid;
+ if (ds->ds_resume_bytes[tx->tx_txg & TXG_MASK] != 0) {
+ VERIFY0(zap_update(tx->tx_pool->dp_meta_objset,
+ ds->ds_object, DS_FIELD_RESUME_OBJECT, 8, 1,
+ &ds->ds_resume_object[tx->tx_txg & TXG_MASK], tx));
+ VERIFY0(zap_update(tx->tx_pool->dp_meta_objset,
+ ds->ds_object, DS_FIELD_RESUME_OFFSET, 8, 1,
+ &ds->ds_resume_offset[tx->tx_txg & TXG_MASK], tx));
+ VERIFY0(zap_update(tx->tx_pool->dp_meta_objset,
+ ds->ds_object, DS_FIELD_RESUME_BYTES, 8, 1,
+ &ds->ds_resume_bytes[tx->tx_txg & TXG_MASK], tx));
+ ds->ds_resume_object[tx->tx_txg & TXG_MASK] = 0;
+ ds->ds_resume_offset[tx->tx_txg & TXG_MASK] = 0;
+ ds->ds_resume_bytes[tx->tx_txg & TXG_MASK] = 0;
+ }
+
dmu_objset_sync(ds->ds_objset, zio, tx);
- if (ds->ds_need_large_blocks && !ds->ds_large_blocks) {
- dsl_dataset_activate_large_blocks_sync_impl(ds->ds_object, tx);
- ds->ds_large_blocks = B_TRUE;
+ for (f = 0; f < SPA_FEATURES; f++) {
+ if (ds->ds_feature_activation_needed[f]) {
+ if (ds->ds_feature_inuse[f])
+ continue;
+ dsl_dataset_activate_feature(ds->ds_object, f, tx);
+ ds->ds_feature_inuse[f] = B_TRUE;
+ }
+ }
+}
+
+static int
+deadlist_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
+{
+ dsl_deadlist_t *dl = arg;
+ dsl_deadlist_insert(dl, bp, tx);
+ return (0);
+}
+
+void
+dsl_dataset_sync_done(dsl_dataset_t *ds, dmu_tx_t *tx)
+{
+ objset_t *os = ds->ds_objset;
+
+ bplist_iterate(&ds->ds_pending_deadlist,
+ deadlist_enqueue_cb, &ds->ds_deadlist, tx);
+
+ if (os->os_synced_dnodes != NULL) {
+ multilist_destroy(os->os_synced_dnodes);
+ os->os_synced_dnodes = NULL;
}
+
+ ASSERT(!dmu_objset_is_dirty(os, dmu_tx_get_txg(tx)));
+
+ dmu_buf_rele(ds->ds_dbuf, ds);
}
static void
@@ -1595,10 +1722,21 @@ get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv)
zap_cursor_t zc;
zap_attribute_t za;
nvlist_t *propval = fnvlist_alloc();
- nvlist_t *val = fnvlist_alloc();
+ nvlist_t *val;
ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));
+ /*
+ * We use nvlist_alloc() instead of fnvlist_alloc() because the
+ * latter would allocate the list with NV_UNIQUE_NAME flag.
+ * As a result, every time a clone name is appended to the list
+ * it would be (linearly) searched for for a duplicate name.
+ * We already know that all clone names must be unique and we
+ * want avoid the quadratic complexity of double-checking that
+ * because we can have a large number of clones.
+ */
+ VERIFY0(nvlist_alloc(&val, 0, KM_SLEEP));
+
/*
* There may be missing entries in ds_next_clones_obj
* due to a bug in a previous version of the code.
@@ -1615,7 +1753,7 @@ get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv)
zap_cursor_retrieve(&zc, &za) == 0;
zap_cursor_advance(&zc)) {
dsl_dataset_t *clone;
- char buf[ZFS_MAXNAMELEN];
+ char buf[ZFS_MAX_DATASET_NAME_LEN];
VERIFY0(dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
za.za_first_integer, FTAG, &clone));
dsl_dir_name(clone->ds_dir, buf);
@@ -1630,11 +1768,91 @@ get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv)
nvlist_free(propval);
}
+static void
+get_receive_resume_stats(dsl_dataset_t *ds, nvlist_t *nv)
+{
+ dsl_pool_t *dp = ds->ds_dir->dd_pool;
+
+ if (dsl_dataset_has_resume_receive_state(ds)) {
+ char *str;
+ void *packed;
+ uint8_t *compressed;
+ uint64_t val;
+ nvlist_t *token_nv = fnvlist_alloc();
+ size_t packed_size, compressed_size;
+ zio_cksum_t cksum;
+ char *propval;
+ char buf[MAXNAMELEN];
+ int i;
+
+ if (zap_lookup(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_FROMGUID, sizeof (val), 1, &val) == 0) {
+ fnvlist_add_uint64(token_nv, "fromguid", val);
+ }
+ if (zap_lookup(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_OBJECT, sizeof (val), 1, &val) == 0) {
+ fnvlist_add_uint64(token_nv, "object", val);
+ }
+ if (zap_lookup(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_OFFSET, sizeof (val), 1, &val) == 0) {
+ fnvlist_add_uint64(token_nv, "offset", val);
+ }
+ if (zap_lookup(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_BYTES, sizeof (val), 1, &val) == 0) {
+ fnvlist_add_uint64(token_nv, "bytes", val);
+ }
+ if (zap_lookup(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_TOGUID, sizeof (val), 1, &val) == 0) {
+ fnvlist_add_uint64(token_nv, "toguid", val);
+ }
+ if (zap_lookup(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_TONAME, 1, sizeof (buf), buf) == 0) {
+ fnvlist_add_string(token_nv, "toname", buf);
+ }
+ if (zap_contains(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_LARGEBLOCK) == 0) {
+ fnvlist_add_boolean(token_nv, "largeblockok");
+ }
+ if (zap_contains(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_EMBEDOK) == 0) {
+ fnvlist_add_boolean(token_nv, "embedok");
+ }
+ if (zap_contains(dp->dp_meta_objset, ds->ds_object,
+ DS_FIELD_RESUME_COMPRESSOK) == 0) {
+ fnvlist_add_boolean(token_nv, "compressok");
+ }
+ packed = fnvlist_pack(token_nv, &packed_size);
+ fnvlist_free(token_nv);
+ compressed = kmem_alloc(packed_size, KM_SLEEP);
+
+ compressed_size = gzip_compress(packed, compressed,
+ packed_size, packed_size, 6);
+
+ fletcher_4_native_varsize(compressed, compressed_size, &cksum);
+
+ str = kmem_alloc(compressed_size * 2 + 1, KM_SLEEP);
+ for (i = 0; i < compressed_size; i++) {
+ (void) sprintf(str + i * 2, "%02x", compressed[i]);
+ }
+ str[compressed_size * 2] = '\0';
+ propval = kmem_asprintf("%u-%llx-%llx-%s",
+ ZFS_SEND_RESUME_TOKEN_VERSION,
+ (longlong_t)cksum.zc_word[0],
+ (longlong_t)packed_size, str);
+ dsl_prop_nvlist_add_string(nv,
+ ZFS_PROP_RECEIVE_RESUME_TOKEN, propval);
+ kmem_free(packed, packed_size);
+ kmem_free(str, compressed_size * 2 + 1);
+ kmem_free(compressed, packed_size);
+ strfree(propval);
+ }
+}
+
void
dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
{
+ dsl_pool_t *dp = ds->ds_dir->dd_pool;
uint64_t refd, avail, uobjs, aobjs, ratio;
- ASSERTV(dsl_pool_t *dp = ds->ds_dir->dd_pool);
ASSERT(dsl_pool_config_held(dp));
@@ -1652,6 +1870,12 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
dsl_dataset_phys(ds)->ds_unique_bytes);
get_clones_stat(ds, nv);
} else {
+ if (ds->ds_prev != NULL && ds->ds_prev != dp->dp_origin_snap) {
+ char buf[ZFS_MAX_DATASET_NAME_LEN];
+ dsl_dataset_name(ds->ds_prev, buf);
+ dsl_prop_nvlist_add_string(nv, ZFS_PROP_PREV_SNAP, buf);
+ }
+
dsl_dir_stats(ds->ds_dir, nv);
}
@@ -1697,6 +1921,32 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
}
}
+ if (!dsl_dataset_is_snapshot(ds)) {
+ /* 6 extra bytes for /%recv */
+ char recvname[ZFS_MAX_DATASET_NAME_LEN + 6];
+ dsl_dataset_t *recv_ds;
+
+ /*
+ * A failed "newfs" (e.g. full) resumable receive leaves
+ * the stats set on this dataset. Check here for the prop.
+ */
+ get_receive_resume_stats(ds, nv);
+
+ /*
+ * A failed incremental resumable receive leaves the
+ * stats set on our child named "%recv". Check the child
+ * for the prop.
+ */
+ dsl_dataset_name(ds, recvname);
+ if (strlcat(recvname, "/", sizeof (recvname)) <
+ sizeof (recvname) &&
+ strlcat(recvname, recv_clone_name, sizeof (recvname)) <
+ sizeof (recvname) &&
+ dsl_dataset_hold(dp, recvname, FTAG, &recv_ds) == 0) {
+ get_receive_resume_stats(recv_ds, nv);
+ dsl_dataset_rele(recv_ds, FTAG);
+ }
+ }
}
void
@@ -1756,18 +2006,25 @@ dsl_dataset_space(dsl_dataset_t *ds,
else
*availbytesp = 0;
}
+ rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
*usedobjsp = BP_GET_FILL(&dsl_dataset_phys(ds)->ds_bp);
+ rrw_exit(&ds->ds_bp_rwlock, FTAG);
*availobjsp = DN_MAX_OBJECT - *usedobjsp;
}
boolean_t
dsl_dataset_modified_since_snap(dsl_dataset_t *ds, dsl_dataset_t *snap)
{
- ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));
+ ASSERTV(dsl_pool_t *dp = ds->ds_dir->dd_pool);
+ uint64_t birth;
+
+ ASSERT(dsl_pool_config_held(dp));
if (snap == NULL)
return (B_FALSE);
- if (dsl_dataset_phys(ds)->ds_bp.blk_birth >
- dsl_dataset_phys(snap)->ds_creation_txg) {
+ rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
+ birth = dsl_dataset_get_blkptr(ds)->blk_birth;
+ rrw_exit(&ds->ds_bp_rwlock, FTAG);
+ if (birth > dsl_dataset_phys(snap)->ds_creation_txg) {
objset_t *os, *os_snap;
/*
* It may be that only the ZIL differs, because it was
@@ -1817,7 +2074,7 @@ dsl_dataset_rename_snapshot_check_impl(dsl_pool_t *dp,
/* dataset name + 1 for the "@" + the new snapshot name must fit */
if (dsl_dir_namelen(hds->ds_dir) + 1 +
- strlen(ddrsa->ddrsa_newsnapname) >= MAXNAMELEN)
+ strlen(ddrsa->ddrsa_newsnapname) >= ZFS_MAX_DATASET_NAME_LEN)
error = SET_ERROR(ENAMETOOLONG);
return (error);
@@ -1872,7 +2129,8 @@ dsl_dataset_rename_snapshot_sync_impl(dsl_pool_t *dp,
VERIFY0(dsl_dataset_snap_remove(hds, ddrsa->ddrsa_oldsnapname, tx,
B_FALSE));
mutex_enter(&ds->ds_lock);
- (void) strcpy(ds->ds_snapname, ddrsa->ddrsa_newsnapname);
+ (void) strlcpy(ds->ds_snapname, ddrsa->ddrsa_newsnapname,
+ sizeof (ds->ds_snapname));
mutex_exit(&ds->ds_lock);
VERIFY0(zap_add(dp->dp_meta_objset,
dsl_dataset_phys(hds)->ds_snapnames_zapobj,
@@ -1889,7 +2147,7 @@ dsl_dataset_rename_snapshot_sync(void *arg, dmu_tx_t *tx)
{
dsl_dataset_rename_snapshot_arg_t *ddrsa = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
- dsl_dataset_t *hds;
+ dsl_dataset_t *hds = NULL;
VERIFY0(dsl_dataset_hold(dp, ddrsa->ddrsa_fsname, FTAG, &hds));
ddrsa->ddrsa_tx = tx;
@@ -1952,6 +2210,7 @@ dsl_dataset_handoff_check(dsl_dataset_t *ds, void *owner, dmu_tx_t *tx)
typedef struct dsl_dataset_rollback_arg {
const char *ddra_fsname;
+ const char *ddra_tosnap;
void *ddra_owner;
nvlist_t *ddra_result;
} dsl_dataset_rollback_arg_t;
@@ -1983,6 +2242,30 @@ dsl_dataset_rollback_check(void *arg, dmu_tx_t *tx)
return (SET_ERROR(EINVAL));
}
+ /*
+ * No rollback to a snapshot created in the current txg, because
+ * the rollback may dirty the dataset and create blocks that are
+ * not reachable from the rootbp while having a birth txg that
+ * falls into the snapshot's range.
+ */
+ if (dmu_tx_is_syncing(tx) &&
+ dsl_dataset_phys(ds)->ds_prev_snap_txg >= tx->tx_txg) {
+ dsl_dataset_rele(ds, FTAG);
+ return (SET_ERROR(EAGAIN));
+ }
+
+ /*
+ * If the expected target snapshot is specified, then check that
+ * the latest snapshot is it.
+ */
+ if (ddra->ddra_tosnap != NULL) {
+ char namebuf[ZFS_MAX_DATASET_NAME_LEN];
+
+ dsl_dataset_name(ds->ds_prev, namebuf);
+ if (strcmp(namebuf, ddra->ddra_tosnap) != 0)
+ return (SET_ERROR(EXDEV));
+ }
+
/* must not have any bookmarks after the most recent snapshot */
proprequest = fnvlist_alloc();
fnvlist_add_boolean(proprequest, zfs_prop_to_name(ZFS_PROP_CREATETXG));
@@ -2049,7 +2332,7 @@ dsl_dataset_rollback_sync(void *arg, dmu_tx_t *tx)
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dataset_t *ds, *clone;
uint64_t cloneobj;
- char namebuf[ZFS_MAXNAMELEN];
+ char namebuf[ZFS_MAX_DATASET_NAME_LEN];
VERIFY0(dsl_dataset_hold(dp, ddra->ddra_fsname, FTAG, &ds));
@@ -2084,11 +2367,13 @@ dsl_dataset_rollback_sync(void *arg, dmu_tx_t *tx)
* notes above zfs_suspend_fs() for further details.
*/
int
-dsl_dataset_rollback(const char *fsname, void *owner, nvlist_t *result)
+dsl_dataset_rollback(const char *fsname, const char *tosnap, void *owner,
+ nvlist_t *result)
{
dsl_dataset_rollback_arg_t ddra;
ddra.ddra_fsname = fsname;
+ ddra.ddra_tosnap = tosnap;
ddra.ddra_owner = owner;
ddra.ddra_result = result;
@@ -2152,6 +2437,10 @@ dsl_dataset_promote_check(void *arg, dmu_tx_t *tx)
}
snap = list_head(&ddpa->shared_snaps);
+ if (snap == NULL) {
+ err = SET_ERROR(ENOENT);
+ goto out;
+ }
origin_ds = snap->ds;
/* compute origin's new unique space */
@@ -2260,6 +2549,10 @@ dsl_dataset_promote_check(void *arg, dmu_tx_t *tx)
* iterate over all bps.
*/
snap = list_head(&ddpa->origin_snaps);
+ if (snap == NULL) {
+ err = SET_ERROR(ENOENT);
+ goto out;
+ }
err = snaplist_space(&ddpa->shared_snaps,
snap->ds->ds_dir->dd_origin_txg, &ddpa->cloneusedsnap);
if (err != 0)
@@ -2602,7 +2895,7 @@ promote_rele(dsl_dataset_promote_arg_t *ddpa, void *tag)
* Promote a clone.
*
* If it fails due to a conflicting snapshot name, "conflsnap" will be filled
- * in with the name. (It must be at least MAXNAMELEN bytes long.)
+ * in with the name. (It must be at least ZFS_MAX_DATASET_NAME_LEN bytes long.)
*/
int
dsl_dataset_promote(const char *name, char *conflsnap)
@@ -2639,6 +2932,12 @@ int
dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone,
dsl_dataset_t *origin_head, boolean_t force, void *owner, dmu_tx_t *tx)
{
+ /*
+ * "slack" factor for received datasets with refquota set on them.
+ * See the bottom of this function for details on its use.
+ */
+ uint64_t refquota_slack = (uint64_t)DMU_MAX_ACCESS *
+ spa_asize_inflation;
int64_t unused_refres_delta;
/* they should both be heads */
@@ -2681,10 +2980,22 @@ dsl_dataset_clone_swap_check_impl(dsl_dataset_t *clone,
dsl_dir_space_available(origin_head->ds_dir, NULL, 0, TRUE))
return (SET_ERROR(ENOSPC));
- /* clone can't be over the head's refquota */
+ /*
+ * The clone can't be too much over the head's refquota.
+ *
+ * To ensure that the entire refquota can be used, we allow one
+ * transaction to exceed the the refquota. Therefore, this check
+ * needs to also allow for the space referenced to be more than the
+ * refquota. The maximum amount of space that one transaction can use
+ * on disk is DMU_MAX_ACCESS * spa_asize_inflation. Allowing this
+ * overage ensures that we are able to receive a filesystem that
+ * exceeds the refquota on the source system.
+ *
+ * So that overage is the refquota_slack we use below.
+ */
if (origin_head->ds_quota != 0 &&
dsl_dataset_phys(clone)->ds_referenced_bytes >
- origin_head->ds_quota)
+ origin_head->ds_quota + refquota_slack)
return (SET_ERROR(EDQUOT));
return (0);
@@ -2696,12 +3007,55 @@ dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone,
{
dsl_pool_t *dp = dmu_tx_pool(tx);
int64_t unused_refres_delta;
+ blkptr_t tmp;
ASSERT(clone->ds_reserved == 0);
+ /*
+ * NOTE: On DEBUG kernels there could be a race between this and
+ * the check function if spa_asize_inflation is adjusted...
+ */
ASSERT(origin_head->ds_quota == 0 ||
- dsl_dataset_phys(clone)->ds_unique_bytes <= origin_head->ds_quota);
+ dsl_dataset_phys(clone)->ds_unique_bytes <= origin_head->ds_quota +
+ DMU_MAX_ACCESS * spa_asize_inflation);
ASSERT3P(clone->ds_prev, ==, origin_head->ds_prev);
+ /*
+ * Swap per-dataset feature flags.
+ */
+ for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
+ boolean_t clone_inuse;
+ boolean_t origin_head_inuse;
+
+ if (!(spa_feature_table[f].fi_flags &
+ ZFEATURE_FLAG_PER_DATASET)) {
+ ASSERT(!clone->ds_feature_inuse[f]);
+ ASSERT(!origin_head->ds_feature_inuse[f]);
+ continue;
+ }
+
+ clone_inuse = clone->ds_feature_inuse[f];
+ origin_head_inuse = origin_head->ds_feature_inuse[f];
+
+ if (clone_inuse) {
+ dsl_dataset_deactivate_feature(clone->ds_object, f, tx);
+ clone->ds_feature_inuse[f] = B_FALSE;
+ }
+ if (origin_head_inuse) {
+ dsl_dataset_deactivate_feature(origin_head->ds_object,
+ f, tx);
+ origin_head->ds_feature_inuse[f] = B_FALSE;
+ }
+ if (clone_inuse) {
+ dsl_dataset_activate_feature(origin_head->ds_object,
+ f, tx);
+ origin_head->ds_feature_inuse[f] = B_TRUE;
+ }
+ if (origin_head_inuse) {
+ dsl_dataset_activate_feature(clone->ds_object, f, tx);
+ clone->ds_feature_inuse[f] = B_TRUE;
+ }
+ }
+
dmu_buf_will_dirty(clone->ds_dbuf, tx);
dmu_buf_will_dirty(origin_head->ds_dbuf, tx);
@@ -2736,11 +3090,14 @@ dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone,
/* swap blkptrs */
{
- blkptr_t tmp;
+ rrw_enter(&clone->ds_bp_rwlock, RW_WRITER, FTAG);
+ rrw_enter(&origin_head->ds_bp_rwlock, RW_WRITER, FTAG);
tmp = dsl_dataset_phys(origin_head)->ds_bp;
dsl_dataset_phys(origin_head)->ds_bp =
dsl_dataset_phys(clone)->ds_bp;
dsl_dataset_phys(clone)->ds_bp = tmp;
+ rrw_exit(&origin_head->ds_bp_rwlock, FTAG);
+ rrw_exit(&clone->ds_bp_rwlock, FTAG);
}
/* set dd_*_bytes */
@@ -2955,7 +3312,7 @@ dsl_dataset_set_refquota_sync(void *arg, dmu_tx_t *tx)
{
dsl_dataset_set_qr_arg_t *ddsqra = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
- dsl_dataset_t *ds;
+ dsl_dataset_t *ds = NULL;
uint64_t newval;
VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds));
@@ -3082,7 +3439,7 @@ dsl_dataset_set_refreservation_sync(void *arg, dmu_tx_t *tx)
{
dsl_dataset_set_qr_arg_t *ddsqra = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
- dsl_dataset_t *ds;
+ dsl_dataset_t *ds = NULL;
VERIFY0(dsl_dataset_hold(dp, ddsqra->ddsqra_name, FTAG, &ds));
dsl_dataset_set_refreservation_sync_impl(ds,
@@ -3256,77 +3613,6 @@ dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap,
return (err);
}
-static int
-dsl_dataset_activate_large_blocks_check(void *arg, dmu_tx_t *tx)
-{
- const char *dsname = arg;
- dsl_dataset_t *ds;
- dsl_pool_t *dp = dmu_tx_pool(tx);
- int error = 0;
-
- if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LARGE_BLOCKS))
- return (SET_ERROR(ENOTSUP));
-
- ASSERT(spa_feature_is_enabled(dp->dp_spa,
- SPA_FEATURE_EXTENSIBLE_DATASET));
-
- error = dsl_dataset_hold(dp, dsname, FTAG, &ds);
- if (error != 0)
- return (error);
-
- if (ds->ds_large_blocks)
- error = EALREADY;
- dsl_dataset_rele(ds, FTAG);
-
- return (error);
-}
-
-void
-dsl_dataset_activate_large_blocks_sync_impl(uint64_t dsobj, dmu_tx_t *tx)
-{
- spa_t *spa = dmu_tx_pool(tx)->dp_spa;
- objset_t *mos = dmu_tx_pool(tx)->dp_meta_objset;
- uint64_t zero = 0;
-
- spa_feature_incr(spa, SPA_FEATURE_LARGE_BLOCKS, tx);
- dmu_object_zapify(mos, dsobj, DMU_OT_DSL_DATASET, tx);
-
- VERIFY0(zap_add(mos, dsobj, DS_FIELD_LARGE_BLOCKS,
- sizeof (zero), 1, &zero, tx));
-}
-
-static void
-dsl_dataset_activate_large_blocks_sync(void *arg, dmu_tx_t *tx)
-{
- const char *dsname = arg;
- dsl_dataset_t *ds;
-
- VERIFY0(dsl_dataset_hold(dmu_tx_pool(tx), dsname, FTAG, &ds));
-
- dsl_dataset_activate_large_blocks_sync_impl(ds->ds_object, tx);
- ASSERT(!ds->ds_large_blocks);
- ds->ds_large_blocks = B_TRUE;
- dsl_dataset_rele(ds, FTAG);
-}
-
-int
-dsl_dataset_activate_large_blocks(const char *dsname)
-{
- int error;
-
- error = dsl_sync_task(dsname,
- dsl_dataset_activate_large_blocks_check,
- dsl_dataset_activate_large_blocks_sync, (void *)dsname,
- 1, ZFS_SPACE_CHECK_RESERVED);
-
- /*
- * EALREADY indicates that this dataset already supports large blocks.
- */
- if (error == EALREADY)
- error = 0;
- return (error);
-}
-
/*
* Return TRUE if 'earlier' is an earlier snapshot in 'later's timeline.
* For example, they could both be snapshots of the same filesystem, and
@@ -3338,7 +3624,7 @@ dsl_dataset_activate_large_blocks(const char *dsname)
*/
boolean_t
dsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier,
- uint64_t earlier_txg)
+ uint64_t earlier_txg)
{
dsl_pool_t *dp = later->ds_dir->dd_pool;
int error;
@@ -3371,7 +3657,6 @@ dsl_dataset_is_before(dsl_dataset_t *later, dsl_dataset_t *earlier,
return (ret);
}
-
void
dsl_dataset_zapify(dsl_dataset_t *ds, dmu_tx_t *tx)
{
@@ -3379,6 +3664,23 @@ dsl_dataset_zapify(dsl_dataset_t *ds, dmu_tx_t *tx)
dmu_object_zapify(mos, ds->ds_object, DMU_OT_DSL_DATASET, tx);
}
+boolean_t
+dsl_dataset_is_zapified(dsl_dataset_t *ds)
+{
+ dmu_object_info_t doi;
+
+ dmu_object_info_from_db(ds->ds_dbuf, &doi);
+ return (doi.doi_type == DMU_OTN_ZAP_METADATA);
+}
+
+boolean_t
+dsl_dataset_has_resume_receive_state(dsl_dataset_t *ds)
+{
+ return (dsl_dataset_is_zapified(ds) &&
+ zap_contains(ds->ds_dir->dd_pool->dp_meta_objset,
+ ds->ds_object, DS_FIELD_RESUME_TOGUID) == 0);
+}
+
#if defined(_KERNEL) && defined(HAVE_SPL)
#if defined(_LP64)
module_param(zfs_max_recordsize, int, 0644);
@@ -3406,7 +3708,6 @@ EXPORT_SYMBOL(dsl_dataset_user_hold);
EXPORT_SYMBOL(dsl_dataset_user_release);
EXPORT_SYMBOL(dsl_dataset_get_holds);
EXPORT_SYMBOL(dsl_dataset_get_blkptr);
-EXPORT_SYMBOL(dsl_dataset_set_blkptr);
EXPORT_SYMBOL(dsl_dataset_get_spa);
EXPORT_SYMBOL(dsl_dataset_modified_since_snap);
EXPORT_SYMBOL(dsl_dataset_space_written);
@@ -3414,8 +3715,6 @@ EXPORT_SYMBOL(dsl_dataset_space_wouldfree);
EXPORT_SYMBOL(dsl_dataset_sync);
EXPORT_SYMBOL(dsl_dataset_block_born);
EXPORT_SYMBOL(dsl_dataset_block_kill);
-EXPORT_SYMBOL(dsl_dataset_block_freeable);
-EXPORT_SYMBOL(dsl_dataset_prev_snap_txg);
EXPORT_SYMBOL(dsl_dataset_dirty);
EXPORT_SYMBOL(dsl_dataset_stats);
EXPORT_SYMBOL(dsl_dataset_fast_stat);
diff --git a/zfs/module/zfs/dsl_deadlist.c b/zfs/module/zfs/dsl_deadlist.c
index 8da77ebd7b6e..0be0d7420940 100644
--- a/zfs/module/zfs/dsl_deadlist.c
+++ b/zfs/module/zfs/dsl_deadlist.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
@@ -54,15 +54,10 @@
static int
dsl_deadlist_compare(const void *arg1, const void *arg2)
{
- const dsl_deadlist_entry_t *dle1 = arg1;
- const dsl_deadlist_entry_t *dle2 = arg2;
+ const dsl_deadlist_entry_t *dle1 = (const dsl_deadlist_entry_t *)arg1;
+ const dsl_deadlist_entry_t *dle2 = (const dsl_deadlist_entry_t *)arg2;
- if (dle1->dle_mintxg < dle2->dle_mintxg)
- return (-1);
- else if (dle1->dle_mintxg > dle2->dle_mintxg)
- return (+1);
- else
- return (0);
+ return (AVL_CMP(dle1->dle_mintxg, dle2->dle_mintxg));
}
static void
@@ -71,6 +66,8 @@ dsl_deadlist_load_tree(dsl_deadlist_t *dl)
zap_cursor_t zc;
zap_attribute_t za;
+ ASSERT(MUTEX_HELD(&dl->dl_lock));
+
ASSERT(!dl->dl_oldfmt);
if (dl->dl_havetree)
return;
@@ -81,10 +78,8 @@ dsl_deadlist_load_tree(dsl_deadlist_t *dl)
for (zap_cursor_init(&zc, dl->dl_os, dl->dl_object);
zap_cursor_retrieve(&zc, &za) == 0;
zap_cursor_advance(&zc)) {
- dsl_deadlist_entry_t *dle;
-
- dle = kmem_alloc(sizeof (*dle), KM_SLEEP);
- dle->dle_mintxg = strtonum(za.za_name, NULL);
+ dsl_deadlist_entry_t *dle = kmem_alloc(sizeof (*dle), KM_SLEEP);
+ dle->dle_mintxg = zfs_strtonum(za.za_name, NULL);
VERIFY3U(0, ==, bpobj_open(&dle->dle_bpobj, dl->dl_os,
za.za_first_integer));
avl_add(&dl->dl_tree, dle);
@@ -123,6 +118,7 @@ dsl_deadlist_close(dsl_deadlist_t *dl)
dsl_deadlist_entry_t *dle;
dl->dl_os = NULL;
+ mutex_destroy(&dl->dl_lock);
if (dl->dl_oldfmt) {
dl->dl_oldfmt = B_FALSE;
@@ -139,7 +135,6 @@ dsl_deadlist_close(dsl_deadlist_t *dl)
avl_destroy(&dl->dl_tree);
}
dmu_buf_rele(dl->dl_dbuf, dl);
- mutex_destroy(&dl->dl_lock);
dl->dl_dbuf = NULL;
dl->dl_phys = NULL;
}
@@ -183,6 +178,7 @@ static void
dle_enqueue(dsl_deadlist_t *dl, dsl_deadlist_entry_t *dle,
const blkptr_t *bp, dmu_tx_t *tx)
{
+ ASSERT(MUTEX_HELD(&dl->dl_lock));
if (dle->dle_bpobj.bpo_object ==
dmu_objset_pool(dl->dl_os)->dp_empty_bpobj) {
uint64_t obj = bpobj_alloc(dl->dl_os, SPA_OLD_MAXBLOCKSIZE, tx);
@@ -199,6 +195,7 @@ static void
dle_enqueue_subobj(dsl_deadlist_t *dl, dsl_deadlist_entry_t *dle,
uint64_t obj, dmu_tx_t *tx)
{
+ ASSERT(MUTEX_HELD(&dl->dl_lock));
if (dle->dle_bpobj.bpo_object !=
dmu_objset_pool(dl->dl_os)->dp_empty_bpobj) {
bpobj_enqueue_subobj(&dle->dle_bpobj, obj, tx);
@@ -223,15 +220,14 @@ dsl_deadlist_insert(dsl_deadlist_t *dl, const blkptr_t *bp, dmu_tx_t *tx)
return;
}
+ mutex_enter(&dl->dl_lock);
dsl_deadlist_load_tree(dl);
dmu_buf_will_dirty(dl->dl_dbuf, tx);
- mutex_enter(&dl->dl_lock);
dl->dl_phys->dl_used +=
bp_get_dsize_sync(dmu_objset_spa(dl->dl_os), bp);
dl->dl_phys->dl_comp += BP_GET_PSIZE(bp);
dl->dl_phys->dl_uncomp += BP_GET_UCSIZE(bp);
- mutex_exit(&dl->dl_lock);
dle_tofind.dle_mintxg = bp->blk_birth;
dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
@@ -239,7 +235,16 @@ dsl_deadlist_insert(dsl_deadlist_t *dl, const blkptr_t *bp, dmu_tx_t *tx)
dle = avl_nearest(&dl->dl_tree, where, AVL_BEFORE);
else
dle = AVL_PREV(&dl->dl_tree, dle);
+
+ if (dle == NULL) {
+ zfs_panic_recover("blkptr at %p has invalid BLK_BIRTH %llu",
+ bp, (longlong_t)bp->blk_birth);
+ dle = avl_first(&dl->dl_tree);
+ }
+
+ ASSERT3P(dle, !=, NULL);
dle_enqueue(dl, dle, bp, tx);
+ mutex_exit(&dl->dl_lock);
}
/*
@@ -255,16 +260,19 @@ dsl_deadlist_add_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx)
if (dl->dl_oldfmt)
return;
- dsl_deadlist_load_tree(dl);
-
dle = kmem_alloc(sizeof (*dle), KM_SLEEP);
dle->dle_mintxg = mintxg;
+
+ mutex_enter(&dl->dl_lock);
+ dsl_deadlist_load_tree(dl);
+
obj = bpobj_alloc_empty(dl->dl_os, SPA_OLD_MAXBLOCKSIZE, tx);
VERIFY3U(0, ==, bpobj_open(&dle->dle_bpobj, dl->dl_os, obj));
avl_add(&dl->dl_tree, dle);
VERIFY3U(0, ==, zap_add_int_key(dl->dl_os, dl->dl_object,
mintxg, obj, tx));
+ mutex_exit(&dl->dl_lock);
}
/*
@@ -279,6 +287,7 @@ dsl_deadlist_remove_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx)
if (dl->dl_oldfmt)
return;
+ mutex_enter(&dl->dl_lock);
dsl_deadlist_load_tree(dl);
dle_tofind.dle_mintxg = mintxg;
@@ -292,6 +301,7 @@ dsl_deadlist_remove_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx)
kmem_free(dle, sizeof (*dle));
VERIFY3U(0, ==, zap_remove_int(dl->dl_os, dl->dl_object, mintxg, tx));
+ mutex_exit(&dl->dl_lock);
}
/*
@@ -335,6 +345,7 @@ dsl_deadlist_clone(dsl_deadlist_t *dl, uint64_t maxtxg,
return (newobj);
}
+ mutex_enter(&dl->dl_lock);
dsl_deadlist_load_tree(dl);
for (dle = avl_first(&dl->dl_tree); dle;
@@ -348,6 +359,7 @@ dsl_deadlist_clone(dsl_deadlist_t *dl, uint64_t maxtxg,
VERIFY3U(0, ==, zap_add_int_key(dl->dl_os, newobj,
dle->dle_mintxg, obj, tx));
}
+ mutex_exit(&dl->dl_lock);
return (newobj);
}
@@ -425,6 +437,8 @@ dsl_deadlist_insert_bpobj(dsl_deadlist_t *dl, uint64_t obj, uint64_t birth,
uint64_t used, comp, uncomp;
bpobj_t bpo;
+ ASSERT(MUTEX_HELD(&dl->dl_lock));
+
VERIFY3U(0, ==, bpobj_open(&bpo, dl->dl_os, obj));
VERIFY3U(0, ==, bpobj_space(&bpo, &used, &comp, &uncomp));
bpobj_close(&bpo);
@@ -432,11 +446,9 @@ dsl_deadlist_insert_bpobj(dsl_deadlist_t *dl, uint64_t obj, uint64_t birth,
dsl_deadlist_load_tree(dl);
dmu_buf_will_dirty(dl->dl_dbuf, tx);
- mutex_enter(&dl->dl_lock);
dl->dl_phys->dl_used += used;
dl->dl_phys->dl_comp += comp;
dl->dl_phys->dl_uncomp += uncomp;
- mutex_exit(&dl->dl_lock);
dle_tofind.dle_mintxg = birth;
dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
@@ -476,10 +488,11 @@ dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx)
return;
}
+ mutex_enter(&dl->dl_lock);
for (zap_cursor_init(&zc, dl->dl_os, obj);
zap_cursor_retrieve(&zc, &za) == 0;
zap_cursor_advance(&zc)) {
- uint64_t mintxg = strtonum(za.za_name, NULL);
+ uint64_t mintxg = zfs_strtonum(za.za_name, NULL);
dsl_deadlist_insert_bpobj(dl, za.za_first_integer, mintxg, tx);
VERIFY3U(0, ==, zap_remove_int(dl->dl_os, obj, mintxg, tx));
}
@@ -490,6 +503,7 @@ dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx)
dmu_buf_will_dirty(bonus, tx);
bzero(dlp, sizeof (*dlp));
dmu_buf_rele(bonus, FTAG);
+ mutex_exit(&dl->dl_lock);
}
/*
@@ -504,6 +518,8 @@ dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *bpo, uint64_t mintxg,
avl_index_t where;
ASSERT(!dl->dl_oldfmt);
+
+ mutex_enter(&dl->dl_lock);
dmu_buf_will_dirty(dl->dl_dbuf, tx);
dsl_deadlist_load_tree(dl);
@@ -519,14 +535,12 @@ dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *bpo, uint64_t mintxg,
VERIFY3U(0, ==, bpobj_space(&dle->dle_bpobj,
&used, &comp, &uncomp));
- mutex_enter(&dl->dl_lock);
ASSERT3U(dl->dl_phys->dl_used, >=, used);
ASSERT3U(dl->dl_phys->dl_comp, >=, comp);
ASSERT3U(dl->dl_phys->dl_uncomp, >=, uncomp);
dl->dl_phys->dl_used -= used;
dl->dl_phys->dl_comp -= comp;
dl->dl_phys->dl_uncomp -= uncomp;
- mutex_exit(&dl->dl_lock);
VERIFY3U(0, ==, zap_remove_int(dl->dl_os, dl->dl_object,
dle->dle_mintxg, tx));
@@ -537,4 +551,5 @@ dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *bpo, uint64_t mintxg,
kmem_free(dle, sizeof (*dle));
dle = dle_next;
}
+ mutex_exit(&dl->dl_lock);
}
diff --git a/zfs/module/zfs/dsl_deleg.c b/zfs/module/zfs/dsl_deleg.c
index 952422be2381..f17cedb08213 100644
--- a/zfs/module/zfs/dsl_deleg.c
+++ b/zfs/module/zfs/dsl_deleg.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
*/
/*
@@ -330,7 +330,7 @@ dsl_deleg_get(const char *ddname, nvlist_t **nvp)
za = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
basezc = kmem_alloc(sizeof (zap_cursor_t), KM_SLEEP);
baseza = kmem_alloc(sizeof (zap_attribute_t), KM_SLEEP);
- source = kmem_alloc(MAXNAMELEN + strlen(MOS_DIR_NAME) + 1, KM_SLEEP);
+ source = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
for (dd = startdd; dd != NULL; dd = dd->dd_parent) {
@@ -370,7 +370,7 @@ dsl_deleg_get(const char *ddname, nvlist_t **nvp)
nvlist_free(sp_nvp);
}
- kmem_free(source, MAXNAMELEN + strlen(MOS_DIR_NAME) + 1);
+ kmem_free(source, ZFS_MAX_DATASET_NAME_LEN);
kmem_free(baseza, sizeof (zap_attribute_t));
kmem_free(basezc, sizeof (zap_cursor_t));
kmem_free(za, sizeof (zap_attribute_t));
@@ -393,14 +393,13 @@ typedef struct perm_set {
static int
perm_set_compare(const void *arg1, const void *arg2)
{
- const perm_set_t *node1 = arg1;
- const perm_set_t *node2 = arg2;
+ const perm_set_t *node1 = (const perm_set_t *)arg1;
+ const perm_set_t *node2 = (const perm_set_t *)arg2;
int val;
val = strcmp(node1->p_setname, node2->p_setname);
- if (val == 0)
- return (0);
- return (val > 0 ? 1 : -1);
+
+ return (AVL_ISIGN(val));
}
/*
diff --git a/zfs/module/zfs/dsl_destroy.c b/zfs/module/zfs/dsl_destroy.c
index 34d076e9f2ae..d980f7d1fd78 100644
--- a/zfs/module/zfs/dsl_destroy.c
+++ b/zfs/module/zfs/dsl_destroy.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
* Copyright (c) 2013 by Joyent, Inc. All rights reserved.
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
@@ -245,6 +245,7 @@ dsl_dataset_remove_clones_key(dsl_dataset_t *ds, uint64_t mintxg, dmu_tx_t *tx)
void
dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
{
+ spa_feature_t f;
int after_branch_point = FALSE;
dsl_pool_t *dp = ds->ds_dir->dd_pool;
objset_t *mos = dp->dp_meta_objset;
@@ -254,7 +255,9 @@ dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
+ rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
ASSERT3U(dsl_dataset_phys(ds)->ds_bp.blk_birth, <=, tx->tx_txg);
+ rrw_exit(&ds->ds_bp_rwlock, FTAG);
ASSERT(refcount_is_zero(&ds->ds_longholds));
if (defer &&
@@ -276,9 +279,11 @@ dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
obj = ds->ds_object;
- if (ds->ds_large_blocks) {
- ASSERT0(zap_contains(mos, obj, DS_FIELD_LARGE_BLOCKS));
- spa_feature_decr(dp->dp_spa, SPA_FEATURE_LARGE_BLOCKS, tx);
+ for (f = 0; f < SPA_FEATURES; f++) {
+ if (ds->ds_feature_inuse[f]) {
+ dsl_dataset_deactivate_feature(obj, f, tx);
+ ds->ds_feature_inuse[f] = B_FALSE;
+ }
}
if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {
ASSERT3P(ds->ds_prev, ==, NULL);
@@ -558,7 +563,7 @@ kill_blkptr(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
struct killarg *ka = arg;
dmu_tx_t *tx = ka->tx;
- if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
+ if (bp == NULL || BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
return (0);
if (zb->zb_level == ZB_ZIL_LEVEL) {
@@ -716,6 +721,7 @@ void
dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
{
dsl_pool_t *dp = dmu_tx_pool(tx);
+ spa_feature_t f;
objset_t *mos = dp->dp_meta_objset;
uint64_t obj, ddobj, prevobj = 0;
boolean_t rmorigin;
@@ -724,7 +730,9 @@ dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
ASSERT3U(dsl_dataset_phys(ds)->ds_num_children, <=, 1);
ASSERT(ds->ds_prev == NULL ||
dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj != ds->ds_object);
+ rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
ASSERT3U(dsl_dataset_phys(ds)->ds_bp.blk_birth, <=, tx->tx_txg);
+ rrw_exit(&ds->ds_bp_rwlock, FTAG);
ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
/* We need to log before removing it from the namespace. */
@@ -743,12 +751,16 @@ dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
ASSERT0(ds->ds_reserved);
}
- if (ds->ds_large_blocks)
- spa_feature_decr(dp->dp_spa, SPA_FEATURE_LARGE_BLOCKS, tx);
+ obj = ds->ds_object;
- dsl_scan_ds_destroyed(ds, tx);
+ for (f = 0; f < SPA_FEATURES; f++) {
+ if (ds->ds_feature_inuse[f]) {
+ dsl_dataset_deactivate_feature(obj, f, tx);
+ ds->ds_feature_inuse[f] = B_FALSE;
+ }
+ }
- obj = ds->ds_object;
+ dsl_scan_ds_destroyed(ds, tx);
if (dsl_dataset_phys(ds)->ds_prev_snap_obj != 0) {
/* This is a clone */
@@ -811,10 +823,12 @@ dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
dsl_dataset_phys(ds)->ds_unique_bytes == used);
+ rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
bptree_add(mos, dp->dp_bptree_obj,
&dsl_dataset_phys(ds)->ds_bp,
dsl_dataset_phys(ds)->ds_prev_snap_txg,
used, comp, uncomp, tx);
+ rrw_exit(&ds->ds_bp_rwlock, FTAG);
dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
-used, -comp, -uncomp, tx);
dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD,
@@ -970,9 +984,17 @@ dsl_destroy_inconsistent(const char *dsname, void *arg)
objset_t *os;
if (dmu_objset_hold(dsname, FTAG, &os) == 0) {
- boolean_t inconsistent = DS_IS_INCONSISTENT(dmu_objset_ds(os));
+ boolean_t need_destroy = DS_IS_INCONSISTENT(dmu_objset_ds(os));
+
+ /*
+ * If the dataset is inconsistent because a resumable receive
+ * has failed, then do not destroy it.
+ */
+ if (dsl_dataset_has_resume_receive_state(dmu_objset_ds(os)))
+ need_destroy = B_FALSE;
+
dmu_objset_rele(os, FTAG);
- if (inconsistent)
+ if (need_destroy)
(void) dsl_destroy_head(dsname);
}
return (0);
diff --git a/zfs/module/zfs/dsl_dir.c b/zfs/module/zfs/dsl_dir.c
index 762e2e5ff230..a3ef5896a3f2 100644
--- a/zfs/module/zfs/dsl_dir.c
+++ b/zfs/module/zfs/dsl_dir.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
* Copyright (c) 2013 Martin Matuska. All rights reserved.
* Copyright (c) 2014 Joyent, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
@@ -129,7 +129,7 @@ extern inline dsl_dir_phys_t *dsl_dir_phys(dsl_dir_t *dd);
static uint64_t dsl_dir_space_towrite(dsl_dir_t *dd);
static void
-dsl_dir_evict(void *dbu)
+dsl_dir_evict_async(void *dbu)
{
dsl_dir_t *dd = dbu;
int t;
@@ -148,11 +148,7 @@ dsl_dir_evict(void *dbu)
spa_async_close(dd->dd_pool->dp_spa, dd);
- /*
- * The props callback list should have been cleaned up by
- * objset_evict().
- */
- list_destroy(&dd->dd_prop_cbs);
+ dsl_prop_fini(dd);
mutex_destroy(&dd->dd_lock);
kmem_free(dd, sizeof (dsl_dir_t));
}
@@ -187,9 +183,7 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
dd->dd_dbuf = dbuf;
dd->dd_pool = dp;
mutex_init(&dd->dd_lock, NULL, MUTEX_DEFAULT, NULL);
-
- list_create(&dd->dd_prop_cbs, sizeof (dsl_prop_cb_record_t),
- offsetof(dsl_prop_cb_record_t, cbr_node));
+ dsl_prop_init(dd);
dsl_dir_snap_cmtime_update(dd);
@@ -209,7 +203,8 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
sizeof (foundobj), 1, &foundobj);
ASSERT(err || foundobj == ddobj);
#endif
- (void) strcpy(dd->dd_myname, tail);
+ (void) strlcpy(dd->dd_myname, tail,
+ sizeof (dd->dd_myname));
} else {
err = zap_value_search(dp->dp_meta_objset,
dsl_dir_phys(dd->dd_parent)->
@@ -242,11 +237,13 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
dmu_buf_rele(origin_bonus, FTAG);
}
- dmu_buf_init_user(&dd->dd_dbu, dsl_dir_evict, &dd->dd_dbuf);
+ dmu_buf_init_user(&dd->dd_dbu, NULL, dsl_dir_evict_async,
+ &dd->dd_dbuf);
winner = dmu_buf_set_user_ie(dbuf, &dd->dd_dbu);
if (winner != NULL) {
if (dd->dd_parent)
dsl_dir_rele(dd->dd_parent, dd);
+ dsl_prop_fini(dd);
mutex_destroy(&dd->dd_lock);
kmem_free(dd, sizeof (dsl_dir_t));
dd = winner;
@@ -274,6 +271,7 @@ dsl_dir_hold_obj(dsl_pool_t *dp, uint64_t ddobj,
errout:
if (dd->dd_parent)
dsl_dir_rele(dd->dd_parent, dd);
+ dsl_prop_fini(dd);
mutex_destroy(&dd->dd_lock);
kmem_free(dd, sizeof (dsl_dir_t));
dmu_buf_rele(dbuf, tag);
@@ -303,13 +301,14 @@ dsl_dir_async_rele(dsl_dir_t *dd, void *tag)
dmu_buf_rele(dd->dd_dbuf, tag);
}
-/* buf must be long enough (MAXNAMELEN + strlen(MOS_DIR_NAME) + 1 should do) */
+/* buf must be at least ZFS_MAX_DATASET_NAME_LEN bytes */
void
dsl_dir_name(dsl_dir_t *dd, char *buf)
{
if (dd->dd_parent) {
dsl_dir_name(dd->dd_parent, buf);
- (void) strcat(buf, "/");
+ VERIFY3U(strlcat(buf, "/", ZFS_MAX_DATASET_NAME_LEN), <,
+ ZFS_MAX_DATASET_NAME_LEN);
} else {
buf[0] = '\0';
}
@@ -319,10 +318,12 @@ dsl_dir_name(dsl_dir_t *dd, char *buf)
* dprintf_dd() with dd_lock held
*/
mutex_enter(&dd->dd_lock);
- (void) strcat(buf, dd->dd_myname);
+ VERIFY3U(strlcat(buf, dd->dd_myname, ZFS_MAX_DATASET_NAME_LEN),
+ <, ZFS_MAX_DATASET_NAME_LEN);
mutex_exit(&dd->dd_lock);
} else {
- (void) strcat(buf, dd->dd_myname);
+ VERIFY3U(strlcat(buf, dd->dd_myname, ZFS_MAX_DATASET_NAME_LEN),
+ <, ZFS_MAX_DATASET_NAME_LEN);
}
}
@@ -371,12 +372,12 @@ getcomponent(const char *path, char *component, const char **nextp)
if (p != NULL &&
(p[0] != '@' || strpbrk(path+1, "/@") || p[1] == '\0'))
return (SET_ERROR(EINVAL));
- if (strlen(path) >= MAXNAMELEN)
+ if (strlen(path) >= ZFS_MAX_DATASET_NAME_LEN)
return (SET_ERROR(ENAMETOOLONG));
(void) strcpy(component, path);
p = NULL;
} else if (p[0] == '/') {
- if (p - path >= MAXNAMELEN)
+ if (p - path >= ZFS_MAX_DATASET_NAME_LEN)
return (SET_ERROR(ENAMETOOLONG));
(void) strncpy(component, path, p - path);
component[p - path] = '\0';
@@ -388,7 +389,7 @@ getcomponent(const char *path, char *component, const char **nextp)
*/
if (strchr(path, '/'))
return (SET_ERROR(EINVAL));
- if (p - path >= MAXNAMELEN)
+ if (p - path >= ZFS_MAX_DATASET_NAME_LEN)
return (SET_ERROR(ENAMETOOLONG));
(void) strncpy(component, path, p - path);
component[p - path] = '\0';
@@ -416,7 +417,7 @@ dsl_dir_hold(dsl_pool_t *dp, const char *name, void *tag,
dsl_dir_t *dd;
uint64_t ddobj;
- buf = kmem_alloc(MAXNAMELEN, KM_SLEEP);
+ buf = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
err = getcomponent(name, buf, &next);
if (err != 0)
goto error;
@@ -483,7 +484,7 @@ dsl_dir_hold(dsl_pool_t *dp, const char *name, void *tag,
*tailp = next;
*ddp = dd;
error:
- kmem_free(buf, MAXNAMELEN);
+ kmem_free(buf, ZFS_MAX_DATASET_NAME_LEN);
return (err);
}
@@ -978,7 +979,7 @@ dsl_dir_stats(dsl_dir_t *dd, nvlist_t *nv)
if (dsl_dir_is_clone(dd)) {
dsl_dataset_t *ds;
- char buf[MAXNAMELEN];
+ char buf[ZFS_MAX_DATASET_NAME_LEN];
VERIFY0(dsl_dataset_hold_obj(dd->dd_pool,
dsl_dir_phys(dd)->dd_origin_obj, FTAG, &ds));
@@ -1030,13 +1031,12 @@ static uint64_t
dsl_dir_space_towrite(dsl_dir_t *dd)
{
uint64_t space = 0;
- int i;
ASSERT(MUTEX_HELD(&dd->dd_lock));
- for (i = 0; i < TXG_SIZE; i++) {
- space += dd->dd_space_towrite[i&TXG_MASK];
- ASSERT3U(dd->dd_space_towrite[i&TXG_MASK], >=, 0);
+ for (int i = 0; i < TXG_SIZE; i++) {
+ space += dd->dd_space_towrite[i & TXG_MASK];
+ ASSERT3U(dd->dd_space_towrite[i & TXG_MASK], >=, 0);
}
return (space);
}
@@ -1116,17 +1116,19 @@ struct tempreserve {
static int
dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree,
- boolean_t ignorequota, boolean_t checkrefquota, list_t *tr_list,
+ boolean_t ignorequota, list_t *tr_list,
dmu_tx_t *tx, boolean_t first)
{
- uint64_t txg = tx->tx_txg;
- uint64_t est_inflight, used_on_disk, quota, parent_rsrv;
- uint64_t deferred = 0;
+ uint64_t txg;
+ uint64_t quota;
struct tempreserve *tr;
- int retval = EDQUOT;
- int txgidx = txg & TXG_MASK;
- int i;
- uint64_t ref_rsrv = 0;
+ int retval;
+ uint64_t ref_rsrv;
+
+top_of_function:
+ txg = tx->tx_txg;
+ retval = EDQUOT;
+ ref_rsrv = 0;
ASSERT3U(txg, !=, 0);
ASSERT3S(asize, >, 0);
@@ -1137,10 +1139,10 @@ dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree,
* Check against the dsl_dir's quota. We don't add in the delta
* when checking for over-quota because they get one free hit.
*/
- est_inflight = dsl_dir_space_towrite(dd);
- for (i = 0; i < TXG_SIZE; i++)
+ uint64_t est_inflight = dsl_dir_space_towrite(dd);
+ for (int i = 0; i < TXG_SIZE; i++)
est_inflight += dd->dd_tempreserved[i];
- used_on_disk = dsl_dir_phys(dd)->dd_used_bytes;
+ uint64_t used_on_disk = dsl_dir_phys(dd)->dd_used_bytes;
/*
* On the first iteration, fetch the dataset's used-on-disk and
@@ -1151,9 +1153,9 @@ dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree,
int error;
dsl_dataset_t *ds = tx->tx_objset->os_dsl_dataset;
- error = dsl_dataset_check_quota(ds, checkrefquota,
+ error = dsl_dataset_check_quota(ds, !netfree,
asize, est_inflight, &used_on_disk, &ref_rsrv);
- if (error) {
+ if (error != 0) {
mutex_exit(&dd->dd_lock);
DMU_TX_STAT_BUMP(dmu_tx_quota);
return (error);
@@ -1179,6 +1181,7 @@ dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree,
* we're very close to full, this will allow a steady trickle of
* removes to get through.
*/
+ uint64_t deferred = 0;
if (dd->dd_parent == NULL) {
spa_t *spa = dd->dd_pool->dp_spa;
uint64_t poolsize = dsl_pool_adjustedsize(dd->dd_pool, netfree);
@@ -1209,9 +1212,9 @@ dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree,
}
/* We need to up our estimated delta before dropping dd_lock */
- dd->dd_tempreserved[txgidx] += asize;
+ dd->dd_tempreserved[txg & TXG_MASK] += asize;
- parent_rsrv = parent_delta(dd, used_on_disk + est_inflight,
+ uint64_t parent_rsrv = parent_delta(dd, used_on_disk + est_inflight,
asize - ref_rsrv);
mutex_exit(&dd->dd_lock);
@@ -1221,11 +1224,19 @@ dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree,
list_insert_tail(tr_list, tr);
/* see if it's OK with our parent */
- if (dd->dd_parent && parent_rsrv) {
- boolean_t ismos = (dsl_dir_phys(dd)->dd_head_dataset_obj == 0);
+ if (dd->dd_parent != NULL && parent_rsrv != 0) {
+ /*
+ * Recurse on our parent without recursion. This has been
+ * observed to be potentially large stack usage even within
+ * the test suite. Largest seen stack was 7632 bytes on linux.
+ */
+
+ dd = dd->dd_parent;
+ asize = parent_rsrv;
+ ignorequota = (dsl_dir_phys(dd)->dd_head_dataset_obj == 0);
+ first = B_FALSE;
+ goto top_of_function;
- return (dsl_dir_tempreserve_impl(dd->dd_parent,
- parent_rsrv, netfree, ismos, TRUE, tr_list, tx, FALSE));
} else {
return (0);
}
@@ -1239,7 +1250,7 @@ dsl_dir_tempreserve_impl(dsl_dir_t *dd, uint64_t asize, boolean_t netfree,
*/
int
dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, uint64_t asize,
- uint64_t fsize, uint64_t usize, void **tr_cookiep, dmu_tx_t *tx)
+ boolean_t netfree, void **tr_cookiep, dmu_tx_t *tx)
{
int err;
list_t *tr_list;
@@ -1253,7 +1264,6 @@ dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, uint64_t asize,
list_create(tr_list, sizeof (struct tempreserve),
offsetof(struct tempreserve, tr_node));
ASSERT3S(asize, >, 0);
- ASSERT3S(fsize, >=, 0);
err = arc_tempreserve_space(lsize, tx->tx_txg);
if (err == 0) {
@@ -1280,8 +1290,8 @@ dsl_dir_tempreserve_space(dsl_dir_t *dd, uint64_t lsize, uint64_t asize,
}
if (err == 0) {
- err = dsl_dir_tempreserve_impl(dd, asize, fsize >= asize,
- FALSE, asize > usize, tr_list, tx, TRUE);
+ err = dsl_dir_tempreserve_impl(dd, asize, netfree,
+ B_FALSE, tr_list, tx, B_TRUE);
}
if (err != 0)
@@ -1695,11 +1705,11 @@ static int
dsl_valid_rename(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
{
int *deltap = arg;
- char namebuf[MAXNAMELEN];
+ char namebuf[ZFS_MAX_DATASET_NAME_LEN];
dsl_dataset_name(ds, namebuf);
- if (strlen(namebuf) + *deltap >= MAXNAMELEN)
+ if (strlen(namebuf) + *deltap >= ZFS_MAX_DATASET_NAME_LEN)
return (SET_ERROR(ENAMETOOLONG));
return (0);
}
@@ -1904,7 +1914,8 @@ dsl_dir_rename_sync(void *arg, dmu_tx_t *tx)
dd->dd_myname, tx);
ASSERT0(error);
- (void) strcpy(dd->dd_myname, mynewname);
+ (void) strlcpy(dd->dd_myname, mynewname,
+ sizeof (dd->dd_myname));
dsl_dir_rele(dd->dd_parent, dd);
dsl_dir_phys(dd)->dd_parent_obj = newparent->dd_object;
VERIFY0(dsl_dir_hold_obj(dp,
diff --git a/zfs/module/zfs/dsl_pool.c b/zfs/module/zfs/dsl_pool.c
old mode 100755
new mode 100644
index cf5259acd5b4..c16708048cc5
--- a/zfs/module/zfs/dsl_pool.c
+++ b/zfs/module/zfs/dsl_pool.c
@@ -20,9 +20,10 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
+ * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
*/
#include <sys/dsl_pool.h>
@@ -47,6 +48,7 @@
#include <sys/zil_impl.h>
#include <sys/dsl_userhold.h>
#include <sys/trace_txg.h>
+#include <sys/mmp.h>
/*
* ZFS Write Throttle
@@ -128,8 +130,10 @@ int zfs_delay_min_dirty_percent = 60;
*/
unsigned long zfs_delay_scale = 1000 * 1000 * 1000 / 2000;
-hrtime_t zfs_throttle_delay = MSEC2NSEC(10);
-hrtime_t zfs_throttle_resolution = MSEC2NSEC(10);
+/*
+ * This determines the number of threads used by the dp_sync_taskq.
+ */
+int zfs_sync_taskq_batch_pct = 75;
int
dsl_pool_open_special_dir(dsl_pool_t *dp, const char *name, dsl_dir_t **ddp)
@@ -157,16 +161,21 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg)
dp->dp_meta_rootbp = *bp;
rrw_init(&dp->dp_config_rwlock, B_TRUE);
txg_init(dp, txg);
+ mmp_init(spa);
- txg_list_create(&dp->dp_dirty_datasets,
+ txg_list_create(&dp->dp_dirty_datasets, spa,
offsetof(dsl_dataset_t, ds_dirty_link));
- txg_list_create(&dp->dp_dirty_zilogs,
+ txg_list_create(&dp->dp_dirty_zilogs, spa,
offsetof(zilog_t, zl_dirty_link));
- txg_list_create(&dp->dp_dirty_dirs,
+ txg_list_create(&dp->dp_dirty_dirs, spa,
offsetof(dsl_dir_t, dd_dirty_link));
- txg_list_create(&dp->dp_sync_tasks,
+ txg_list_create(&dp->dp_sync_tasks, spa,
offsetof(dsl_sync_task_t, dst_node));
+ dp->dp_sync_taskq = taskq_create("dp_sync_taskq",
+ zfs_sync_taskq_batch_pct, minclsyspri, 1, INT_MAX,
+ TASKQ_THREADS_CPU_PCT);
+
mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&dp->dp_spaceavail_cv, NULL, CV_DEFAULT, NULL);
@@ -325,6 +334,8 @@ dsl_pool_close(dsl_pool_t *dp)
txg_list_destroy(&dp->dp_sync_tasks);
txg_list_destroy(&dp->dp_dirty_dirs);
+ taskq_destroy(dp->dp_sync_taskq);
+
/*
* We can't set retry to TRUE since we're explicitly specifying
* a spa to flush. This is good enough; any missed buffers for
@@ -333,12 +344,14 @@ dsl_pool_close(dsl_pool_t *dp)
*/
arc_flush(dp->dp_spa, FALSE);
+ mmp_fini(dp->dp_spa);
txg_fini(dp);
dsl_scan_fini(dp);
dmu_buf_user_evict_wait();
rrw_destroy(&dp->dp_config_rwlock);
mutex_destroy(&dp->dp_lock);
+ cv_destroy(&dp->dp_spaceavail_cv);
taskq_destroy(dp->dp_iput_taskq);
if (dp->dp_blkstats)
vmem_free(dp->dp_blkstats, sizeof (zfs_all_blkstats_t));
@@ -402,8 +415,10 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg)
/* create the root objset */
VERIFY0(dsl_dataset_hold_obj(dp, obj, FTAG, &ds));
+ rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
VERIFY(NULL != (os = dmu_objset_create_impl(dp->dp_spa, ds,
dsl_dataset_get_blkptr(ds), DMU_OST_ZFS, tx)));
+ rrw_exit(&ds->ds_bp_rwlock, FTAG);
#ifdef _KERNEL
zfs_create_fs(os, kcred, zplprops, tx);
#endif
@@ -431,14 +446,6 @@ dsl_pool_mos_diduse_space(dsl_pool_t *dp,
mutex_exit(&dp->dp_lock);
}
-static int
-deadlist_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
-{
- dsl_deadlist_t *dl = arg;
- dsl_deadlist_insert(dl, bp, tx);
- return (0);
-}
-
static void
dsl_pool_sync_mos(dsl_pool_t *dp, dmu_tx_t *tx)
{
@@ -463,7 +470,7 @@ dsl_pool_dirty_delta(dsl_pool_t *dp, int64_t delta)
* Note: we signal even when increasing dp_dirty_total.
* This ensures forward progress -- each thread wakes the next waiter.
*/
- if (dp->dp_dirty_total <= zfs_dirty_data_max)
+ if (dp->dp_dirty_total < zfs_dirty_data_max)
cv_signal(&dp->dp_spaceavail_cv);
}
@@ -507,14 +514,27 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
*/
dsl_pool_undirty_space(dp, dp->dp_dirty_pertxg[txg & TXG_MASK], txg);
+ /*
+ * Update the long range free counter after
+ * we're done syncing user data
+ */
+ mutex_enter(&dp->dp_lock);
+ ASSERT(spa_sync_pass(dp->dp_spa) == 1 ||
+ dp->dp_long_free_dirty_pertxg[txg & TXG_MASK] == 0);
+ dp->dp_long_free_dirty_pertxg[txg & TXG_MASK] = 0;
+ mutex_exit(&dp->dp_lock);
+
/*
* After the data blocks have been written (ensured by the zio_wait()
- * above), update the user/group space accounting.
+ * above), update the user/group space accounting. This happens
+ * in tasks dispatched to dp_sync_taskq, so wait for them before
+ * continuing.
*/
for (ds = list_head(&synced_datasets); ds != NULL;
ds = list_next(&synced_datasets, ds)) {
dmu_objset_do_userquota_updates(ds->ds_objset, tx);
}
+ taskq_wait(dp->dp_sync_taskq);
/*
* Sync the datasets again to push out the changes due to
@@ -539,11 +559,7 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
* - release hold from dsl_dataset_dirty()
*/
while ((ds = list_remove_head(&synced_datasets)) != NULL) {
- ASSERTV(objset_t *os = ds->ds_objset);
- bplist_iterate(&ds->ds_pending_deadlist,
- deadlist_enqueue_cb, &ds->ds_deadlist, tx);
- ASSERT(!dmu_objset_is_dirty(os, txg));
- dmu_buf_rele(ds->ds_dbuf, ds);
+ dsl_dataset_sync_done(ds, tx);
}
while ((dd = txg_list_remove(&dp->dp_dirty_dirs, txg)) != NULL) {
@@ -566,8 +582,7 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
dp->dp_mos_uncompressed_delta = 0;
}
- if (list_head(&mos->os_dirty_dnodes[txg & TXG_MASK]) != NULL ||
- list_head(&mos->os_free_dnodes[txg & TXG_MASK]) != NULL) {
+ if (!multilist_is_empty(mos->os_dirty_dnodes[txg & TXG_MASK])) {
dsl_pool_sync_mos(dp, tx);
}
@@ -601,9 +616,16 @@ dsl_pool_sync_done(dsl_pool_t *dp, uint64_t txg)
{
zilog_t *zilog;
- while ((zilog = txg_list_remove(&dp->dp_dirty_zilogs, txg))) {
+ while ((zilog = txg_list_head(&dp->dp_dirty_zilogs, txg))) {
dsl_dataset_t *ds = dmu_objset_ds(zilog->zl_os);
+ /*
+ * We don't remove the zilog from the dp_dirty_zilogs
+ * list until after we've cleaned it. This ensures that
+ * callers of zilog_is_dirty() receive an accurate
+ * answer when they are racing with the spa sync thread.
+ */
zil_clean(zilog, txg);
+ (void) txg_list_remove_this(&dp->dp_dirty_zilogs, zilog, txg);
ASSERT(!dmu_objset_is_dirty(zilog->zl_os, txg));
dmu_buf_rele(ds->ds_dbuf, zilog);
}
@@ -618,7 +640,8 @@ int
dsl_pool_sync_context(dsl_pool_t *dp)
{
return (curthread == dp->dp_tx.tx_sync_thread ||
- spa_is_initializing(dp->dp_spa));
+ spa_is_initializing(dp->dp_spa) ||
+ taskq_member(dp->dp_sync_taskq, curthread));
}
uint64_t
@@ -718,7 +741,9 @@ upgrade_clones_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg)
* The $ORIGIN can't have any data, or the accounting
* will be wrong.
*/
+ rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
ASSERT0(dsl_dataset_phys(prev)->ds_bp.blk_birth);
+ rrw_exit(&ds->ds_bp_rwlock, FTAG);
/* The origin doesn't get attached to itself */
if (ds->ds_object == prev->ds_object) {
@@ -1087,6 +1112,7 @@ dsl_pool_config_held_writer(dsl_pool_t *dp)
EXPORT_SYMBOL(dsl_pool_config_enter);
EXPORT_SYMBOL(dsl_pool_config_exit);
+/* BEGIN CSTYLED */
/* zfs_dirty_data_max_percent only applied at module load in arc_init(). */
module_param(zfs_dirty_data_max_percent, int, 0444);
MODULE_PARM_DESC(zfs_dirty_data_max_percent, "percent of ram can be dirty");
@@ -1112,4 +1138,9 @@ MODULE_PARM_DESC(zfs_dirty_data_sync, "sync txg when this much dirty data");
module_param(zfs_delay_scale, ulong, 0644);
MODULE_PARM_DESC(zfs_delay_scale, "how quickly delay approaches infinity");
+
+module_param(zfs_sync_taskq_batch_pct, int, 0644);
+MODULE_PARM_DESC(zfs_sync_taskq_batch_pct,
+ "max percent of CPUs that are used to sync dirty data");
+/* END CSTYLED */
#endif
diff --git a/zfs/module/zfs/dsl_prop.c b/zfs/module/zfs/dsl_prop.c
index 28b101eee547..0d563a703e55 100644
--- a/zfs/module/zfs/dsl_prop.c
+++ b/zfs/module/zfs/dsl_prop.c
@@ -20,8 +20,9 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
* Copyright (c) 2013 Martin Matuska. All rights reserved.
+ * Copyright 2015, Joyent, Inc.
*/
#include <sys/zfs_context.h>
@@ -42,16 +43,14 @@
#define ZPROP_RECVD_SUFFIX "$recvd"
static int
-dodefault(const char *propname, int intsz, int numints, void *buf)
+dodefault(zfs_prop_t prop, int intsz, int numints, void *buf)
{
- zfs_prop_t prop;
-
/*
* The setonce properties are read-only, BUT they still
* have a default value that can be used as the initial
* value.
*/
- if ((prop = zfs_name_to_prop(propname)) == ZPROP_INVAL ||
+ if (prop == ZPROP_INVAL ||
(zfs_prop_readonly(prop) && !zfs_prop_setonce(prop)))
return (SET_ERROR(ENOENT));
@@ -149,7 +148,7 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname,
}
if (err == ENOENT)
- err = dodefault(propname, intsz, numints, buf);
+ err = dodefault(prop, intsz, numints, buf);
strfree(inheritstr);
strfree(recvdstr);
@@ -216,6 +215,58 @@ dsl_prop_get_ds(dsl_dataset_t *ds, const char *propname,
intsz, numints, buf, setpoint, ds->ds_is_snapshot));
}
+static dsl_prop_record_t *
+dsl_prop_record_find(dsl_dir_t *dd, const char *propname)
+{
+ dsl_prop_record_t *pr = NULL;
+
+ ASSERT(MUTEX_HELD(&dd->dd_lock));
+
+ for (pr = list_head(&dd->dd_props);
+ pr != NULL; pr = list_next(&dd->dd_props, pr)) {
+ if (strcmp(pr->pr_propname, propname) == 0)
+ break;
+ }
+
+ return (pr);
+}
+
+static dsl_prop_record_t *
+dsl_prop_record_create(dsl_dir_t *dd, const char *propname)
+{
+ dsl_prop_record_t *pr;
+
+ ASSERT(MUTEX_HELD(&dd->dd_lock));
+
+ pr = kmem_alloc(sizeof (dsl_prop_record_t), KM_SLEEP);
+ pr->pr_propname = spa_strdup(propname);
+ list_create(&pr->pr_cbs, sizeof (dsl_prop_cb_record_t),
+ offsetof(dsl_prop_cb_record_t, cbr_pr_node));
+ list_insert_head(&dd->dd_props, pr);
+
+ return (pr);
+}
+
+void
+dsl_prop_init(dsl_dir_t *dd)
+{
+ list_create(&dd->dd_props, sizeof (dsl_prop_record_t),
+ offsetof(dsl_prop_record_t, pr_node));
+}
+
+void
+dsl_prop_fini(dsl_dir_t *dd)
+{
+ dsl_prop_record_t *pr;
+
+ while ((pr = list_remove_head(&dd->dd_props)) != NULL) {
+ list_destroy(&pr->pr_cbs);
+ spa_strfree((char *)pr->pr_propname);
+ kmem_free(pr, sizeof (dsl_prop_record_t));
+ }
+ list_destroy(&dd->dd_props);
+}
+
/*
* Register interest in the named property. We'll call the callback
* once to notify it of the current property value, and again each time
@@ -229,6 +280,7 @@ dsl_prop_register(dsl_dataset_t *ds, const char *propname,
{
dsl_dir_t *dd = ds->ds_dir;
uint64_t value;
+ dsl_prop_record_t *pr;
dsl_prop_cb_record_t *cbr;
int err;
ASSERTV(dsl_pool_t *dp = dd->dd_pool);
@@ -241,12 +293,16 @@ dsl_prop_register(dsl_dataset_t *ds, const char *propname,
cbr = kmem_alloc(sizeof (dsl_prop_cb_record_t), KM_SLEEP);
cbr->cbr_ds = ds;
- cbr->cbr_propname = kmem_alloc(strlen(propname)+1, KM_SLEEP);
- (void) strcpy((char *)cbr->cbr_propname, propname);
cbr->cbr_func = callback;
cbr->cbr_arg = cbarg;
+
mutex_enter(&dd->dd_lock);
- list_insert_head(&dd->dd_prop_cbs, cbr);
+ pr = dsl_prop_record_find(dd, propname);
+ if (pr == NULL)
+ pr = dsl_prop_record_create(dd, propname);
+ cbr->cbr_pr = pr;
+ list_insert_head(&pr->pr_cbs, cbr);
+ list_insert_head(&ds->ds_prop_cbs, cbr);
mutex_exit(&dd->dd_lock);
cbr->cbr_func(cbr->cbr_arg, value);
@@ -379,6 +435,9 @@ dsl_prop_predict(dsl_dir_t *dd, const char *propname,
/*
* Unregister this callback. Return 0 on success, ENOENT if ddname is
* invalid, or ENOMSG if no matching callback registered.
+ *
+ * NOTE: This function is no longer used internally but has been preserved
+ * to prevent breaking external consumers (Lustre, etc).
*/
int
dsl_prop_unregister(dsl_dataset_t *ds, const char *propname,
@@ -388,12 +447,12 @@ dsl_prop_unregister(dsl_dataset_t *ds, const char *propname,
dsl_prop_cb_record_t *cbr;
mutex_enter(&dd->dd_lock);
- for (cbr = list_head(&dd->dd_prop_cbs);
- cbr; cbr = list_next(&dd->dd_prop_cbs, cbr)) {
+ for (cbr = list_head(&ds->ds_prop_cbs);
+ cbr; cbr = list_next(&ds->ds_prop_cbs, cbr)) {
if (cbr->cbr_ds == ds &&
cbr->cbr_func == callback &&
cbr->cbr_arg == cbarg &&
- strcmp(cbr->cbr_propname, propname) == 0)
+ strcmp(cbr->cbr_pr->pr_propname, propname) == 0)
break;
}
@@ -402,31 +461,43 @@ dsl_prop_unregister(dsl_dataset_t *ds, const char *propname,
return (SET_ERROR(ENOMSG));
}
- list_remove(&dd->dd_prop_cbs, cbr);
+ list_remove(&ds->ds_prop_cbs, cbr);
+ list_remove(&cbr->cbr_pr->pr_cbs, cbr);
mutex_exit(&dd->dd_lock);
- kmem_free((void*)cbr->cbr_propname, strlen(cbr->cbr_propname)+1);
kmem_free(cbr, sizeof (dsl_prop_cb_record_t));
return (0);
}
-boolean_t
-dsl_prop_hascb(dsl_dataset_t *ds)
+/*
+ * Unregister all callbacks that are registered with the
+ * given callback argument.
+ */
+void
+dsl_prop_unregister_all(dsl_dataset_t *ds, void *cbarg)
{
+ dsl_prop_cb_record_t *cbr, *next_cbr;
+
dsl_dir_t *dd = ds->ds_dir;
- boolean_t rv = B_FALSE;
- dsl_prop_cb_record_t *cbr;
mutex_enter(&dd->dd_lock);
- for (cbr = list_head(&dd->dd_prop_cbs); cbr;
- cbr = list_next(&dd->dd_prop_cbs, cbr)) {
- if (cbr->cbr_ds == ds) {
- rv = B_TRUE;
- break;
+ next_cbr = list_head(&ds->ds_prop_cbs);
+ while (next_cbr != NULL) {
+ cbr = next_cbr;
+ next_cbr = list_next(&ds->ds_prop_cbs, cbr);
+ if (cbr->cbr_arg == cbarg) {
+ list_remove(&ds->ds_prop_cbs, cbr);
+ list_remove(&cbr->cbr_pr->pr_cbs, cbr);
+ kmem_free(cbr, sizeof (dsl_prop_cb_record_t));
}
}
mutex_exit(&dd->dd_lock);
- return (rv);
+}
+
+boolean_t
+dsl_prop_hascb(dsl_dataset_t *ds)
+{
+ return (!list_is_empty(&ds->ds_prop_cbs));
}
/* ARGSUSED */
@@ -434,38 +505,50 @@ static int
dsl_prop_notify_all_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
{
dsl_dir_t *dd = ds->ds_dir;
+ dsl_prop_record_t *pr;
dsl_prop_cb_record_t *cbr;
mutex_enter(&dd->dd_lock);
- for (cbr = list_head(&dd->dd_prop_cbs); cbr;
- cbr = list_next(&dd->dd_prop_cbs, cbr)) {
- uint64_t value;
+ for (pr = list_head(&dd->dd_props);
+ pr; pr = list_next(&dd->dd_props, pr)) {
+ for (cbr = list_head(&pr->pr_cbs); cbr;
+ cbr = list_next(&pr->pr_cbs, cbr)) {
+ uint64_t value;
- /*
- * Callback entries do not have holds on their datasets
- * so that datasets with registered callbacks are still
- * eligible for eviction. Unlike operations on callbacks
- * for a single dataset, we are performing a recursive
- * descent of related datasets and the calling context
- * for this iteration only has a dataset hold on the root.
- * Without a hold, the callback's pointer to the dataset
- * could be invalidated by eviction at any time.
- *
- * Use dsl_dataset_try_add_ref() to verify that the
- * dataset has not begun eviction processing and to
- * prevent eviction from occurring for the duration
- * of the callback. If the hold attempt fails, this
- * object is already being evicted and the callback can
- * be safely ignored.
- */
- if (!dsl_dataset_try_add_ref(dp, cbr->cbr_ds, FTAG))
- continue;
+ /*
+ * Callback entries do not have holds on their
+ * datasets so that datasets with registered
+ * callbacks are still eligible for eviction.
+ * Unlike operations to update properties on a
+ * single dataset, we are performing a recursive
+ * descent of related head datasets. The caller
+ * of this function only has a dataset hold on
+ * the passed in head dataset, not the snapshots
+ * associated with this dataset. Without a hold,
+ * the dataset pointer within callback records
+ * for snapshots can be invalidated by eviction
+ * at any time.
+ *
+ * Use dsl_dataset_try_add_ref() to verify
+ * that the dataset for a snapshot has not
+ * begun eviction processing and to prevent
+ * eviction from occurring for the duration of
+ * the callback. If the hold attempt fails,
+ * this object is already being evicted and the
+ * callback can be safely ignored.
+ */
+ if (ds != cbr->cbr_ds &&
+ !dsl_dataset_try_add_ref(dp, cbr->cbr_ds, FTAG))
+ continue;
- if (dsl_prop_get_ds(cbr->cbr_ds, cbr->cbr_propname,
- sizeof (value), 1, &value, NULL) == 0)
- cbr->cbr_func(cbr->cbr_arg, value);
+ if (dsl_prop_get_ds(cbr->cbr_ds,
+ cbr->cbr_pr->pr_propname, sizeof (value), 1,
+ &value, NULL) == 0)
+ cbr->cbr_func(cbr->cbr_arg, value);
- dsl_dataset_rele(cbr->cbr_ds, FTAG);
+ if (ds != cbr->cbr_ds)
+ dsl_dataset_rele(cbr->cbr_ds, FTAG);
+ }
}
mutex_exit(&dd->dd_lock);
@@ -490,6 +573,7 @@ dsl_prop_changed_notify(dsl_pool_t *dp, uint64_t ddobj,
const char *propname, uint64_t value, int first)
{
dsl_dir_t *dd;
+ dsl_prop_record_t *pr;
dsl_prop_cb_record_t *cbr;
objset_t *mos = dp->dp_meta_objset;
zap_cursor_t zc;
@@ -516,30 +600,33 @@ dsl_prop_changed_notify(dsl_pool_t *dp, uint64_t ddobj,
}
mutex_enter(&dd->dd_lock);
- for (cbr = list_head(&dd->dd_prop_cbs); cbr;
- cbr = list_next(&dd->dd_prop_cbs, cbr)) {
- uint64_t propobj;
+ pr = dsl_prop_record_find(dd, propname);
+ if (pr != NULL) {
+ for (cbr = list_head(&pr->pr_cbs); cbr;
+ cbr = list_next(&pr->pr_cbs, cbr)) {
+ uint64_t propobj;
- /*
- * cbr->cbf_ds may be invalidated due to eviction,
- * requiring the use of dsl_dataset_try_add_ref().
- * See comment block in dsl_prop_notify_all_cb()
- * for details.
- */
- if (strcmp(cbr->cbr_propname, propname) != 0 ||
- !dsl_dataset_try_add_ref(dp, cbr->cbr_ds, FTAG))
- continue;
+ /*
+ * cbr->cbr_ds may be invalidated due to eviction,
+ * requiring the use of dsl_dataset_try_add_ref().
+ * See comment block in dsl_prop_notify_all_cb()
+ * for details.
+ */
+ if (!dsl_dataset_try_add_ref(dp, cbr->cbr_ds, FTAG))
+ continue;
- propobj = dsl_dataset_phys(cbr->cbr_ds)->ds_props_obj;
+ propobj = dsl_dataset_phys(cbr->cbr_ds)->ds_props_obj;
- /*
- * If the property is not set on this ds, then it is
- * inherited here; call the callback.
- */
- if (propobj == 0 || zap_contains(mos, propobj, propname) != 0)
- cbr->cbr_func(cbr->cbr_arg, value);
+ /*
+ * If the property is not set on this ds, then it is
+ * inherited here; call the callback.
+ */
+ if (propobj == 0 ||
+ zap_contains(mos, propobj, propname) != 0)
+ cbr->cbr_func(cbr->cbr_arg, value);
- dsl_dataset_rele(cbr->cbr_ds, FTAG);
+ dsl_dataset_rele(cbr->cbr_ds, FTAG);
+ }
}
mutex_exit(&dd->dd_lock);
@@ -572,7 +659,7 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname,
int err;
uint64_t version = spa_version(ds->ds_dir->dd_pool->dp_spa);
- isint = (dodefault(propname, 8, 1, &intval) == 0);
+ isint = (dodefault(zfs_name_to_prop(propname), 8, 1, &intval) == 0);
if (ds->ds_is_snapshot) {
ASSERT(version >= SPA_VERSION_SNAP_PROPS);
@@ -679,10 +766,10 @@ dsl_prop_set_sync_impl(dsl_dataset_t *ds, const char *propname,
* ds here.
*/
mutex_enter(&ds->ds_dir->dd_lock);
- for (cbr = list_head(&ds->ds_dir->dd_prop_cbs); cbr;
- cbr = list_next(&ds->ds_dir->dd_prop_cbs, cbr)) {
- if (cbr->cbr_ds == ds &&
- strcmp(cbr->cbr_propname, propname) == 0)
+ for (cbr = list_head(&ds->ds_prop_cbs); cbr;
+ cbr = list_next(&ds->ds_prop_cbs, cbr)) {
+ if (strcmp(cbr->cbr_pr->pr_propname,
+ propname) == 0)
cbr->cbr_func(cbr->cbr_arg, intval);
}
mutex_exit(&ds->ds_dir->dd_lock);
@@ -805,11 +892,15 @@ dsl_props_set_sync_impl(dsl_dataset_t *ds, zprop_source_t source,
while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
nvpair_t *pair = elem;
+ const char *name = nvpair_name(pair);
if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
/*
- * dsl_prop_get_all_impl() returns properties in this
- * format.
+ * This usually happens when we reuse the nvlist_t data
+ * returned by the counterpart dsl_prop_get_all_impl().
+ * For instance we do this to restore the original
+ * received properties when an error occurs in the
+ * zfs_ioc_recv() codepath.
*/
nvlist_t *attrs = fnvpair_value_nvlist(pair);
pair = fnvlist_lookup_nvpair(attrs, ZPROP_VALUE);
@@ -817,14 +908,14 @@ dsl_props_set_sync_impl(dsl_dataset_t *ds, zprop_source_t source,
if (nvpair_type(pair) == DATA_TYPE_STRING) {
const char *value = fnvpair_value_string(pair);
- dsl_prop_set_sync_impl(ds, nvpair_name(pair),
+ dsl_prop_set_sync_impl(ds, name,
source, 1, strlen(value) + 1, value, tx);
} else if (nvpair_type(pair) == DATA_TYPE_UINT64) {
uint64_t intval = fnvpair_value_uint64(pair);
- dsl_prop_set_sync_impl(ds, nvpair_name(pair),
+ dsl_prop_set_sync_impl(ds, name,
source, sizeof (intval), 1, &intval, tx);
} else if (nvpair_type(pair) == DATA_TYPE_BOOLEAN) {
- dsl_prop_set_sync_impl(ds, nvpair_name(pair),
+ dsl_prop_set_sync_impl(ds, name,
source, 0, 0, NULL, tx);
} else {
panic("invalid nvpair type");
@@ -1008,7 +1099,7 @@ dsl_prop_get_all_ds(dsl_dataset_t *ds, nvlist_t **nvp,
dsl_pool_t *dp = dd->dd_pool;
objset_t *mos = dp->dp_meta_objset;
int err = 0;
- char setpoint[MAXNAMELEN];
+ char setpoint[ZFS_MAX_DATASET_NAME_LEN];
VERIFY(nvlist_alloc(nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
@@ -1040,6 +1131,10 @@ dsl_prop_get_all_ds(dsl_dataset_t *ds, nvlist_t **nvp,
break;
}
out:
+ if (err) {
+ nvlist_free(*nvp);
+ *nvp = NULL;
+ }
return (err);
}
@@ -1130,7 +1225,7 @@ dsl_prop_nvlist_add_uint64(nvlist_t *nv, zfs_prop_t prop, uint64_t value)
VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
VERIFY(nvlist_add_uint64(propval, ZPROP_VALUE, value) == 0);
/* Indicate the default source if we can. */
- if (dodefault(propname, 8, 1, &default_value) == 0 &&
+ if (dodefault(prop, 8, 1, &default_value) == 0 &&
value == default_value) {
VERIFY(nvlist_add_string(propval, ZPROP_SOURCE, "") == 0);
}
@@ -1158,6 +1253,7 @@ dsl_prop_nvlist_add_string(nvlist_t *nv, zfs_prop_t prop, const char *value)
#if defined(_KERNEL) && defined(HAVE_SPL)
EXPORT_SYMBOL(dsl_prop_register);
EXPORT_SYMBOL(dsl_prop_unregister);
+EXPORT_SYMBOL(dsl_prop_unregister_all);
EXPORT_SYMBOL(dsl_prop_get);
EXPORT_SYMBOL(dsl_prop_get_integer);
EXPORT_SYMBOL(dsl_prop_get_all);
diff --git a/zfs/module/zfs/dsl_scan.c b/zfs/module/zfs/dsl_scan.c
index b989e763386b..74cbce0d393e 100644
--- a/zfs/module/zfs/dsl_scan.c
+++ b/zfs/module/zfs/dsl_scan.c
@@ -20,7 +20,10 @@
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
+ * Copyright 2016 Gary Mills
+ * Copyright (c) 2017 Datto Inc.
+ * Copyright 2017 Joyent, Inc.
*/
#include <sys/dsl_scan.h>
@@ -46,6 +49,7 @@
#include <sys/sa.h>
#include <sys/sa_impl.h>
#include <sys/zfeature.h>
+#include <sys/abd.h>
#ifdef _KERNEL
#include <sys/zfs_vfsops.h>
#endif
@@ -55,7 +59,8 @@ typedef int (scan_cb_t)(dsl_pool_t *, const blkptr_t *,
static scan_cb_t dsl_scan_scrub_cb;
static void dsl_scan_cancel_sync(void *, dmu_tx_t *);
-static void dsl_scan_sync_state(dsl_scan_t *, dmu_tx_t *tx);
+static void dsl_scan_sync_state(dsl_scan_t *, dmu_tx_t *);
+static boolean_t dsl_scan_restarting(dsl_scan_t *, dmu_tx_t *);
int zfs_top_maxinflight = 32; /* maximum I/Os per top-level */
int zfs_resilver_delay = 2; /* number of ticks to delay resilver */
@@ -70,12 +75,17 @@ int zfs_no_scrub_prefetch = B_FALSE; /* set to disable scrub prefetch */
enum ddt_class zfs_scrub_ddt_class_max = DDT_CLASS_DUPLICATE;
int dsl_scan_delay_completion = B_FALSE; /* set to delay scan completion */
/* max number of blocks to free in a single TXG */
-ulong zfs_free_max_blocks = 100000;
+unsigned long zfs_free_max_blocks = 100000;
#define DSL_SCAN_IS_SCRUB_RESILVER(scn) \
((scn)->scn_phys.scn_func == POOL_SCAN_SCRUB || \
(scn)->scn_phys.scn_func == POOL_SCAN_RESILVER)
+/*
+ * Enable/disable the processing of the free_bpobj object.
+ */
+int zfs_free_bpobj_enabled = 1;
+
/* the order has to match pool_scan_type */
static scan_cb_t *scan_funcs[POOL_SCAN_FUNCS] = {
NULL,
@@ -240,11 +250,10 @@ dsl_scan_setup_sync(void *arg, dmu_tx_t *tx)
if (vdev_resilver_needed(spa->spa_root_vdev,
&scn->scn_phys.scn_min_txg, &scn->scn_phys.scn_max_txg)) {
- spa_event_notify(spa, NULL,
- FM_EREPORT_ZFS_RESILVER_START);
+ spa_event_notify(spa, NULL, NULL,
+ ESC_ZFS_RESILVER_START);
} else {
- spa_event_notify(spa, NULL,
- FM_EREPORT_ZFS_SCRUB_START);
+ spa_event_notify(spa, NULL, NULL, ESC_ZFS_SCRUB_START);
}
spa->spa_scrub_started = B_TRUE;
@@ -311,6 +320,8 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
scn->scn_phys.scn_queue_obj = 0;
}
+ scn->scn_phys.scn_flags &= ~DSF_SCRUB_PAUSED;
+
/*
* If we were "restarted" from a stopped state, don't bother
* with anything else.
@@ -323,8 +334,15 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
else
scn->scn_phys.scn_state = DSS_CANCELED;
- spa_history_log_internal(spa, "scan done", tx,
- "complete=%u", complete);
+ if (dsl_scan_restarting(scn, tx))
+ spa_history_log_internal(spa, "scan aborted, restarting", tx,
+ "errors=%llu", spa_get_errlog_size(spa));
+ else if (!complete)
+ spa_history_log_internal(spa, "scan cancelled", tx,
+ "errors=%llu", spa_get_errlog_size(spa));
+ else
+ spa_history_log_internal(spa, "scan done", tx,
+ "errors=%llu", spa_get_errlog_size(spa));
if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) {
mutex_enter(&spa->spa_scrub_lock);
@@ -344,9 +362,9 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx)
vdev_dtl_reassess(spa->spa_root_vdev, tx->tx_txg,
complete ? scn->scn_phys.scn_max_txg : 0, B_TRUE);
if (complete) {
- spa_event_notify(spa, NULL, scn->scn_phys.scn_min_txg ?
- FM_EREPORT_ZFS_RESILVER_FINISH :
- FM_EREPORT_ZFS_SCRUB_FINISH);
+ spa_event_notify(spa, NULL, NULL,
+ scn->scn_phys.scn_min_txg ?
+ ESC_ZFS_RESILVER_FINISH : ESC_ZFS_SCRUB_FINISH);
}
spa_errlog_rotate(spa);
@@ -391,6 +409,92 @@ dsl_scan_cancel(dsl_pool_t *dp)
dsl_scan_cancel_sync, NULL, 3, ZFS_SPACE_CHECK_RESERVED));
}
+boolean_t
+dsl_scan_is_paused_scrub(const dsl_scan_t *scn)
+{
+ if (dsl_scan_scrubbing(scn->scn_dp) &&
+ scn->scn_phys.scn_flags & DSF_SCRUB_PAUSED)
+ return (B_TRUE);
+
+ return (B_FALSE);
+}
+
+static int
+dsl_scrub_pause_resume_check(void *arg, dmu_tx_t *tx)
+{
+ pool_scrub_cmd_t *cmd = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ dsl_scan_t *scn = dp->dp_scan;
+
+ if (*cmd == POOL_SCRUB_PAUSE) {
+ /* can't pause a scrub when there is no in-progress scrub */
+ if (!dsl_scan_scrubbing(dp))
+ return (SET_ERROR(ENOENT));
+
+ /* can't pause a paused scrub */
+ if (dsl_scan_is_paused_scrub(scn))
+ return (SET_ERROR(EBUSY));
+ } else if (*cmd != POOL_SCRUB_NORMAL) {
+ return (SET_ERROR(ENOTSUP));
+ }
+
+ return (0);
+}
+
+static void
+dsl_scrub_pause_resume_sync(void *arg, dmu_tx_t *tx)
+{
+ pool_scrub_cmd_t *cmd = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ spa_t *spa = dp->dp_spa;
+ dsl_scan_t *scn = dp->dp_scan;
+
+
+ if (*cmd == POOL_SCRUB_PAUSE) {
+ /* can't pause a scrub when there is no in-progress scrub */
+ spa->spa_scan_pass_scrub_pause = gethrestime_sec();
+ scn->scn_phys.scn_flags |= DSF_SCRUB_PAUSED;
+ dsl_scan_sync_state(scn, tx);
+ } else {
+ ASSERT3U(*cmd, ==, POOL_SCRUB_NORMAL);
+ if (dsl_scan_is_paused_scrub(scn)) {
+ /*
+ * We need to keep track of how much time we spend
+ * paused per pass so that we can adjust the scrub rate
+ * shown in the output of 'zpool status'
+ */
+ spa->spa_scan_pass_scrub_spent_paused +=
+ gethrestime_sec() - spa->spa_scan_pass_scrub_pause;
+ spa->spa_scan_pass_scrub_pause = 0;
+ scn->scn_phys.scn_flags &= ~DSF_SCRUB_PAUSED;
+ dsl_scan_sync_state(scn, tx);
+ }
+ }
+}
+
+/*
+ * Set scrub pause/resume state if it makes sense to do so
+ */
+int
+dsl_scrub_set_pause_resume(const dsl_pool_t *dp, pool_scrub_cmd_t cmd)
+{
+ return (dsl_sync_task(spa_name(dp->dp_spa),
+ dsl_scrub_pause_resume_check, dsl_scrub_pause_resume_sync, &cmd, 3,
+ ZFS_SPACE_CHECK_RESERVED));
+}
+
+boolean_t
+dsl_scan_scrubbing(const dsl_pool_t *dp)
+{
+ dsl_scan_t *scn = dp->dp_scan;
+
+ if (scn->scn_phys.scn_state == DSS_SCANNING &&
+ scn->scn_phys.scn_func == POOL_SCAN_SCRUB)
+ return (B_TRUE);
+
+ return (B_FALSE);
+}
+
static void dsl_scan_visitbp(blkptr_t *bp, const zbookmark_phys_t *zb,
dnode_phys_t *dnp, dsl_dataset_t *ds, dsl_scan_t *scn,
dmu_objset_type_t ostype, dmu_tx_t *tx);
@@ -432,7 +536,7 @@ dsl_scan_sync_state(dsl_scan_t *scn, dmu_tx_t *tx)
extern int zfs_vdev_async_write_active_min_dirty_percent;
static boolean_t
-dsl_scan_check_pause(dsl_scan_t *scn, const zbookmark_phys_t *zb)
+dsl_scan_check_suspend(dsl_scan_t *scn, const zbookmark_phys_t *zb)
{
uint64_t elapsed_nanosecs;
int mintime;
@@ -442,8 +546,8 @@ dsl_scan_check_pause(dsl_scan_t *scn, const zbookmark_phys_t *zb)
if (zb && (int64_t)zb->zb_object < 0)
return (B_FALSE);
- if (scn->scn_pausing)
- return (B_TRUE); /* we're already pausing */
+ if (scn->scn_suspending)
+ return (B_TRUE); /* we're already suspending */
if (!ZB_IS_ZERO(&scn->scn_phys.scn_bookmark))
return (B_FALSE); /* we're resuming */
@@ -453,7 +557,7 @@ dsl_scan_check_pause(dsl_scan_t *scn, const zbookmark_phys_t *zb)
return (B_FALSE);
/*
- * We pause if:
+ * We suspend if:
* - we have scanned for the maximum time: an entire txg
* timeout (default 5 sec)
* or
@@ -476,19 +580,19 @@ dsl_scan_check_pause(dsl_scan_t *scn, const zbookmark_phys_t *zb)
dirty_pct >= zfs_vdev_async_write_active_min_dirty_percent)) ||
spa_shutting_down(scn->scn_dp->dp_spa)) {
if (zb) {
- dprintf("pausing at bookmark %llx/%llx/%llx/%llx\n",
+ dprintf("suspending at bookmark %llx/%llx/%llx/%llx\n",
(longlong_t)zb->zb_objset,
(longlong_t)zb->zb_object,
(longlong_t)zb->zb_level,
(longlong_t)zb->zb_blkid);
scn->scn_phys.scn_bookmark = *zb;
}
- dprintf("pausing at DDT bookmark %llx/%llx/%llx/%llx\n",
+ dprintf("suspending at DDT bookmark %llx/%llx/%llx/%llx\n",
(longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_class,
(longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_type,
(longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_checksum,
(longlong_t)scn->scn_phys.scn_ddt_bookmark.ddb_cursor);
- scn->scn_pausing = B_TRUE;
+ scn->scn_suspending = B_TRUE;
return (B_TRUE);
}
return (B_FALSE);
@@ -619,13 +723,14 @@ dsl_scan_check_resume(dsl_scan_t *scn, const dnode_phys_t *dnp,
* If we already visited this bp & everything below (in
* a prior txg sync), don't bother doing it again.
*/
- if (zbookmark_is_before(dnp, zb, &scn->scn_phys.scn_bookmark))
+ if (zbookmark_subtree_completed(dnp, zb,
+ &scn->scn_phys.scn_bookmark))
return (B_TRUE);
/*
* If we found the block we're trying to resume from, or
* we went past it to a different object, zero it out to
- * indicate that it's OK to start checking for pausing
+ * indicate that it's OK to start checking for suspending
* again.
*/
if (bcmp(zb, &scn->scn_phys.scn_bookmark, sizeof (*zb)) == 0 ||
@@ -680,7 +785,7 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
dsl_scan_visitbp(cbp, &czb, dnp,
ds, scn, ostype, tx);
}
- (void) arc_buf_remove_ref(buf, &buf);
+ arc_buf_destroy(buf, &buf);
} else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) {
arc_flags_t flags = ARC_FLAG_WAIT;
dnode_phys_t *cdnp;
@@ -694,19 +799,23 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
scn->scn_phys.scn_errors++;
return (err);
}
- for (i = 0, cdnp = buf->b_data; i < epb; i++, cdnp++) {
+ for (i = 0, cdnp = buf->b_data; i < epb;
+ i += cdnp->dn_extra_slots + 1,
+ cdnp += cdnp->dn_extra_slots + 1) {
for (j = 0; j < cdnp->dn_nblkptr; j++) {
blkptr_t *cbp = &cdnp->dn_blkptr[j];
dsl_scan_prefetch(scn, buf, cbp,
zb->zb_objset, zb->zb_blkid * epb + i, j);
}
}
- for (i = 0, cdnp = buf->b_data; i < epb; i++, cdnp++) {
+ for (i = 0, cdnp = buf->b_data; i < epb;
+ i += cdnp->dn_extra_slots + 1,
+ cdnp += cdnp->dn_extra_slots + 1) {
dsl_scan_visitdnode(scn, ds, ostype,
cdnp, zb->zb_blkid * epb + i, tx);
}
- (void) arc_buf_remove_ref(buf, &buf);
+ arc_buf_destroy(buf, &buf);
} else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
arc_flags_t flags = ARC_FLAG_WAIT;
objset_phys_t *osp;
@@ -728,7 +837,7 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
/*
* We also always visit user/group accounting
* objects, and never skip them, even if we are
- * pausing. This is necessary so that the space
+ * suspending. This is necessary so that the space
* deltas from this txg get integrated.
*/
dsl_scan_visitdnode(scn, ds, osp->os_type,
@@ -738,7 +847,7 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
&osp->os_userused_dnode,
DMU_USERUSED_OBJECT, tx);
}
- (void) arc_buf_remove_ref(buf, &buf);
+ arc_buf_destroy(buf, &buf);
}
return (0);
@@ -764,7 +873,7 @@ dsl_scan_visitdnode(dsl_scan_t *scn, dsl_dataset_t *ds,
zbookmark_phys_t czb;
SET_BOOKMARK(&czb, ds ? ds->ds_object : 0, object,
0, DMU_SPILL_BLKID);
- dsl_scan_visitbp(&dnp->dn_spill,
+ dsl_scan_visitbp(DN_SPILL_BLKPTR(dnp),
&czb, dnp, ds, scn, ostype, tx);
}
}
@@ -786,7 +895,7 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_phys_t *zb,
/* ASSERT(pbuf == NULL || arc_released(pbuf)); */
- if (dsl_scan_check_pause(scn, zb))
+ if (dsl_scan_check_suspend(scn, zb))
goto out;
if (dsl_scan_check_resume(scn, dnp, zb))
@@ -817,7 +926,7 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_phys_t *zb,
goto out;
/*
- * If dsl_scan_ddt() has aready visited this block, it will have
+ * If dsl_scan_ddt() has already visited this block, it will have
* already done any translations or scrubbing, so don't call the
* callback again.
*/
@@ -866,7 +975,16 @@ dsl_scan_ds_destroyed(dsl_dataset_t *ds, dmu_tx_t *tx)
if (scn->scn_phys.scn_bookmark.zb_objset == ds->ds_object) {
if (ds->ds_is_snapshot) {
- /* Note, scn_cur_{min,max}_txg stays the same. */
+ /*
+ * Note:
+ * - scn_cur_{min,max}_txg stays the same.
+ * - Setting the flag is not really necessary if
+ * scn_cur_max_txg == scn_max_txg, because there
+ * is nothing after this snapshot that we care
+ * about. However, we set it anyway and then
+ * ignore it when we retraverse it in
+ * dsl_scan_visitds().
+ */
scn->scn_phys.scn_bookmark.zb_objset =
dsl_dataset_phys(ds)->ds_next_snap_obj;
zfs_dbgmsg("destroying ds %llu; currently traversing; "
@@ -906,9 +1024,6 @@ dsl_scan_ds_destroyed(dsl_dataset_t *ds, dmu_tx_t *tx)
zfs_dbgmsg("destroying ds %llu; in queue; removing",
(u_longlong_t)ds->ds_object);
}
- } else {
- zfs_dbgmsg("destroying ds %llu; ignoring",
- (u_longlong_t)ds->ds_object);
}
/*
@@ -1062,6 +1177,46 @@ dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx)
VERIFY3U(0, ==, dsl_dataset_hold_obj(dp, dsobj, FTAG, &ds));
+ if (scn->scn_phys.scn_cur_min_txg >=
+ scn->scn_phys.scn_max_txg) {
+ /*
+ * This can happen if this snapshot was created after the
+ * scan started, and we already completed a previous snapshot
+ * that was created after the scan started. This snapshot
+ * only references blocks with:
+ *
+ * birth < our ds_creation_txg
+ * cur_min_txg is no less than ds_creation_txg.
+ * We have already visited these blocks.
+ * or
+ * birth > scn_max_txg
+ * The scan requested not to visit these blocks.
+ *
+ * Subsequent snapshots (and clones) can reference our
+ * blocks, or blocks with even higher birth times.
+ * Therefore we do not need to visit them either,
+ * so we do not add them to the work queue.
+ *
+ * Note that checking for cur_min_txg >= cur_max_txg
+ * is not sufficient, because in that case we may need to
+ * visit subsequent snapshots. This happens when min_txg > 0,
+ * which raises cur_min_txg. In this case we will visit
+ * this dataset but skip all of its blocks, because the
+ * rootbp's birth time is < cur_min_txg. Then we will
+ * add the next snapshots/clones to the work queue.
+ */
+ char *dsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
+ dsl_dataset_name(ds, dsname);
+ zfs_dbgmsg("scanning dataset %llu (%s) is unnecessary because "
+ "cur_min_txg (%llu) >= max_txg (%llu)",
+ dsobj, dsname,
+ scn->scn_phys.scn_cur_min_txg,
+ scn->scn_phys.scn_max_txg);
+ kmem_free(dsname, MAXNAMELEN);
+
+ goto out;
+ }
+
if (dmu_objset_from_ds(ds, &os))
goto out;
@@ -1079,19 +1234,21 @@ dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx)
* Iterate over the bps in this ds.
*/
dmu_buf_will_dirty(ds->ds_dbuf, tx);
+ rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
dsl_scan_visit_rootbp(scn, ds, &dsl_dataset_phys(ds)->ds_bp, tx);
+ rrw_exit(&ds->ds_bp_rwlock, FTAG);
- dsname = kmem_alloc(ZFS_MAXNAMELEN, KM_SLEEP);
+ dsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
dsl_dataset_name(ds, dsname);
zfs_dbgmsg("scanned dataset %llu (%s) with min=%llu max=%llu; "
- "pausing=%u",
+ "suspending=%u",
(longlong_t)dsobj, dsname,
(longlong_t)scn->scn_phys.scn_cur_min_txg,
(longlong_t)scn->scn_phys.scn_cur_max_txg,
- (int)scn->scn_pausing);
- kmem_free(dsname, ZFS_MAXNAMELEN);
+ (int)scn->scn_suspending);
+ kmem_free(dsname, ZFS_MAX_DATASET_NAME_LEN);
- if (scn->scn_pausing)
+ if (scn->scn_suspending)
goto out;
/*
@@ -1257,13 +1414,13 @@ dsl_scan_ddt(dsl_scan_t *scn, dmu_tx_t *tx)
dsl_scan_ddt_entry(scn, ddb->ddb_checksum, &dde, tx);
n++;
- if (dsl_scan_check_pause(scn, NULL))
+ if (dsl_scan_check_suspend(scn, NULL))
break;
}
- zfs_dbgmsg("scanned %llu ddt entries with class_max = %u; pausing=%u",
- (longlong_t)n, (int)scn->scn_phys.scn_ddt_class_max,
- (int)scn->scn_pausing);
+ zfs_dbgmsg("scanned %llu ddt entries with class_max = %u; "
+ "suspending=%u", (longlong_t)n,
+ (int)scn->scn_phys.scn_ddt_class_max, (int)scn->scn_suspending);
ASSERT(error == 0 || error == ENOENT);
ASSERT(error != ENOENT ||
@@ -1307,7 +1464,7 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
scn->scn_phys.scn_cur_min_txg = scn->scn_phys.scn_min_txg;
scn->scn_phys.scn_cur_max_txg = scn->scn_phys.scn_max_txg;
dsl_scan_ddt(scn, tx);
- if (scn->scn_pausing)
+ if (scn->scn_suspending)
return;
}
@@ -1319,7 +1476,7 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
dsl_scan_visit_rootbp(scn, NULL,
&dp->dp_meta_rootbp, tx);
spa_set_rootblkptr(dp->dp_spa, &dp->dp_meta_rootbp);
- if (scn->scn_pausing)
+ if (scn->scn_suspending)
return;
if (spa_version(dp->dp_spa) < SPA_VERSION_DSL_SCRUB) {
@@ -1329,22 +1486,22 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
dsl_scan_visitds(scn,
dp->dp_origin_snap->ds_object, tx);
}
- ASSERT(!scn->scn_pausing);
+ ASSERT(!scn->scn_suspending);
} else if (scn->scn_phys.scn_bookmark.zb_objset !=
ZB_DESTROYED_OBJSET) {
/*
- * If we were paused, continue from here. Note if the
- * ds we were paused on was deleted, the zb_objset may
+ * If we were suspended, continue from here. Note if the
+ * ds we were suspended on was deleted, the zb_objset may
* be -1, so we will skip this and find a new objset
* below.
*/
dsl_scan_visitds(scn, scn->scn_phys.scn_bookmark.zb_objset, tx);
- if (scn->scn_pausing)
+ if (scn->scn_suspending)
return;
}
/*
- * In case we were paused right at the end of the ds, zero the
+ * In case we were suspended right at the end of the ds, zero the
* bookmark so we don't think that we're still trying to resume.
*/
bzero(&scn->scn_phys.scn_bookmark, sizeof (zbookmark_phys_t));
@@ -1358,7 +1515,7 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
dsl_dataset_t *ds;
uint64_t dsobj;
- dsobj = strtonum(za->za_name, NULL);
+ dsobj = zfs_strtonum(za->za_name, NULL);
VERIFY3U(0, ==, zap_remove_int(dp->dp_meta_objset,
scn->scn_phys.scn_queue_obj, dsobj, tx));
@@ -1378,7 +1535,7 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
dsl_scan_visitds(scn, dsobj, tx);
zap_cursor_fini(zc);
- if (scn->scn_pausing)
+ if (scn->scn_suspending)
goto out;
}
zap_cursor_fini(zc);
@@ -1388,7 +1545,7 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
}
static boolean_t
-dsl_scan_free_should_pause(dsl_scan_t *scn)
+dsl_scan_free_should_suspend(dsl_scan_t *scn)
{
uint64_t elapsed_nanosecs;
@@ -1412,7 +1569,7 @@ dsl_scan_free_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
if (!scn->scn_is_bptree ||
(BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_OBJSET)) {
- if (dsl_scan_free_should_pause(scn))
+ if (dsl_scan_free_should_suspend(scn))
return (SET_ERROR(ERESTART));
}
@@ -1435,7 +1592,8 @@ dsl_scan_active(dsl_scan_t *scn)
return (B_FALSE);
if (spa_shutting_down(spa))
return (B_FALSE);
- if (scn->scn_phys.scn_state == DSS_SCANNING ||
+ if ((scn->scn_phys.scn_state == DSS_SCANNING &&
+ !dsl_scan_is_paused_scrub(scn)) ||
(scn->scn_async_destroying && !scn->scn_async_stalled))
return (B_TRUE);
@@ -1446,6 +1604,7 @@ dsl_scan_active(dsl_scan_t *scn)
return (used != 0);
}
+/* Called whenever a txg syncs. */
void
dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
{
@@ -1458,8 +1617,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
* that we can restart an old-style scan while the pool is being
* imported (see dsl_scan_init).
*/
- if (scn->scn_restart_txg != 0 &&
- scn->scn_restart_txg <= tx->tx_txg) {
+ if (dsl_scan_restarting(scn, tx)) {
pool_scan_func_t func = POOL_SCAN_SCRUB;
dsl_scan_done(scn, B_FALSE, tx);
if (vdev_resilver_needed(spa->spa_root_vdev, NULL, NULL))
@@ -1469,26 +1627,40 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
dsl_scan_setup_sync(&func, tx);
}
+ /*
+ * Only process scans in sync pass 1.
+ */
+ if (spa_sync_pass(dp->dp_spa) > 1)
+ return;
+
+ /*
+ * If the spa is shutting down, then stop scanning. This will
+ * ensure that the scan does not dirty any new data during the
+ * shutdown phase.
+ */
+ if (spa_shutting_down(spa))
+ return;
+
/*
* If the scan is inactive due to a stalled async destroy, try again.
*/
- if ((!scn->scn_async_stalled && !dsl_scan_active(scn)) ||
- spa_sync_pass(dp->dp_spa) > 1)
+ if (!scn->scn_async_stalled && !dsl_scan_active(scn))
return;
scn->scn_visited_this_txg = 0;
- scn->scn_pausing = B_FALSE;
+ scn->scn_suspending = B_FALSE;
scn->scn_sync_start_time = gethrtime();
spa->spa_scrub_active = B_TRUE;
/*
- * First process the async destroys. If we pause, don't do
+ * First process the async destroys. If we suspend, don't do
* any scrubbing or resilvering. This ensures that there are no
* async destroys while we are scanning, so the scan code doesn't
* have to worry about traversing it. It is also faster to free the
* blocks than to scrub them.
*/
- if (spa_version(dp->dp_spa) >= SPA_VERSION_DEADLISTS) {
+ if (zfs_free_bpobj_enabled &&
+ spa_version(dp->dp_spa) >= SPA_VERSION_DEADLISTS) {
scn->scn_is_bptree = B_FALSE;
scn->scn_zio_root = zio_root(dp->dp_spa, NULL,
NULL, ZIO_FLAG_MUSTSUCCEED);
@@ -1560,7 +1732,8 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
}
if (err != 0)
return;
- if (!scn->scn_async_destroying && zfs_free_leak_on_eio &&
+ if (dp->dp_free_dir != NULL && !scn->scn_async_destroying &&
+ zfs_free_leak_on_eio &&
(dsl_dir_phys(dp->dp_free_dir)->dd_used_bytes != 0 ||
dsl_dir_phys(dp->dp_free_dir)->dd_compressed_bytes != 0 ||
dsl_dir_phys(dp->dp_free_dir)->dd_uncompressed_bytes != 0)) {
@@ -1586,7 +1759,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
-dsl_dir_phys(dp->dp_free_dir)->dd_compressed_bytes,
-dsl_dir_phys(dp->dp_free_dir)->dd_uncompressed_bytes, tx);
}
- if (!scn->scn_async_destroying) {
+ if (dp->dp_free_dir != NULL && !scn->scn_async_destroying) {
/* finished; verify that space accounting went to zero */
ASSERT0(dsl_dir_phys(dp->dp_free_dir)->dd_used_bytes);
ASSERT0(dsl_dir_phys(dp->dp_free_dir)->dd_compressed_bytes);
@@ -1597,7 +1770,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
return;
if (scn->scn_done_txg == tx->tx_txg) {
- ASSERT(!scn->scn_pausing);
+ ASSERT(!scn->scn_suspending);
/* finished with scan. */
zfs_dbgmsg("txg %llu scan complete", tx->tx_txg);
dsl_scan_done(scn, B_TRUE, tx);
@@ -1606,6 +1779,9 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
return;
}
+ if (dsl_scan_is_paused_scrub(scn))
+ return;
+
if (scn->scn_phys.scn_ddt_bookmark.ddb_class <=
scn->scn_phys.scn_ddt_class_max) {
zfs_dbgmsg("doing scan sync txg %llu; "
@@ -1640,7 +1816,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
(longlong_t)scn->scn_visited_this_txg,
(longlong_t)NSEC2MSEC(gethrtime() - scn->scn_sync_start_time));
- if (!scn->scn_pausing) {
+ if (!scn->scn_suspending) {
scn->scn_done_txg = tx->tx_txg + 1;
zfs_dbgmsg("txg %llu traversal complete, waiting till txg %llu",
tx->tx_txg, scn->scn_done_txg);
@@ -1744,7 +1920,7 @@ dsl_scan_scrub_done(zio_t *zio)
{
spa_t *spa = zio->io_spa;
- zio_data_buf_free(zio->io_data, zio->io_size);
+ abd_free(zio->io_abd);
mutex_enter(&spa->spa_scrub_lock);
spa->spa_scrub_inflight--;
@@ -1757,12 +1933,51 @@ dsl_scan_scrub_done(zio_t *zio)
mutex_exit(&spa->spa_scrub_lock);
}
+static boolean_t
+dsl_scan_need_resilver(spa_t *spa, const dva_t *dva, size_t psize,
+ uint64_t phys_birth)
+{
+ vdev_t *vd;
+
+ if (DVA_GET_GANG(dva)) {
+ /*
+ * Gang members may be spread across multiple
+ * vdevs, so the best estimate we have is the
+ * scrub range, which has already been checked.
+ * XXX -- it would be better to change our
+ * allocation policy to ensure that all
+ * gang members reside on the same vdev.
+ */
+ return (B_TRUE);
+ }
+
+ vd = vdev_lookup_top(spa, DVA_GET_VDEV(dva));
+
+ /*
+ * Check if the txg falls within the range which must be
+ * resilvered. DVAs outside this range can always be skipped.
+ */
+ if (!vdev_dtl_contains(vd, DTL_PARTIAL, phys_birth, 1))
+ return (B_FALSE);
+
+ /*
+ * Check if the top-level vdev must resilver this offset.
+ * When the offset does not intersect with a dirty leaf DTL
+ * then it may be possible to skip the resilver IO. The psize
+ * is provided instead of asize to simplify the check for RAIDZ.
+ */
+ if (!vdev_dtl_need_resilver(vd, DVA_GET_OFFSET(dva), psize))
+ return (B_FALSE);
+
+ return (B_TRUE);
+}
+
static int
dsl_scan_scrub_cb(dsl_pool_t *dp,
const blkptr_t *bp, const zbookmark_phys_t *zb)
{
dsl_scan_t *scn = dp->dp_scan;
- size_t size = BP_GET_PSIZE(bp);
+ size_t psize = BP_GET_PSIZE(bp);
spa_t *spa = dp->dp_spa;
uint64_t phys_birth = BP_PHYSICAL_BIRTH(bp);
boolean_t needs_io = B_FALSE;
@@ -1796,39 +2011,24 @@ dsl_scan_scrub_cb(dsl_pool_t *dp,
zio_flags |= ZIO_FLAG_SPECULATIVE;
for (d = 0; d < BP_GET_NDVAS(bp); d++) {
- vdev_t *vd = vdev_lookup_top(spa,
- DVA_GET_VDEV(&bp->blk_dva[d]));
+ const dva_t *dva = &bp->blk_dva[d];
/*
* Keep track of how much data we've examined so that
* zpool(1M) status can make useful progress reports.
*/
- scn->scn_phys.scn_examined += DVA_GET_ASIZE(&bp->blk_dva[d]);
- spa->spa_scan_pass_exam += DVA_GET_ASIZE(&bp->blk_dva[d]);
+ scn->scn_phys.scn_examined += DVA_GET_ASIZE(dva);
+ spa->spa_scan_pass_exam += DVA_GET_ASIZE(dva);
/* if it's a resilver, this may not be in the target range */
- if (!needs_io) {
- if (DVA_GET_GANG(&bp->blk_dva[d])) {
- /*
- * Gang members may be spread across multiple
- * vdevs, so the best estimate we have is the
- * scrub range, which has already been checked.
- * XXX -- it would be better to change our
- * allocation policy to ensure that all
- * gang members reside on the same vdev.
- */
- needs_io = B_TRUE;
- } else {
- needs_io = vdev_dtl_contains(vd, DTL_PARTIAL,
- phys_birth, 1);
- }
- }
+ if (!needs_io)
+ needs_io = dsl_scan_need_resilver(spa, dva, psize,
+ phys_birth);
}
if (needs_io && !zfs_no_scrub_io) {
vdev_t *rvd = spa->spa_root_vdev;
uint64_t maxinflight = rvd->vdev_children * zfs_top_maxinflight;
- void *data = zio_data_buf_alloc(size);
mutex_enter(&spa->spa_scrub_lock);
while (spa->spa_scrub_inflight >= maxinflight)
@@ -1843,19 +2043,25 @@ dsl_scan_scrub_cb(dsl_pool_t *dp,
if (ddi_get_lbolt64() - spa->spa_last_io <= zfs_scan_idle)
delay(scan_delay);
- zio_nowait(zio_read(NULL, spa, bp, data, size,
- dsl_scan_scrub_done, NULL, ZIO_PRIORITY_SCRUB,
- zio_flags, zb));
+ zio_nowait(zio_read(NULL, spa, bp,
+ abd_alloc_for_io(psize, B_FALSE),
+ psize, dsl_scan_scrub_done, NULL,
+ ZIO_PRIORITY_SCRUB, zio_flags, zb));
}
/* do not relocate this block */
return (0);
}
+/*
+ * Called by the ZFS_IOC_POOL_SCAN ioctl to start a scrub or resilver.
+ * Can also be called to resume a paused scrub.
+ */
int
dsl_scan(dsl_pool_t *dp, pool_scan_func_t func)
{
spa_t *spa = dp->dp_spa;
+ dsl_scan_t *scn = dp->dp_scan;
/*
* Purge all vdev caches and probe all devices. We do this here
@@ -1870,10 +2076,27 @@ dsl_scan(dsl_pool_t *dp, pool_scan_func_t func)
spa->spa_scrub_reopen = B_FALSE;
(void) spa_vdev_state_exit(spa, NULL, 0);
+ if (func == POOL_SCAN_SCRUB && dsl_scan_is_paused_scrub(scn)) {
+ /* got scrub start cmd, resume paused scrub */
+ int err = dsl_scrub_set_pause_resume(scn->scn_dp,
+ POOL_SCRUB_NORMAL);
+ if (err == 0)
+ return (ECANCELED);
+
+ return (SET_ERROR(err));
+ }
+
return (dsl_sync_task(spa_name(spa), dsl_scan_setup_check,
dsl_scan_setup_sync, &func, 0, ZFS_SPACE_CHECK_NONE));
}
+static boolean_t
+dsl_scan_restarting(dsl_scan_t *scn, dmu_tx_t *tx)
+{
+ return (scn->scn_restart_txg != 0 &&
+ scn->scn_restart_txg <= tx->tx_txg);
+}
+
#if defined(_KERNEL) && defined(HAVE_SPL)
module_param(zfs_top_maxinflight, int, 0644);
MODULE_PARM_DESC(zfs_top_maxinflight, "Max I/Os per top-level");
@@ -1902,6 +2125,10 @@ MODULE_PARM_DESC(zfs_no_scrub_io, "Set to disable scrub I/O");
module_param(zfs_no_scrub_prefetch, int, 0644);
MODULE_PARM_DESC(zfs_no_scrub_prefetch, "Set to disable scrub prefetching");
+/* CSTYLED */
module_param(zfs_free_max_blocks, ulong, 0644);
MODULE_PARM_DESC(zfs_free_max_blocks, "Max number of blocks freed in one txg");
+
+module_param(zfs_free_bpobj_enabled, int, 0644);
+MODULE_PARM_DESC(zfs_free_bpobj_enabled, "Enable processing of the free_bpobj");
#endif
diff --git a/zfs/module/zfs/dsl_userhold.c b/zfs/module/zfs/dsl_userhold.c
index 1b234ed480f9..583fbfe47fcc 100644
--- a/zfs/module/zfs/dsl_userhold.c
+++ b/zfs/module/zfs/dsl_userhold.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
*/
@@ -181,7 +181,7 @@ dsl_dataset_user_hold_sync_one_impl(nvlist_t *tmpholds, dsl_dataset_t *ds,
}
typedef struct zfs_hold_cleanup_arg {
- char zhca_spaname[MAXNAMELEN];
+ char zhca_spaname[ZFS_MAX_DATASET_NAME_LEN];
uint64_t zhca_spa_load_guid;
nvlist_t *zhca_holds;
} zfs_hold_cleanup_arg_t;
@@ -342,7 +342,7 @@ static int
dsl_dataset_hold_obj_string(dsl_pool_t *dp, const char *dsobj, void *tag,
dsl_dataset_t **dsp)
{
- return (dsl_dataset_hold_obj(dp, strtonum(dsobj, NULL), tag, dsp));
+ return (dsl_dataset_hold_obj(dp, zfs_strtonum(dsobj, NULL), tag, dsp));
}
static int
@@ -580,7 +580,7 @@ dsl_dataset_user_release_impl(nvlist_t *holds, nvlist_t *errlist,
error = dsl_dataset_hold_obj_string(tmpdp,
nvpair_name(pair), FTAG, &ds);
if (error == 0) {
- char name[MAXNAMELEN];
+ char name[ZFS_MAX_DATASET_NAME_LEN];
dsl_dataset_name(ds, name);
dsl_pool_config_exit(tmpdp, FTAG);
dsl_dataset_rele(ds, FTAG);
diff --git a/zfs/module/zfs/edonr_zfs.c b/zfs/module/zfs/edonr_zfs.c
new file mode 100644
index 000000000000..e92da6d6c1d4
--- /dev/null
+++ b/zfs/module/zfs/edonr_zfs.c
@@ -0,0 +1,115 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2013 Saso Kiselkov. All rights reserved.
+ * Use is subject to license terms.
+ */
+/*
+ * Copyright (c) 2016 by Delphix. All rights reserved.
+ */
+#include <sys/zfs_context.h>
+#include <sys/zio.h>
+#include <sys/edonr.h>
+#include <sys/zfs_context.h> /* For CTASSERT() */
+#include <sys/abd.h>
+
+#define EDONR_MODE 512
+#define EDONR_BLOCK_SIZE EdonR512_BLOCK_SIZE
+
+static int
+edonr_incremental(void *buf, size_t size, void *arg)
+{
+ EdonRState *ctx = arg;
+ EdonRUpdate(ctx, buf, size * 8);
+ return (0);
+}
+
+/*
+ * Native zio_checksum interface for the Edon-R hash function.
+ */
+/*ARGSUSED*/
+void
+abd_checksum_edonr_native(abd_t *abd, uint64_t size,
+ const void *ctx_template, zio_cksum_t *zcp)
+{
+ uint8_t digest[EDONR_MODE / 8];
+ EdonRState ctx;
+
+ ASSERT(ctx_template != NULL);
+ bcopy(ctx_template, &ctx, sizeof (ctx));
+ (void) abd_iterate_func(abd, 0, size, edonr_incremental, &ctx);
+ EdonRFinal(&ctx, digest);
+ bcopy(digest, zcp->zc_word, sizeof (zcp->zc_word));
+}
+
+/*
+ * Byteswapped zio_checksum interface for the Edon-R hash function.
+ */
+void
+abd_checksum_edonr_byteswap(abd_t *abd, uint64_t size,
+ const void *ctx_template, zio_cksum_t *zcp)
+{
+ zio_cksum_t tmp;
+
+ abd_checksum_edonr_native(abd, size, ctx_template, &tmp);
+ zcp->zc_word[0] = BSWAP_64(zcp->zc_word[0]);
+ zcp->zc_word[1] = BSWAP_64(zcp->zc_word[1]);
+ zcp->zc_word[2] = BSWAP_64(zcp->zc_word[2]);
+ zcp->zc_word[3] = BSWAP_64(zcp->zc_word[3]);
+}
+
+void *
+abd_checksum_edonr_tmpl_init(const zio_cksum_salt_t *salt)
+{
+ EdonRState *ctx;
+ uint8_t salt_block[EDONR_BLOCK_SIZE];
+
+ /*
+ * Edon-R needs all but the last hash invocation to be on full-size
+ * blocks, but the salt is too small. Rather than simply padding it
+ * with zeros, we expand the salt into a new salt block of proper
+ * size by double-hashing it (the new salt block will be composed of
+ * H(salt) || H(H(salt))).
+ */
+ CTASSERT(EDONR_BLOCK_SIZE == 2 * (EDONR_MODE / 8));
+ EdonRHash(EDONR_MODE, salt->zcs_bytes, sizeof (salt->zcs_bytes) * 8,
+ salt_block);
+ EdonRHash(EDONR_MODE, salt_block, EDONR_MODE, salt_block +
+ EDONR_MODE / 8);
+
+ /*
+ * Feed the new salt block into the hash function - this will serve
+ * as our MAC key.
+ */
+ ctx = kmem_zalloc(sizeof (*ctx), KM_SLEEP);
+ EdonRInit(ctx, EDONR_MODE);
+ EdonRUpdate(ctx, salt_block, sizeof (salt_block) * 8);
+ return (ctx);
+}
+
+void
+abd_checksum_edonr_tmpl_free(void *ctx_template)
+{
+ EdonRState *ctx = ctx_template;
+
+ bzero(ctx, sizeof (*ctx));
+ kmem_free(ctx, sizeof (*ctx));
+}
diff --git a/zfs/module/zfs/fm.c b/zfs/module/zfs/fm.c
index 999bd8adc518..f6ae1628480d 100644
--- a/zfs/module/zfs/fm.c
+++ b/zfs/module/zfs/fm.c
@@ -84,6 +84,9 @@ static int zevent_len_cur = 0;
static int zevent_waiters = 0;
static int zevent_flags = 0;
+/* Num events rate limited since the last time zfs_zevent_next() was called */
+static uint64_t ratelimit_dropped = 0;
+
/*
* The EID (Event IDentifier) is used to uniquely tag a zevent when it is
* posted. The posted EIDs are monotonically increasing but not persistent.
@@ -97,7 +100,6 @@ static list_t zevent_list;
static kcondvar_t zevent_cv;
#endif /* _KERNEL */
-extern void fastreboot_disable_highpil(void);
/*
* Common fault management kstats to record event generation failures
@@ -154,7 +156,7 @@ fm_printf(int depth, int c, int cols, const char *format, ...)
}
/*
- * Recursively print a nvlist in the specified column width and return the
+ * Recursively print an nvlist in the specified column width and return the
* column we end up in. This function is called recursively by fm_nvprint(),
* below. We generically format the entire nvpair using hexadecimal
* integers and strings, and elide any integer arrays. Arrays are basically
@@ -427,11 +429,9 @@ zfs_zevent_alloc(void)
zevent_t *ev;
ev = kmem_zalloc(sizeof (zevent_t), KM_SLEEP);
- if (ev == NULL)
- return (NULL);
list_create(&ev->ev_ze_list, sizeof (zfs_zevent_t),
- offsetof(zfs_zevent_t, ze_node));
+ offsetof(zfs_zevent_t, ze_node));
list_link_init(&ev->ev_node);
return (ev);
@@ -523,25 +523,25 @@ zfs_zevent_post(nvlist_t *nvl, nvlist_t *detector, zevent_cb_t *cb)
error = nvlist_add_int64_array(nvl, FM_EREPORT_TIME, tv_array, 2);
if (error) {
- atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1);
+ atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
goto out;
}
eid = atomic_inc_64_nv(&zevent_eid);
error = nvlist_add_uint64(nvl, FM_EREPORT_EID, eid);
if (error) {
- atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1);
+ atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
goto out;
}
error = nvlist_size(nvl, &nvl_size, NV_ENCODE_NATIVE);
if (error) {
- atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1);
+ atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64);
goto out;
}
if (nvl_size > ERPT_DATA_SZ || nvl_size == 0) {
- atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1);
+ atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64);
error = EOVERFLOW;
goto out;
}
@@ -551,7 +551,7 @@ zfs_zevent_post(nvlist_t *nvl, nvlist_t *detector, zevent_cb_t *cb)
ev = zfs_zevent_alloc();
if (ev == NULL) {
- atomic_add_64(&erpt_kstat_data.erpt_dropped.value.ui64, 1);
+ atomic_inc_64(&erpt_kstat_data.erpt_dropped.value.ui64);
error = ENOMEM;
goto out;
}
@@ -655,8 +655,14 @@ zfs_zevent_next(zfs_zevent_t *ze, nvlist_t **event, uint64_t *event_size,
ze->ze_zevent = ev;
list_insert_head(&ev->ev_ze_list, ze);
- nvlist_dup(ev->ev_nvl, event, KM_SLEEP);
+ (void) nvlist_dup(ev->ev_nvl, event, KM_SLEEP);
*dropped = ze->ze_dropped;
+
+#ifdef _KERNEL
+ /* Include events dropped due to rate limiting */
+ *dropped += ratelimit_dropped;
+ ratelimit_dropped = 0;
+#endif
ze->ze_dropped = 0;
out:
mutex_exit(&zevent_lock);
@@ -1025,8 +1031,7 @@ fm_payload_set(nvlist_t *payload, ...)
va_end(ap);
if (ret)
- atomic_add_64(
- &erpt_kstat_data.payload_set_failed.value.ui64, 1);
+ atomic_inc_64(&erpt_kstat_data.payload_set_failed.value.ui64);
}
/*
@@ -1059,24 +1064,24 @@ fm_ereport_set(nvlist_t *ereport, int version, const char *erpt_class,
int ret;
if (version != FM_EREPORT_VERS0) {
- atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1);
+ atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
return;
}
(void) snprintf(ereport_class, FM_MAX_CLASS, "%s.%s",
FM_EREPORT_CLASS, erpt_class);
if (nvlist_add_string(ereport, FM_CLASS, ereport_class) != 0) {
- atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1);
+ atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
return;
}
if (nvlist_add_uint64(ereport, FM_EREPORT_ENA, ena)) {
- atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1);
+ atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
}
if (nvlist_add_nvlist(ereport, FM_EREPORT_DETECTOR,
(nvlist_t *)detector) != 0) {
- atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1);
+ atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
}
va_start(ap, detector);
@@ -1085,7 +1090,7 @@ fm_ereport_set(nvlist_t *ereport, int version, const char *erpt_class,
va_end(ap);
if (ret)
- atomic_add_64(&erpt_kstat_data.erpt_set_failed.value.ui64, 1);
+ atomic_inc_64(&erpt_kstat_data.erpt_set_failed.value.ui64);
}
/*
@@ -1108,19 +1113,19 @@ static int
fm_fmri_hc_set_common(nvlist_t *fmri, int version, const nvlist_t *auth)
{
if (version != FM_HC_SCHEME_VERSION) {
- atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+ atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
return (0);
}
if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0 ||
nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_HC) != 0) {
- atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+ atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
return (0);
}
if (auth != NULL && nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY,
(nvlist_t *)auth) != 0) {
- atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+ atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
return (0);
}
@@ -1152,22 +1157,22 @@ fm_fmri_hc_set(nvlist_t *fmri, int version, const nvlist_t *auth,
pairs[i] = fm_nvlist_create(nva);
if (nvlist_add_string(pairs[i], FM_FMRI_HC_NAME, name) != 0 ||
nvlist_add_string(pairs[i], FM_FMRI_HC_ID, idstr) != 0) {
- atomic_add_64(
- &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+ atomic_inc_64(
+ &erpt_kstat_data.fmri_set_failed.value.ui64);
}
}
va_end(ap);
if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, pairs, npairs) != 0)
- atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+ atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
for (i = 0; i < npairs; i++)
fm_nvlist_destroy(pairs[i], FM_NVA_RETAIN);
if (snvl != NULL) {
if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) {
- atomic_add_64(
- &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+ atomic_inc_64(
+ &erpt_kstat_data.fmri_set_failed.value.ui64);
}
}
}
@@ -1192,20 +1197,20 @@ fm_fmri_hc_create(nvlist_t *fmri, int version, const nvlist_t *auth,
*/
if (nvlist_lookup_nvlist_array(bboard, FM_FMRI_HC_LIST, &hcl, &n)
!= 0) {
- atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+ atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
return;
}
for (i = 0; i < n; i++) {
if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_NAME,
&hcname) != 0) {
- atomic_add_64(
- &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+ atomic_inc_64(
+ &erpt_kstat_data.fmri_set_failed.value.ui64);
return;
}
if (nvlist_lookup_string(hcl[i], FM_FMRI_HC_ID, &hcid) != 0) {
- atomic_add_64(
- &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+ atomic_inc_64(
+ &erpt_kstat_data.fmri_set_failed.value.ui64);
return;
}
@@ -1217,8 +1222,8 @@ fm_fmri_hc_create(nvlist_t *fmri, int version, const nvlist_t *auth,
fm_nvlist_destroy(pairs[j],
FM_NVA_RETAIN);
}
- atomic_add_64(
- &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+ atomic_inc_64(
+ &erpt_kstat_data.fmri_set_failed.value.ui64);
return;
}
}
@@ -1242,8 +1247,8 @@ fm_fmri_hc_create(nvlist_t *fmri, int version, const nvlist_t *auth,
fm_nvlist_destroy(pairs[j],
FM_NVA_RETAIN);
}
- atomic_add_64(
- &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+ atomic_inc_64(
+ &erpt_kstat_data.fmri_set_failed.value.ui64);
return;
}
}
@@ -1254,7 +1259,7 @@ fm_fmri_hc_create(nvlist_t *fmri, int version, const nvlist_t *auth,
*/
if (nvlist_add_nvlist_array(fmri, FM_FMRI_HC_LIST, pairs,
npairs + n) != 0) {
- atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+ atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
return;
}
@@ -1264,8 +1269,8 @@ fm_fmri_hc_create(nvlist_t *fmri, int version, const nvlist_t *auth,
if (snvl != NULL) {
if (nvlist_add_nvlist(fmri, FM_FMRI_HC_SPECIFIC, snvl) != 0) {
- atomic_add_64(
- &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+ atomic_inc_64(
+ &erpt_kstat_data.fmri_set_failed.value.ui64);
return;
}
}
@@ -1291,7 +1296,7 @@ fm_fmri_dev_set(nvlist_t *fmri_dev, int version, const nvlist_t *auth,
int err = 0;
if (version != DEV_SCHEME_VERSION0) {
- atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+ atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
return;
}
@@ -1312,7 +1317,7 @@ fm_fmri_dev_set(nvlist_t *fmri_dev, int version, const nvlist_t *auth,
err |= nvlist_add_string(fmri_dev, FM_FMRI_DEV_TGTPTLUN0, tpl0);
if (err)
- atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+ atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
}
@@ -1337,35 +1342,35 @@ fm_fmri_cpu_set(nvlist_t *fmri_cpu, int version, const nvlist_t *auth,
uint64_t *failedp = &erpt_kstat_data.fmri_set_failed.value.ui64;
if (version < CPU_SCHEME_VERSION1) {
- atomic_add_64(failedp, 1);
+ atomic_inc_64(failedp);
return;
}
if (nvlist_add_uint8(fmri_cpu, FM_VERSION, version) != 0) {
- atomic_add_64(failedp, 1);
+ atomic_inc_64(failedp);
return;
}
if (nvlist_add_string(fmri_cpu, FM_FMRI_SCHEME,
FM_FMRI_SCHEME_CPU) != 0) {
- atomic_add_64(failedp, 1);
+ atomic_inc_64(failedp);
return;
}
if (auth != NULL && nvlist_add_nvlist(fmri_cpu, FM_FMRI_AUTHORITY,
(nvlist_t *)auth) != 0)
- atomic_add_64(failedp, 1);
+ atomic_inc_64(failedp);
if (nvlist_add_uint32(fmri_cpu, FM_FMRI_CPU_ID, cpu_id) != 0)
- atomic_add_64(failedp, 1);
+ atomic_inc_64(failedp);
if (cpu_maskp != NULL && nvlist_add_uint8(fmri_cpu, FM_FMRI_CPU_MASK,
*cpu_maskp) != 0)
- atomic_add_64(failedp, 1);
+ atomic_inc_64(failedp);
if (serial_idp == NULL || nvlist_add_string(fmri_cpu,
FM_FMRI_CPU_SERIAL_ID, (char *)serial_idp) != 0)
- atomic_add_64(failedp, 1);
+ atomic_inc_64(failedp);
}
/*
@@ -1386,49 +1391,47 @@ fm_fmri_mem_set(nvlist_t *fmri, int version, const nvlist_t *auth,
const char *unum, const char *serial, uint64_t offset)
{
if (version != MEM_SCHEME_VERSION0) {
- atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+ atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
return;
}
if (!serial && (offset != (uint64_t)-1)) {
- atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+ atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
return;
}
if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) {
- atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+ atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
return;
}
if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_MEM) != 0) {
- atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+ atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
return;
}
if (auth != NULL) {
if (nvlist_add_nvlist(fmri, FM_FMRI_AUTHORITY,
(nvlist_t *)auth) != 0) {
- atomic_add_64(
- &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+ atomic_inc_64(
+ &erpt_kstat_data.fmri_set_failed.value.ui64);
}
}
if (nvlist_add_string(fmri, FM_FMRI_MEM_UNUM, unum) != 0) {
- atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+ atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
}
if (serial != NULL) {
if (nvlist_add_string_array(fmri, FM_FMRI_MEM_SERIAL_ID,
(char **)&serial, 1) != 0) {
- atomic_add_64(
- &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+ atomic_inc_64(
+ &erpt_kstat_data.fmri_set_failed.value.ui64);
}
- if (offset != (uint64_t)-1) {
- if (nvlist_add_uint64(fmri, FM_FMRI_MEM_OFFSET,
- offset) != 0) {
- atomic_add_64(&erpt_kstat_data.
- fmri_set_failed.value.ui64, 1);
- }
+ if (offset != (uint64_t)-1 && nvlist_add_uint64(fmri,
+ FM_FMRI_MEM_OFFSET, offset) != 0) {
+ atomic_inc_64(
+ &erpt_kstat_data.fmri_set_failed.value.ui64);
}
}
}
@@ -1438,28 +1441,28 @@ fm_fmri_zfs_set(nvlist_t *fmri, int version, uint64_t pool_guid,
uint64_t vdev_guid)
{
if (version != ZFS_SCHEME_VERSION0) {
- atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+ atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
return;
}
if (nvlist_add_uint8(fmri, FM_VERSION, version) != 0) {
- atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+ atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
return;
}
if (nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_ZFS) != 0) {
- atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+ atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
return;
}
if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_POOL, pool_guid) != 0) {
- atomic_add_64(&erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+ atomic_inc_64(&erpt_kstat_data.fmri_set_failed.value.ui64);
}
if (vdev_guid != 0) {
if (nvlist_add_uint64(fmri, FM_FMRI_ZFS_VDEV, vdev_guid) != 0) {
- atomic_add_64(
- &erpt_kstat_data.fmri_set_failed.value.ui64, 1);
+ atomic_inc_64(
+ &erpt_kstat_data.fmri_set_failed.value.ui64);
}
}
}
@@ -1592,6 +1595,19 @@ fm_ena_time_get(uint64_t ena)
return (time);
}
+#ifdef _KERNEL
+/*
+ * Helper function to increment ereport dropped count. Used by the event
+ * rate limiting code to give feedback to the user about how many events were
+ * rate limited by including them in the 'dropped' count.
+ */
+void
+fm_erpt_dropped_increment(void)
+{
+ atomic_inc_64(&ratelimit_dropped);
+}
+#endif
+
#ifdef _KERNEL
void
fm_init(void)
diff --git a/zfs/module/zfs/gzip.c b/zfs/module/zfs/gzip.c
index 011fb918812a..6c8fdd308a06 100644
--- a/zfs/module/zfs/gzip.c
+++ b/zfs/module/zfs/gzip.c
@@ -28,6 +28,7 @@
#include <sys/debug.h>
#include <sys/types.h>
+#include "qat_compress.h"
#ifdef _KERNEL
@@ -56,6 +57,14 @@ gzip_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
ASSERT(d_len <= s_len);
+ /* check if hardware accelerator can be used */
+ if (qat_use_accel(s_len)) {
+ if (qat_compress(QAT_COMPRESS, s_start,
+ s_len, d_start, d_len, &dstlen) == CPA_STATUS_SUCCESS)
+ return ((size_t)dstlen);
+ /* if hardware compress fail, do it again with software */
+ }
+
if (compress_func(d_start, &dstlen, s_start, s_len, n) != Z_OK) {
if (d_len != s_len)
return (s_len);
@@ -64,7 +73,7 @@ gzip_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
return (s_len);
}
- return ((size_t) dstlen);
+ return ((size_t)dstlen);
}
/*ARGSUSED*/
@@ -75,6 +84,14 @@ gzip_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
ASSERT(d_len >= s_len);
+ /* check if hardware accelerator can be used */
+ if (qat_use_accel(d_len)) {
+ if (qat_compress(QAT_DECOMPRESS, s_start, s_len,
+ d_start, d_len, &dstlen) == CPA_STATUS_SUCCESS)
+ return (0);
+ /* if hardware de-compress fail, do it again with software */
+ }
+
if (uncompress_func(d_start, &dstlen, s_start, s_len) != Z_OK)
return (-1);
diff --git a/zfs/module/zfs/lz4.c b/zfs/module/zfs/lz4.c
index cf406b9368ff..9b9a2e6936e0 100644
--- a/zfs/module/zfs/lz4.c
+++ b/zfs/module/zfs/lz4.c
@@ -63,7 +63,7 @@ lz4_compress_zfs(void *s_start, void *d_start, size_t s_len,
return (s_len);
/*
- * Encode the compresed buffer size at the start. We'll need this in
+ * Encode the compressed buffer size at the start. We'll need this in
* decompression to counter the effects of padding which might be
* added to the compressed buffer and which, if unhandled, would
* confuse the hell out of our decompression function.
@@ -87,7 +87,7 @@ lz4_decompress_zfs(void *s_start, void *d_start, size_t s_len,
/*
* Returns 0 on success (decompression function returned non-negative)
- * and non-zero on failure (decompression function returned negative.
+ * and non-zero on failure (decompression function returned negative).
*/
return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
d_start, bufsiz, d_len) < 0);
@@ -205,7 +205,7 @@ lz4_decompress_zfs(void *s_start, void *d_start, size_t s_len,
/*
* Little Endian or Big Endian?
- * Note: overwrite the below #define if you know your architecture endianess.
+ * Note: overwrite the below #define if you know your architecture endianness.
*/
#if defined(_BIG_ENDIAN)
#define LZ4_BIG_ENDIAN 1
@@ -873,6 +873,11 @@ real_LZ4_compress(const char *source, char *dest, int isize, int osize)
* its code is not present here.
*/
+static const int dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
+#if LZ4_ARCH64
+static const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
+#endif
+
static int
LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
int maxOutputSize)
@@ -886,11 +891,6 @@ LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
BYTE *const oend = op + maxOutputSize;
BYTE *cpy;
- size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
-#if LZ4_ARCH64
- size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
-#endif
-
/* Main Loop */
while (ip < iend) {
unsigned token;
@@ -902,6 +902,8 @@ LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
int s = 255;
while ((ip < iend) && (s == 255)) {
s = *ip++;
+ if (unlikely(length > (size_t)(length + s)))
+ goto _output_error;
length += s;
}
}
@@ -944,6 +946,8 @@ LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
if ((length = (token & ML_MASK)) == ML_MASK) {
while (ip < iend) {
int s = *ip++;
+ if (unlikely(length > (size_t)(length + s)))
+ goto _output_error;
length += s;
if (s == 255)
continue;
@@ -953,7 +957,7 @@ LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
/* copy repeated sequence */
if (unlikely(op - ref < STEPSIZE)) {
#if LZ4_ARCH64
- size_t dec64 = dec64table[op-ref];
+ int dec64 = dec64table[op - ref];
#else
const int dec64 = 0;
#endif
@@ -963,7 +967,7 @@ LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
op[3] = ref[3];
op += 4;
ref += 4;
- ref -= dec32table[op-ref];
+ ref -= dec32table[op - ref];
A32(op) = A32(ref);
op += STEPSIZE - 4;
ref -= dec64;
@@ -978,6 +982,13 @@ LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
* destination buffer
*/
goto _output_error;
+#if LZ4_ARCH64
+ if ((ref + COPYLENGTH) > oend)
+#else
+ if ((ref + COPYLENGTH) > oend ||
+ (op + COPYLENGTH) > oend)
+#endif
+ goto _output_error;
LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
while (op < cpy)
*op++ = *ref++;
@@ -999,14 +1010,14 @@ LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
/* write overflow error detected */
_output_error:
- return (int)(-(((char *)ip) - source));
+ return (-1);
}
void
lz4_init(void)
{
lz4_cache = kmem_cache_create("lz4_cache",
- sizeof (struct refTables), 0, NULL, NULL, NULL, NULL, NULL, 0);
+ sizeof (struct refTables), 0, NULL, NULL, NULL, NULL, NULL, 0);
}
void
diff --git a/zfs/module/zfs/metaslab.c b/zfs/module/zfs/metaslab.c
index 59bcefd346c0..5e413c06518b 100644
--- a/zfs/module/zfs/metaslab.c
+++ b/zfs/module/zfs/metaslab.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
*/
@@ -36,22 +36,8 @@
#define WITH_DF_BLOCK_ALLOCATOR
-/*
- * Allow allocations to switch to gang blocks quickly. We do this to
- * avoid having to load lots of space_maps in a given txg. There are,
- * however, some cases where we want to avoid "fast" ganging and instead
- * we want to do an exhaustive search of all metaslabs on this device.
- * Currently we don't allow any gang, slog, or dump device related allocations
- * to "fast" gang.
- */
-#define CAN_FASTGANG(flags) \
- (!((flags) & (METASLAB_GANG_CHILD | METASLAB_GANG_HEADER | \
- METASLAB_GANG_AVOID)))
-
-#define METASLAB_WEIGHT_PRIMARY (1ULL << 63)
-#define METASLAB_WEIGHT_SECONDARY (1ULL << 62)
-#define METASLAB_ACTIVE_MASK \
- (METASLAB_WEIGHT_PRIMARY | METASLAB_WEIGHT_SECONDARY)
+#define GANG_ALLOCATION(flags) \
+ ((flags) & (METASLAB_GANG_CHILD | METASLAB_GANG_HEADER))
/*
* Metaslab granularity, in bytes. This is roughly similar to what would be
@@ -66,7 +52,7 @@ uint64_t metaslab_gang_bang = SPA_MAXBLOCKSIZE + 1; /* force gang blocks */
/*
* The in-core space map representation is more compact than its on-disk form.
* The zfs_condense_pct determines how much more compact the in-core
- * space_map representation must be before we compact it on-disk.
+ * space map representation must be before we compact it on-disk.
* Values should be greater than or equal to 100.
*/
int zfs_condense_pct = 200;
@@ -134,12 +120,12 @@ int metaslab_debug_unload = 0;
* an allocation of this size then it switches to using more
* aggressive strategy (i.e search by size rather than offset).
*/
-uint64_t metaslab_df_alloc_threshold = SPA_MAXBLOCKSIZE;
+uint64_t metaslab_df_alloc_threshold = SPA_OLD_MAXBLOCKSIZE;
/*
* The minimum free space, in percent, which must be available
* in a space map to continue allocations in a first-fit fashion.
- * Once the space_map's free space drops below this level we dynamically
+ * Once the space map's free space drops below this level we dynamically
* switch to using best-fit allocations.
*/
int metaslab_df_free_pct = 4;
@@ -181,7 +167,45 @@ int metaslab_lba_weighting_enabled = B_TRUE;
*/
int metaslab_bias_enabled = B_TRUE;
-static uint64_t metaslab_fragmentation(metaslab_t *);
+
+/*
+ * Enable/disable segment-based metaslab selection.
+ */
+int zfs_metaslab_segment_weight_enabled = B_TRUE;
+
+/*
+ * When using segment-based metaslab selection, we will continue
+ * allocating from the active metaslab until we have exhausted
+ * zfs_metaslab_switch_threshold of its buckets.
+ */
+int zfs_metaslab_switch_threshold = 2;
+
+/*
+ * Internal switch to enable/disable the metaslab allocation tracing
+ * facility.
+ */
+#ifdef _METASLAB_TRACING
+boolean_t metaslab_trace_enabled = B_TRUE;
+#endif
+
+/*
+ * Maximum entries that the metaslab allocation tracing facility will keep
+ * in a given list when running in non-debug mode. We limit the number
+ * of entries in non-debug mode to prevent us from using up too much memory.
+ * The limit should be sufficiently large that we don't expect any allocation
+ * to every exceed this value. In debug mode, the system will panic if this
+ * limit is ever reached allowing for further investigation.
+ */
+#ifdef _METASLAB_TRACING
+uint64_t metaslab_trace_max_entries = 5000;
+#endif
+
+static uint64_t metaslab_weight(metaslab_t *);
+static void metaslab_set_fragmentation(metaslab_t *);
+
+#ifdef _METASLAB_TRACING
+kmem_cache_t *metaslab_alloc_trace_cache;
+#endif
/*
* ==========================================================================
@@ -198,7 +222,8 @@ metaslab_class_create(spa_t *spa, metaslab_ops_t *ops)
mc->mc_spa = spa;
mc->mc_rotor = NULL;
mc->mc_ops = ops;
- mutex_init(&mc->mc_fastwrite_lock, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&mc->mc_lock, NULL, MUTEX_DEFAULT, NULL);
+ refcount_create_tracked(&mc->mc_alloc_slots);
return (mc);
}
@@ -212,7 +237,8 @@ metaslab_class_destroy(metaslab_class_t *mc)
ASSERT(mc->mc_space == 0);
ASSERT(mc->mc_dspace == 0);
- mutex_destroy(&mc->mc_fastwrite_lock);
+ refcount_destroy(&mc->mc_alloc_slots);
+ mutex_destroy(&mc->mc_lock);
kmem_free(mc, sizeof (metaslab_class_t));
}
@@ -387,47 +413,90 @@ metaslab_class_expandable_space(metaslab_class_t *mc)
continue;
}
- space += tvd->vdev_max_asize - tvd->vdev_asize;
+ /*
+ * Calculate if we have enough space to add additional
+ * metaslabs. We report the expandable space in terms
+ * of the metaslab size since that's the unit of expansion.
+ */
+ space += P2ALIGN(tvd->vdev_max_asize - tvd->vdev_asize,
+ 1ULL << tvd->vdev_ms_shift);
}
spa_config_exit(mc->mc_spa, SCL_VDEV, FTAG);
return (space);
}
-/*
- * ==========================================================================
- * Metaslab groups
- * ==========================================================================
- */
static int
metaslab_compare(const void *x1, const void *x2)
{
- const metaslab_t *m1 = x1;
- const metaslab_t *m2 = x2;
+ const metaslab_t *m1 = (const metaslab_t *)x1;
+ const metaslab_t *m2 = (const metaslab_t *)x2;
+
+ int cmp = AVL_CMP(m2->ms_weight, m1->ms_weight);
+ if (likely(cmp))
+ return (cmp);
+
+ IMPLY(AVL_CMP(m1->ms_start, m2->ms_start) == 0, m1 == m2);
+
+ return (AVL_CMP(m1->ms_start, m2->ms_start));
+}
+
+/*
+ * Verify that the space accounting on disk matches the in-core range_trees.
+ */
+void
+metaslab_verify_space(metaslab_t *msp, uint64_t txg)
+{
+ spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
+ uint64_t allocated = 0;
+ uint64_t sm_free_space, msp_free_space;
+ int t;
+
+ ASSERT(MUTEX_HELD(&msp->ms_lock));
- if (m1->ms_weight < m2->ms_weight)
- return (1);
- if (m1->ms_weight > m2->ms_weight)
- return (-1);
+ if ((zfs_flags & ZFS_DEBUG_METASLAB_VERIFY) == 0)
+ return;
/*
- * If the weights are identical, use the offset to force uniqueness.
+ * We can only verify the metaslab space when we're called
+ * from syncing context with a loaded metaslab that has an allocated
+ * space map. Calling this in non-syncing context does not
+ * provide a consistent view of the metaslab since we're performing
+ * allocations in the future.
*/
- if (m1->ms_start < m2->ms_start)
- return (-1);
- if (m1->ms_start > m2->ms_start)
- return (1);
+ if (txg != spa_syncing_txg(spa) || msp->ms_sm == NULL ||
+ !msp->ms_loaded)
+ return;
- ASSERT3P(m1, ==, m2);
+ sm_free_space = msp->ms_size - space_map_allocated(msp->ms_sm) -
+ space_map_alloc_delta(msp->ms_sm);
- return (0);
+ /*
+ * Account for future allocations since we would have already
+ * deducted that space from the ms_freetree.
+ */
+ for (t = 0; t < TXG_CONCURRENT_STATES; t++) {
+ allocated +=
+ range_tree_space(msp->ms_alloctree[(txg + t) & TXG_MASK]);
+ }
+
+ msp_free_space = range_tree_space(msp->ms_tree) + allocated +
+ msp->ms_deferspace + range_tree_space(msp->ms_freedtree);
+
+ VERIFY3U(sm_free_space, ==, msp_free_space);
}
+/*
+ * ==========================================================================
+ * Metaslab groups
+ * ==========================================================================
+ */
/*
* Update the allocatable flag and the metaslab group's capacity.
* The allocatable flag is set to true if the capacity is below
- * the zfs_mg_noalloc_threshold. If a metaslab group transitions
- * from allocatable to non-allocatable or vice versa then the metaslab
- * group's class is updated to reflect the transition.
+ * the zfs_mg_noalloc_threshold or has a fragmentation value that is
+ * greater than zfs_mg_fragmentation_threshold. If a metaslab group
+ * transitions from allocatable to non-allocatable or vice versa then the
+ * metaslab group's class is updated to reflect the transition.
*/
static void
metaslab_group_alloc_update(metaslab_group_t *mg)
@@ -436,22 +505,45 @@ metaslab_group_alloc_update(metaslab_group_t *mg)
metaslab_class_t *mc = mg->mg_class;
vdev_stat_t *vs = &vd->vdev_stat;
boolean_t was_allocatable;
+ boolean_t was_initialized;
ASSERT(vd == vd->vdev_top);
mutex_enter(&mg->mg_lock);
was_allocatable = mg->mg_allocatable;
+ was_initialized = mg->mg_initialized;
mg->mg_free_capacity = ((vs->vs_space - vs->vs_alloc) * 100) /
(vs->vs_space + 1);
+ mutex_enter(&mc->mc_lock);
+
+ /*
+ * If the metaslab group was just added then it won't
+ * have any space until we finish syncing out this txg.
+ * At that point we will consider it initialized and available
+ * for allocations. We also don't consider non-activated
+ * metaslab groups (e.g. vdevs that are in the middle of being removed)
+ * to be initialized, because they can't be used for allocation.
+ */
+ mg->mg_initialized = metaslab_group_initialized(mg);
+ if (!was_initialized && mg->mg_initialized) {
+ mc->mc_groups++;
+ } else if (was_initialized && !mg->mg_initialized) {
+ ASSERT3U(mc->mc_groups, >, 0);
+ mc->mc_groups--;
+ }
+ if (mg->mg_initialized)
+ mg->mg_no_free_space = B_FALSE;
+
/*
* A metaslab group is considered allocatable if it has plenty
* of free space or is not heavily fragmented. We only take
* fragmentation into account if the metaslab group has a valid
* fragmentation metric (i.e. a value between 0 and 100).
*/
- mg->mg_allocatable = (mg->mg_free_capacity > zfs_mg_noalloc_threshold &&
+ mg->mg_allocatable = (mg->mg_activation_count > 0 &&
+ mg->mg_free_capacity > zfs_mg_noalloc_threshold &&
(mg->mg_fragmentation == ZFS_FRAG_INVALID ||
mg->mg_fragmentation <= zfs_mg_fragmentation_threshold));
@@ -474,6 +566,7 @@ metaslab_group_alloc_update(metaslab_group_t *mg)
mc->mc_alloc_groups--;
else if (!was_allocatable && mg->mg_allocatable)
mc->mc_alloc_groups++;
+ mutex_exit(&mc->mc_lock);
mutex_exit(&mg->mg_lock);
}
@@ -490,6 +583,9 @@ metaslab_group_create(metaslab_class_t *mc, vdev_t *vd)
mg->mg_vd = vd;
mg->mg_class = mc;
mg->mg_activation_count = 0;
+ mg->mg_initialized = B_FALSE;
+ mg->mg_no_free_space = B_TRUE;
+ refcount_create_tracked(&mg->mg_alloc_queue_depth);
mg->mg_taskq = taskq_create("metaslab_group_taskq", metaslab_load_pct,
maxclsyspri, 10, INT_MAX, TASKQ_THREADS_CPU_PCT | TASKQ_DYNAMIC);
@@ -512,6 +608,7 @@ metaslab_group_destroy(metaslab_group_t *mg)
taskq_destroy(mg->mg_taskq);
avl_destroy(&mg->mg_metaslab_tree);
mutex_destroy(&mg->mg_lock);
+ refcount_destroy(&mg->mg_alloc_queue_depth);
kmem_free(mg, sizeof (metaslab_group_t));
}
@@ -581,6 +678,15 @@ metaslab_group_passivate(metaslab_group_t *mg)
mg->mg_next = NULL;
}
+boolean_t
+metaslab_group_initialized(metaslab_group_t *mg)
+{
+ vdev_t *vd = mg->mg_vd;
+ vdev_stat_t *vs = &vd->vdev_stat;
+
+ return (vs->vs_space != 0 && mg->mg_activation_count > 0);
+}
+
uint64_t
metaslab_group_get_space(metaslab_group_t *mg)
{
@@ -753,30 +859,97 @@ metaslab_group_fragmentation(metaslab_group_t *mg)
* group should avoid allocations if its free capacity is less than the
* zfs_mg_noalloc_threshold or its fragmentation metric is greater than
* zfs_mg_fragmentation_threshold and there is at least one metaslab group
- * that can still handle allocations.
+ * that can still handle allocations. If the allocation throttle is enabled
+ * then we skip allocations to devices that have reached their maximum
+ * allocation queue depth unless the selected metaslab group is the only
+ * eligible group remaining.
*/
static boolean_t
-metaslab_group_allocatable(metaslab_group_t *mg)
+metaslab_group_allocatable(metaslab_group_t *mg, metaslab_group_t *rotor,
+ uint64_t psize)
{
- vdev_t *vd = mg->mg_vd;
- spa_t *spa = vd->vdev_spa;
+ spa_t *spa = mg->mg_vd->vdev_spa;
metaslab_class_t *mc = mg->mg_class;
/*
- * We use two key metrics to determine if a metaslab group is
- * considered allocatable -- free space and fragmentation. If
- * the free space is greater than the free space threshold and
- * the fragmentation is less than the fragmentation threshold then
- * consider the group allocatable. There are two case when we will
- * not consider these key metrics. The first is if the group is
- * associated with a slog device and the second is if all groups
- * in this metaslab class have already been consider ineligible
+ * We can only consider skipping this metaslab group if it's
+ * in the normal metaslab class and there are other metaslab
+ * groups to select from. Otherwise, we always consider it eligible
* for allocations.
*/
- return ((mg->mg_free_capacity > zfs_mg_noalloc_threshold &&
- (mg->mg_fragmentation == ZFS_FRAG_INVALID ||
- mg->mg_fragmentation <= zfs_mg_fragmentation_threshold)) ||
- mc != spa_normal_class(spa) || mc->mc_alloc_groups == 0);
+ if (mc != spa_normal_class(spa) || mc->mc_groups <= 1)
+ return (B_TRUE);
+
+ /*
+ * If the metaslab group's mg_allocatable flag is set (see comments
+ * in metaslab_group_alloc_update() for more information) and
+ * the allocation throttle is disabled then allow allocations to this
+ * device. However, if the allocation throttle is enabled then
+ * check if we have reached our allocation limit (mg_alloc_queue_depth)
+ * to determine if we should allow allocations to this metaslab group.
+ * If all metaslab groups are no longer considered allocatable
+ * (mc_alloc_groups == 0) or we're trying to allocate the smallest
+ * gang block size then we allow allocations on this metaslab group
+ * regardless of the mg_allocatable or throttle settings.
+ */
+ if (mg->mg_allocatable) {
+ metaslab_group_t *mgp;
+ int64_t qdepth;
+ uint64_t qmax = mg->mg_max_alloc_queue_depth;
+
+ if (!mc->mc_alloc_throttle_enabled)
+ return (B_TRUE);
+
+ /*
+ * If this metaslab group does not have any free space, then
+ * there is no point in looking further.
+ */
+ if (mg->mg_no_free_space)
+ return (B_FALSE);
+
+ qdepth = refcount_count(&mg->mg_alloc_queue_depth);
+
+ /*
+ * If this metaslab group is below its qmax or it's
+ * the only allocatable metasable group, then attempt
+ * to allocate from it.
+ */
+ if (qdepth < qmax || mc->mc_alloc_groups == 1)
+ return (B_TRUE);
+ ASSERT3U(mc->mc_alloc_groups, >, 1);
+
+ /*
+ * Since this metaslab group is at or over its qmax, we
+ * need to determine if there are metaslab groups after this
+ * one that might be able to handle this allocation. This is
+ * racy since we can't hold the locks for all metaslab
+ * groups at the same time when we make this check.
+ */
+ for (mgp = mg->mg_next; mgp != rotor; mgp = mgp->mg_next) {
+ qmax = mgp->mg_max_alloc_queue_depth;
+
+ qdepth = refcount_count(&mgp->mg_alloc_queue_depth);
+
+ /*
+ * If there is another metaslab group that
+ * might be able to handle the allocation, then
+ * we return false so that we skip this group.
+ */
+ if (qdepth < qmax && !mgp->mg_no_free_space)
+ return (B_FALSE);
+ }
+
+ /*
+ * We didn't find another group to handle the allocation
+ * so we can't skip this metaslab group even though
+ * we are at or over our qmax.
+ */
+ return (B_TRUE);
+
+ } else if (mc->mc_alloc_groups == 0 || psize == SPA_MINBLOCKSIZE) {
+ return (B_TRUE);
+ }
+ return (B_FALSE);
}
/*
@@ -797,18 +970,11 @@ metaslab_rangesize_compare(const void *x1, const void *x2)
uint64_t rs_size1 = r1->rs_end - r1->rs_start;
uint64_t rs_size2 = r2->rs_end - r2->rs_start;
- if (rs_size1 < rs_size2)
- return (-1);
- if (rs_size1 > rs_size2)
- return (1);
+ int cmp = AVL_CMP(rs_size1, rs_size2);
+ if (likely(cmp))
+ return (cmp);
- if (r1->rs_start < r2->rs_start)
- return (-1);
-
- if (r1->rs_start > r2->rs_start)
- return (1);
-
- return (0);
+ return (AVL_CMP(r1->rs_start, r2->rs_start));
}
/*
@@ -892,7 +1058,7 @@ static range_tree_ops_t metaslab_rt_ops = {
/*
* ==========================================================================
- * Metaslab block operations
+ * Common allocator routines
* ==========================================================================
*/
@@ -911,31 +1077,22 @@ metaslab_block_maxsize(metaslab_t *msp)
return (rs->rs_end - rs->rs_start);
}
-uint64_t
-metaslab_block_alloc(metaslab_t *msp, uint64_t size)
+static range_seg_t *
+metaslab_block_find(avl_tree_t *t, uint64_t start, uint64_t size)
{
- uint64_t start;
- range_tree_t *rt = msp->ms_tree;
-
- VERIFY(!msp->ms_condensing);
+ range_seg_t *rs, rsearch;
+ avl_index_t where;
- start = msp->ms_ops->msop_alloc(msp, size);
- if (start != -1ULL) {
- vdev_t *vd = msp->ms_group->mg_vd;
+ rsearch.rs_start = start;
+ rsearch.rs_end = start + size;
- VERIFY0(P2PHASE(start, 1ULL << vd->vdev_ashift));
- VERIFY0(P2PHASE(size, 1ULL << vd->vdev_ashift));
- VERIFY3U(range_tree_space(rt) - size, <=, msp->ms_size);
- range_tree_remove(rt, start, size);
+ rs = avl_find(t, &rsearch, &where);
+ if (rs == NULL) {
+ rs = avl_nearest(t, where, AVL_AFTER);
}
- return (start);
-}
-/*
- * ==========================================================================
- * Common allocator routines
- * ==========================================================================
- */
+ return (rs);
+}
#if defined(WITH_FF_BLOCK_ALLOCATOR) || \
defined(WITH_DF_BLOCK_ALLOCATOR) || \
@@ -949,15 +1106,7 @@ static uint64_t
metaslab_block_picker(avl_tree_t *t, uint64_t *cursor, uint64_t size,
uint64_t align)
{
- range_seg_t *rs, rsearch;
- avl_index_t where;
-
- rsearch.rs_start = *cursor;
- rsearch.rs_end = *cursor + size;
-
- rs = avl_find(t, &rsearch, &where);
- if (rs == NULL)
- rs = avl_nearest(t, where, AVL_AFTER);
+ range_seg_t *rs = metaslab_block_find(t, *cursor, size);
while (rs != NULL) {
uint64_t offset = P2ROUNDUP(rs->rs_start, align);
@@ -1199,6 +1348,7 @@ metaslab_load(metaslab_t *msp)
{
int error = 0;
int t;
+ boolean_t success = B_FALSE;
ASSERT(MUTEX_HELD(&msp->ms_lock));
ASSERT(!msp->ms_loaded);
@@ -1216,14 +1366,18 @@ metaslab_load(metaslab_t *msp)
else
range_tree_add(msp->ms_tree, msp->ms_start, msp->ms_size);
- msp->ms_loaded = (error == 0);
+ success = (error == 0);
msp->ms_loading = B_FALSE;
- if (msp->ms_loaded) {
+ if (success) {
+ ASSERT3P(msp->ms_group, !=, NULL);
+ msp->ms_loaded = B_TRUE;
+
for (t = 0; t < TXG_DEFER_SIZE; t++) {
range_tree_walk(msp->ms_defertree[t],
range_tree_remove, msp->ms_tree);
}
+ msp->ms_max_size = metaslab_block_maxsize(msp);
}
cv_broadcast(&msp->ms_load_cv);
return (error);
@@ -1236,6 +1390,7 @@ metaslab_unload(metaslab_t *msp)
range_tree_vacate(msp->ms_tree, NULL, NULL);
msp->ms_loaded = B_FALSE;
msp->ms_weight &= ~METASLAB_ACTIVE_MASK;
+ msp->ms_max_size = 0;
}
int
@@ -1272,7 +1427,7 @@ metaslab_init(metaslab_group_t *mg, uint64_t id, uint64_t object, uint64_t txg,
/*
* We create the main range tree here, but we don't create the
- * alloctree and freetree until metaslab_sync_done(). This serves
+ * other range trees until metaslab_sync_done(). This serves
* two purposes: it allows metaslab_sync_done() to detect the
* addition of new space; and for debugging, it ensures that we'd
* data fault on any attempt to use this metaslab before it's ready.
@@ -1280,21 +1435,23 @@ metaslab_init(metaslab_group_t *mg, uint64_t id, uint64_t object, uint64_t txg,
ms->ms_tree = range_tree_create(&metaslab_rt_ops, ms, &ms->ms_lock);
metaslab_group_add(mg, ms);
- ms->ms_fragmentation = metaslab_fragmentation(ms);
- ms->ms_ops = mg->mg_class->mc_ops;
+ metaslab_set_fragmentation(ms);
/*
* If we're opening an existing pool (txg == 0) or creating
* a new one (txg == TXG_INITIAL), all space is available now.
* If we're adding space to an existing pool, the new space
* does not become available until after this txg has synced.
+ * The metaslab's weight will also be initialized when we sync
+ * out this txg. This ensures that we don't attempt to allocate
+ * from it before we have initialized it completely.
*/
if (txg <= TXG_INITIAL)
metaslab_sync_done(ms, 0);
/*
* If metaslab_debug_load is set and we're initializing a metaslab
- * that has an allocated space_map object then load the its space
+ * that has an allocated space map object then load the its space
* map so that can verify frees.
*/
if (metaslab_debug_load && ms->ms_sm != NULL) {
@@ -1323,7 +1480,6 @@ metaslab_fini(metaslab_t *msp)
metaslab_group_remove(mg, msp);
mutex_enter(&msp->ms_lock);
-
VERIFY(msp->ms_group == NULL);
vdev_space_update(mg->mg_vd, -space_map_allocated(msp->ms_sm),
0, -msp->ms_size);
@@ -1331,10 +1487,11 @@ metaslab_fini(metaslab_t *msp)
metaslab_unload(msp);
range_tree_destroy(msp->ms_tree);
+ range_tree_destroy(msp->ms_freeingtree);
+ range_tree_destroy(msp->ms_freedtree);
for (t = 0; t < TXG_SIZE; t++) {
range_tree_destroy(msp->ms_alloctree[t]);
- range_tree_destroy(msp->ms_freetree[t]);
}
for (t = 0; t < TXG_DEFER_SIZE; t++) {
@@ -1396,8 +1553,8 @@ int zfs_frag_table[FRAGMENTATION_TABLE_SIZE] = {
* not support this metric. Otherwise, the return value should be in the
* range [0, 100].
*/
-static uint64_t
-metaslab_fragmentation(metaslab_t *msp)
+static void
+metaslab_set_fragmentation(metaslab_t *msp)
{
spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
uint64_t fragmentation = 0;
@@ -1406,37 +1563,50 @@ metaslab_fragmentation(metaslab_t *msp)
SPA_FEATURE_SPACEMAP_HISTOGRAM);
int i;
- if (!feature_enabled)
- return (ZFS_FRAG_INVALID);
+ if (!feature_enabled) {
+ msp->ms_fragmentation = ZFS_FRAG_INVALID;
+ return;
+ }
/*
* A null space map means that the entire metaslab is free
* and thus is not fragmented.
*/
- if (msp->ms_sm == NULL)
- return (0);
+ if (msp->ms_sm == NULL) {
+ msp->ms_fragmentation = 0;
+ return;
+ }
/*
- * If this metaslab's space_map has not been upgraded, flag it
+ * If this metaslab's space map has not been upgraded, flag it
* so that we upgrade next time we encounter it.
*/
if (msp->ms_sm->sm_dbuf->db_size != sizeof (space_map_phys_t)) {
+ uint64_t txg = spa_syncing_txg(spa);
vdev_t *vd = msp->ms_group->mg_vd;
- if (spa_writeable(vd->vdev_spa)) {
- uint64_t txg = spa_syncing_txg(spa);
-
+ /*
+ * If we've reached the final dirty txg, then we must
+ * be shutting down the pool. We don't want to dirty
+ * any data past this point so skip setting the condense
+ * flag. We can retry this action the next time the pool
+ * is imported.
+ */
+ if (spa_writeable(spa) && txg < spa_final_dirty_txg(spa)) {
msp->ms_condense_wanted = B_TRUE;
vdev_dirty(vd, VDD_METASLAB, msp, txg + 1);
spa_dbgmsg(spa, "txg %llu, requesting force condense: "
- "msp %p, vd %p", txg, msp, vd);
+ "ms_id %llu, vdev_id %llu", txg, msp->ms_id,
+ vd->vdev_id);
}
- return (ZFS_FRAG_INVALID);
+ msp->ms_fragmentation = ZFS_FRAG_INVALID;
+ return;
}
for (i = 0; i < SPACE_MAP_HISTOGRAM_SIZE; i++) {
uint64_t space = 0;
uint8_t shift = msp->ms_sm->sm_shift;
+
int idx = MIN(shift - SPA_MINBLOCKSHIFT + i,
FRAGMENTATION_TABLE_SIZE - 1);
@@ -1453,7 +1623,8 @@ metaslab_fragmentation(metaslab_t *msp)
if (total > 0)
fragmentation /= total;
ASSERT3U(fragmentation, <=, 100);
- return (fragmentation);
+
+ msp->ms_fragmentation = fragmentation;
}
/*
@@ -1462,30 +1633,20 @@ metaslab_fragmentation(metaslab_t *msp)
* the LBA range, and whether the metaslab is loaded.
*/
static uint64_t
-metaslab_weight(metaslab_t *msp)
+metaslab_space_weight(metaslab_t *msp)
{
metaslab_group_t *mg = msp->ms_group;
vdev_t *vd = mg->mg_vd;
uint64_t weight, space;
ASSERT(MUTEX_HELD(&msp->ms_lock));
-
- /*
- * This vdev is in the process of being removed so there is nothing
- * for us to do here.
- */
- if (vd->vdev_removing) {
- ASSERT0(space_map_allocated(msp->ms_sm));
- ASSERT0(vd->vdev_ms_shift);
- return (0);
- }
+ ASSERT(!vd->vdev_removing);
/*
* The baseline weight is the metaslab's free space.
*/
space = msp->ms_size - space_map_allocated(msp->ms_sm);
- msp->ms_fragmentation = metaslab_fragmentation(msp);
if (metaslab_fragmentation_factor_enabled &&
msp->ms_fragmentation != ZFS_FRAG_INVALID) {
/*
@@ -1534,117 +1695,339 @@ metaslab_weight(metaslab_t *msp)
weight |= (msp->ms_weight & METASLAB_ACTIVE_MASK);
}
+ WEIGHT_SET_SPACEBASED(weight);
return (weight);
}
-static int
-metaslab_activate(metaslab_t *msp, uint64_t activation_weight)
+/*
+ * Return the weight of the specified metaslab, according to the segment-based
+ * weighting algorithm. The metaslab must be loaded. This function can
+ * be called within a sync pass since it relies only on the metaslab's
+ * range tree which is always accurate when the metaslab is loaded.
+ */
+static uint64_t
+metaslab_weight_from_range_tree(metaslab_t *msp)
{
- ASSERT(MUTEX_HELD(&msp->ms_lock));
-
- if ((msp->ms_weight & METASLAB_ACTIVE_MASK) == 0) {
- metaslab_load_wait(msp);
- if (!msp->ms_loaded) {
- int error = metaslab_load(msp);
- if (error) {
- metaslab_group_sort(msp->ms_group, msp, 0);
- return (error);
- }
- }
+ uint64_t weight = 0;
+ uint32_t segments = 0;
+ int i;
- metaslab_group_sort(msp->ms_group, msp,
- msp->ms_weight | activation_weight);
- }
ASSERT(msp->ms_loaded);
- ASSERT(msp->ms_weight & METASLAB_ACTIVE_MASK);
- return (0);
+ for (i = RANGE_TREE_HISTOGRAM_SIZE - 1; i >= SPA_MINBLOCKSHIFT; i--) {
+ uint8_t shift = msp->ms_group->mg_vd->vdev_ashift;
+ int max_idx = SPACE_MAP_HISTOGRAM_SIZE + shift - 1;
+
+ segments <<= 1;
+ segments += msp->ms_tree->rt_histogram[i];
+
+ /*
+ * The range tree provides more precision than the space map
+ * and must be downgraded so that all values fit within the
+ * space map's histogram. This allows us to compare loaded
+ * vs. unloaded metaslabs to determine which metaslab is
+ * considered "best".
+ */
+ if (i > max_idx)
+ continue;
+
+ if (segments != 0) {
+ WEIGHT_SET_COUNT(weight, segments);
+ WEIGHT_SET_INDEX(weight, i);
+ WEIGHT_SET_ACTIVE(weight, 0);
+ break;
+ }
+ }
+ return (weight);
}
-static void
-metaslab_passivate(metaslab_t *msp, uint64_t size)
+/*
+ * Calculate the weight based on the on-disk histogram. This should only
+ * be called after a sync pass has completely finished since the on-disk
+ * information is updated in metaslab_sync().
+ */
+static uint64_t
+metaslab_weight_from_spacemap(metaslab_t *msp)
{
- /*
- * If size < SPA_MINBLOCKSIZE, then we will not allocate from
- * this metaslab again. In that case, it had better be empty,
- * or we would be leaving space on the table.
- */
- ASSERT(size >= SPA_MINBLOCKSIZE || range_tree_space(msp->ms_tree) == 0);
- metaslab_group_sort(msp->ms_group, msp, MIN(msp->ms_weight, size));
- ASSERT((msp->ms_weight & METASLAB_ACTIVE_MASK) == 0);
+ uint64_t weight = 0;
+ int i;
+
+ for (i = SPACE_MAP_HISTOGRAM_SIZE - 1; i >= 0; i--) {
+ if (msp->ms_sm->sm_phys->smp_histogram[i] != 0) {
+ WEIGHT_SET_COUNT(weight,
+ msp->ms_sm->sm_phys->smp_histogram[i]);
+ WEIGHT_SET_INDEX(weight, i +
+ msp->ms_sm->sm_shift);
+ WEIGHT_SET_ACTIVE(weight, 0);
+ break;
+ }
+ }
+ return (weight);
}
-static void
-metaslab_preload(void *arg)
+/*
+ * Compute a segment-based weight for the specified metaslab. The weight
+ * is determined by highest bucket in the histogram. The information
+ * for the highest bucket is encoded into the weight value.
+ */
+static uint64_t
+metaslab_segment_weight(metaslab_t *msp)
{
- metaslab_t *msp = arg;
- spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
- fstrans_cookie_t cookie = spl_fstrans_mark();
-
- ASSERT(!MUTEX_HELD(&msp->ms_group->mg_lock));
+ metaslab_group_t *mg = msp->ms_group;
+ uint64_t weight = 0;
+ uint8_t shift = mg->mg_vd->vdev_ashift;
- mutex_enter(&msp->ms_lock);
- metaslab_load_wait(msp);
- if (!msp->ms_loaded)
- (void) metaslab_load(msp);
+ ASSERT(MUTEX_HELD(&msp->ms_lock));
/*
- * Set the ms_access_txg value so that we don't unload it right away.
+ * The metaslab is completely free.
*/
- msp->ms_access_txg = spa_syncing_txg(spa) + metaslab_unload_delay + 1;
- mutex_exit(&msp->ms_lock);
- spl_fstrans_unmark(cookie);
-}
+ if (space_map_allocated(msp->ms_sm) == 0) {
+ int idx = highbit64(msp->ms_size) - 1;
+ int max_idx = SPACE_MAP_HISTOGRAM_SIZE + shift - 1;
-static void
-metaslab_group_preload(metaslab_group_t *mg)
-{
- spa_t *spa = mg->mg_vd->vdev_spa;
- metaslab_t *msp;
- avl_tree_t *t = &mg->mg_metaslab_tree;
- int m = 0;
+ if (idx < max_idx) {
+ WEIGHT_SET_COUNT(weight, 1ULL);
+ WEIGHT_SET_INDEX(weight, idx);
+ } else {
+ WEIGHT_SET_COUNT(weight, 1ULL << (idx - max_idx));
+ WEIGHT_SET_INDEX(weight, max_idx);
+ }
+ WEIGHT_SET_ACTIVE(weight, 0);
+ ASSERT(!WEIGHT_IS_SPACEBASED(weight));
- if (spa_shutting_down(spa) || !metaslab_preload_enabled) {
- taskq_wait_outstanding(mg->mg_taskq, 0);
- return;
+ return (weight);
}
- mutex_enter(&mg->mg_lock);
+ ASSERT3U(msp->ms_sm->sm_dbuf->db_size, ==, sizeof (space_map_phys_t));
+
/*
- * Load the next potential metaslabs
+ * If the metaslab is fully allocated then just make the weight 0.
+ */
+ if (space_map_allocated(msp->ms_sm) == msp->ms_size)
+ return (0);
+ /*
+ * If the metaslab is already loaded, then use the range tree to
+ * determine the weight. Otherwise, we rely on the space map information
+ * to generate the weight.
*/
- msp = avl_first(t);
- while (msp != NULL) {
- metaslab_t *msp_next = AVL_NEXT(t, msp);
+ if (msp->ms_loaded) {
+ weight = metaslab_weight_from_range_tree(msp);
+ } else {
+ weight = metaslab_weight_from_spacemap(msp);
+ }
- /*
- * We preload only the maximum number of metaslabs specified
- * by metaslab_preload_limit. If a metaslab is being forced
- * to condense then we preload it too. This will ensure
- * that force condensing happens in the next txg.
- */
- if (++m > metaslab_preload_limit && !msp->ms_condense_wanted) {
- msp = msp_next;
- continue;
+ /*
+ * If the metaslab was active the last time we calculated its weight
+ * then keep it active. We want to consume the entire region that
+ * is associated with this weight.
+ */
+ if (msp->ms_activation_weight != 0 && weight != 0)
+ WEIGHT_SET_ACTIVE(weight, WEIGHT_GET_ACTIVE(msp->ms_weight));
+ return (weight);
+}
+
+/*
+ * Determine if we should attempt to allocate from this metaslab. If the
+ * metaslab has a maximum size then we can quickly determine if the desired
+ * allocation size can be satisfied. Otherwise, if we're using segment-based
+ * weighting then we can determine the maximum allocation that this metaslab
+ * can accommodate based on the index encoded in the weight. If we're using
+ * space-based weights then rely on the entire weight (excluding the weight
+ * type bit).
+ */
+boolean_t
+metaslab_should_allocate(metaslab_t *msp, uint64_t asize)
+{
+ boolean_t should_allocate;
+
+ if (msp->ms_max_size != 0)
+ return (msp->ms_max_size >= asize);
+
+ if (!WEIGHT_IS_SPACEBASED(msp->ms_weight)) {
+ /*
+ * The metaslab segment weight indicates segments in the
+ * range [2^i, 2^(i+1)), where i is the index in the weight.
+ * Since the asize might be in the middle of the range, we
+ * should attempt the allocation if asize < 2^(i+1).
+ */
+ should_allocate = (asize <
+ 1ULL << (WEIGHT_GET_INDEX(msp->ms_weight) + 1));
+ } else {
+ should_allocate = (asize <=
+ (msp->ms_weight & ~METASLAB_WEIGHT_TYPE));
+ }
+ return (should_allocate);
+}
+static uint64_t
+metaslab_weight(metaslab_t *msp)
+{
+ vdev_t *vd = msp->ms_group->mg_vd;
+ spa_t *spa = vd->vdev_spa;
+ uint64_t weight;
+
+ ASSERT(MUTEX_HELD(&msp->ms_lock));
+
+ /*
+ * This vdev is in the process of being removed so there is nothing
+ * for us to do here.
+ */
+ if (vd->vdev_removing) {
+ ASSERT0(space_map_allocated(msp->ms_sm));
+ ASSERT0(vd->vdev_ms_shift);
+ return (0);
+ }
+
+ metaslab_set_fragmentation(msp);
+
+ /*
+ * Update the maximum size if the metaslab is loaded. This will
+ * ensure that we get an accurate maximum size if newly freed space
+ * has been added back into the free tree.
+ */
+ if (msp->ms_loaded)
+ msp->ms_max_size = metaslab_block_maxsize(msp);
+
+ /*
+ * Segment-based weighting requires space map histogram support.
+ */
+ if (zfs_metaslab_segment_weight_enabled &&
+ spa_feature_is_enabled(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM) &&
+ (msp->ms_sm == NULL || msp->ms_sm->sm_dbuf->db_size ==
+ sizeof (space_map_phys_t))) {
+ weight = metaslab_segment_weight(msp);
+ } else {
+ weight = metaslab_space_weight(msp);
+ }
+ return (weight);
+}
+
+static int
+metaslab_activate(metaslab_t *msp, uint64_t activation_weight)
+{
+ ASSERT(MUTEX_HELD(&msp->ms_lock));
+
+ if ((msp->ms_weight & METASLAB_ACTIVE_MASK) == 0) {
+ metaslab_load_wait(msp);
+ if (!msp->ms_loaded) {
+ int error = metaslab_load(msp);
+ if (error) {
+ metaslab_group_sort(msp->ms_group, msp, 0);
+ return (error);
+ }
}
+ msp->ms_activation_weight = msp->ms_weight;
+ metaslab_group_sort(msp->ms_group, msp,
+ msp->ms_weight | activation_weight);
+ }
+ ASSERT(msp->ms_loaded);
+ ASSERT(msp->ms_weight & METASLAB_ACTIVE_MASK);
+
+ return (0);
+}
+
+static void
+metaslab_passivate(metaslab_t *msp, uint64_t weight)
+{
+ ASSERTV(uint64_t size = weight & ~METASLAB_WEIGHT_TYPE);
+
+ /*
+ * If size < SPA_MINBLOCKSIZE, then we will not allocate from
+ * this metaslab again. In that case, it had better be empty,
+ * or we would be leaving space on the table.
+ */
+ ASSERT(size >= SPA_MINBLOCKSIZE ||
+ range_tree_space(msp->ms_tree) == 0);
+ ASSERT0(weight & METASLAB_ACTIVE_MASK);
+
+ msp->ms_activation_weight = 0;
+ metaslab_group_sort(msp->ms_group, msp, weight);
+ ASSERT((msp->ms_weight & METASLAB_ACTIVE_MASK) == 0);
+}
+
+/*
+ * Segment-based metaslabs are activated once and remain active until
+ * we either fail an allocation attempt (similar to space-based metaslabs)
+ * or have exhausted the free space in zfs_metaslab_switch_threshold
+ * buckets since the metaslab was activated. This function checks to see
+ * if we've exhaused the zfs_metaslab_switch_threshold buckets in the
+ * metaslab and passivates it proactively. This will allow us to select a
+ * metaslab with a larger contiguous region, if any, remaining within this
+ * metaslab group. If we're in sync pass > 1, then we continue using this
+ * metaslab so that we don't dirty more block and cause more sync passes.
+ */
+void
+metaslab_segment_may_passivate(metaslab_t *msp)
+{
+ spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
+ uint64_t weight;
+ int activation_idx, current_idx;
+
+ if (WEIGHT_IS_SPACEBASED(msp->ms_weight) || spa_sync_pass(spa) > 1)
+ return;
+
+ /*
+ * Since we are in the middle of a sync pass, the most accurate
+ * information that is accessible to us is the in-core range tree
+ * histogram; calculate the new weight based on that information.
+ */
+ weight = metaslab_weight_from_range_tree(msp);
+ activation_idx = WEIGHT_GET_INDEX(msp->ms_activation_weight);
+ current_idx = WEIGHT_GET_INDEX(weight);
+
+ if (current_idx <= activation_idx - zfs_metaslab_switch_threshold)
+ metaslab_passivate(msp, weight);
+}
+
+static void
+metaslab_preload(void *arg)
+{
+ metaslab_t *msp = arg;
+ spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
+ fstrans_cookie_t cookie = spl_fstrans_mark();
+
+ ASSERT(!MUTEX_HELD(&msp->ms_group->mg_lock));
+
+ mutex_enter(&msp->ms_lock);
+ metaslab_load_wait(msp);
+ if (!msp->ms_loaded)
+ (void) metaslab_load(msp);
+ msp->ms_selected_txg = spa_syncing_txg(spa);
+ mutex_exit(&msp->ms_lock);
+ spl_fstrans_unmark(cookie);
+}
+
+static void
+metaslab_group_preload(metaslab_group_t *mg)
+{
+ spa_t *spa = mg->mg_vd->vdev_spa;
+ metaslab_t *msp;
+ avl_tree_t *t = &mg->mg_metaslab_tree;
+ int m = 0;
+
+ if (spa_shutting_down(spa) || !metaslab_preload_enabled) {
+ taskq_wait_outstanding(mg->mg_taskq, 0);
+ return;
+ }
+
+ mutex_enter(&mg->mg_lock);
+ /*
+ * Load the next potential metaslabs
+ */
+ for (msp = avl_first(t); msp != NULL; msp = AVL_NEXT(t, msp)) {
/*
- * We must drop the metaslab group lock here to preserve
- * lock ordering with the ms_lock (when grabbing both
- * the mg_lock and the ms_lock, the ms_lock must be taken
- * first). As a result, it is possible that the ordering
- * of the metaslabs within the avl tree may change before
- * we reacquire the lock. The metaslab cannot be removed from
- * the tree while we're in syncing context so it is safe to
- * drop the mg_lock here. If the metaslabs are reordered
- * nothing will break -- we just may end up loading a
- * less than optimal one.
+ * We preload only the maximum number of metaslabs specified
+ * by metaslab_preload_limit. If a metaslab is being forced
+ * to condense then we preload it too. This will ensure
+ * that force condensing happens in the next txg.
*/
- mutex_exit(&mg->mg_lock);
+ if (++m > metaslab_preload_limit && !msp->ms_condense_wanted) {
+ continue;
+ }
+
VERIFY(taskq_dispatch(mg->mg_taskq, metaslab_preload,
- msp, TQ_SLEEP) != 0);
- mutex_enter(&mg->mg_lock);
- msp = msp_next;
+ msp, TQ_SLEEP) != TASKQID_INVALID);
}
mutex_exit(&mg->mg_lock);
}
@@ -1687,7 +2070,7 @@ metaslab_should_condense(metaslab_t *msp)
range_seg_t *rs;
uint64_t size, entries, segsz, object_size, optimal_size, record_size;
dmu_object_info_t doi;
- uint64_t vdev_blocksize = 1 << msp->ms_group->mg_vd->vdev_ashift;
+ uint64_t vdev_blocksize = 1ULL << msp->ms_group->mg_vd->vdev_ashift;
ASSERT(MUTEX_HELD(&msp->ms_lock));
ASSERT(msp->ms_loaded);
@@ -1732,7 +2115,6 @@ static void
metaslab_condense(metaslab_t *msp, uint64_t txg, dmu_tx_t *tx)
{
spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
- range_tree_t *freetree = msp->ms_freetree[txg & TXG_MASK];
range_tree_t *condense_tree;
space_map_t *sm = msp->ms_sm;
int t;
@@ -1742,10 +2124,11 @@ metaslab_condense(metaslab_t *msp, uint64_t txg, dmu_tx_t *tx)
ASSERT(msp->ms_loaded);
- spa_dbgmsg(spa, "condensing: txg %llu, msp[%llu] %p, "
- "smp size %llu, segments %lu, forcing condense=%s", txg,
- msp->ms_id, msp, space_map_length(msp->ms_sm),
- avl_numnodes(&msp->ms_tree->rt_root),
+ spa_dbgmsg(spa, "condensing: txg %llu, msp[%llu] %p, vdev id %llu, "
+ "spa %s, smp size %llu, segments %lu, forcing condense=%s", txg,
+ msp->ms_id, msp, msp->ms_group->mg_vd->vdev_id,
+ msp->ms_group->mg_vd->vdev_spa->spa_name,
+ space_map_length(msp->ms_sm), avl_numnodes(&msp->ms_tree->rt_root),
msp->ms_condense_wanted ? "TRUE" : "FALSE");
msp->ms_condense_wanted = B_FALSE;
@@ -1763,9 +2146,9 @@ metaslab_condense(metaslab_t *msp, uint64_t txg, dmu_tx_t *tx)
/*
* Remove what's been freed in this txg from the condense_tree.
* Since we're in sync_pass 1, we know that all the frees from
- * this txg are in the freetree.
+ * this txg are in the freeingtree.
*/
- range_tree_walk(freetree, range_tree_remove, condense_tree);
+ range_tree_walk(msp->ms_freeingtree, range_tree_remove, condense_tree);
for (t = 0; t < TXG_DEFER_SIZE; t++) {
range_tree_walk(msp->ms_defertree[t],
@@ -1793,7 +2176,7 @@ metaslab_condense(metaslab_t *msp, uint64_t txg, dmu_tx_t *tx)
mutex_enter(&msp->ms_lock);
/*
- * While we would ideally like to create a space_map representation
+ * While we would ideally like to create a space map representation
* that consists only of allocation records, doing so can be
* prohibitively expensive because the in-core free tree can be
* large, and therefore computationally expensive to subtract
@@ -1821,9 +2204,6 @@ metaslab_sync(metaslab_t *msp, uint64_t txg)
spa_t *spa = vd->vdev_spa;
objset_t *mos = spa_meta_objset(spa);
range_tree_t *alloctree = msp->ms_alloctree[txg & TXG_MASK];
- range_tree_t **freetree = &msp->ms_freetree[txg & TXG_MASK];
- range_tree_t **freed_tree =
- &msp->ms_freetree[TXG_CLEAN(txg) & TXG_MASK];
dmu_tx_t *tx;
uint64_t object = space_map_object(msp->ms_sm);
@@ -1832,31 +2212,35 @@ metaslab_sync(metaslab_t *msp, uint64_t txg)
/*
* This metaslab has just been added so there's no work to do now.
*/
- if (*freetree == NULL) {
+ if (msp->ms_freeingtree == NULL) {
ASSERT3P(alloctree, ==, NULL);
return;
}
ASSERT3P(alloctree, !=, NULL);
- ASSERT3P(*freetree, !=, NULL);
- ASSERT3P(*freed_tree, !=, NULL);
+ ASSERT3P(msp->ms_freeingtree, !=, NULL);
+ ASSERT3P(msp->ms_freedtree, !=, NULL);
/*
* Normally, we don't want to process a metaslab if there
* are no allocations or frees to perform. However, if the metaslab
- * is being forced to condense we need to let it through.
+ * is being forced to condense and it's loaded, we need to let it
+ * through.
*/
if (range_tree_space(alloctree) == 0 &&
- range_tree_space(*freetree) == 0 &&
- !msp->ms_condense_wanted)
+ range_tree_space(msp->ms_freeingtree) == 0 &&
+ !(msp->ms_loaded && msp->ms_condense_wanted))
return;
+
+ VERIFY(txg <= spa_final_dirty_txg(spa));
+
/*
* The only state that can actually be changing concurrently with
* metaslab_sync() is the metaslab's ms_tree. No other thread can
- * be modifying this txg's alloctree, freetree, freed_tree, or
+ * be modifying this txg's alloctree, freeingtree, freedtree, or
* space_map_phys_t. Therefore, we only hold ms_lock to satify
- * space_map ASSERTs. We drop it whenever we call into the DMU,
+ * space map ASSERTs. We drop it whenever we call into the DMU,
* because the DMU can call down to us (e.g. via zio_free()) at
* any time.
*/
@@ -1878,8 +2262,8 @@ metaslab_sync(metaslab_t *msp, uint64_t txg)
mutex_enter(&msp->ms_lock);
/*
- * Note: metaslab_condense() clears the space_map's histogram.
- * Therefore we muse verify and remove this histogram before
+ * Note: metaslab_condense() clears the space map's histogram.
+ * Therefore we must verify and remove this histogram before
* condensing.
*/
metaslab_group_histogram_verify(mg);
@@ -1891,10 +2275,12 @@ metaslab_sync(metaslab_t *msp, uint64_t txg)
metaslab_condense(msp, txg, tx);
} else {
space_map_write(msp->ms_sm, alloctree, SM_ALLOC, tx);
- space_map_write(msp->ms_sm, *freetree, SM_FREE, tx);
+ space_map_write(msp->ms_sm, msp->ms_freeingtree, SM_FREE, tx);
}
if (msp->ms_loaded) {
+ int t;
+
/*
* When the space map is loaded, we have an accruate
* histogram in the range tree. This gives us an opportunity
@@ -1903,35 +2289,59 @@ metaslab_sync(metaslab_t *msp, uint64_t txg)
*/
space_map_histogram_clear(msp->ms_sm);
space_map_histogram_add(msp->ms_sm, msp->ms_tree, tx);
- } else {
+
/*
- * Since the space map is not loaded we simply update the
- * exisiting histogram with what was freed in this txg. This
- * means that the on-disk histogram may not have an accurate
- * view of the free space but it's close enough to allow
- * us to make allocation decisions.
+ * Since we've cleared the histogram we need to add back
+ * any free space that has already been processed, plus
+ * any deferred space. This allows the on-disk histogram
+ * to accurately reflect all free space even if some space
+ * is not yet available for allocation (i.e. deferred).
*/
- space_map_histogram_add(msp->ms_sm, *freetree, tx);
+ space_map_histogram_add(msp->ms_sm, msp->ms_freedtree, tx);
+
+ /*
+ * Add back any deferred free space that has not been
+ * added back into the in-core free tree yet. This will
+ * ensure that we don't end up with a space map histogram
+ * that is completely empty unless the metaslab is fully
+ * allocated.
+ */
+ for (t = 0; t < TXG_DEFER_SIZE; t++) {
+ space_map_histogram_add(msp->ms_sm,
+ msp->ms_defertree[t], tx);
+ }
}
+
+ /*
+ * Always add the free space from this sync pass to the space
+ * map histogram. We want to make sure that the on-disk histogram
+ * accounts for all free space. If the space map is not loaded,
+ * then we will lose some accuracy but will correct it the next
+ * time we load the space map.
+ */
+ space_map_histogram_add(msp->ms_sm, msp->ms_freeingtree, tx);
+
metaslab_group_histogram_add(mg, msp);
metaslab_group_histogram_verify(mg);
metaslab_class_histogram_verify(mg->mg_class);
/*
* For sync pass 1, we avoid traversing this txg's free range tree
- * and instead will just swap the pointers for freetree and
- * freed_tree. We can safely do this since the freed_tree is
+ * and instead will just swap the pointers for freeingtree and
+ * freedtree. We can safely do this since the freed_tree is
* guaranteed to be empty on the initial pass.
*/
if (spa_sync_pass(spa) == 1) {
- range_tree_swap(freetree, freed_tree);
+ range_tree_swap(&msp->ms_freeingtree, &msp->ms_freedtree);
} else {
- range_tree_vacate(*freetree, range_tree_add, *freed_tree);
+ range_tree_vacate(msp->ms_freeingtree,
+ range_tree_add, msp->ms_freedtree);
}
range_tree_vacate(alloctree, NULL, NULL);
ASSERT0(range_tree_space(msp->ms_alloctree[txg & TXG_MASK]));
- ASSERT0(range_tree_space(msp->ms_freetree[txg & TXG_MASK]));
+ ASSERT0(range_tree_space(msp->ms_alloctree[TXG_CLEAN(txg) & TXG_MASK]));
+ ASSERT0(range_tree_space(msp->ms_freeingtree));
mutex_exit(&msp->ms_lock);
@@ -1952,9 +2362,11 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg)
{
metaslab_group_t *mg = msp->ms_group;
vdev_t *vd = mg->mg_vd;
- range_tree_t **freed_tree;
+ spa_t *spa = vd->vdev_spa;
range_tree_t **defer_tree;
int64_t alloc_delta, defer_delta;
+ uint64_t free_space;
+ boolean_t defer_allowed = B_TRUE;
int t;
ASSERT(!vd->vdev_ishole);
@@ -1963,20 +2375,24 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg)
/*
* If this metaslab is just becoming available, initialize its
- * alloctrees, freetrees, and defertree and add its capacity to
- * the vdev.
+ * range trees and add its capacity to the vdev.
*/
- if (msp->ms_freetree[TXG_CLEAN(txg) & TXG_MASK] == NULL) {
+ if (msp->ms_freedtree == NULL) {
for (t = 0; t < TXG_SIZE; t++) {
ASSERT(msp->ms_alloctree[t] == NULL);
- ASSERT(msp->ms_freetree[t] == NULL);
msp->ms_alloctree[t] = range_tree_create(NULL, msp,
&msp->ms_lock);
- msp->ms_freetree[t] = range_tree_create(NULL, msp,
- &msp->ms_lock);
}
+ ASSERT3P(msp->ms_freeingtree, ==, NULL);
+ msp->ms_freeingtree = range_tree_create(NULL, msp,
+ &msp->ms_lock);
+
+ ASSERT3P(msp->ms_freedtree, ==, NULL);
+ msp->ms_freedtree = range_tree_create(NULL, msp,
+ &msp->ms_lock);
+
for (t = 0; t < TXG_DEFER_SIZE; t++) {
ASSERT(msp->ms_defertree[t] == NULL);
@@ -1987,18 +2403,25 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg)
vdev_space_update(vd, 0, 0, msp->ms_size);
}
- freed_tree = &msp->ms_freetree[TXG_CLEAN(txg) & TXG_MASK];
defer_tree = &msp->ms_defertree[txg % TXG_DEFER_SIZE];
+ free_space = metaslab_class_get_space(spa_normal_class(spa)) -
+ metaslab_class_get_alloc(spa_normal_class(spa));
+ if (free_space <= spa_get_slop_space(spa)) {
+ defer_allowed = B_FALSE;
+ }
+
+ defer_delta = 0;
alloc_delta = space_map_alloc_delta(msp->ms_sm);
- defer_delta = range_tree_space(*freed_tree) -
- range_tree_space(*defer_tree);
+ if (defer_allowed) {
+ defer_delta = range_tree_space(msp->ms_freedtree) -
+ range_tree_space(*defer_tree);
+ } else {
+ defer_delta -= range_tree_space(*defer_tree);
+ }
vdev_space_update(vd, alloc_delta + defer_delta, defer_delta, 0);
- ASSERT0(range_tree_space(msp->ms_alloctree[txg & TXG_MASK]));
- ASSERT0(range_tree_space(msp->ms_freetree[txg & TXG_MASK]));
-
/*
* If there's a metaslab_load() in progress, wait for it to complete
* so that we have a consistent view of the in-core space map.
@@ -2013,7 +2436,12 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg)
*/
range_tree_vacate(*defer_tree,
msp->ms_loaded ? range_tree_add : NULL, msp->ms_tree);
- range_tree_swap(freed_tree, defer_tree);
+ if (defer_allowed) {
+ range_tree_swap(&msp->ms_freedtree, defer_tree);
+ } else {
+ range_tree_vacate(msp->ms_freedtree,
+ msp->ms_loaded ? range_tree_add : NULL, msp->ms_tree);
+ }
space_map_update(msp->ms_sm);
@@ -2028,7 +2456,19 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg)
vdev_dirty(vd, VDD_METASLAB, msp, txg + 1);
}
- if (msp->ms_loaded && msp->ms_access_txg < txg) {
+ /*
+ * Calculate the new weights before unloading any metaslabs.
+ * This will give us the most accurate weighting.
+ */
+ metaslab_group_sort(mg, msp, metaslab_weight(msp));
+
+ /*
+ * If the metaslab is loaded and we've not tried to load or allocate
+ * from it in 'metaslab_unload_delay' txgs, then unload it.
+ */
+ if (msp->ms_loaded &&
+ msp->ms_selected_txg + metaslab_unload_delay < txg) {
+
for (t = 1; t < TXG_CONCURRENT_STATES; t++) {
VERIFY0(range_tree_space(
msp->ms_alloctree[(txg + t) & TXG_MASK]));
@@ -2038,7 +2478,6 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg)
metaslab_unload(msp);
}
- metaslab_group_sort(mg, msp, metaslab_weight(msp));
mutex_exit(&msp->ms_lock);
}
@@ -2071,14 +2510,238 @@ metaslab_distance(metaslab_t *msp, dva_t *dva)
return (0);
}
+/*
+ * ==========================================================================
+ * Metaslab allocation tracing facility
+ * ==========================================================================
+ */
+#ifdef _METASLAB_TRACING
+kstat_t *metaslab_trace_ksp;
+kstat_named_t metaslab_trace_over_limit;
+
+void
+metaslab_alloc_trace_init(void)
+{
+ ASSERT(metaslab_alloc_trace_cache == NULL);
+ metaslab_alloc_trace_cache = kmem_cache_create(
+ "metaslab_alloc_trace_cache", sizeof (metaslab_alloc_trace_t),
+ 0, NULL, NULL, NULL, NULL, NULL, 0);
+ metaslab_trace_ksp = kstat_create("zfs", 0, "metaslab_trace_stats",
+ "misc", KSTAT_TYPE_NAMED, 1, KSTAT_FLAG_VIRTUAL);
+ if (metaslab_trace_ksp != NULL) {
+ metaslab_trace_ksp->ks_data = &metaslab_trace_over_limit;
+ kstat_named_init(&metaslab_trace_over_limit,
+ "metaslab_trace_over_limit", KSTAT_DATA_UINT64);
+ kstat_install(metaslab_trace_ksp);
+ }
+}
+
+void
+metaslab_alloc_trace_fini(void)
+{
+ if (metaslab_trace_ksp != NULL) {
+ kstat_delete(metaslab_trace_ksp);
+ metaslab_trace_ksp = NULL;
+ }
+ kmem_cache_destroy(metaslab_alloc_trace_cache);
+ metaslab_alloc_trace_cache = NULL;
+}
+
+/*
+ * Add an allocation trace element to the allocation tracing list.
+ */
+static void
+metaslab_trace_add(zio_alloc_list_t *zal, metaslab_group_t *mg,
+ metaslab_t *msp, uint64_t psize, uint32_t dva_id, uint64_t offset)
+{
+ metaslab_alloc_trace_t *mat;
+
+ if (!metaslab_trace_enabled)
+ return;
+
+ /*
+ * When the tracing list reaches its maximum we remove
+ * the second element in the list before adding a new one.
+ * By removing the second element we preserve the original
+ * entry as a clue to what allocations steps have already been
+ * performed.
+ */
+ if (zal->zal_size == metaslab_trace_max_entries) {
+ metaslab_alloc_trace_t *mat_next;
+#ifdef DEBUG
+ panic("too many entries in allocation list");
+#endif
+ atomic_inc_64(&metaslab_trace_over_limit.value.ui64);
+ zal->zal_size--;
+ mat_next = list_next(&zal->zal_list, list_head(&zal->zal_list));
+ list_remove(&zal->zal_list, mat_next);
+ kmem_cache_free(metaslab_alloc_trace_cache, mat_next);
+ }
+
+ mat = kmem_cache_alloc(metaslab_alloc_trace_cache, KM_SLEEP);
+ list_link_init(&mat->mat_list_node);
+ mat->mat_mg = mg;
+ mat->mat_msp = msp;
+ mat->mat_size = psize;
+ mat->mat_dva_id = dva_id;
+ mat->mat_offset = offset;
+ mat->mat_weight = 0;
+
+ if (msp != NULL)
+ mat->mat_weight = msp->ms_weight;
+
+ /*
+ * The list is part of the zio so locking is not required. Only
+ * a single thread will perform allocations for a given zio.
+ */
+ list_insert_tail(&zal->zal_list, mat);
+ zal->zal_size++;
+
+ ASSERT3U(zal->zal_size, <=, metaslab_trace_max_entries);
+}
+
+void
+metaslab_trace_init(zio_alloc_list_t *zal)
+{
+ list_create(&zal->zal_list, sizeof (metaslab_alloc_trace_t),
+ offsetof(metaslab_alloc_trace_t, mat_list_node));
+ zal->zal_size = 0;
+}
+
+void
+metaslab_trace_fini(zio_alloc_list_t *zal)
+{
+ metaslab_alloc_trace_t *mat;
+
+ while ((mat = list_remove_head(&zal->zal_list)) != NULL)
+ kmem_cache_free(metaslab_alloc_trace_cache, mat);
+ list_destroy(&zal->zal_list);
+ zal->zal_size = 0;
+}
+#else
+
+#define metaslab_trace_add(zal, mg, msp, psize, id, off)
+
+void
+metaslab_alloc_trace_init(void)
+{
+}
+
+void
+metaslab_alloc_trace_fini(void)
+{
+}
+
+void
+metaslab_trace_init(zio_alloc_list_t *zal)
+{
+}
+
+void
+metaslab_trace_fini(zio_alloc_list_t *zal)
+{
+}
+
+#endif /* _METASLAB_TRACING */
+
+/*
+ * ==========================================================================
+ * Metaslab block operations
+ * ==========================================================================
+ */
+
+static void
+metaslab_group_alloc_increment(spa_t *spa, uint64_t vdev, void *tag, int flags)
+{
+ metaslab_group_t *mg;
+
+ if (!(flags & METASLAB_ASYNC_ALLOC) ||
+ flags & METASLAB_DONT_THROTTLE)
+ return;
+
+ mg = vdev_lookup_top(spa, vdev)->vdev_mg;
+ if (!mg->mg_class->mc_alloc_throttle_enabled)
+ return;
+
+ (void) refcount_add(&mg->mg_alloc_queue_depth, tag);
+}
+
+void
+metaslab_group_alloc_decrement(spa_t *spa, uint64_t vdev, void *tag, int flags)
+{
+ metaslab_group_t *mg;
+
+ if (!(flags & METASLAB_ASYNC_ALLOC) ||
+ flags & METASLAB_DONT_THROTTLE)
+ return;
+
+ mg = vdev_lookup_top(spa, vdev)->vdev_mg;
+ if (!mg->mg_class->mc_alloc_throttle_enabled)
+ return;
+
+ (void) refcount_remove(&mg->mg_alloc_queue_depth, tag);
+}
+
+void
+metaslab_group_alloc_verify(spa_t *spa, const blkptr_t *bp, void *tag)
+{
+#ifdef ZFS_DEBUG
+ const dva_t *dva = bp->blk_dva;
+ int ndvas = BP_GET_NDVAS(bp);
+ int d;
+
+ for (d = 0; d < ndvas; d++) {
+ uint64_t vdev = DVA_GET_VDEV(&dva[d]);
+ metaslab_group_t *mg = vdev_lookup_top(spa, vdev)->vdev_mg;
+ VERIFY(refcount_not_held(&mg->mg_alloc_queue_depth, tag));
+ }
+#endif
+}
+
static uint64_t
-metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize,
- uint64_t txg, uint64_t min_distance, dva_t *dva, int d)
+metaslab_block_alloc(metaslab_t *msp, uint64_t size, uint64_t txg)
+{
+ uint64_t start;
+ range_tree_t *rt = msp->ms_tree;
+ metaslab_class_t *mc = msp->ms_group->mg_class;
+
+ VERIFY(!msp->ms_condensing);
+
+ start = mc->mc_ops->msop_alloc(msp, size);
+ if (start != -1ULL) {
+ metaslab_group_t *mg = msp->ms_group;
+ vdev_t *vd = mg->mg_vd;
+
+ VERIFY0(P2PHASE(start, 1ULL << vd->vdev_ashift));
+ VERIFY0(P2PHASE(size, 1ULL << vd->vdev_ashift));
+ VERIFY3U(range_tree_space(rt) - size, <=, msp->ms_size);
+ range_tree_remove(rt, start, size);
+
+ if (range_tree_space(msp->ms_alloctree[txg & TXG_MASK]) == 0)
+ vdev_dirty(mg->mg_vd, VDD_METASLAB, msp, txg);
+
+ range_tree_add(msp->ms_alloctree[txg & TXG_MASK], start, size);
+
+ /* Track the last successful allocation */
+ msp->ms_alloc_txg = txg;
+ metaslab_verify_space(msp, txg);
+ }
+
+ /*
+ * Now that we've attempted the allocation we need to update the
+ * metaslab's maximum block size since it may have changed.
+ */
+ msp->ms_max_size = metaslab_block_maxsize(msp);
+ return (start);
+}
+
+static uint64_t
+metaslab_group_alloc_normal(metaslab_group_t *mg, zio_alloc_list_t *zal,
+ uint64_t asize, uint64_t txg, uint64_t min_distance, dva_t *dva, int d)
{
- spa_t *spa = mg->mg_vd->vdev_spa;
metaslab_t *msp = NULL;
+ metaslab_t *search;
uint64_t offset = -1ULL;
- avl_tree_t *t = &mg->mg_metaslab_tree;
uint64_t activation_weight;
uint64_t target_distance;
int i;
@@ -2091,20 +2754,39 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize,
}
}
+ search = kmem_alloc(sizeof (*search), KM_SLEEP);
+ search->ms_weight = UINT64_MAX;
+ search->ms_start = 0;
for (;;) {
boolean_t was_active;
+ avl_tree_t *t = &mg->mg_metaslab_tree;
+ avl_index_t idx;
mutex_enter(&mg->mg_lock);
- for (msp = avl_first(t); msp; msp = AVL_NEXT(t, msp)) {
- if (msp->ms_weight < asize) {
- spa_dbgmsg(spa, "%s: failed to meet weight "
- "requirement: vdev %llu, txg %llu, mg %p, "
- "msp %p, psize %llu, asize %llu, "
- "weight %llu", spa_name(spa),
- mg->mg_vd->vdev_id, txg,
- mg, msp, psize, asize, msp->ms_weight);
- mutex_exit(&mg->mg_lock);
- return (-1ULL);
+
+ /*
+ * Find the metaslab with the highest weight that is less
+ * than what we've already tried. In the common case, this
+ * means that we will examine each metaslab at most once.
+ * Note that concurrent callers could reorder metaslabs
+ * by activation/passivation once we have dropped the mg_lock.
+ * If a metaslab is activated by another thread, and we fail
+ * to allocate from the metaslab we have selected, we may
+ * not try the newly-activated metaslab, and instead activate
+ * another metaslab. This is not optimal, but generally
+ * does not cause any problems (a possible exception being
+ * if every metaslab is completely full except for the
+ * the newly-activated metaslab which we fail to examine).
+ */
+ msp = avl_find(t, search, &idx);
+ if (msp == NULL)
+ msp = avl_nearest(t, idx, AVL_AFTER);
+ for (; msp != NULL; msp = AVL_NEXT(t, msp)) {
+
+ if (!metaslab_should_allocate(msp, asize)) {
+ metaslab_trace_add(zal, mg, msp, asize, d,
+ TRACE_TOO_SMALL);
+ continue;
}
/*
@@ -2121,16 +2803,21 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize,
(space_map_allocated(msp->ms_sm) != 0 ? 0 :
min_distance >> 1);
- for (i = 0; i < d; i++)
+ for (i = 0; i < d; i++) {
if (metaslab_distance(msp, &dva[i]) <
target_distance)
break;
+ }
if (i == d)
break;
}
mutex_exit(&mg->mg_lock);
- if (msp == NULL)
+ if (msp == NULL) {
+ kmem_free(search, sizeof (*search));
return (-1ULL);
+ }
+ search->ms_weight = msp->ms_weight;
+ search->ms_start = msp->ms_start + 1;
mutex_enter(&msp->ms_lock);
@@ -2138,11 +2825,11 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize,
* Ensure that the metaslab we have selected is still
* capable of handling our request. It's possible that
* another thread may have changed the weight while we
- * were blocked on the metaslab lock.
+ * were blocked on the metaslab lock. We check the
+ * active status first to see if we need to reselect
+ * a new metaslab.
*/
- if (msp->ms_weight < asize || (was_active &&
- !(msp->ms_weight & METASLAB_ACTIVE_MASK) &&
- activation_weight == METASLAB_WEIGHT_PRIMARY)) {
+ if (was_active && !(msp->ms_weight & METASLAB_ACTIVE_MASK)) {
mutex_exit(&msp->ms_lock);
continue;
}
@@ -2159,6 +2846,22 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize,
mutex_exit(&msp->ms_lock);
continue;
}
+ msp->ms_selected_txg = txg;
+
+ /*
+ * Now that we have the lock, recheck to see if we should
+ * continue to use this metaslab for this allocation. The
+ * the metaslab is now loaded so metaslab_should_allocate() can
+ * accurately determine if the allocation attempt should
+ * proceed.
+ */
+ if (!metaslab_should_allocate(msp, asize)) {
+ /* Passivate this metaslab and select a new one. */
+ metaslab_trace_add(zal, mg, msp, asize, d,
+ TRACE_TOO_SMALL);
+ goto next;
+ }
+
/*
* If this metaslab is currently condensing then pick again as
@@ -2166,55 +2869,131 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize,
* to disk.
*/
if (msp->ms_condensing) {
+ metaslab_trace_add(zal, mg, msp, asize, d,
+ TRACE_CONDENSING);
mutex_exit(&msp->ms_lock);
continue;
}
- if ((offset = metaslab_block_alloc(msp, asize)) != -1ULL)
+ offset = metaslab_block_alloc(msp, asize, txg);
+ metaslab_trace_add(zal, mg, msp, asize, d, offset);
+
+ if (offset != -1ULL) {
+ /* Proactively passivate the metaslab, if needed */
+ metaslab_segment_may_passivate(msp);
break;
+ }
+next:
+ ASSERT(msp->ms_loaded);
- metaslab_passivate(msp, metaslab_block_maxsize(msp));
+ /*
+ * We were unable to allocate from this metaslab so determine
+ * a new weight for this metaslab. Now that we have loaded
+ * the metaslab we can provide a better hint to the metaslab
+ * selector.
+ *
+ * For space-based metaslabs, we use the maximum block size.
+ * This information is only available when the metaslab
+ * is loaded and is more accurate than the generic free
+ * space weight that was calculated by metaslab_weight().
+ * This information allows us to quickly compare the maximum
+ * available allocation in the metaslab to the allocation
+ * size being requested.
+ *
+ * For segment-based metaslabs, determine the new weight
+ * based on the highest bucket in the range tree. We
+ * explicitly use the loaded segment weight (i.e. the range
+ * tree histogram) since it contains the space that is
+ * currently available for allocation and is accurate
+ * even within a sync pass.
+ */
+ if (WEIGHT_IS_SPACEBASED(msp->ms_weight)) {
+ uint64_t weight = metaslab_block_maxsize(msp);
+ WEIGHT_SET_SPACEBASED(weight);
+ metaslab_passivate(msp, weight);
+ } else {
+ metaslab_passivate(msp,
+ metaslab_weight_from_range_tree(msp));
+ }
+
+ /*
+ * We have just failed an allocation attempt, check
+ * that metaslab_should_allocate() agrees. Otherwise,
+ * we may end up in an infinite loop retrying the same
+ * metaslab.
+ */
+ ASSERT(!metaslab_should_allocate(msp, asize));
mutex_exit(&msp->ms_lock);
}
+ mutex_exit(&msp->ms_lock);
+ kmem_free(search, sizeof (*search));
+ return (offset);
+}
- if (range_tree_space(msp->ms_alloctree[txg & TXG_MASK]) == 0)
- vdev_dirty(mg->mg_vd, VDD_METASLAB, msp, txg);
-
- range_tree_add(msp->ms_alloctree[txg & TXG_MASK], offset, asize);
- msp->ms_access_txg = txg + metaslab_unload_delay;
+static uint64_t
+metaslab_group_alloc(metaslab_group_t *mg, zio_alloc_list_t *zal,
+ uint64_t asize, uint64_t txg, uint64_t min_distance, dva_t *dva, int d)
+{
+ uint64_t offset;
+ ASSERT(mg->mg_initialized);
- mutex_exit(&msp->ms_lock);
+ offset = metaslab_group_alloc_normal(mg, zal, asize, txg,
+ min_distance, dva, d);
+ mutex_enter(&mg->mg_lock);
+ if (offset == -1ULL) {
+ mg->mg_failed_allocations++;
+ metaslab_trace_add(zal, mg, NULL, asize, d,
+ TRACE_GROUP_FAILURE);
+ if (asize == SPA_GANGBLOCKSIZE) {
+ /*
+ * This metaslab group was unable to allocate
+ * the minimum gang block size so it must be out of
+ * space. We must notify the allocation throttle
+ * to start skipping allocation attempts to this
+ * metaslab group until more space becomes available.
+ * Note: this failure cannot be caused by the
+ * allocation throttle since the allocation throttle
+ * is only responsible for skipping devices and
+ * not failing block allocations.
+ */
+ mg->mg_no_free_space = B_TRUE;
+ }
+ }
+ mg->mg_allocations++;
+ mutex_exit(&mg->mg_lock);
return (offset);
}
+/*
+ * If we have to write a ditto block (i.e. more than one DVA for a given BP)
+ * on the same vdev as an existing DVA of this BP, then try to allocate it
+ * at least (vdev_asize / (2 ^ ditto_same_vdev_distance_shift)) away from the
+ * existing DVAs.
+ */
+int ditto_same_vdev_distance_shift = 3;
+
/*
* Allocate a block for the specified i/o.
*/
static int
metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
- dva_t *dva, int d, dva_t *hintdva, uint64_t txg, int flags)
+ dva_t *dva, int d, dva_t *hintdva, uint64_t txg, int flags,
+ zio_alloc_list_t *zal)
{
metaslab_group_t *mg, *fast_mg, *rotor;
vdev_t *vd;
- int dshift = 3;
- int all_zero;
- int zio_lock = B_FALSE;
- boolean_t allocatable;
- uint64_t offset = -1ULL;
- uint64_t asize;
- uint64_t distance;
+ boolean_t try_hard = B_FALSE;
ASSERT(!DVA_IS_VALID(&dva[d]));
/*
* For testing, make some blocks above a certain size be gang blocks.
*/
- if (psize >= metaslab_gang_bang && (ddi_get_lbolt() & 3) == 0)
+ if (psize >= metaslab_gang_bang && (ddi_get_lbolt() & 3) == 0) {
+ metaslab_trace_add(zal, NULL, NULL, psize, d, TRACE_FORCE_GANG);
return (SET_ERROR(ENOSPC));
-
- if (flags & METASLAB_FASTWRITE)
- mutex_enter(&mc->mc_fastwrite_lock);
+ }
/*
* Start at the rotor and loop through all mgs until we find something.
@@ -2280,16 +3059,18 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
rotor = mg;
top:
- all_zero = B_TRUE;
do {
- ASSERT(mg->mg_activation_count == 1);
+ boolean_t allocatable;
+ uint64_t offset;
+ uint64_t distance, asize;
+ ASSERT(mg->mg_activation_count == 1);
vd = mg->mg_vd;
/*
* Don't allocate from faulted devices.
*/
- if (zio_lock) {
+ if (try_hard) {
spa_config_enter(spa, SCL_ZIO, FTAG, RW_READER);
allocatable = vdev_allocatable(vd);
spa_config_exit(spa, SCL_ZIO, FTAG);
@@ -2299,45 +3080,59 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
/*
* Determine if the selected metaslab group is eligible
- * for allocations. If we're ganging or have requested
- * an allocation for the smallest gang block size
- * then we don't want to avoid allocating to the this
- * metaslab group. If we're in this condition we should
- * try to allocate from any device possible so that we
- * don't inadvertently return ENOSPC and suspend the pool
+ * for allocations. If we're ganging then don't allow
+ * this metaslab group to skip allocations since that would
+ * inadvertently return ENOSPC and suspend the pool
* even though space is still available.
*/
- if (allocatable && CAN_FASTGANG(flags) &&
- psize > SPA_GANGBLOCKSIZE)
- allocatable = metaslab_group_allocatable(mg);
+ if (allocatable && !GANG_ALLOCATION(flags) && !try_hard) {
+ allocatable = metaslab_group_allocatable(mg, rotor,
+ psize);
+ }
- if (!allocatable)
+ if (!allocatable) {
+ metaslab_trace_add(zal, mg, NULL, psize, d,
+ TRACE_NOT_ALLOCATABLE);
goto next;
+ }
+
+ ASSERT(mg->mg_initialized);
/*
- * Avoid writing single-copy data to a failing vdev
- * unless the user instructs us that it is okay.
+ * Avoid writing single-copy data to a failing,
+ * non-redundant vdev, unless we've already tried all
+ * other vdevs.
*/
if ((vd->vdev_stat.vs_write_errors > 0 ||
vd->vdev_state < VDEV_STATE_HEALTHY) &&
- d == 0 && dshift == 3 && vd->vdev_children == 0) {
- all_zero = B_FALSE;
+ d == 0 && !try_hard && vd->vdev_children == 0) {
+ metaslab_trace_add(zal, mg, NULL, psize, d,
+ TRACE_VDEV_ERROR);
goto next;
}
ASSERT(mg->mg_class == mc);
- distance = vd->vdev_asize >> dshift;
- if (distance <= (1ULL << vd->vdev_ms_shift))
- distance = 0;
- else
- all_zero = B_FALSE;
+ /*
+ * If we don't need to try hard, then require that the
+ * block be 1/8th of the device away from any other DVAs
+ * in this BP. If we are trying hard, allow any offset
+ * to be used (distance=0).
+ */
+ distance = 0;
+ if (!try_hard) {
+ distance = vd->vdev_asize >>
+ ditto_same_vdev_distance_shift;
+ if (distance <= (1ULL << vd->vdev_ms_shift))
+ distance = 0;
+ }
asize = vdev_psize_to_asize(vd, psize);
ASSERT(P2PHASE(asize, 1ULL << vd->vdev_ashift) == 0);
- offset = metaslab_group_alloc(mg, psize, asize, txg, distance,
+ offset = metaslab_group_alloc(mg, zal, asize, txg, distance,
dva, d);
+
if (offset != -1ULL) {
/*
* If we've just selected this metaslab group,
@@ -2394,13 +3189,13 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
DVA_SET_VDEV(&dva[d], vd->vdev_id);
DVA_SET_OFFSET(&dva[d], offset);
- DVA_SET_GANG(&dva[d], !!(flags & METASLAB_GANG_HEADER));
+ DVA_SET_GANG(&dva[d],
+ ((flags & METASLAB_GANG_HEADER) ? 1 : 0));
DVA_SET_ASIZE(&dva[d], asize);
if (flags & METASLAB_FASTWRITE) {
atomic_add_64(&vd->vdev_pending_fastwrite,
psize);
- mutex_exit(&mc->mc_fastwrite_lock);
}
return (0);
@@ -2410,23 +3205,17 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
mc->mc_aliquot = 0;
} while ((mg = mg->mg_next) != rotor);
- if (!all_zero) {
- dshift++;
- ASSERT(dshift < 64);
- goto top;
- }
-
- if (!allocatable && !zio_lock) {
- dshift = 3;
- zio_lock = B_TRUE;
+ /*
+ * If we haven't tried hard, do so now.
+ */
+ if (!try_hard) {
+ try_hard = B_TRUE;
goto top;
}
bzero(&dva[d], sizeof (dva_t));
- if (flags & METASLAB_FASTWRITE)
- mutex_exit(&mc->mc_fastwrite_lock);
-
+ metaslab_trace_add(zal, rotor, NULL, psize, d, TRACE_ENOSPC);
return (SET_ERROR(ENOSPC));
}
@@ -2473,11 +3262,12 @@ metaslab_free_dva(spa_t *spa, const dva_t *dva, uint64_t txg, boolean_t now)
VERIFY0(P2PHASE(offset, 1ULL << vd->vdev_ashift));
VERIFY0(P2PHASE(size, 1ULL << vd->vdev_ashift));
range_tree_add(msp->ms_tree, offset, size);
+ msp->ms_max_size = metaslab_block_maxsize(msp);
} else {
- if (range_tree_space(msp->ms_freetree[txg & TXG_MASK]) == 0)
+ VERIFY3U(txg, ==, spa->spa_syncing_txg);
+ if (range_tree_space(msp->ms_freeingtree) == 0)
vdev_dirty(vd, VDD_METASLAB, msp, txg);
- range_tree_add(msp->ms_freetree[txg & TXG_MASK],
- offset, size);
+ range_tree_add(msp->ms_freeingtree, offset, size);
}
mutex_exit(&msp->ms_lock);
@@ -2540,9 +3330,63 @@ metaslab_claim_dva(spa_t *spa, const dva_t *dva, uint64_t txg)
return (0);
}
+/*
+ * Reserve some allocation slots. The reservation system must be called
+ * before we call into the allocator. If there aren't any available slots
+ * then the I/O will be throttled until an I/O completes and its slots are
+ * freed up. The function returns true if it was successful in placing
+ * the reservation.
+ */
+boolean_t
+metaslab_class_throttle_reserve(metaslab_class_t *mc, int slots, zio_t *zio,
+ int flags)
+{
+ uint64_t available_slots = 0;
+ uint64_t reserved_slots;
+ boolean_t slot_reserved = B_FALSE;
+
+ ASSERT(mc->mc_alloc_throttle_enabled);
+ mutex_enter(&mc->mc_lock);
+
+ reserved_slots = refcount_count(&mc->mc_alloc_slots);
+ if (reserved_slots < mc->mc_alloc_max_slots)
+ available_slots = mc->mc_alloc_max_slots - reserved_slots;
+
+ if (slots <= available_slots || GANG_ALLOCATION(flags)) {
+ int d;
+
+ /*
+ * We reserve the slots individually so that we can unreserve
+ * them individually when an I/O completes.
+ */
+ for (d = 0; d < slots; d++) {
+ reserved_slots = refcount_add(&mc->mc_alloc_slots, zio);
+ }
+ zio->io_flags |= ZIO_FLAG_IO_ALLOCATING;
+ slot_reserved = B_TRUE;
+ }
+
+ mutex_exit(&mc->mc_lock);
+ return (slot_reserved);
+}
+
+void
+metaslab_class_throttle_unreserve(metaslab_class_t *mc, int slots, zio_t *zio)
+{
+ int d;
+
+ ASSERT(mc->mc_alloc_throttle_enabled);
+ mutex_enter(&mc->mc_lock);
+ for (d = 0; d < slots; d++) {
+ (void) refcount_remove(&mc->mc_alloc_slots, zio);
+ }
+ mutex_exit(&mc->mc_lock);
+}
+
int
metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize, blkptr_t *bp,
- int ndvas, uint64_t txg, blkptr_t *hintbp, int flags)
+ int ndvas, uint64_t txg, blkptr_t *hintbp, int flags,
+ zio_alloc_list_t *zal, zio_t *zio)
{
dva_t *dva = bp->blk_dva;
dva_t *hintdva = hintbp->blk_dva;
@@ -2561,25 +3405,36 @@ metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize, blkptr_t *bp,
ASSERT(ndvas > 0 && ndvas <= spa_max_replication(spa));
ASSERT(BP_GET_NDVAS(bp) == 0);
ASSERT(hintbp == NULL || ndvas <= BP_GET_NDVAS(hintbp));
+ ASSERT3P(zal, !=, NULL);
for (d = 0; d < ndvas; d++) {
error = metaslab_alloc_dva(spa, mc, psize, dva, d, hintdva,
- txg, flags);
+ txg, flags, zal);
if (error != 0) {
for (d--; d >= 0; d--) {
metaslab_free_dva(spa, &dva[d], txg, B_TRUE);
+ metaslab_group_alloc_decrement(spa,
+ DVA_GET_VDEV(&dva[d]), zio, flags);
bzero(&dva[d], sizeof (dva_t));
}
spa_config_exit(spa, SCL_ALLOC, FTAG);
return (error);
+ } else {
+ /*
+ * Update the metaslab group's queue depth
+ * based on the newly allocated dva.
+ */
+ metaslab_group_alloc_increment(spa,
+ DVA_GET_VDEV(&dva[d]), zio, flags);
}
+
}
ASSERT(error == 0);
ASSERT(BP_GET_NDVAS(bp) == ndvas);
spa_config_exit(spa, SCL_ALLOC, FTAG);
- BP_SET_BIRTH(bp, txg, txg);
+ BP_SET_BIRTH(bp, txg, 0);
return (0);
}
@@ -2700,8 +3555,8 @@ metaslab_check_free(spa_t *spa, const blkptr_t *bp)
if (msp->ms_loaded)
range_tree_verify(msp->ms_tree, offset, size);
- for (j = 0; j < TXG_SIZE; j++)
- range_tree_verify(msp->ms_freetree[j], offset, size);
+ range_tree_verify(msp->ms_freeingtree, offset, size);
+ range_tree_verify(msp->ms_freedtree, offset, size);
for (j = 0; j < TXG_DEFER_SIZE; j++)
range_tree_verify(msp->ms_defertree[j], offset, size);
}
@@ -2709,37 +3564,52 @@ metaslab_check_free(spa_t *spa, const blkptr_t *bp)
}
#if defined(_KERNEL) && defined(HAVE_SPL)
+/* CSTYLED */
module_param(metaslab_aliquot, ulong, 0644);
-module_param(metaslab_debug_load, int, 0644);
-module_param(metaslab_debug_unload, int, 0644);
-module_param(metaslab_preload_enabled, int, 0644);
-module_param(zfs_mg_noalloc_threshold, int, 0644);
-module_param(zfs_mg_fragmentation_threshold, int, 0644);
-module_param(zfs_metaslab_fragmentation_threshold, int, 0644);
-module_param(metaslab_fragmentation_factor_enabled, int, 0644);
-module_param(metaslab_lba_weighting_enabled, int, 0644);
-module_param(metaslab_bias_enabled, int, 0644);
-
MODULE_PARM_DESC(metaslab_aliquot,
"allocation granularity (a.k.a. stripe size)");
+
+module_param(metaslab_debug_load, int, 0644);
MODULE_PARM_DESC(metaslab_debug_load,
"load all metaslabs when pool is first opened");
+
+module_param(metaslab_debug_unload, int, 0644);
MODULE_PARM_DESC(metaslab_debug_unload,
"prevent metaslabs from being unloaded");
+
+module_param(metaslab_preload_enabled, int, 0644);
MODULE_PARM_DESC(metaslab_preload_enabled,
"preload potential metaslabs during reassessment");
+module_param(zfs_mg_noalloc_threshold, int, 0644);
MODULE_PARM_DESC(zfs_mg_noalloc_threshold,
"percentage of free space for metaslab group to allow allocation");
+
+module_param(zfs_mg_fragmentation_threshold, int, 0644);
MODULE_PARM_DESC(zfs_mg_fragmentation_threshold,
"fragmentation for metaslab group to allow allocation");
+module_param(zfs_metaslab_fragmentation_threshold, int, 0644);
MODULE_PARM_DESC(zfs_metaslab_fragmentation_threshold,
"fragmentation for metaslab to allow allocation");
+
+module_param(metaslab_fragmentation_factor_enabled, int, 0644);
MODULE_PARM_DESC(metaslab_fragmentation_factor_enabled,
"use the fragmentation metric to prefer less fragmented metaslabs");
+
+module_param(metaslab_lba_weighting_enabled, int, 0644);
MODULE_PARM_DESC(metaslab_lba_weighting_enabled,
"prefer metaslabs with lower LBAs");
+
+module_param(metaslab_bias_enabled, int, 0644);
MODULE_PARM_DESC(metaslab_bias_enabled,
"enable metaslab group biasing");
+
+module_param(zfs_metaslab_segment_weight_enabled, int, 0644);
+MODULE_PARM_DESC(zfs_metaslab_segment_weight_enabled,
+ "enable segment-based metaslab selection");
+
+module_param(zfs_metaslab_switch_threshold, int, 0644);
+MODULE_PARM_DESC(zfs_metaslab_switch_threshold,
+ "segment-based metaslab selection maximum buckets before switching");
#endif /* _KERNEL && HAVE_SPL */
diff --git a/zfs/module/zfs/mmp.c b/zfs/module/zfs/mmp.c
new file mode 100644
index 000000000000..00478a39f2b3
--- /dev/null
+++ b/zfs/module/zfs/mmp.c
@@ -0,0 +1,524 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2017 by Lawrence Livermore National Security, LLC.
+ */
+
+#include <sys/abd.h>
+#include <sys/mmp.h>
+#include <sys/spa.h>
+#include <sys/spa_impl.h>
+#include <sys/vdev.h>
+#include <sys/vdev_impl.h>
+#include <sys/zfs_context.h>
+#include <sys/callb.h>
+
+/*
+ * Multi-Modifier Protection (MMP) attempts to prevent a user from importing
+ * or opening a pool on more than one host at a time. In particular, it
+ * prevents "zpool import -f" on a host from succeeding while the pool is
+ * already imported on another host. There are many other ways in which a
+ * device could be used by two hosts for different purposes at the same time
+ * resulting in pool damage. This implementation does not attempt to detect
+ * those cases.
+ *
+ * MMP operates by ensuring there are frequent visible changes on disk (a
+ * "heartbeat") at all times. And by altering the import process to check
+ * for these changes and failing the import when they are detected. This
+ * functionality is enabled by setting the 'multihost' pool property to on.
+ *
+ * Uberblocks written by the txg_sync thread always go into the first
+ * (N-MMP_BLOCKS_PER_LABEL) slots, the remaining slots are reserved for MMP.
+ * They are used to hold uberblocks which are exactly the same as the last
+ * synced uberblock except that the ub_timestamp is frequently updated.
+ * Like all other uberblocks, the slot is written with an embedded checksum,
+ * and slots with invalid checksums are ignored. This provides the
+ * "heartbeat", with no risk of overwriting good uberblocks that must be
+ * preserved, e.g. previous txgs and associated block pointers.
+ *
+ * Two optional fields are added to uberblock structure: ub_mmp_magic and
+ * ub_mmp_delay. The magic field allows zfs to tell whether ub_mmp_delay is
+ * valid. The delay field is a decaying average of the amount of time between
+ * completion of successive MMP writes, in nanoseconds. It is used to predict
+ * how long the import must wait to detect activity in the pool, before
+ * concluding it is not in use.
+ *
+ * During import an activity test may now be performed to determine if
+ * the pool is in use. The activity test is typically required if the
+ * ZPOOL_CONFIG_HOSTID does not match the system hostid, the pool state is
+ * POOL_STATE_ACTIVE, and the pool is not a root pool.
+ *
+ * The activity test finds the "best" uberblock (highest txg & timestamp),
+ * waits some time, and then finds the "best" uberblock again. If the txg
+ * and timestamp in both "best" uberblocks do not match, the pool is in use
+ * by another host and the import fails. Since the granularity of the
+ * timestamp is in seconds this activity test must take a bare minimum of one
+ * second. In order to assure the accuracy of the activity test, the default
+ * values result in an activity test duration of 10x the mmp write interval.
+ *
+ * The "zpool import" activity test can be expected to take a minimum time of
+ * zfs_multihost_import_intervals * zfs_multihost_interval milliseconds. If the
+ * "best" uberblock has a valid ub_mmp_delay field, then the duration of the
+ * test may take longer if MMP writes were occurring less frequently than
+ * expected. Additionally, the duration is then extended by a random 25% to
+ * attempt to to detect simultaneous imports. For example, if both partner
+ * hosts are rebooted at the same time and automatically attempt to import the
+ * pool.
+ */
+
+/*
+ * Used to control the frequency of mmp writes which are performed when the
+ * 'multihost' pool property is on. This is one factor used to determine the
+ * length of the activity check during import.
+ *
+ * The mmp write period is zfs_multihost_interval / leaf-vdevs milliseconds.
+ * This means that on average an mmp write will be issued for each leaf vdev
+ * every zfs_multihost_interval milliseconds. In practice, the observed period
+ * can vary with the I/O load and this observed value is the delay which is
+ * stored in the uberblock. The minimum allowed value is 100 ms.
+ */
+ulong_t zfs_multihost_interval = MMP_DEFAULT_INTERVAL;
+
+/*
+ * Used to control the duration of the activity test on import. Smaller values
+ * of zfs_multihost_import_intervals will reduce the import time but increase
+ * the risk of failing to detect an active pool. The total activity check time
+ * is never allowed to drop below one second. A value of 0 is ignored and
+ * treated as if it was set to 1.
+ */
+uint_t zfs_multihost_import_intervals = MMP_DEFAULT_IMPORT_INTERVALS;
+
+/*
+ * Controls the behavior of the pool when mmp write failures are detected.
+ *
+ * When zfs_multihost_fail_intervals = 0 then mmp write failures are ignored.
+ * The failures will still be reported to the ZED which depending on its
+ * configuration may take action such as suspending the pool or taking a
+ * device offline.
+ *
+ * When zfs_multihost_fail_intervals > 0 then sequential mmp write failures will
+ * cause the pool to be suspended. This occurs when
+ * zfs_multihost_fail_intervals * zfs_multihost_interval milliseconds have
+ * passed since the last successful mmp write. This guarantees the activity
+ * test will see mmp writes if the
+ * pool is imported.
+ */
+uint_t zfs_multihost_fail_intervals = MMP_DEFAULT_FAIL_INTERVALS;
+
+static void mmp_thread(spa_t *spa);
+
+void
+mmp_init(spa_t *spa)
+{
+ mmp_thread_t *mmp = &spa->spa_mmp;
+
+ mutex_init(&mmp->mmp_thread_lock, NULL, MUTEX_DEFAULT, NULL);
+ cv_init(&mmp->mmp_thread_cv, NULL, CV_DEFAULT, NULL);
+ mutex_init(&mmp->mmp_io_lock, NULL, MUTEX_DEFAULT, NULL);
+}
+
+void
+mmp_fini(spa_t *spa)
+{
+ mmp_thread_t *mmp = &spa->spa_mmp;
+
+ mutex_destroy(&mmp->mmp_thread_lock);
+ cv_destroy(&mmp->mmp_thread_cv);
+ mutex_destroy(&mmp->mmp_io_lock);
+}
+
+static void
+mmp_thread_enter(mmp_thread_t *mmp, callb_cpr_t *cpr)
+{
+ CALLB_CPR_INIT(cpr, &mmp->mmp_thread_lock, callb_generic_cpr, FTAG);
+ mutex_enter(&mmp->mmp_thread_lock);
+}
+
+static void
+mmp_thread_exit(mmp_thread_t *mmp, kthread_t **mpp, callb_cpr_t *cpr)
+{
+ ASSERT(*mpp != NULL);
+ *mpp = NULL;
+ cv_broadcast(&mmp->mmp_thread_cv);
+ CALLB_CPR_EXIT(cpr); /* drops &mmp->mmp_thread_lock */
+ thread_exit();
+}
+
+void
+mmp_thread_start(spa_t *spa)
+{
+ mmp_thread_t *mmp = &spa->spa_mmp;
+
+ if (spa_writeable(spa)) {
+ mutex_enter(&mmp->mmp_thread_lock);
+ if (!mmp->mmp_thread) {
+ dprintf("mmp_thread_start pool %s\n",
+ spa->spa_name);
+ mmp->mmp_thread = thread_create(NULL, 0, mmp_thread,
+ spa, 0, &p0, TS_RUN, defclsyspri);
+ }
+ mutex_exit(&mmp->mmp_thread_lock);
+ }
+}
+
+void
+mmp_thread_stop(spa_t *spa)
+{
+ mmp_thread_t *mmp = &spa->spa_mmp;
+
+ mutex_enter(&mmp->mmp_thread_lock);
+ mmp->mmp_thread_exiting = 1;
+ cv_broadcast(&mmp->mmp_thread_cv);
+
+ while (mmp->mmp_thread) {
+ cv_wait(&mmp->mmp_thread_cv, &mmp->mmp_thread_lock);
+ }
+ mutex_exit(&mmp->mmp_thread_lock);
+
+ ASSERT(mmp->mmp_thread == NULL);
+ mmp->mmp_thread_exiting = 0;
+}
+
+/*
+ * Randomly choose a leaf vdev, to write an MMP block to. It must be
+ * writable. It must not have an outstanding mmp write (if so then
+ * there is a problem, and a new write will also block).
+ *
+ * We try 10 times to pick a random leaf without an outstanding write.
+ * If 90% of the leaves have pending writes, this gives us a >65%
+ * chance of finding one we can write to. There will be at least
+ * (zfs_multihost_fail_intervals) tries before the inability to write an MMP
+ * block causes serious problems.
+ */
+static vdev_t *
+vdev_random_leaf(spa_t *spa)
+{
+ vdev_t *vd, *child;
+ int pending_writes = 10;
+
+ ASSERT(spa);
+ ASSERT(spa_config_held(spa, SCL_STATE, RW_READER) == SCL_STATE);
+
+ /*
+ * Since we hold SCL_STATE, neither pool nor vdev state can
+ * change. Therefore, if the root is not dead, there is a
+ * child that is not dead, and so on down to a leaf.
+ */
+ if (!vdev_writeable(spa->spa_root_vdev))
+ return (NULL);
+
+ vd = spa->spa_root_vdev;
+ while (!vd->vdev_ops->vdev_op_leaf) {
+ child = vd->vdev_child[spa_get_random(vd->vdev_children)];
+
+ if (!vdev_writeable(child))
+ continue;
+
+ if (child->vdev_ops->vdev_op_leaf && child->vdev_mmp_pending) {
+ if (pending_writes-- > 0)
+ continue;
+ else
+ return (NULL);
+ }
+
+ vd = child;
+ }
+ return (vd);
+}
+
+static void
+mmp_write_done(zio_t *zio)
+{
+ spa_t *spa = zio->io_spa;
+ vdev_t *vd = zio->io_vd;
+ mmp_thread_t *mts = zio->io_private;
+
+ mutex_enter(&mts->mmp_io_lock);
+ vd->vdev_mmp_pending = 0;
+
+ if (zio->io_error)
+ goto unlock;
+
+ /*
+ * Mmp writes are queued on a fixed schedule, but under many
+ * circumstances, such as a busy device or faulty hardware,
+ * the writes will complete at variable, much longer,
+ * intervals. In these cases, another node checking for
+ * activity must wait longer to account for these delays.
+ *
+ * The mmp_delay is calculated as a decaying average of the interval
+ * between completed mmp writes. This is used to predict how long
+ * the import must wait to detect activity in the pool, before
+ * concluding it is not in use.
+ *
+ * Do not set mmp_delay if the multihost property is not on,
+ * so as not to trigger an activity check on import.
+ */
+ if (spa_multihost(spa)) {
+ hrtime_t delay = gethrtime() - mts->mmp_last_write;
+
+ if (delay > mts->mmp_delay)
+ mts->mmp_delay = delay;
+ else
+ mts->mmp_delay = (delay + mts->mmp_delay * 127) /
+ 128;
+ } else {
+ mts->mmp_delay = 0;
+ }
+ mts->mmp_last_write = gethrtime();
+
+unlock:
+ mutex_exit(&mts->mmp_io_lock);
+ spa_config_exit(spa, SCL_STATE, FTAG);
+
+ abd_free(zio->io_abd);
+}
+
+/*
+ * When the uberblock on-disk is updated by a spa_sync,
+ * creating a new "best" uberblock, update the one stored
+ * in the mmp thread state, used for mmp writes.
+ */
+void
+mmp_update_uberblock(spa_t *spa, uberblock_t *ub)
+{
+ mmp_thread_t *mmp = &spa->spa_mmp;
+
+ mutex_enter(&mmp->mmp_io_lock);
+ mmp->mmp_ub = *ub;
+ mmp->mmp_ub.ub_timestamp = gethrestime_sec();
+ mutex_exit(&mmp->mmp_io_lock);
+}
+
+/*
+ * Choose a random vdev, label, and MMP block, and write over it
+ * with a copy of the last-synced uberblock, whose timestamp
+ * has been updated to reflect that the pool is in use.
+ */
+static void
+mmp_write_uberblock(spa_t *spa)
+{
+ int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL;
+ mmp_thread_t *mmp = &spa->spa_mmp;
+ uberblock_t *ub;
+ vdev_t *vd;
+ int label;
+ uint64_t offset;
+
+ spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
+ vd = vdev_random_leaf(spa);
+ if (vd == NULL || !vdev_writeable(vd)) {
+ spa_config_exit(spa, SCL_STATE, FTAG);
+ return;
+ }
+
+ mutex_enter(&mmp->mmp_io_lock);
+
+ if (mmp->mmp_zio_root == NULL)
+ mmp->mmp_zio_root = zio_root(spa, NULL, NULL,
+ flags | ZIO_FLAG_GODFATHER);
+
+ ub = &mmp->mmp_ub;
+ ub->ub_timestamp = gethrestime_sec();
+ ub->ub_mmp_magic = MMP_MAGIC;
+ ub->ub_mmp_delay = mmp->mmp_delay;
+ vd->vdev_mmp_pending = gethrtime();
+
+ zio_t *zio = zio_null(mmp->mmp_zio_root, spa, NULL, NULL, NULL, flags);
+ abd_t *ub_abd = abd_alloc_for_io(VDEV_UBERBLOCK_SIZE(vd), B_TRUE);
+ abd_zero(ub_abd, VDEV_UBERBLOCK_SIZE(vd));
+ abd_copy_from_buf(ub_abd, ub, sizeof (uberblock_t));
+
+ mutex_exit(&mmp->mmp_io_lock);
+
+ offset = VDEV_UBERBLOCK_OFFSET(vd, VDEV_UBERBLOCK_COUNT(vd) -
+ MMP_BLOCKS_PER_LABEL + spa_get_random(MMP_BLOCKS_PER_LABEL));
+
+ label = spa_get_random(VDEV_LABELS);
+ vdev_label_write(zio, vd, label, ub_abd, offset,
+ VDEV_UBERBLOCK_SIZE(vd), mmp_write_done, mmp,
+ flags | ZIO_FLAG_DONT_PROPAGATE);
+
+ spa_mmp_history_add(ub->ub_txg, ub->ub_timestamp, ub->ub_mmp_delay, vd,
+ label);
+
+ zio_nowait(zio);
+}
+
+static void
+mmp_thread(spa_t *spa)
+{
+ mmp_thread_t *mmp = &spa->spa_mmp;
+ boolean_t last_spa_suspended = spa_suspended(spa);
+ boolean_t last_spa_multihost = spa_multihost(spa);
+ callb_cpr_t cpr;
+ hrtime_t max_fail_ns = zfs_multihost_fail_intervals *
+ MSEC2NSEC(MAX(zfs_multihost_interval, MMP_MIN_INTERVAL));
+
+ mmp_thread_enter(mmp, &cpr);
+
+ /*
+ * The mmp_write_done() function calculates mmp_delay based on the
+ * prior value of mmp_delay and the elapsed time since the last write.
+ * For the first mmp write, there is no "last write", so we start
+ * with fake, but reasonable, default non-zero values.
+ */
+ mmp->mmp_delay = MSEC2NSEC(MAX(zfs_multihost_interval,
+ MMP_MIN_INTERVAL)) / MAX(vdev_count_leaves(spa), 1);
+ mmp->mmp_last_write = gethrtime() - mmp->mmp_delay;
+
+ while (!mmp->mmp_thread_exiting) {
+ uint64_t mmp_fail_intervals = zfs_multihost_fail_intervals;
+ uint64_t mmp_interval = MSEC2NSEC(
+ MAX(zfs_multihost_interval, MMP_MIN_INTERVAL));
+ boolean_t suspended = spa_suspended(spa);
+ boolean_t multihost = spa_multihost(spa);
+ hrtime_t start, next_time;
+
+ start = gethrtime();
+ if (multihost) {
+ next_time = start + mmp_interval /
+ MAX(vdev_count_leaves(spa), 1);
+ } else {
+ next_time = start + MSEC2NSEC(MMP_DEFAULT_INTERVAL);
+ }
+
+ /*
+ * When MMP goes off => on, or spa goes suspended =>
+ * !suspended, we know no writes occurred recently. We
+ * update mmp_last_write to give us some time to try.
+ */
+ if ((!last_spa_multihost && multihost) ||
+ (last_spa_suspended && !suspended)) {
+ mutex_enter(&mmp->mmp_io_lock);
+ mmp->mmp_last_write = gethrtime();
+ mutex_exit(&mmp->mmp_io_lock);
+ } else if (last_spa_multihost && !multihost) {
+ mutex_enter(&mmp->mmp_io_lock);
+ mmp->mmp_delay = 0;
+ mutex_exit(&mmp->mmp_io_lock);
+ }
+ last_spa_multihost = multihost;
+ last_spa_suspended = suspended;
+
+ /*
+ * Smooth max_fail_ns when its factors are decreased, because
+ * making (max_fail_ns < mmp_interval) results in the pool being
+ * immediately suspended before writes can occur at the new
+ * higher frequency.
+ */
+ if ((mmp_interval * mmp_fail_intervals) < max_fail_ns) {
+ max_fail_ns = ((31 * max_fail_ns) + (mmp_interval *
+ mmp_fail_intervals)) / 32;
+ } else {
+ max_fail_ns = mmp_interval * mmp_fail_intervals;
+ }
+
+ /*
+ * Suspend the pool if no MMP write has succeeded in over
+ * mmp_interval * mmp_fail_intervals nanoseconds.
+ */
+ if (!suspended && mmp_fail_intervals && multihost &&
+ (start - mmp->mmp_last_write) > max_fail_ns) {
+ zio_suspend(spa, NULL);
+ }
+
+ if (multihost)
+ mmp_write_uberblock(spa);
+
+ CALLB_CPR_SAFE_BEGIN(&cpr);
+ (void) cv_timedwait_sig(&mmp->mmp_thread_cv,
+ &mmp->mmp_thread_lock, ddi_get_lbolt() +
+ ((next_time - gethrtime()) / (NANOSEC / hz)));
+ CALLB_CPR_SAFE_END(&cpr, &mmp->mmp_thread_lock);
+ }
+
+ /* Outstanding writes are allowed to complete. */
+ if (mmp->mmp_zio_root)
+ zio_wait(mmp->mmp_zio_root);
+
+ mmp->mmp_zio_root = NULL;
+ mmp_thread_exit(mmp, &mmp->mmp_thread, &cpr);
+}
+
+/*
+ * Signal the MMP thread to wake it, when it is sleeping on
+ * its cv. Used when some module parameter has changed and
+ * we want the thread to know about it.
+ * Only signal if the pool is active and mmp thread is
+ * running, otherwise there is no thread to wake.
+ */
+static void
+mmp_signal_thread(spa_t *spa)
+{
+ mmp_thread_t *mmp = &spa->spa_mmp;
+
+ mutex_enter(&mmp->mmp_thread_lock);
+ if (mmp->mmp_thread)
+ cv_broadcast(&mmp->mmp_thread_cv);
+ mutex_exit(&mmp->mmp_thread_lock);
+}
+
+void
+mmp_signal_all_threads(void)
+{
+ spa_t *spa = NULL;
+
+ mutex_enter(&spa_namespace_lock);
+ while ((spa = spa_next(spa))) {
+ if (spa->spa_state == POOL_STATE_ACTIVE)
+ mmp_signal_thread(spa);
+ }
+ mutex_exit(&spa_namespace_lock);
+}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+#include <linux/mod_compat.h>
+
+static int
+param_set_multihost_interval(const char *val, zfs_kernel_param_t *kp)
+{
+ int ret;
+
+ ret = param_set_ulong(val, kp);
+ if (ret < 0)
+ return (ret);
+
+ mmp_signal_all_threads();
+
+ return (ret);
+}
+
+/* BEGIN CSTYLED */
+module_param(zfs_multihost_fail_intervals, uint, 0644);
+MODULE_PARM_DESC(zfs_multihost_fail_intervals,
+ "Max allowed period without a successful mmp write");
+
+module_param_call(zfs_multihost_interval, param_set_multihost_interval,
+ param_get_ulong, &zfs_multihost_interval, 0644);
+MODULE_PARM_DESC(zfs_multihost_interval,
+ "Milliseconds between mmp writes to each leaf");
+
+module_param(zfs_multihost_import_intervals, uint, 0644);
+MODULE_PARM_DESC(zfs_multihost_import_intervals,
+ "Number of zfs_multihost_interval periods to wait for activity");
+/* END CSTYLED */
+#endif
diff --git a/zfs/module/zfs/multilist.c b/zfs/module/zfs/multilist.c
index e4446ded2208..40fa1686e1e1 100644
--- a/zfs/module/zfs/multilist.c
+++ b/zfs/module/zfs/multilist.c
@@ -13,7 +13,7 @@
* CDDL HEADER END
*/
/*
- * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -23,6 +23,12 @@
/* needed for spa_get_random() */
#include <sys/spa.h>
+/*
+ * This overrides the number of sublists in each multilist_t, which defaults
+ * to the number of CPUs in the system (see multilist_create()).
+ */
+int zfs_multilist_num_sublists = 0;
+
/*
* Given the object contained on the list, return a pointer to the
* object's multilist_node_t structure it contains.
@@ -62,18 +68,16 @@ multilist_d2l(multilist_t *ml, void *obj)
* requirement, but a general rule of thumb in order to garner the
* best multi-threaded performance out of the data structure.
*/
-void
-multilist_create(multilist_t *ml, size_t size, size_t offset, unsigned int num,
- multilist_sublist_index_func_t *index_func)
+static multilist_t *
+multilist_create_impl(size_t size, size_t offset,
+ unsigned int num, multilist_sublist_index_func_t *index_func)
{
- int i;
-
- ASSERT3P(ml, !=, NULL);
ASSERT3U(size, >, 0);
ASSERT3U(size, >=, offset + sizeof (multilist_node_t));
ASSERT3U(num, >, 0);
ASSERT3P(index_func, !=, NULL);
+ multilist_t *ml = kmem_alloc(sizeof (*ml), KM_SLEEP);
ml->ml_offset = offset;
ml->ml_num_sublists = num;
ml->ml_index_func = index_func;
@@ -83,11 +87,32 @@ multilist_create(multilist_t *ml, size_t size, size_t offset, unsigned int num,
ASSERT3P(ml->ml_sublists, !=, NULL);
- for (i = 0; i < ml->ml_num_sublists; i++) {
+ for (int i = 0; i < ml->ml_num_sublists; i++) {
multilist_sublist_t *mls = &ml->ml_sublists[i];
- mutex_init(&mls->mls_lock, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&mls->mls_lock, NULL, MUTEX_NOLOCKDEP, NULL);
list_create(&mls->mls_list, size, offset);
}
+ return (ml);
+}
+
+/*
+ * Allocate a new multilist, using the default number of sublists
+ * (the number of CPUs, or at least 4, or the tunable
+ * zfs_multilist_num_sublists).
+ */
+multilist_t *
+multilist_create(size_t size, size_t offset,
+ multilist_sublist_index_func_t *index_func)
+{
+ int num_sublists;
+
+ if (zfs_multilist_num_sublists > 0) {
+ num_sublists = zfs_multilist_num_sublists;
+ } else {
+ num_sublists = MAX(boot_ncpus, 4);
+ }
+
+ return (multilist_create_impl(size, offset, num_sublists, index_func));
}
/*
@@ -115,6 +140,7 @@ multilist_destroy(multilist_t *ml)
ml->ml_num_sublists = 0;
ml->ml_offset = 0;
+ kmem_free(ml, sizeof (multilist_t));
}
/*
@@ -268,6 +294,13 @@ multilist_sublist_lock(multilist_t *ml, unsigned int sublist_idx)
return (mls);
}
+/* Lock and return the sublist that would be used to store the specified obj */
+multilist_sublist_t *
+multilist_sublist_lock_obj(multilist_t *ml, void *obj)
+{
+ return (multilist_sublist_lock(ml, ml->ml_index_func(ml, obj)));
+}
+
void
multilist_sublist_unlock(multilist_sublist_t *mls)
{
@@ -373,3 +406,14 @@ multilist_link_active(multilist_node_t *link)
{
return (list_link_active(link));
}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+
+/* BEGIN CSTYLED */
+
+module_param(zfs_multilist_num_sublists, int, 0644);
+MODULE_PARM_DESC(zfs_multilist_num_sublists,
+ "Number of sublists used in each multilist");
+
+/* END CSTYLED */
+#endif
diff --git a/zfs/module/zfs/pathname.c b/zfs/module/zfs/pathname.c
new file mode 100644
index 000000000000..4ec1320661f5
--- /dev/null
+++ b/zfs/module/zfs/pathname.c
@@ -0,0 +1,89 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
+/* All Rights Reserved */
+
+/*
+ * University Copyright- Copyright (c) 1982, 1986, 1988
+ * The Regents of the University of California
+ * All Rights Reserved
+ *
+ * University Acknowledgment- Portions of this document are derived from
+ * software developed by the University of California, Berkeley, and its
+ * contributors.
+ */
+
+
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/pathname.h>
+#include <sys/kmem.h>
+
+/*
+ * Pathname utilities.
+ *
+ * In translating file names we copy each argument file
+ * name into a pathname structure where we operate on it.
+ * Each pathname structure can hold "pn_bufsize" characters
+ * including a terminating null, and operations here support
+ * allocating and freeing pathname structures, fetching
+ * strings from user space, getting the next character from
+ * a pathname, combining two pathnames (used in symbolic
+ * link processing), and peeling off the first component
+ * of a pathname.
+ */
+
+/*
+ * Allocate contents of pathname structure. Structure is typically
+ * an automatic variable in calling routine for convenience.
+ *
+ * May sleep in the call to kmem_alloc() and so must not be called
+ * from interrupt level.
+ */
+void
+pn_alloc(struct pathname *pnp)
+{
+ pn_alloc_sz(pnp, MAXPATHLEN);
+}
+void
+pn_alloc_sz(struct pathname *pnp, size_t sz)
+{
+ pnp->pn_path = pnp->pn_buf = kmem_alloc(sz, KM_SLEEP);
+ pnp->pn_pathlen = 0;
+ pnp->pn_bufsize = sz;
+}
+
+/*
+ * Free pathname resources.
+ */
+void
+pn_free(struct pathname *pnp)
+{
+ /* pn_bufsize is usually MAXPATHLEN, but may not be */
+ kmem_free(pnp->pn_buf, pnp->pn_bufsize);
+ pnp->pn_path = pnp->pn_buf = NULL;
+ pnp->pn_pathlen = pnp->pn_bufsize = 0;
+}
diff --git a/zfs/module/zfs/policy.c b/zfs/module/zfs/policy.c
new file mode 100644
index 000000000000..03e8f748b746
--- /dev/null
+++ b/zfs/module/zfs/policy.c
@@ -0,0 +1,303 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2013, Joyent, Inc. All rights reserved.
+ * Copyright (C) 2016 Lawrence Livermore National Security, LLC.
+ *
+ * For Linux the vast majority of this enforcement is already handled via
+ * the standard Linux VFS permission checks. However certain administrative
+ * commands which bypass the standard mechanisms may need to make use of
+ * this functionality.
+ */
+
+#include <sys/policy.h>
+#include <linux/security.h>
+#include <linux/vfs_compat.h>
+
+/*
+ * The passed credentials cannot be directly verified because Linux only
+ * provides and interface to check the *current* process credentials. In
+ * order to handle this the capable() test is only run when the passed
+ * credentials match the current process credentials or the kcred. In
+ * all other cases this function must fail and return the passed err.
+ */
+static int
+priv_policy(const cred_t *cr, int capability, boolean_t all, int err)
+{
+ ASSERT3S(all, ==, B_FALSE);
+
+ if (cr != CRED() && (cr != kcred))
+ return (err);
+
+ if (!capable(capability))
+ return (err);
+
+ return (0);
+}
+
+/*
+ * Checks for operations that are either client-only or are used by
+ * both clients and servers.
+ */
+int
+secpolicy_nfs(const cred_t *cr)
+{
+ return (priv_policy(cr, CAP_SYS_ADMIN, B_FALSE, EPERM));
+}
+
+/*
+ * Catch all system configuration.
+ */
+int
+secpolicy_sys_config(const cred_t *cr, boolean_t checkonly)
+{
+ return (priv_policy(cr, CAP_SYS_ADMIN, B_FALSE, EPERM));
+}
+
+/*
+ * Like secpolicy_vnode_access() but we get the actual wanted mode and the
+ * current mode of the file, not the missing bits.
+ *
+ * Enforced in the Linux VFS.
+ */
+int
+secpolicy_vnode_access2(const cred_t *cr, struct inode *ip, uid_t owner,
+ mode_t curmode, mode_t wantmode)
+{
+ return (0);
+}
+
+/*
+ * This is a special routine for ZFS; it is used to determine whether
+ * any of the privileges in effect allow any form of access to the
+ * file. There's no reason to audit this or any reason to record
+ * this. More work is needed to do the "KPLD" stuff.
+ */
+int
+secpolicy_vnode_any_access(const cred_t *cr, struct inode *ip, uid_t owner)
+{
+ if (crgetfsuid(cr) == owner)
+ return (0);
+
+ if (zpl_inode_owner_or_capable(ip))
+ return (0);
+
+ if (priv_policy(cr, CAP_DAC_OVERRIDE, B_FALSE, EPERM) == 0)
+ return (0);
+
+ if (priv_policy(cr, CAP_DAC_READ_SEARCH, B_FALSE, EPERM) == 0)
+ return (0);
+
+ return (EPERM);
+}
+
+/*
+ * Determine if subject can chown owner of a file.
+ */
+int
+secpolicy_vnode_chown(const cred_t *cr, uid_t owner)
+{
+ if (crgetfsuid(cr) == owner)
+ return (0);
+
+ return (priv_policy(cr, CAP_FOWNER, B_FALSE, EPERM));
+}
+
+/*
+ * Determine if subject can change group ownership of a file.
+ */
+int
+secpolicy_vnode_create_gid(const cred_t *cr)
+{
+ return (priv_policy(cr, CAP_SETGID, B_FALSE, EPERM));
+}
+
+/*
+ * Policy determines whether we can remove an entry from a directory,
+ * regardless of permission bits.
+ */
+int
+secpolicy_vnode_remove(const cred_t *cr)
+{
+ return (priv_policy(cr, CAP_FOWNER, B_FALSE, EPERM));
+}
+
+/*
+ * Determine that subject can modify the mode of a file. allzone privilege
+ * needed when modifying root owned object.
+ */
+int
+secpolicy_vnode_setdac(const cred_t *cr, uid_t owner)
+{
+ if (crgetfsuid(cr) == owner)
+ return (0);
+
+ return (priv_policy(cr, CAP_FOWNER, B_FALSE, EPERM));
+}
+
+/*
+ * Are we allowed to retain the set-uid/set-gid bits when
+ * changing ownership or when writing to a file?
+ * "issuid" should be true when set-uid; only in that case
+ * root ownership is checked (setgid is assumed).
+ *
+ * Enforced in the Linux VFS.
+ */
+int
+secpolicy_vnode_setid_retain(const cred_t *cr, boolean_t issuidroot)
+{
+ return (0);
+}
+
+/*
+ * Determine that subject can set the file setgid flag.
+ */
+int
+secpolicy_vnode_setids_setgids(const cred_t *cr, gid_t gid)
+{
+ if (crgetfsgid(cr) != gid && !groupmember(gid, cr))
+ return (priv_policy(cr, CAP_FSETID, B_FALSE, EPERM));
+
+ return (0);
+}
+
+/*
+ * Determine if the subject can inject faults in the ZFS fault injection
+ * framework. Requires all privileges.
+ */
+int
+secpolicy_zinject(const cred_t *cr)
+{
+ return (priv_policy(cr, CAP_SYS_ADMIN, B_FALSE, EACCES));
+}
+
+/*
+ * Determine if the subject has permission to manipulate ZFS datasets
+ * (not pools). Equivalent to the SYS_MOUNT privilege.
+ */
+int
+secpolicy_zfs(const cred_t *cr)
+{
+ return (priv_policy(cr, CAP_SYS_ADMIN, B_FALSE, EACCES));
+}
+
+void
+secpolicy_setid_clear(vattr_t *vap, cred_t *cr)
+{
+ if ((vap->va_mode & (S_ISUID | S_ISGID)) != 0 &&
+ secpolicy_vnode_setid_retain(cr,
+ (vap->va_mode & S_ISUID) != 0 &&
+ (vap->va_mask & AT_UID) != 0 && vap->va_uid == 0) != 0) {
+ vap->va_mask |= AT_MODE;
+ vap->va_mode &= ~(S_ISUID|S_ISGID);
+ }
+}
+
+/*
+ * Determine that subject can set the file setid flags.
+ */
+static int
+secpolicy_vnode_setid_modify(const cred_t *cr, uid_t owner)
+{
+ if (crgetfsuid(cr) == owner)
+ return (0);
+
+ return (priv_policy(cr, CAP_FSETID, B_FALSE, EPERM));
+}
+
+/*
+ * Determine that subject can make a file a "sticky".
+ *
+ * Enforced in the Linux VFS.
+ */
+static int
+secpolicy_vnode_stky_modify(const cred_t *cr)
+{
+ return (0);
+}
+
+int
+secpolicy_setid_setsticky_clear(struct inode *ip, vattr_t *vap,
+ const vattr_t *ovap, cred_t *cr)
+{
+ int error;
+
+ if ((vap->va_mode & S_ISUID) != 0 &&
+ (error = secpolicy_vnode_setid_modify(cr,
+ ovap->va_uid)) != 0) {
+ return (error);
+ }
+
+ /*
+ * Check privilege if attempting to set the
+ * sticky bit on a non-directory.
+ */
+ if (!S_ISDIR(ip->i_mode) && (vap->va_mode & S_ISVTX) != 0 &&
+ secpolicy_vnode_stky_modify(cr) != 0) {
+ vap->va_mode &= ~S_ISVTX;
+ }
+
+ /*
+ * Check for privilege if attempting to set the
+ * group-id bit.
+ */
+ if ((vap->va_mode & S_ISGID) != 0 &&
+ secpolicy_vnode_setids_setgids(cr, ovap->va_gid) != 0) {
+ vap->va_mode &= ~S_ISGID;
+ }
+
+ return (0);
+}
+
+/*
+ * Check privileges for setting xvattr attributes
+ */
+int
+secpolicy_xvattr(xvattr_t *xvap, uid_t owner, cred_t *cr, vtype_t vtype)
+{
+ return (secpolicy_vnode_chown(cr, owner));
+}
+
+/*
+ * Check privileges for setattr attributes.
+ *
+ * Enforced in the Linux VFS.
+ */
+int
+secpolicy_vnode_setattr(cred_t *cr, struct inode *ip, struct vattr *vap,
+ const struct vattr *ovap, int flags,
+ int unlocked_access(void *, int, cred_t *), void *node)
+{
+ return (0);
+}
+
+/*
+ * Check privileges for links.
+ *
+ * Enforced in the Linux VFS.
+ */
+int
+secpolicy_basic_link(const cred_t *cr)
+{
+ return (0);
+}
diff --git a/zfs/module/zfs/qat_compress.c b/zfs/module/zfs/qat_compress.c
new file mode 100644
index 000000000000..99d1c9a12d6f
--- /dev/null
+++ b/zfs/module/zfs/qat_compress.c
@@ -0,0 +1,585 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#if defined(_KERNEL) && defined(HAVE_QAT)
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/pagemap.h>
+#include <linux/completion.h>
+#include <sys/zfs_context.h>
+#include "qat_compress.h"
+
+/*
+ * Timeout - no response from hardware after 0.5 seconds
+ */
+#define TIMEOUT_MS 500
+
+/*
+ * Max instances in QAT device, each instance is a channel to submit
+ * jobs to QAT hardware
+ */
+#define MAX_INSTANCES 6
+
+/*
+ * ZLIB head and foot size
+ */
+#define ZLIB_HEAD_SZ 2
+#define ZLIB_FOOT_SZ 4
+
+/*
+ * The minimal and maximal buffer size, which are not restricted
+ * in the QAT hardware, but with the input buffer size between 4KB
+ * and 128KB, the hardware can provide the optimal performance.
+ */
+#define QAT_MIN_BUF_SIZE (4*1024)
+#define QAT_MAX_BUF_SIZE (128*1024)
+
+/*
+ * Used for qat kstat.
+ */
+typedef struct qat_stats {
+ /*
+ * Number of jobs submitted to qat compression engine.
+ */
+ kstat_named_t comp_requests;
+ /*
+ * Total bytes sent to qat compression engine.
+ */
+ kstat_named_t comp_total_in_bytes;
+ /*
+ * Total bytes output from qat compression engine.
+ */
+ kstat_named_t comp_total_out_bytes;
+ /*
+ * Number of jobs submitted to qat de-compression engine.
+ */
+ kstat_named_t decomp_requests;
+ /*
+ * Total bytes sent to qat de-compression engine.
+ */
+ kstat_named_t decomp_total_in_bytes;
+ /*
+ * Total bytes output from qat de-compression engine.
+ */
+ kstat_named_t decomp_total_out_bytes;
+ /*
+ * Number of fails in qat engine.
+ * Note: when qat fail happens, it doesn't mean a critical hardware
+ * issue, sometimes it is because the output buffer is not big enough,
+ * and the compression job will be transfered to gzip software again,
+ * so the functionality of ZFS is not impacted.
+ */
+ kstat_named_t dc_fails;
+} qat_stats_t;
+
+qat_stats_t qat_stats = {
+ { "comp_reqests", KSTAT_DATA_UINT64 },
+ { "comp_total_in_bytes", KSTAT_DATA_UINT64 },
+ { "comp_total_out_bytes", KSTAT_DATA_UINT64 },
+ { "decomp_reqests", KSTAT_DATA_UINT64 },
+ { "decomp_total_in_bytes", KSTAT_DATA_UINT64 },
+ { "decomp_total_out_bytes", KSTAT_DATA_UINT64 },
+ { "dc_fails", KSTAT_DATA_UINT64 },
+};
+
+static kstat_t *qat_ksp;
+static CpaInstanceHandle dc_inst_handles[MAX_INSTANCES];
+static CpaDcSessionHandle session_handles[MAX_INSTANCES];
+static CpaBufferList **buffer_array[MAX_INSTANCES];
+static Cpa32U num_inst = 0;
+static Cpa32U inst_num = 0;
+static boolean_t qat_init_done = B_FALSE;
+int zfs_qat_disable = 0;
+
+#define QAT_STAT_INCR(stat, val) \
+ atomic_add_64(&qat_stats.stat.value.ui64, (val));
+#define QAT_STAT_BUMP(stat) \
+ QAT_STAT_INCR(stat, 1);
+
+#define PHYS_CONTIG_ALLOC(pp_mem_addr, size_bytes) \
+ mem_alloc_contig((void *)(pp_mem_addr), (size_bytes))
+
+#define PHYS_CONTIG_FREE(p_mem_addr) \
+ mem_free_contig((void *)&(p_mem_addr))
+
+static inline struct page *
+mem_to_page(void *addr)
+{
+ if (!is_vmalloc_addr(addr))
+ return (virt_to_page(addr));
+
+ return (vmalloc_to_page(addr));
+}
+
+static void
+qat_dc_callback(void *p_callback, CpaStatus status)
+{
+ if (p_callback != NULL)
+ complete((struct completion *)p_callback);
+}
+
+static inline CpaStatus
+mem_alloc_contig(void **pp_mem_addr, Cpa32U size_bytes)
+{
+ *pp_mem_addr = kmalloc(size_bytes, GFP_KERNEL);
+ if (*pp_mem_addr == NULL)
+ return (CPA_STATUS_RESOURCE);
+ return (CPA_STATUS_SUCCESS);
+}
+
+static inline void
+mem_free_contig(void **pp_mem_addr)
+{
+ if (*pp_mem_addr != NULL) {
+ kfree(*pp_mem_addr);
+ *pp_mem_addr = NULL;
+ }
+}
+
+static void
+qat_clean(void)
+{
+ Cpa16U buff_num = 0;
+ Cpa16U num_inter_buff_lists = 0;
+ Cpa16U i = 0;
+
+ for (i = 0; i < num_inst; i++) {
+ cpaDcStopInstance(dc_inst_handles[i]);
+ PHYS_CONTIG_FREE(session_handles[i]);
+ /* free intermediate buffers */
+ if (buffer_array[i] != NULL) {
+ cpaDcGetNumIntermediateBuffers(
+ dc_inst_handles[i], &num_inter_buff_lists);
+ for (buff_num = 0; buff_num < num_inter_buff_lists;
+ buff_num++) {
+ CpaBufferList *buffer_inter =
+ buffer_array[i][buff_num];
+ if (buffer_inter->pBuffers) {
+ PHYS_CONTIG_FREE(
+ buffer_inter->pBuffers->pData);
+ PHYS_CONTIG_FREE(
+ buffer_inter->pBuffers);
+ }
+ PHYS_CONTIG_FREE(
+ buffer_inter->pPrivateMetaData);
+ PHYS_CONTIG_FREE(buffer_inter);
+ }
+ }
+ }
+
+ num_inst = 0;
+ qat_init_done = B_FALSE;
+}
+
+int
+qat_init(void)
+{
+ CpaStatus status = CPA_STATUS_SUCCESS;
+ Cpa32U sess_size = 0;
+ Cpa32U ctx_size = 0;
+ Cpa16U num_inter_buff_lists = 0;
+ Cpa16U buff_num = 0;
+ Cpa32U buff_meta_size = 0;
+ CpaDcSessionSetupData sd = {0};
+ Cpa16U i;
+
+ status = cpaDcGetNumInstances(&num_inst);
+ if (status != CPA_STATUS_SUCCESS || num_inst == 0)
+ return (-1);
+
+ if (num_inst > MAX_INSTANCES)
+ num_inst = MAX_INSTANCES;
+
+ status = cpaDcGetInstances(num_inst, &dc_inst_handles[0]);
+ if (status != CPA_STATUS_SUCCESS)
+ return (-1);
+
+ for (i = 0; i < num_inst; i++) {
+ cpaDcSetAddressTranslation(dc_inst_handles[i],
+ (void*)virt_to_phys);
+
+ status = cpaDcBufferListGetMetaSize(dc_inst_handles[i],
+ 1, &buff_meta_size);
+
+ if (status == CPA_STATUS_SUCCESS)
+ status = cpaDcGetNumIntermediateBuffers(
+ dc_inst_handles[i], &num_inter_buff_lists);
+
+ if (status == CPA_STATUS_SUCCESS && num_inter_buff_lists != 0)
+ status = PHYS_CONTIG_ALLOC(&buffer_array[i],
+ num_inter_buff_lists *
+ sizeof (CpaBufferList *));
+
+ for (buff_num = 0; buff_num < num_inter_buff_lists;
+ buff_num++) {
+ if (status == CPA_STATUS_SUCCESS)
+ status = PHYS_CONTIG_ALLOC(
+ &buffer_array[i][buff_num],
+ sizeof (CpaBufferList));
+
+ if (status == CPA_STATUS_SUCCESS)
+ status = PHYS_CONTIG_ALLOC(
+ &buffer_array[i][buff_num]->
+ pPrivateMetaData,
+ buff_meta_size);
+
+ if (status == CPA_STATUS_SUCCESS)
+ status = PHYS_CONTIG_ALLOC(
+ &buffer_array[i][buff_num]->pBuffers,
+ sizeof (CpaFlatBuffer));
+
+ if (status == CPA_STATUS_SUCCESS) {
+ /*
+ * implementation requires an intermediate
+ * buffer approximately twice the size of
+ * output buffer, which is 2x max buffer
+ * size here.
+ */
+ status = PHYS_CONTIG_ALLOC(
+ &buffer_array[i][buff_num]->pBuffers->
+ pData, 2 * QAT_MAX_BUF_SIZE);
+ if (status != CPA_STATUS_SUCCESS)
+ goto fail;
+
+ buffer_array[i][buff_num]->numBuffers = 1;
+ buffer_array[i][buff_num]->pBuffers->
+ dataLenInBytes = 2 * QAT_MAX_BUF_SIZE;
+ }
+ }
+
+ status = cpaDcStartInstance(dc_inst_handles[i],
+ num_inter_buff_lists, buffer_array[i]);
+ if (status != CPA_STATUS_SUCCESS)
+ goto fail;
+
+ sd.compLevel = CPA_DC_L1;
+ sd.compType = CPA_DC_DEFLATE;
+ sd.huffType = CPA_DC_HT_FULL_DYNAMIC;
+ sd.sessDirection = CPA_DC_DIR_COMBINED;
+ sd.sessState = CPA_DC_STATELESS;
+ sd.deflateWindowSize = 7;
+ sd.checksum = CPA_DC_ADLER32;
+ status = cpaDcGetSessionSize(dc_inst_handles[i],
+ &sd, &sess_size, &ctx_size);
+ if (status != CPA_STATUS_SUCCESS)
+ goto fail;
+
+ PHYS_CONTIG_ALLOC(&session_handles[i], sess_size);
+ if (session_handles[i] == NULL)
+ goto fail;
+
+ status = cpaDcInitSession(dc_inst_handles[i],
+ session_handles[i],
+ &sd, NULL, qat_dc_callback);
+ if (status != CPA_STATUS_SUCCESS)
+ goto fail;
+ }
+
+ qat_ksp = kstat_create("zfs", 0, "qat", "misc",
+ KSTAT_TYPE_NAMED, sizeof (qat_stats) / sizeof (kstat_named_t),
+ KSTAT_FLAG_VIRTUAL);
+ if (qat_ksp != NULL) {
+ qat_ksp->ks_data = &qat_stats;
+ kstat_install(qat_ksp);
+ }
+
+ qat_init_done = B_TRUE;
+ return (0);
+fail:
+ qat_clean();
+ return (-1);
+}
+
+void
+qat_fini(void)
+{
+ qat_clean();
+
+ if (qat_ksp != NULL) {
+ kstat_delete(qat_ksp);
+ qat_ksp = NULL;
+ }
+}
+
+boolean_t
+qat_use_accel(size_t s_len)
+{
+ return (!zfs_qat_disable &&
+ qat_init_done &&
+ s_len >= QAT_MIN_BUF_SIZE &&
+ s_len <= QAT_MAX_BUF_SIZE);
+}
+
+int
+qat_compress(qat_compress_dir_t dir, char *src, int src_len,
+ char *dst, int dst_len, size_t *c_len)
+{
+ CpaInstanceHandle dc_inst_handle;
+ CpaDcSessionHandle session_handle;
+ CpaBufferList *buf_list_src = NULL;
+ CpaBufferList *buf_list_dst = NULL;
+ CpaFlatBuffer *flat_buf_src = NULL;
+ CpaFlatBuffer *flat_buf_dst = NULL;
+ Cpa8U *buffer_meta_src = NULL;
+ Cpa8U *buffer_meta_dst = NULL;
+ Cpa32U buffer_meta_size = 0;
+ CpaDcRqResults dc_results;
+ CpaStatus status = CPA_STATUS_SUCCESS;
+ Cpa32U hdr_sz = 0;
+ Cpa32U compressed_sz;
+ Cpa32U num_src_buf = (src_len >> PAGE_SHIFT) + 1;
+ Cpa32U num_dst_buf = (dst_len >> PAGE_SHIFT) + 1;
+ Cpa32U bytes_left;
+ char *data;
+ struct page *in_page, *out_page;
+ struct page **in_pages = NULL;
+ struct page **out_pages = NULL;
+ struct completion complete;
+ size_t ret = -1;
+ Cpa16U page_num = 0;
+ Cpa16U i;
+
+ Cpa32U src_buffer_list_mem_size = sizeof (CpaBufferList) +
+ (num_src_buf * sizeof (CpaFlatBuffer));
+ Cpa32U dst_buffer_list_mem_size = sizeof (CpaBufferList) +
+ (num_dst_buf * sizeof (CpaFlatBuffer));
+
+ if (!is_vmalloc_addr(src) || !is_vmalloc_addr(src + src_len - 1) ||
+ !is_vmalloc_addr(dst) || !is_vmalloc_addr(dst + dst_len - 1))
+ return (-1);
+
+ if (PHYS_CONTIG_ALLOC(&in_pages,
+ num_src_buf * sizeof (struct page *)) != CPA_STATUS_SUCCESS)
+ goto fail;
+
+ if (PHYS_CONTIG_ALLOC(&out_pages,
+ num_dst_buf * sizeof (struct page *)) != CPA_STATUS_SUCCESS)
+ goto fail;
+
+ i = atomic_inc_32_nv(&inst_num) % num_inst;
+ dc_inst_handle = dc_inst_handles[i];
+ session_handle = session_handles[i];
+
+ cpaDcBufferListGetMetaSize(dc_inst_handle, num_src_buf,
+ &buffer_meta_size);
+ if (PHYS_CONTIG_ALLOC(&buffer_meta_src, buffer_meta_size) !=
+ CPA_STATUS_SUCCESS)
+ goto fail;
+
+ cpaDcBufferListGetMetaSize(dc_inst_handle, num_dst_buf,
+ &buffer_meta_size);
+ if (PHYS_CONTIG_ALLOC(&buffer_meta_dst, buffer_meta_size) !=
+ CPA_STATUS_SUCCESS)
+ goto fail;
+
+ /* build source buffer list */
+ if (PHYS_CONTIG_ALLOC(&buf_list_src, src_buffer_list_mem_size) !=
+ CPA_STATUS_SUCCESS)
+ goto fail;
+
+ flat_buf_src = (CpaFlatBuffer *)(buf_list_src + 1);
+
+ buf_list_src->pBuffers = flat_buf_src; /* always point to first one */
+
+ /* build destination buffer list */
+ if (PHYS_CONTIG_ALLOC(&buf_list_dst, dst_buffer_list_mem_size) !=
+ CPA_STATUS_SUCCESS)
+ goto fail;
+
+ flat_buf_dst = (CpaFlatBuffer *)(buf_list_dst + 1);
+
+ buf_list_dst->pBuffers = flat_buf_dst; /* always point to first one */
+
+ buf_list_src->numBuffers = 0;
+ buf_list_src->pPrivateMetaData = buffer_meta_src;
+ bytes_left = src_len;
+ data = src;
+ page_num = 0;
+ while (bytes_left > 0) {
+ in_page = mem_to_page(data);
+ in_pages[page_num] = in_page;
+ flat_buf_src->pData = kmap(in_page);
+ flat_buf_src->dataLenInBytes =
+ min((long)bytes_left, (long)PAGE_SIZE);
+
+ bytes_left -= flat_buf_src->dataLenInBytes;
+ data += flat_buf_src->dataLenInBytes;
+ flat_buf_src++;
+ buf_list_src->numBuffers++;
+ page_num++;
+ }
+
+ buf_list_dst->numBuffers = 0;
+ buf_list_dst->pPrivateMetaData = buffer_meta_dst;
+ bytes_left = dst_len;
+ data = dst;
+ page_num = 0;
+ while (bytes_left > 0) {
+ out_page = mem_to_page(data);
+ flat_buf_dst->pData = kmap(out_page);
+ out_pages[page_num] = out_page;
+ flat_buf_dst->dataLenInBytes =
+ min((long)bytes_left, (long)PAGE_SIZE);
+
+ bytes_left -= flat_buf_dst->dataLenInBytes;
+ data += flat_buf_dst->dataLenInBytes;
+ flat_buf_dst++;
+ buf_list_dst->numBuffers++;
+ page_num++;
+ }
+
+ init_completion(&complete);
+
+ if (dir == QAT_COMPRESS) {
+ QAT_STAT_BUMP(comp_requests);
+ QAT_STAT_INCR(comp_total_in_bytes, src_len);
+
+ cpaDcGenerateHeader(session_handle,
+ buf_list_dst->pBuffers, &hdr_sz);
+ buf_list_dst->pBuffers->pData += hdr_sz;
+ buf_list_dst->pBuffers->dataLenInBytes -= hdr_sz;
+ status = cpaDcCompressData(
+ dc_inst_handle, session_handle,
+ buf_list_src, buf_list_dst,
+ &dc_results, CPA_DC_FLUSH_FINAL,
+ &complete);
+ if (status != CPA_STATUS_SUCCESS) {
+ goto fail;
+ }
+
+ /* we now wait until the completion of the operation. */
+ if (!wait_for_completion_interruptible_timeout(&complete,
+ TIMEOUT_MS)) {
+ status = CPA_STATUS_FAIL;
+ goto fail;
+ }
+
+ if (dc_results.status != CPA_STATUS_SUCCESS) {
+ status = CPA_STATUS_FAIL;
+ goto fail;
+ }
+
+ compressed_sz = dc_results.produced;
+ if (compressed_sz + hdr_sz + ZLIB_FOOT_SZ > dst_len) {
+ goto fail;
+ }
+
+ flat_buf_dst = (CpaFlatBuffer *)(buf_list_dst + 1);
+ /* move to the last page */
+ flat_buf_dst += (compressed_sz + hdr_sz) >> PAGE_SHIFT;
+
+ /* no space for gzip foot in the last page */
+ if (((compressed_sz + hdr_sz) % PAGE_SIZE)
+ + ZLIB_FOOT_SZ > PAGE_SIZE)
+ goto fail;
+
+ flat_buf_dst->pData += (compressed_sz + hdr_sz) % PAGE_SIZE;
+ flat_buf_dst->dataLenInBytes = ZLIB_FOOT_SZ;
+
+ dc_results.produced = 0;
+ status = cpaDcGenerateFooter(session_handle,
+ flat_buf_dst, &dc_results);
+ if (status != CPA_STATUS_SUCCESS) {
+ goto fail;
+ }
+
+ *c_len = compressed_sz + dc_results.produced + hdr_sz;
+
+ if (*c_len < PAGE_SIZE)
+ *c_len = 8 * PAGE_SIZE;
+
+ QAT_STAT_INCR(comp_total_out_bytes, *c_len);
+
+ ret = 0;
+
+ } else if (dir == QAT_DECOMPRESS) {
+ QAT_STAT_BUMP(decomp_requests);
+ QAT_STAT_INCR(decomp_total_in_bytes, src_len);
+
+ buf_list_src->pBuffers->pData += ZLIB_HEAD_SZ;
+ buf_list_src->pBuffers->dataLenInBytes -= ZLIB_HEAD_SZ;
+ status = cpaDcDecompressData(dc_inst_handle,
+ session_handle,
+ buf_list_src,
+ buf_list_dst,
+ &dc_results,
+ CPA_DC_FLUSH_FINAL,
+ &complete);
+
+ if (CPA_STATUS_SUCCESS != status) {
+ status = CPA_STATUS_FAIL;
+ goto fail;
+ }
+
+ /* we now wait until the completion of the operation. */
+ if (!wait_for_completion_interruptible_timeout(&complete,
+ TIMEOUT_MS)) {
+ status = CPA_STATUS_FAIL;
+ goto fail;
+ }
+
+ if (dc_results.status != CPA_STATUS_SUCCESS) {
+ status = CPA_STATUS_FAIL;
+ goto fail;
+ }
+
+ *c_len = dc_results.produced;
+
+ QAT_STAT_INCR(decomp_total_out_bytes, *c_len);
+
+ ret = 0;
+ }
+
+fail:
+ if (status != CPA_STATUS_SUCCESS) {
+ QAT_STAT_BUMP(dc_fails);
+ }
+
+ if (in_pages) {
+ for (page_num = 0;
+ page_num < buf_list_src->numBuffers;
+ page_num++) {
+ kunmap(in_pages[page_num]);
+ }
+ PHYS_CONTIG_FREE(in_pages);
+ }
+
+ if (out_pages) {
+ for (page_num = 0;
+ page_num < buf_list_dst->numBuffers;
+ page_num++) {
+ kunmap(out_pages[page_num]);
+ }
+ PHYS_CONTIG_FREE(out_pages);
+ }
+
+ PHYS_CONTIG_FREE(buffer_meta_src);
+ PHYS_CONTIG_FREE(buffer_meta_dst);
+ PHYS_CONTIG_FREE(buf_list_src);
+ PHYS_CONTIG_FREE(buf_list_dst);
+
+ return (ret);
+}
+
+module_param(zfs_qat_disable, int, 0644);
+MODULE_PARM_DESC(zfs_qat_disable, "Disable QAT compression");
+
+#endif
diff --git a/zfs/module/zfs/qat_compress.h b/zfs/module/zfs/qat_compress.h
new file mode 100644
index 000000000000..ff074646fdeb
--- /dev/null
+++ b/zfs/module/zfs/qat_compress.h
@@ -0,0 +1,48 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+#ifndef _SYS_QAT_COMPRESS_H
+#define _SYS_QAT_COMPRESS_H
+
+#if defined(_KERNEL) && defined(HAVE_QAT)
+#include <sys/zio.h>
+#include "cpa.h"
+#include "dc/cpa_dc.h"
+
+typedef enum qat_compress_dir {
+ QAT_COMPRESS = 0,
+ QAT_DECOMPRESS = 1,
+} qat_compress_dir_t;
+
+extern int qat_init(void);
+extern void qat_fini(void);
+extern boolean_t qat_use_accel(size_t s_len);
+extern int qat_compress(qat_compress_dir_t dir, char *src, int src_len,
+ char *dst, int dst_len, size_t *c_len);
+#else
+#define CPA_STATUS_SUCCESS 0
+#define qat_init()
+#define qat_fini()
+#define qat_use_accel(s_len) 0
+#define qat_compress(dir, s, sl, d, dl, cl) 0
+#endif
+
+#endif /* _SYS_QAT_COMPRESS_H */
diff --git a/zfs/module/zfs/range_tree.c b/zfs/module/zfs/range_tree.c
index 6422fd1c1fa6..ebef7f447862 100644
--- a/zfs/module/zfs/range_tree.c
+++ b/zfs/module/zfs/range_tree.c
@@ -111,20 +111,13 @@ range_tree_stat_decr(range_tree_t *rt, range_seg_t *rs)
static int
range_tree_seg_compare(const void *x1, const void *x2)
{
- const range_seg_t *r1 = x1;
- const range_seg_t *r2 = x2;
+ const range_seg_t *r1 = (const range_seg_t *)x1;
+ const range_seg_t *r2 = (const range_seg_t *)x2;
- if (r1->rs_start < r2->rs_start) {
- if (r1->rs_end > r2->rs_start)
- return (0);
- return (-1);
- }
- if (r1->rs_start > r2->rs_start) {
- if (r1->rs_start < r2->rs_end)
- return (0);
- return (1);
- }
- return (0);
+ ASSERT3U(r1->rs_start, <=, r1->rs_end);
+ ASSERT3U(r2->rs_start, <=, r2->rs_end);
+
+ return ((r1->rs_start >= r2->rs_end) - (r1->rs_end <= r2->rs_start));
}
range_tree_t *
diff --git a/zfs/module/zfs/refcount.c b/zfs/module/zfs/refcount.c
index 25875243bad9..a151aceaecfb 100644
--- a/zfs/module/zfs/refcount.c
+++ b/zfs/module/zfs/refcount.c
@@ -20,14 +20,12 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
#include <sys/refcount.h>
-#ifdef ZFS_DEBUG
-
#ifdef _KERNEL
int reference_tracking_enable = FALSE; /* runs out of memory too easily */
#else
@@ -35,6 +33,7 @@ int reference_tracking_enable = TRUE;
#endif
int reference_history = 3; /* tunable */
+#ifdef ZFS_DEBUG
static kmem_cache_t *reference_cache;
static kmem_cache_t *reference_history_cache;
@@ -68,6 +67,13 @@ refcount_create(refcount_t *rc)
rc->rc_tracked = reference_tracking_enable;
}
+void
+refcount_create_tracked(refcount_t *rc)
+{
+ refcount_create(rc);
+ rc->rc_tracked = B_TRUE;
+}
+
void
refcount_create_untracked(refcount_t *rc)
{
@@ -227,4 +233,84 @@ refcount_transfer(refcount_t *dst, refcount_t *src)
list_destroy(&removed);
}
+void
+refcount_transfer_ownership(refcount_t *rc, void *current_holder,
+ void *new_holder)
+{
+ reference_t *ref;
+ boolean_t found = B_FALSE;
+
+ mutex_enter(&rc->rc_mtx);
+ if (!rc->rc_tracked) {
+ mutex_exit(&rc->rc_mtx);
+ return;
+ }
+
+ for (ref = list_head(&rc->rc_list); ref;
+ ref = list_next(&rc->rc_list, ref)) {
+ if (ref->ref_holder == current_holder) {
+ ref->ref_holder = new_holder;
+ found = B_TRUE;
+ break;
+ }
+ }
+ ASSERT(found);
+ mutex_exit(&rc->rc_mtx);
+}
+
+/*
+ * If tracking is enabled, return true if a reference exists that matches
+ * the "holder" tag. If tracking is disabled, then return true if a reference
+ * might be held.
+ */
+boolean_t
+refcount_held(refcount_t *rc, void *holder)
+{
+ reference_t *ref;
+
+ mutex_enter(&rc->rc_mtx);
+
+ if (!rc->rc_tracked) {
+ mutex_exit(&rc->rc_mtx);
+ return (rc->rc_count > 0);
+ }
+
+ for (ref = list_head(&rc->rc_list); ref;
+ ref = list_next(&rc->rc_list, ref)) {
+ if (ref->ref_holder == holder) {
+ mutex_exit(&rc->rc_mtx);
+ return (B_TRUE);
+ }
+ }
+ mutex_exit(&rc->rc_mtx);
+ return (B_FALSE);
+}
+
+/*
+ * If tracking is enabled, return true if a reference does not exist that
+ * matches the "holder" tag. If tracking is disabled, always return true
+ * since the reference might not be held.
+ */
+boolean_t
+refcount_not_held(refcount_t *rc, void *holder)
+{
+ reference_t *ref;
+
+ mutex_enter(&rc->rc_mtx);
+
+ if (!rc->rc_tracked) {
+ mutex_exit(&rc->rc_mtx);
+ return (B_TRUE);
+ }
+
+ for (ref = list_head(&rc->rc_list); ref;
+ ref = list_next(&rc->rc_list, ref)) {
+ if (ref->ref_holder == holder) {
+ mutex_exit(&rc->rc_mtx);
+ return (B_FALSE);
+ }
+ }
+ mutex_exit(&rc->rc_mtx);
+ return (B_TRUE);
+}
#endif /* ZFS_DEBUG */
diff --git a/zfs/module/zfs/rrwlock.c b/zfs/module/zfs/rrwlock.c
index 51394c01c431..704f76067bf0 100644
--- a/zfs/module/zfs/rrwlock.c
+++ b/zfs/module/zfs/rrwlock.c
@@ -313,8 +313,8 @@ rrw_tsd_destroy(void *arg)
* The idea is to split single busy lock into array of locks, so that
* each reader can lock only one of them for read, depending on result
* of simple hash function. That proportionally reduces lock congestion.
- * Writer same time has to sequentially aquire write on all the locks.
- * That makes write aquisition proportionally slower, but in places where
+ * Writer at the same time has to sequentially acquire write on all the locks.
+ * That makes write acquisition proportionally slower, but in places where
* it is used (filesystem unmount) performance is not critical.
*
* All the functions below are direct wrappers around functions above.
diff --git a/zfs/module/zfs/sa.c b/zfs/module/zfs/sa.c
index d6ac5fcc709a..8046dbde28ad 100644
--- a/zfs/module/zfs/sa.c
+++ b/zfs/module/zfs/sa.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2017 by Delphix. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
@@ -33,6 +33,7 @@
#include <sys/dmu.h>
#include <sys/dmu_impl.h>
#include <sys/dmu_objset.h>
+#include <sys/dmu_tx.h>
#include <sys/dbuf.h>
#include <sys/dnode.h>
#include <sys/zap.h>
@@ -129,8 +130,8 @@ typedef void (sa_iterfunc_t)(void *hdr, void *addr, sa_attr_type_t,
static int sa_build_index(sa_handle_t *hdl, sa_buf_type_t buftype);
static void sa_idx_tab_hold(objset_t *os, sa_idx_tab_t *idx_tab);
-static void *sa_find_idx_tab(objset_t *os, dmu_object_type_t bonustype,
- void *data);
+static sa_idx_tab_t *sa_find_idx_tab(objset_t *os, dmu_object_type_t bonustype,
+ sa_hdr_phys_t *hdr);
static void sa_idx_tab_rele(objset_t *os, void *arg);
static void sa_copy_data(sa_data_locator_t *func, void *start, void *target,
int buflen);
@@ -201,7 +202,7 @@ sa_attr_type_t sa_legacy_zpl_layout[] = {
*/
sa_attr_type_t sa_dummy_zpl_layout[] = { 0 };
-static int sa_legacy_attr_count = 16;
+static int sa_legacy_attr_count = ARRAY_SIZE(sa_legacy_attrs);
static kmem_cache_t *sa_cache = NULL;
/*ARGSUSED*/
@@ -240,31 +241,23 @@ sa_cache_fini(void)
static int
layout_num_compare(const void *arg1, const void *arg2)
{
- const sa_lot_t *node1 = arg1;
- const sa_lot_t *node2 = arg2;
+ const sa_lot_t *node1 = (const sa_lot_t *)arg1;
+ const sa_lot_t *node2 = (const sa_lot_t *)arg2;
- if (node1->lot_num > node2->lot_num)
- return (1);
- else if (node1->lot_num < node2->lot_num)
- return (-1);
- return (0);
+ return (AVL_CMP(node1->lot_num, node2->lot_num));
}
static int
layout_hash_compare(const void *arg1, const void *arg2)
{
- const sa_lot_t *node1 = arg1;
- const sa_lot_t *node2 = arg2;
+ const sa_lot_t *node1 = (const sa_lot_t *)arg1;
+ const sa_lot_t *node2 = (const sa_lot_t *)arg2;
- if (node1->lot_hash > node2->lot_hash)
- return (1);
- if (node1->lot_hash < node2->lot_hash)
- return (-1);
- if (node1->lot_instance > node2->lot_instance)
- return (1);
- if (node1->lot_instance < node2->lot_instance)
- return (-1);
- return (0);
+ int cmp = AVL_CMP(node1->lot_hash, node2->lot_hash);
+ if (likely(cmp))
+ return (cmp);
+
+ return (AVL_CMP(node1->lot_instance, node2->lot_instance));
}
boolean_t
@@ -553,12 +546,11 @@ sa_copy_data(sa_data_locator_t *func, void *datastart, void *target, int buflen)
*/
static int
sa_find_sizes(sa_os_t *sa, sa_bulk_attr_t *attr_desc, int attr_count,
- dmu_buf_t *db, sa_buf_type_t buftype, int *index, int *total,
- boolean_t *will_spill)
+ dmu_buf_t *db, sa_buf_type_t buftype, int full_space, int *index,
+ int *total, boolean_t *will_spill)
{
int var_size_count = 0;
int i;
- int full_space;
int hdrsize;
int extra_hdrsize;
@@ -577,7 +569,6 @@ sa_find_sizes(sa_os_t *sa, sa_bulk_attr_t *attr_desc, int attr_count,
hdrsize = (SA_BONUSTYPE_FROM_DB(db) == DMU_OT_ZNODE) ? 0 :
sizeof (sa_hdr_phys_t);
- full_space = (buftype == SA_BONUS) ? DN_MAX_BONUSLEN : db->db_size;
ASSERT(IS_P2ALIGNED(full_space, 8));
for (i = 0; i != attr_count; i++) {
@@ -668,6 +659,7 @@ sa_build_layouts(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, int attr_count,
void *data_start;
sa_attr_type_t *attrs, *attrs_start;
int i, lot_count;
+ int dnodesize;
int spill_idx;
int hdrsize;
int spillhdrsize = 0;
@@ -676,20 +668,23 @@ sa_build_layouts(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, int attr_count,
sa_lot_t *lot;
int len_idx;
int spill_used;
+ int bonuslen;
boolean_t spilling;
dmu_buf_will_dirty(hdl->sa_bonus, tx);
bonustype = SA_BONUSTYPE_FROM_DB(hdl->sa_bonus);
+ dmu_object_dnsize_from_db(hdl->sa_bonus, &dnodesize);
+ bonuslen = DN_BONUS_SIZE(dnodesize);
/* first determine bonus header size and sum of all attributes */
hdrsize = sa_find_sizes(sa, attr_desc, attr_count, hdl->sa_bonus,
- SA_BONUS, &spill_idx, &used, &spilling);
+ SA_BONUS, bonuslen, &spill_idx, &used, &spilling);
if (used > SPA_OLD_MAXBLOCKSIZE)
return (SET_ERROR(EFBIG));
- VERIFY(0 == dmu_set_bonus(hdl->sa_bonus, spilling ?
- MIN(DN_MAX_BONUSLEN - sizeof (blkptr_t), used + hdrsize) :
+ VERIFY0(dmu_set_bonus(hdl->sa_bonus, spilling ?
+ MIN(bonuslen - sizeof (blkptr_t), used + hdrsize) :
used + hdrsize, tx));
ASSERT((bonustype == DMU_OT_ZNODE && spilling == 0) ||
@@ -706,8 +701,8 @@ sa_build_layouts(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, int attr_count,
dmu_buf_will_dirty(hdl->sa_spill, tx);
spillhdrsize = sa_find_sizes(sa, &attr_desc[spill_idx],
- attr_count - spill_idx, hdl->sa_spill, SA_SPILL, &i,
- &spill_used, &dummy);
+ attr_count - spill_idx, hdl->sa_spill, SA_SPILL,
+ hdl->sa_spill->db_size, &i, &spill_used, &dummy);
if (spill_used > SPA_OLD_MAXBLOCKSIZE)
return (SET_ERROR(EFBIG));
@@ -1250,7 +1245,7 @@ sa_byteswap(sa_handle_t *hdl, sa_buf_type_t buftype)
sa_hdr_phys->sa_layout_info = BSWAP_16(sa_hdr_phys->sa_layout_info);
/*
- * Determine number of variable lenghts in header
+ * Determine number of variable lengths in header
* The standard 8 byte header has one for free and a
* 16 byte header would have 4 + 1;
*/
@@ -1302,7 +1297,7 @@ sa_build_index(sa_handle_t *hdl, sa_buf_type_t buftype)
/*ARGSUSED*/
static void
-sa_evict(void *dbu)
+sa_evict_sync(void *dbu)
{
panic("evicting sa dbuf\n");
}
@@ -1399,7 +1394,8 @@ sa_handle_get_from_db(objset_t *os, dmu_buf_t *db, void *userp,
sa_handle_t *winner = NULL;
handle = kmem_cache_alloc(sa_cache, KM_SLEEP);
- handle->sa_dbu.dbu_evict_func = NULL;
+ handle->sa_dbu.dbu_evict_func_sync = NULL;
+ handle->sa_dbu.dbu_evict_func_async = NULL;
handle->sa_userp = userp;
handle->sa_bonus = db;
handle->sa_os = os;
@@ -1410,7 +1406,8 @@ sa_handle_get_from_db(objset_t *os, dmu_buf_t *db, void *userp,
error = sa_build_index(handle, SA_BONUS);
if (hdl_type == SA_HDL_SHARED) {
- dmu_buf_init_user(&handle->sa_dbu, sa_evict, NULL);
+ dmu_buf_init_user(&handle->sa_dbu, sa_evict_sync, NULL,
+ NULL);
winner = dmu_buf_set_user_ie(db, &handle->sa_dbu);
}
@@ -1501,11 +1498,10 @@ sa_lookup_uio(sa_handle_t *hdl, sa_attr_type_t attr, uio_t *uio)
}
#endif
-void *
-sa_find_idx_tab(objset_t *os, dmu_object_type_t bonustype, void *data)
+static sa_idx_tab_t *
+sa_find_idx_tab(objset_t *os, dmu_object_type_t bonustype, sa_hdr_phys_t *hdr)
{
sa_idx_tab_t *idx_tab;
- sa_hdr_phys_t *hdr = (sa_hdr_phys_t *)data;
sa_os_t *sa = os->os_sa;
sa_lot_t *tb, search;
avl_index_t loc;
@@ -1649,8 +1645,11 @@ sa_replace_all_by_template(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc,
}
/*
- * add/remove/replace a single attribute and then rewrite the entire set
+ * Add/remove a single attribute or replace a variable-sized attribute value
+ * with a value of a different size, and then rewrite the entire set
* of attributes.
+ * Same-length attribute value replacement (including fixed-length attributes)
+ * is handled more efficiently by the upper layers.
*/
static int
sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr,
@@ -1667,7 +1666,7 @@ sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr,
int spill_data_size = 0;
int spill_attr_count = 0;
int error;
- uint16_t length;
+ uint16_t length, reg_length;
int i, j, k, length_idx;
sa_hdr_phys_t *hdr;
sa_idx_tab_t *idx_tab;
@@ -1695,7 +1694,7 @@ sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr,
if ((error = sa_get_spill(hdl)) == 0) {
spill_data_size = hdl->sa_spill->db_size;
- old_data[1] = zio_buf_alloc(spill_data_size);
+ old_data[1] = vmem_alloc(spill_data_size, KM_SLEEP);
bcopy(hdl->sa_spill->db_data, old_data[1],
hdl->sa_spill->db_size);
spill_attr_count =
@@ -1736,20 +1735,36 @@ sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr,
sa_attr_type_t attr;
attr = idx_tab->sa_layout->lot_attrs[i];
- length = SA_REGISTERED_LEN(sa, attr);
+ reg_length = SA_REGISTERED_LEN(sa, attr);
+ if (reg_length == 0) {
+ length = hdr->sa_lengths[length_idx];
+ length_idx++;
+ } else {
+ length = reg_length;
+ }
if (attr == newattr) {
- if (length == 0)
- ++length_idx;
+ /*
+ * There is nothing to do for SA_REMOVE,
+ * so it is just skipped.
+ */
if (action == SA_REMOVE)
continue;
- ASSERT(length == 0);
- ASSERT(action == SA_REPLACE);
+
+ /*
+ * Duplicate attributes are not allowed, so the
+ * action can not be SA_ADD here.
+ */
+ ASSERT3S(action, ==, SA_REPLACE);
+
+ /*
+ * Only a variable-sized attribute can be
+ * replaced here, and its size must be changing.
+ */
+ ASSERT3U(reg_length, ==, 0);
+ ASSERT3U(length, !=, buflen);
SA_ADD_BULK_ATTR(attr_desc, j, attr,
locator, datastart, buflen);
} else {
- if (length == 0)
- length = hdr->sa_lengths[length_idx++];
-
SA_ADD_BULK_ATTR(attr_desc, j, attr,
NULL, (void *)
(TOC_OFF(idx_tab->sa_idx_tab[attr]) +
@@ -1765,20 +1780,19 @@ sa_modify_attrs(sa_handle_t *hdl, sa_attr_type_t newattr,
}
}
if (action == SA_ADD) {
- length = SA_REGISTERED_LEN(sa, newattr);
- if (length == 0) {
- length = buflen;
- }
+ reg_length = SA_REGISTERED_LEN(sa, newattr);
+ IMPLY(reg_length != 0, reg_length == buflen);
SA_ADD_BULK_ATTR(attr_desc, j, newattr, locator,
- datastart, length);
+ datastart, buflen);
}
+ ASSERT3U(j, ==, attr_count);
error = sa_build_layouts(hdl, attr_desc, attr_count, tx);
if (old_data[0])
kmem_free(old_data[0], bonus_data_size);
if (old_data[1])
- zio_buf_free(old_data[1], spill_data_size);
+ vmem_free(old_data[1], spill_data_size);
kmem_free(attr_desc, sizeof (sa_bulk_attr_t) * attr_count);
return (error);
@@ -1851,26 +1865,6 @@ sa_update(sa_handle_t *hdl, sa_attr_type_t type,
return (error);
}
-int
-sa_update_from_cb(sa_handle_t *hdl, sa_attr_type_t attr,
- uint32_t buflen, sa_data_locator_t *locator, void *userdata, dmu_tx_t *tx)
-{
- int error;
- sa_bulk_attr_t bulk;
-
- VERIFY3U(buflen, <=, SA_ATTR_MAX_LEN);
-
- bulk.sa_attr = attr;
- bulk.sa_data = userdata;
- bulk.sa_data_func = locator;
- bulk.sa_length = buflen;
-
- mutex_enter(&hdl->sa_lock);
- error = sa_bulk_update_impl(hdl, &bulk, 1, tx);
- mutex_exit(&hdl->sa_lock);
- return (error);
-}
-
/*
* Return size of an attribute
*/
@@ -2049,7 +2043,6 @@ EXPORT_SYMBOL(sa_bulk_lookup);
EXPORT_SYMBOL(sa_bulk_lookup_locked);
EXPORT_SYMBOL(sa_bulk_update);
EXPORT_SYMBOL(sa_size);
-EXPORT_SYMBOL(sa_update_from_cb);
EXPORT_SYMBOL(sa_object_info);
EXPORT_SYMBOL(sa_object_size);
EXPORT_SYMBOL(sa_get_userdata);
diff --git a/zfs/module/zfs/sha256.c b/zfs/module/zfs/sha256.c
index cf9dd8fcba1a..23a97aa3de17 100644
--- a/zfs/module/zfs/sha256.c
+++ b/zfs/module/zfs/sha256.c
@@ -19,109 +19,73 @@
* CDDL HEADER END
*/
/*
- * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
-
-#include <sys/zfs_context.h>
-#include <sys/zio.h>
-#include <sys/zio_checksum.h>
-
/*
- * SHA-256 checksum, as specified in FIPS 180-3, available at:
- * http://csrc.nist.gov/publications/PubsFIPS.html
- *
- * This is a very compact implementation of SHA-256.
- * It is designed to be simple and portable, not to be fast.
+ * Copyright 2013 Saso Kiselkov. All rights reserved.
+ * Copyright (c) 2016 by Delphix. All rights reserved.
*/
+#include <sys/zfs_context.h>
+#include <sys/zio.h>
+#include <sys/sha2.h>
+#include <sys/abd.h>
-/*
- * The literal definitions of Ch() and Maj() according to FIPS 180-3 are:
- *
- * Ch(x, y, z) (x & y) ^ (~x & z)
- * Maj(x, y, z) (x & y) ^ (x & z) ^ (y & z)
- *
- * We use equivalent logical reductions here that require one less op.
- */
-#define Ch(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
-#define Maj(x, y, z) (((x) & (y)) ^ ((z) & ((x) ^ (y))))
-#define Rot32(x, s) (((x) >> s) | ((x) << (32 - s)))
-#define SIGMA0(x) (Rot32(x, 2) ^ Rot32(x, 13) ^ Rot32(x, 22))
-#define SIGMA1(x) (Rot32(x, 6) ^ Rot32(x, 11) ^ Rot32(x, 25))
-#define sigma0(x) (Rot32(x, 7) ^ Rot32(x, 18) ^ ((x) >> 3))
-#define sigma1(x) (Rot32(x, 17) ^ Rot32(x, 19) ^ ((x) >> 10))
-
-static const uint32_t SHA256_K[64] = {
- 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
- 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
- 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
- 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
- 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
- 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
- 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
- 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
- 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
- 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
- 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
- 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
- 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
- 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
- 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
- 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
-};
-
-static void
-SHA256Transform(uint32_t *H, const uint8_t *cp)
+static int
+sha_incremental(void *buf, size_t size, void *arg)
{
- uint32_t a, b, c, d, e, f, g, h, t, T1, T2, W[64];
-
- for (t = 0; t < 16; t++, cp += 4)
- W[t] = (cp[0] << 24) | (cp[1] << 16) | (cp[2] << 8) | cp[3];
-
- for (t = 16; t < 64; t++)
- W[t] = sigma1(W[t - 2]) + W[t - 7] +
- sigma0(W[t - 15]) + W[t - 16];
-
- a = H[0]; b = H[1]; c = H[2]; d = H[3];
- e = H[4]; f = H[5]; g = H[6]; h = H[7];
-
- for (t = 0; t < 64; t++) {
- T1 = h + SIGMA1(e) + Ch(e, f, g) + SHA256_K[t] + W[t];
- T2 = SIGMA0(a) + Maj(a, b, c);
- h = g; g = f; f = e; e = d + T1;
- d = c; c = b; b = a; a = T1 + T2;
- }
-
- H[0] += a; H[1] += b; H[2] += c; H[3] += d;
- H[4] += e; H[5] += f; H[6] += g; H[7] += h;
+ SHA2_CTX *ctx = arg;
+ SHA2Update(ctx, buf, size);
+ return (0);
}
+/*ARGSUSED*/
void
-zio_checksum_SHA256(const void *buf, uint64_t size, zio_cksum_t *zcp)
+abd_checksum_SHA256(abd_t *abd, uint64_t size,
+ const void *ctx_template, zio_cksum_t *zcp)
{
- uint32_t H[8] = { 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
- 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 };
- uint8_t pad[128];
- int i, padsize;
-
- for (i = 0; i < (size & ~63ULL); i += 64)
- SHA256Transform(H, (uint8_t *)buf + i);
-
- for (padsize = 0; i < size; i++)
- pad[padsize++] = *((uint8_t *)buf + i);
+ SHA2_CTX ctx;
+ zio_cksum_t tmp;
+
+ SHA2Init(SHA256, &ctx);
+ (void) abd_iterate_func(abd, 0, size, sha_incremental, &ctx);
+ SHA2Final(&tmp, &ctx);
+
+ /*
+ * A prior implementation of this function had a
+ * private SHA256 implementation always wrote things out in
+ * Big Endian and there wasn't a byteswap variant of it.
+ * To preserve on disk compatibility we need to force that
+ * behavior.
+ */
+ zcp->zc_word[0] = BE_64(tmp.zc_word[0]);
+ zcp->zc_word[1] = BE_64(tmp.zc_word[1]);
+ zcp->zc_word[2] = BE_64(tmp.zc_word[2]);
+ zcp->zc_word[3] = BE_64(tmp.zc_word[3]);
+}
- for (pad[padsize++] = 0x80; (padsize & 63) != 56; padsize++)
- pad[padsize] = 0;
+/*ARGSUSED*/
+void
+abd_checksum_SHA512_native(abd_t *abd, uint64_t size,
+ const void *ctx_template, zio_cksum_t *zcp)
+{
+ SHA2_CTX ctx;
- for (i = 56; i >= 0; i -= 8)
- pad[padsize++] = (size << 3) >> i;
+ SHA2Init(SHA512_256, &ctx);
+ (void) abd_iterate_func(abd, 0, size, sha_incremental, &ctx);
+ SHA2Final(zcp, &ctx);
+}
- for (i = 0; i < padsize; i += 64)
- SHA256Transform(H, pad + i);
+/*ARGSUSED*/
+void
+abd_checksum_SHA512_byteswap(abd_t *abd, uint64_t size,
+ const void *ctx_template, zio_cksum_t *zcp)
+{
+ zio_cksum_t tmp;
- ZIO_SET_CHECKSUM(zcp,
- (uint64_t)H[0] << 32 | H[1],
- (uint64_t)H[2] << 32 | H[3],
- (uint64_t)H[4] << 32 | H[5],
- (uint64_t)H[6] << 32 | H[7]);
+ abd_checksum_SHA512_native(abd, size, ctx_template, &tmp);
+ zcp->zc_word[0] = BSWAP_64(tmp.zc_word[0]);
+ zcp->zc_word[1] = BSWAP_64(tmp.zc_word[1]);
+ zcp->zc_word[2] = BSWAP_64(tmp.zc_word[2]);
+ zcp->zc_word[3] = BSWAP_64(tmp.zc_word[3]);
}
diff --git a/zfs/module/zfs/skein_zfs.c b/zfs/module/zfs/skein_zfs.c
new file mode 100644
index 000000000000..8deb84b266bd
--- /dev/null
+++ b/zfs/module/zfs/skein_zfs.c
@@ -0,0 +1,101 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright 2013 Saso Kiselkov. All rights reserved.
+ * Copyright (c) 2016 by Delphix. All rights reserved.
+ */
+#include <sys/zfs_context.h>
+#include <sys/zio.h>
+#include <sys/skein.h>
+
+#include <sys/abd.h>
+
+static int
+skein_incremental(void *buf, size_t size, void *arg)
+{
+ Skein_512_Ctxt_t *ctx = arg;
+ (void) Skein_512_Update(ctx, buf, size);
+ return (0);
+}
+/*
+ * Computes a native 256-bit skein MAC checksum. Please note that this
+ * function requires the presence of a ctx_template that should be allocated
+ * using abd_checksum_skein_tmpl_init.
+ */
+/*ARGSUSED*/
+void
+abd_checksum_skein_native(abd_t *abd, uint64_t size,
+ const void *ctx_template, zio_cksum_t *zcp)
+{
+ Skein_512_Ctxt_t ctx;
+
+ ASSERT(ctx_template != NULL);
+ bcopy(ctx_template, &ctx, sizeof (ctx));
+ (void) abd_iterate_func(abd, 0, size, skein_incremental, &ctx);
+ (void) Skein_512_Final(&ctx, (uint8_t *)zcp);
+ bzero(&ctx, sizeof (ctx));
+}
+
+/*
+ * Byteswapped version of abd_checksum_skein_native. This just invokes
+ * the native checksum function and byteswaps the resulting checksum (since
+ * skein is internally endian-insensitive).
+ */
+void
+abd_checksum_skein_byteswap(abd_t *abd, uint64_t size,
+ const void *ctx_template, zio_cksum_t *zcp)
+{
+ zio_cksum_t tmp;
+
+ abd_checksum_skein_native(abd, size, ctx_template, &tmp);
+ zcp->zc_word[0] = BSWAP_64(tmp.zc_word[0]);
+ zcp->zc_word[1] = BSWAP_64(tmp.zc_word[1]);
+ zcp->zc_word[2] = BSWAP_64(tmp.zc_word[2]);
+ zcp->zc_word[3] = BSWAP_64(tmp.zc_word[3]);
+}
+
+/*
+ * Allocates a skein MAC template suitable for using in skein MAC checksum
+ * computations and returns a pointer to it.
+ */
+void *
+abd_checksum_skein_tmpl_init(const zio_cksum_salt_t *salt)
+{
+ Skein_512_Ctxt_t *ctx;
+
+ ctx = kmem_zalloc(sizeof (*ctx), KM_SLEEP);
+ (void) Skein_512_InitExt(ctx, sizeof (zio_cksum_t) * 8, 0,
+ salt->zcs_bytes, sizeof (salt->zcs_bytes));
+ return (ctx);
+}
+
+/*
+ * Frees a skein context template previously allocated using
+ * zio_checksum_skein_tmpl_init.
+ */
+void
+abd_checksum_skein_tmpl_free(void *ctx_template)
+{
+ Skein_512_Ctxt_t *ctx = ctx_template;
+
+ bzero(ctx, sizeof (*ctx));
+ kmem_free(ctx, sizeof (*ctx));
+}
diff --git a/zfs/module/zfs/spa.c b/zfs/module/zfs/spa.c
index a6bc4e5912bf..f1f1444f1c94 100644
--- a/zfs/module/zfs/spa.c
+++ b/zfs/module/zfs/spa.c
@@ -21,10 +21,16 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
+ * Copyright (c) 2015, Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2013, 2014, Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
+ * Copyright 2013 Saso Kiselkov. All rights reserved.
+ * Copyright (c) 2014 Integros [integros.com]
+ * Copyright 2016 Toomas Soome <tsoome at me.com>
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
+ * Copyright (c) 2017 Datto Inc.
+ * Copyright 2017 Joyent, Inc.
*/
/*
@@ -49,6 +55,7 @@
#include <sys/vdev_disk.h>
#include <sys/metaslab.h>
#include <sys/metaslab_impl.h>
+#include <sys/mmp.h>
#include <sys/uberblock_impl.h>
#include <sys/txg.h>
#include <sys/avl.h>
@@ -72,6 +79,8 @@
#include <sys/zvol.h>
#ifdef _KERNEL
+#include <sys/fm/protocol.h>
+#include <sys/fm/util.h>
#include <sys/bootprops.h>
#include <sys/callb.h>
#include <sys/cpupart.h>
@@ -83,6 +92,12 @@
#include "zfs_prop.h"
#include "zfs_comutil.h"
+/*
+ * The interval, in seconds, at which failed configuration cache file writes
+ * should be retried.
+ */
+static int zfs_ccw_retry_interval = 300;
+
typedef enum zti_modes {
ZTI_MODE_FIXED, /* value is # of threads (min 1) */
ZTI_MODE_BATCH, /* cpu-intensive; value is ignored */
@@ -135,6 +150,9 @@ const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL }, /* IOCTL */
};
+static sysevent_t *spa_event_create(spa_t *spa, vdev_t *vd, nvlist_t *hist_nvl,
+ const char *name);
+static void spa_event_post(sysevent_t *ev);
static void spa_sync_version(void *arg, dmu_tx_t *tx);
static void spa_sync_props(void *arg, dmu_tx_t *tx);
static boolean_t spa_has_active_shared_spare(spa_t *spa);
@@ -193,7 +211,7 @@ spa_prop_get_config(spa_t *spa, nvlist_t **nvp)
vdev_t *rvd = spa->spa_root_vdev;
dsl_pool_t *pool = spa->spa_dsl_pool;
uint64_t size, alloc, cap, version;
- zprop_source_t src = ZPROP_SRC_NONE;
+ const zprop_source_t src = ZPROP_SRC_NONE;
spa_config_dirent_t *dp;
metaslab_class_t *mc = spa_normal_class(spa);
@@ -225,11 +243,13 @@ spa_prop_get_config(spa_t *spa, nvlist_t **nvp)
rvd->vdev_state, src);
version = spa_version(spa);
- if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION))
- src = ZPROP_SRC_DEFAULT;
- else
- src = ZPROP_SRC_LOCAL;
- spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src);
+ if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION)) {
+ spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL,
+ version, ZPROP_SRC_DEFAULT);
+ } else {
+ spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL,
+ version, ZPROP_SRC_LOCAL);
+ }
}
if (pool != NULL) {
@@ -275,6 +295,14 @@ spa_prop_get_config(spa_t *spa, nvlist_t **nvp)
SPA_OLD_MAXBLOCKSIZE, ZPROP_SRC_NONE);
}
+ if (spa_feature_is_enabled(spa, SPA_FEATURE_LARGE_DNODE)) {
+ spa_prop_add_list(*nvp, ZPOOL_PROP_MAXDNODESIZE, NULL,
+ DNODE_MAX_SIZE, ZPROP_SRC_NONE);
+ } else {
+ spa_prop_add_list(*nvp, ZPOOL_PROP_MAXDNODESIZE, NULL,
+ DNODE_MIN_SIZE, ZPROP_SRC_NONE);
+ }
+
if ((dp = list_head(&spa->spa_config_list)) != NULL) {
if (dp->scd_path == NULL) {
spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE,
@@ -347,8 +375,7 @@ spa_prop_get(spa_t *spa, nvlist_t **nvp)
break;
}
- strval = kmem_alloc(
- MAXNAMELEN + strlen(MOS_DIR_NAME) + 1,
+ strval = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN,
KM_SLEEP);
dsl_dataset_name(ds, strval);
dsl_dataset_rele(ds, FTAG);
@@ -361,8 +388,7 @@ spa_prop_get(spa_t *spa, nvlist_t **nvp)
spa_prop_add_list(*nvp, prop, strval, intval, src);
if (strval != NULL)
- kmem_free(strval,
- MAXNAMELEN + strlen(MOS_DIR_NAME) + 1);
+ kmem_free(strval, ZFS_MAX_DATASET_NAME_LEN);
break;
@@ -466,6 +492,16 @@ spa_prop_validate(spa_t *spa, nvlist_t *props)
error = SET_ERROR(EINVAL);
break;
+ case ZPOOL_PROP_MULTIHOST:
+ error = nvpair_value_uint64(elem, &intval);
+ if (!error && intval > 1)
+ error = SET_ERROR(EINVAL);
+
+ if (!error && !spa_get_hostid())
+ error = SET_ERROR(ENOTSUP);
+
+ break;
+
case ZPOOL_PROP_BOOTFS:
/*
* If the pool version is less than SPA_VERSION_BOOTFS,
@@ -506,7 +542,8 @@ spa_prop_validate(spa_t *spa, nvlist_t *props)
/*
* Must be ZPL, and its property settings
* must be supported by GRUB (compression
- * is not gzip, and large blocks are not used).
+ * is not gzip, and large blocks or large
+ * dnodes are not used).
*/
if (dmu_objset_type(os) != DMU_OST_ZFS) {
@@ -519,9 +556,9 @@ spa_prop_validate(spa_t *spa, nvlist_t *props)
error = SET_ERROR(ENOTSUP);
} else if ((error =
dsl_prop_get_int_ds(dmu_objset_ds(os),
- zfs_prop_to_name(ZFS_PROP_RECORDSIZE),
+ zfs_prop_to_name(ZFS_PROP_DNODESIZE),
&propval)) == 0 &&
- propval > SPA_OLD_MAXBLOCKSIZE) {
+ propval != ZFS_DNSIZE_LEGACY) {
error = SET_ERROR(ENOTSUP);
} else {
objnum = dmu_objset_id(os);
@@ -532,8 +569,7 @@ spa_prop_validate(spa_t *spa, nvlist_t *props)
case ZPOOL_PROP_FAILUREMODE:
error = nvpair_value_uint64(elem, &intval);
- if (!error && (intval < ZIO_FAILURE_MODE_WAIT ||
- intval > ZIO_FAILURE_MODE_PANIC))
+ if (!error && intval > ZIO_FAILURE_MODE_PANIC)
error = SET_ERROR(EINVAL);
/*
@@ -583,7 +619,6 @@ spa_prop_validate(spa_t *spa, nvlist_t *props)
error = SET_ERROR(EINVAL);
break;
}
- check++;
}
if (strlen(strval) > ZPROP_MAX_COMMENT)
error = SET_ERROR(E2BIG);
@@ -783,7 +818,7 @@ spa_change_guid(spa_t *spa)
if (error == 0) {
spa_config_sync(spa, B_FALSE, B_TRUE);
- spa_event_notify(spa, NULL, FM_EREPORT_ZFS_POOL_REGUID);
+ spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_REGUID);
}
mutex_exit(&spa_namespace_lock);
@@ -801,19 +836,14 @@ spa_change_guid(spa_t *spa)
static int
spa_error_entry_compare(const void *a, const void *b)
{
- spa_error_entry_t *sa = (spa_error_entry_t *)a;
- spa_error_entry_t *sb = (spa_error_entry_t *)b;
+ const spa_error_entry_t *sa = (const spa_error_entry_t *)a;
+ const spa_error_entry_t *sb = (const spa_error_entry_t *)b;
int ret;
- ret = bcmp(&sa->se_bookmark, &sb->se_bookmark,
+ ret = memcmp(&sa->se_bookmark, &sb->se_bookmark,
sizeof (zbookmark_phys_t));
- if (ret < 0)
- return (-1);
- else if (ret > 0)
- return (1);
- else
- return (0);
+ return (AVL_ISIGN(ret));
}
/*
@@ -1124,7 +1154,7 @@ spa_activate(spa_t *spa, int mode)
list_create(&spa->spa_state_dirty_list, sizeof (vdev_t),
offsetof(vdev_t, vdev_state_dirty_node));
- txg_list_create(&spa->spa_vdev_txg_list,
+ txg_list_create(&spa->spa_vdev_txg_list, spa,
offsetof(struct vdev, vdev_txg_node));
avl_create(&spa->spa_errlist_scrub,
@@ -1151,6 +1181,13 @@ spa_activate(spa_t *spa, int mode)
*/
spa->spa_zvol_taskq = taskq_create("z_zvol", 1, defclsyspri,
1, INT_MAX, 0);
+
+ /*
+ * The taskq to upgrade datasets in this pool. Currently used by
+ * feature SPA_FEATURE_USEROBJ_ACCOUNTING.
+ */
+ spa->spa_upgrade_taskq = taskq_create("z_upgrade", boot_ncpus,
+ defclsyspri, 1, INT_MAX, TASKQ_DYNAMIC);
}
/*
@@ -1174,13 +1211,18 @@ spa_deactivate(spa_t *spa)
spa->spa_zvol_taskq = NULL;
}
+ if (spa->spa_upgrade_taskq) {
+ taskq_destroy(spa->spa_upgrade_taskq);
+ spa->spa_upgrade_taskq = NULL;
+ }
+
txg_list_destroy(&spa->spa_vdev_txg_list);
list_destroy(&spa->spa_config_dirty_list);
list_destroy(&spa->spa_evicting_os_list);
list_destroy(&spa->spa_state_dirty_list);
- taskq_cancel_id(system_taskq, spa->spa_deadman_tqid);
+ taskq_cancel_id(system_delay_taskq, spa->spa_deadman_tqid);
for (t = 0; t < ZIO_TYPES; t++) {
for (q = 0; q < ZIO_TASKQ_TYPES; q++) {
@@ -1285,7 +1327,7 @@ spa_config_parse(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent,
static void
spa_unload(spa_t *spa)
{
- int i;
+ int i, c;
ASSERT(MUTEX_HELD(&spa_namespace_lock));
@@ -1302,6 +1344,22 @@ spa_unload(spa_t *spa)
spa->spa_sync_on = B_FALSE;
}
+ /*
+ * Even though vdev_free() also calls vdev_metaslab_fini, we need
+ * to call it earlier, before we wait for async i/o to complete.
+ * This ensures that there is no async metaslab prefetching, by
+ * calling taskq_wait(mg_taskq).
+ */
+ if (spa->spa_root_vdev != NULL) {
+ spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
+ for (c = 0; c < spa->spa_root_vdev->vdev_children; c++)
+ vdev_metaslab_fini(spa->spa_root_vdev->vdev_child[c]);
+ spa_config_exit(spa, SCL_ALL, FTAG);
+ }
+
+ if (spa->spa_mmp.mmp_thread)
+ mmp_thread_stop(spa);
+
/*
* Wait for any outstanding async I/O to complete.
*/
@@ -1334,7 +1392,6 @@ spa_unload(spa_t *spa)
ddt_unload(spa);
-
/*
* Drop and purge level 2 cache
*/
@@ -1513,20 +1570,21 @@ spa_load_l2cache(spa_t *spa)
ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
- if (sav->sav_config != NULL) {
- VERIFY(nvlist_lookup_nvlist_array(sav->sav_config,
- ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0);
- newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP);
- } else {
- nl2cache = 0;
- newvdevs = NULL;
- }
-
oldvdevs = sav->sav_vdevs;
oldnvdevs = sav->sav_count;
sav->sav_vdevs = NULL;
sav->sav_count = 0;
+ if (sav->sav_config == NULL) {
+ nl2cache = 0;
+ newvdevs = NULL;
+ goto out;
+ }
+
+ VERIFY(nvlist_lookup_nvlist_array(sav->sav_config,
+ ZPOOL_CONFIG_L2CACHE, &l2cache, &nl2cache) == 0);
+ newvdevs = kmem_alloc(nl2cache * sizeof (void *), KM_SLEEP);
+
/*
* Process new nvlist of vdevs.
*/
@@ -1577,6 +1635,24 @@ spa_load_l2cache(spa_t *spa)
}
}
+ sav->sav_vdevs = newvdevs;
+ sav->sav_count = (int)nl2cache;
+
+ /*
+ * Recompute the stashed list of l2cache devices, with status
+ * information this time.
+ */
+ VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE,
+ DATA_TYPE_NVLIST_ARRAY) == 0);
+
+ l2cache = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP);
+ for (i = 0; i < sav->sav_count; i++)
+ l2cache[i] = vdev_config_generate(spa,
+ sav->sav_vdevs[i], B_TRUE, VDEV_CONFIG_L2CACHE);
+ VERIFY(nvlist_add_nvlist_array(sav->sav_config,
+ ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0);
+
+out:
/*
* Purge vdevs that were dropped
*/
@@ -1598,26 +1674,6 @@ spa_load_l2cache(spa_t *spa)
if (oldvdevs)
kmem_free(oldvdevs, oldnvdevs * sizeof (void *));
- if (sav->sav_config == NULL)
- goto out;
-
- sav->sav_vdevs = newvdevs;
- sav->sav_count = (int)nl2cache;
-
- /*
- * Recompute the stashed list of l2cache devices, with status
- * information this time.
- */
- VERIFY(nvlist_remove(sav->sav_config, ZPOOL_CONFIG_L2CACHE,
- DATA_TYPE_NVLIST_ARRAY) == 0);
-
- l2cache = kmem_alloc(sav->sav_count * sizeof (void *), KM_SLEEP);
- for (i = 0; i < sav->sav_count; i++)
- l2cache[i] = vdev_config_generate(spa,
- sav->sav_vdevs[i], B_TRUE, VDEV_CONFIG_L2CACHE);
- VERIFY(nvlist_add_nvlist_array(sav->sav_config,
- ZPOOL_CONFIG_L2CACHE, l2cache, sav->sav_count) == 0);
-out:
for (i = 0; i < sav->sav_count; i++)
nvlist_free(l2cache[i]);
if (sav->sav_count)
@@ -1664,9 +1720,23 @@ spa_check_removed(vdev_t *vd)
if (vd->vdev_ops->vdev_op_leaf && vdev_is_dead(vd) &&
!vd->vdev_ishole) {
- zfs_ereport_post(FM_EREPORT_RESOURCE_AUTOREPLACE,
- vd->vdev_spa, vd, NULL, 0, 0);
- spa_event_notify(vd->vdev_spa, vd, FM_EREPORT_ZFS_DEVICE_CHECK);
+ zfs_post_autoreplace(vd->vdev_spa, vd);
+ spa_event_notify(vd->vdev_spa, vd, NULL, ESC_ZFS_VDEV_CHECK);
+ }
+}
+
+static void
+spa_config_valid_zaps(vdev_t *vd, vdev_t *mvd)
+{
+ uint64_t i;
+
+ ASSERT3U(vd->vdev_children, ==, mvd->vdev_children);
+
+ vd->vdev_top_zap = mvd->vdev_top_zap;
+ vd->vdev_leaf_zap = mvd->vdev_leaf_zap;
+
+ for (i = 0; i < vd->vdev_children; i++) {
+ spa_config_valid_zaps(vd->vdev_child[i], mvd->vdev_child[i]);
}
}
@@ -1696,7 +1766,7 @@ spa_config_valid(spa_t *spa, nvlist_t *config)
nvlist_t **child, *nv;
uint64_t idx = 0;
- child = kmem_alloc(rvd->vdev_children * sizeof (nvlist_t **),
+ child = kmem_alloc(rvd->vdev_children * sizeof (nvlist_t *),
KM_SLEEP);
VERIFY(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP) == 0);
@@ -1774,16 +1844,25 @@ spa_config_valid(spa_t *spa, nvlist_t *config)
spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
vdev_reopen(rvd);
- } else if (mtvd->vdev_islog) {
+ } else {
+ if (mtvd->vdev_islog) {
+ /*
+ * Load the slog device's state from the MOS
+ * config since it's possible that the label
+ * does not contain the most up-to-date
+ * information.
+ */
+ vdev_load_log_state(tvd, mtvd);
+ vdev_reopen(tvd);
+ }
+
/*
- * Load the slog device's state from the MOS config
- * since it's possible that the label does not
- * contain the most up-to-date information.
+ * Per-vdev ZAP info is stored exclusively in the MOS.
*/
- vdev_load_log_state(tvd, mtvd);
- vdev_reopen(tvd);
+ spa_config_valid_zaps(tvd, mtvd);
}
}
+
vdev_free(mrvd);
spa_config_exit(spa, SCL_ALL, FTAG);
@@ -1914,14 +1993,14 @@ spa_load_verify_done(zio_t *zio)
int error = zio->io_error;
spa_t *spa = zio->io_spa;
+ abd_free(zio->io_abd);
if (error) {
if ((BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type)) &&
type != DMU_OT_INTENT_LOG)
- atomic_add_64(&sle->sle_meta_count, 1);
+ atomic_inc_64(&sle->sle_meta_count);
else
- atomic_add_64(&sle->sle_data_count, 1);
+ atomic_inc_64(&sle->sle_data_count);
}
- zio_data_buf_free(zio->io_data, zio->io_size);
mutex_enter(&spa->spa_scrub_lock);
spa->spa_scrub_inflight--;
@@ -1944,9 +2023,8 @@ spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
{
zio_t *rio;
size_t size;
- void *data;
- if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
+ if (bp == NULL || BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
return (0);
/*
* Note: normally this routine will not be called if
@@ -1955,12 +2033,11 @@ spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
*/
if (!spa_load_verify_metadata)
return (0);
- if (BP_GET_BUFC_TYPE(bp) == ARC_BUFC_DATA && !spa_load_verify_data)
+ if (!BP_IS_METADATA(bp) && !spa_load_verify_data)
return (0);
rio = arg;
size = BP_GET_PSIZE(bp);
- data = zio_data_buf_alloc(size);
mutex_enter(&spa->spa_scrub_lock);
while (spa->spa_scrub_inflight >= spa_load_verify_maxinflight)
@@ -1968,7 +2045,7 @@ spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
spa->spa_scrub_inflight++;
mutex_exit(&spa->spa_scrub_lock);
- zio_nowait(zio_read(rio, spa, bp, data, size,
+ zio_nowait(zio_read(rio, spa, bp, abd_alloc_for_io(size, B_FALSE), size,
spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB,
ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CANFAIL |
ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb));
@@ -2228,6 +2305,229 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type,
return (error);
}
+#ifdef ZFS_DEBUG
+/*
+ * Count the number of per-vdev ZAPs associated with all of the vdevs in the
+ * vdev tree rooted in the given vd, and ensure that each ZAP is present in the
+ * spa's per-vdev ZAP list.
+ */
+static uint64_t
+vdev_count_verify_zaps(vdev_t *vd)
+{
+ spa_t *spa = vd->vdev_spa;
+ uint64_t total = 0;
+ uint64_t i;
+
+ if (vd->vdev_top_zap != 0) {
+ total++;
+ ASSERT0(zap_lookup_int(spa->spa_meta_objset,
+ spa->spa_all_vdev_zaps, vd->vdev_top_zap));
+ }
+ if (vd->vdev_leaf_zap != 0) {
+ total++;
+ ASSERT0(zap_lookup_int(spa->spa_meta_objset,
+ spa->spa_all_vdev_zaps, vd->vdev_leaf_zap));
+ }
+
+ for (i = 0; i < vd->vdev_children; i++) {
+ total += vdev_count_verify_zaps(vd->vdev_child[i]);
+ }
+
+ return (total);
+}
+#endif
+
+/*
+ * Determine whether the activity check is required.
+ */
+static boolean_t
+spa_activity_check_required(spa_t *spa, uberblock_t *ub, nvlist_t *config)
+{
+ uint64_t state = 0;
+ uint64_t hostid = 0;
+ uint64_t tryconfig_txg = 0;
+ uint64_t tryconfig_timestamp = 0;
+ nvlist_t *nvinfo;
+
+ if (nvlist_exists(config, ZPOOL_CONFIG_LOAD_INFO)) {
+ nvinfo = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO);
+ (void) nvlist_lookup_uint64(nvinfo, ZPOOL_CONFIG_MMP_TXG,
+ &tryconfig_txg);
+ (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_TIMESTAMP,
+ &tryconfig_timestamp);
+ }
+
+ (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, &state);
+ (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid);
+
+ /*
+ * Disable the MMP activity check - This is used by zdb which
+ * is intended to be used on potentially active pools.
+ */
+ if (spa->spa_import_flags & ZFS_IMPORT_SKIP_MMP)
+ return (B_FALSE);
+
+ /*
+ * Skip the activity check when the MMP feature is disabled.
+ */
+ if (ub->ub_mmp_magic == MMP_MAGIC && ub->ub_mmp_delay == 0)
+ return (B_FALSE);
+ /*
+ * If the tryconfig_* values are nonzero, they are the results of an
+ * earlier tryimport. If they match the uberblock we just found, then
+ * the pool has not changed and we return false so we do not test a
+ * second time.
+ */
+ if (tryconfig_txg && tryconfig_txg == ub->ub_txg &&
+ tryconfig_timestamp && tryconfig_timestamp == ub->ub_timestamp)
+ return (B_FALSE);
+
+ /*
+ * Allow the activity check to be skipped when importing the pool
+ * on the same host which last imported it.
+ */
+ if (hostid == spa_get_hostid())
+ return (B_FALSE);
+
+ /*
+ * Skip the activity test when the pool was cleanly exported.
+ */
+ if (state != POOL_STATE_ACTIVE)
+ return (B_FALSE);
+
+ return (B_TRUE);
+}
+
+/*
+ * Perform the import activity check. If the user canceled the import or
+ * we detected activity then fail.
+ */
+static int
+spa_activity_check(spa_t *spa, uberblock_t *ub, nvlist_t *config)
+{
+ uint64_t import_intervals = MAX(zfs_multihost_import_intervals, 1);
+ uint64_t txg = ub->ub_txg;
+ uint64_t timestamp = ub->ub_timestamp;
+ uint64_t import_delay = NANOSEC;
+ hrtime_t import_expire;
+ nvlist_t *mmp_label = NULL;
+ vdev_t *rvd = spa->spa_root_vdev;
+ kcondvar_t cv;
+ kmutex_t mtx;
+ int error = 0;
+
+ cv_init(&cv, NULL, CV_DEFAULT, NULL);
+ mutex_init(&mtx, NULL, MUTEX_DEFAULT, NULL);
+ mutex_enter(&mtx);
+
+ /*
+ * If ZPOOL_CONFIG_MMP_TXG is present an activity check was performed
+ * during the earlier tryimport. If the txg recorded there is 0 then
+ * the pool is known to be active on another host.
+ *
+ * Otherwise, the pool might be in use on another node. Check for
+ * changes in the uberblocks on disk if necessary.
+ */
+ if (nvlist_exists(config, ZPOOL_CONFIG_LOAD_INFO)) {
+ nvlist_t *nvinfo = fnvlist_lookup_nvlist(config,
+ ZPOOL_CONFIG_LOAD_INFO);
+
+ if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_TXG) &&
+ fnvlist_lookup_uint64(nvinfo, ZPOOL_CONFIG_MMP_TXG) == 0) {
+ vdev_uberblock_load(rvd, ub, &mmp_label);
+ error = SET_ERROR(EREMOTEIO);
+ goto out;
+ }
+ }
+
+ /*
+ * Preferentially use the zfs_multihost_interval from the node which
+ * last imported the pool. This value is stored in an MMP uberblock as.
+ *
+ * ub_mmp_delay * vdev_count_leaves() == zfs_multihost_interval
+ */
+ if (ub->ub_mmp_magic == MMP_MAGIC && ub->ub_mmp_delay)
+ import_delay = MAX(import_delay, import_intervals *
+ ub->ub_mmp_delay * MAX(vdev_count_leaves(spa), 1));
+
+ /* Apply a floor using the local default values. */
+ import_delay = MAX(import_delay, import_intervals *
+ MSEC2NSEC(MAX(zfs_multihost_interval, MMP_MIN_INTERVAL)));
+
+ /* Add a small random factor in case of simultaneous imports (0-25%) */
+ import_expire = gethrtime() + import_delay +
+ (import_delay * spa_get_random(250) / 1000);
+
+ while (gethrtime() < import_expire) {
+ vdev_uberblock_load(rvd, ub, &mmp_label);
+
+ if (txg != ub->ub_txg || timestamp != ub->ub_timestamp) {
+ error = SET_ERROR(EREMOTEIO);
+ break;
+ }
+
+ if (mmp_label) {
+ nvlist_free(mmp_label);
+ mmp_label = NULL;
+ }
+
+ error = cv_timedwait_sig(&cv, &mtx, ddi_get_lbolt() + hz);
+ if (error != -1) {
+ error = SET_ERROR(EINTR);
+ break;
+ }
+ error = 0;
+ }
+
+out:
+ mutex_exit(&mtx);
+ mutex_destroy(&mtx);
+ cv_destroy(&cv);
+
+ /*
+ * If the pool is determined to be active store the status in the
+ * spa->spa_load_info nvlist. If the remote hostname or hostid are
+ * available from configuration read from disk store them as well.
+ * This allows 'zpool import' to generate a more useful message.
+ *
+ * ZPOOL_CONFIG_MMP_STATE - observed pool status (mandatory)
+ * ZPOOL_CONFIG_MMP_HOSTNAME - hostname from the active pool
+ * ZPOOL_CONFIG_MMP_HOSTID - hostid from the active pool
+ */
+ if (error == EREMOTEIO) {
+ char *hostname = "<unknown>";
+ uint64_t hostid = 0;
+
+ if (mmp_label) {
+ if (nvlist_exists(mmp_label, ZPOOL_CONFIG_HOSTNAME)) {
+ hostname = fnvlist_lookup_string(mmp_label,
+ ZPOOL_CONFIG_HOSTNAME);
+ fnvlist_add_string(spa->spa_load_info,
+ ZPOOL_CONFIG_MMP_HOSTNAME, hostname);
+ }
+
+ if (nvlist_exists(mmp_label, ZPOOL_CONFIG_HOSTID)) {
+ hostid = fnvlist_lookup_uint64(mmp_label,
+ ZPOOL_CONFIG_HOSTID);
+ fnvlist_add_uint64(spa->spa_load_info,
+ ZPOOL_CONFIG_MMP_HOSTID, hostid);
+ }
+ }
+
+ fnvlist_add_uint64(spa->spa_load_info,
+ ZPOOL_CONFIG_MMP_STATE, MMP_STATE_ACTIVE);
+ fnvlist_add_uint64(spa->spa_load_info,
+ ZPOOL_CONFIG_MMP_TXG, 0);
+
+ error = spa_vdev_err(rvd, VDEV_AUX_ACTIVE, EREMOTEIO);
+ }
+
+ if (mmp_label)
+ nvlist_free(mmp_label);
+
+ return (error);
+}
+
/*
* Load an existing storage pool, using the pool's builtin spa_config as a
* source of configuration information.
@@ -2248,6 +2548,8 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
int parse, i;
uint64_t obj;
boolean_t missing_feat_write = B_FALSE;
+ boolean_t activity_check = B_FALSE;
+ nvlist_t *mos_config;
/*
* If this is an untrusted config, access the pool in read-only mode.
@@ -2344,6 +2646,33 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, ENXIO));
}
+ /*
+ * For pools which have the multihost property on determine if the
+ * pool is truly inactive and can be safely imported. Prevent
+ * hosts which don't have a hostid set from importing the pool.
+ */
+ activity_check = spa_activity_check_required(spa, ub, config);
+ if (activity_check) {
+ if (ub->ub_mmp_magic == MMP_MAGIC && ub->ub_mmp_delay &&
+ spa_get_hostid() == 0) {
+ nvlist_free(label);
+ fnvlist_add_uint64(spa->spa_load_info,
+ ZPOOL_CONFIG_MMP_STATE, MMP_STATE_NO_HOSTID);
+ return (spa_vdev_err(rvd, VDEV_AUX_ACTIVE, EREMOTEIO));
+ }
+
+ error = spa_activity_check(spa, ub, config);
+ if (error) {
+ nvlist_free(label);
+ return (error);
+ }
+
+ fnvlist_add_uint64(spa->spa_load_info,
+ ZPOOL_CONFIG_MMP_STATE, MMP_STATE_INACTIVE);
+ fnvlist_add_uint64(spa->spa_load_info,
+ ZPOOL_CONFIG_MMP_TXG, ub->ub_txg);
+ }
+
/*
* If the pool has an unsupported version we can't open it.
*/
@@ -2571,24 +2900,9 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
VERIFY(nvlist_lookup_string(nvconfig,
ZPOOL_CONFIG_HOSTNAME, &hostname) == 0);
-#ifdef _KERNEL
- myhostid = zone_get_hostid(NULL);
-#else /* _KERNEL */
- /*
- * We're emulating the system's hostid in userland, so
- * we can't use zone_get_hostid().
- */
- (void) ddi_strtoul(hw_serial, NULL, 10, &myhostid);
-#endif /* _KERNEL */
- if (hostid != 0 && myhostid != 0 &&
- hostid != myhostid) {
+ myhostid = spa_get_hostid();
+ if (hostid && myhostid && hostid != myhostid) {
nvlist_free(nvconfig);
- cmn_err(CE_WARN, "pool '%s' could not be "
- "loaded as it was last accessed by another "
- "system (host: %s hostid: 0x%lx). See: "
- "http://zfsonlinux.org/msg/ZFS-8000-EY",
- spa_name(spa), hostname,
- (unsigned long)hostid);
return (SET_ERROR(EBADF));
}
}
@@ -2605,6 +2919,19 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
return (spa_load(spa, state, SPA_IMPORT_EXISTING, B_TRUE));
}
+ /* Grab the checksum salt from the MOS. */
+ error = zap_lookup(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
+ DMU_POOL_CHECKSUM_SALT, 1,
+ sizeof (spa->spa_cksum_salt.zcs_bytes),
+ spa->spa_cksum_salt.zcs_bytes);
+ if (error == ENOENT) {
+ /* Generate a new salt for subsequent use */
+ (void) random_get_pseudo_bytes(spa->spa_cksum_salt.zcs_bytes,
+ sizeof (spa->spa_cksum_salt.zcs_bytes));
+ } else if (error != 0) {
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+ }
+
if (spa_dir_prop(spa, DMU_POOL_SYNC_BPOBJ, &obj) != 0)
return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
error = bpobj_open(&spa->spa_deferred_bpobj, spa->spa_meta_objset, obj);
@@ -2646,6 +2973,42 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
if (error != 0 && error != ENOENT)
return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+ /*
+ * Load the per-vdev ZAP map. If we have an older pool, this will not
+ * be present; in this case, defer its creation to a later time to
+ * avoid dirtying the MOS this early / out of sync context. See
+ * spa_sync_config_object.
+ */
+
+ /* The sentinel is only available in the MOS config. */
+ if (load_nvlist(spa, spa->spa_config_object, &mos_config) != 0)
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+
+ error = spa_dir_prop(spa, DMU_POOL_VDEV_ZAP_MAP,
+ &spa->spa_all_vdev_zaps);
+
+ if (error == ENOENT) {
+ VERIFY(!nvlist_exists(mos_config,
+ ZPOOL_CONFIG_HAS_PER_VDEV_ZAPS));
+ spa->spa_avz_action = AVZ_ACTION_INITIALIZE;
+ ASSERT0(vdev_count_verify_zaps(spa->spa_root_vdev));
+ } else if (error != 0) {
+ return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
+ } else if (!nvlist_exists(mos_config, ZPOOL_CONFIG_HAS_PER_VDEV_ZAPS)) {
+ /*
+ * An older version of ZFS overwrote the sentinel value, so
+ * we have orphaned per-vdev ZAPs in the MOS. Defer their
+ * destruction to later; see spa_sync_config_object.
+ */
+ spa->spa_avz_action = AVZ_ACTION_DESTROY;
+ /*
+ * We're assuming that no vdevs have had their ZAPs created
+ * before this. Better be sure of it.
+ */
+ ASSERT0(vdev_count_verify_zaps(spa->spa_root_vdev));
+ }
+ nvlist_free(mos_config);
+
/*
* If we're assembling the pool from the split-off vdevs of
* an existing pool, we don't want to attach the spares & cache
@@ -2705,12 +3068,25 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
spa_prop_find(spa, ZPOOL_PROP_DELEGATION, &spa->spa_delegation);
spa_prop_find(spa, ZPOOL_PROP_FAILUREMODE, &spa->spa_failmode);
spa_prop_find(spa, ZPOOL_PROP_AUTOEXPAND, &spa->spa_autoexpand);
+ spa_prop_find(spa, ZPOOL_PROP_MULTIHOST, &spa->spa_multihost);
spa_prop_find(spa, ZPOOL_PROP_DEDUPDITTO,
&spa->spa_dedup_ditto);
spa->spa_autoreplace = (autoreplace != 0);
}
+ /*
+ * If the 'multihost' property is set, then never allow a pool to
+ * be imported when the system hostid is zero. The exception to
+ * this rule is zdb which is always allowed to access pools.
+ */
+ if (spa_multihost(spa) && spa_get_hostid() == 0 &&
+ (spa->spa_import_flags & ZFS_IMPORT_SKIP_MMP) == 0) {
+ fnvlist_add_uint64(spa->spa_load_info,
+ ZPOOL_CONFIG_MMP_STATE, MMP_STATE_NO_HOSTID);
+ return (spa_vdev_err(rvd, VDEV_AUX_ACTIVE, EREMOTEIO));
+ }
+
/*
* If the 'autoreplace' property is set, then post a resource notifying
* the ZFS DE that it should not issue any faults for unopenable
@@ -2835,6 +3211,7 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
spa_set_log_state(spa, SPA_LOG_GOOD);
spa->spa_sync_on = B_TRUE;
txg_sync_start(spa->spa_dsl_pool);
+ mmp_thread_start(spa);
/*
* Wait for all claims to sync. We sync up to the highest
@@ -2989,6 +3366,8 @@ spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig,
if (config && (rewind_error || state != SPA_LOAD_RECOVER))
spa_config_set(spa, config);
+ else
+ nvlist_free(config);
if (state == SPA_LOAD_RECOVER) {
ASSERT3P(loadinfo, ==, NULL);
@@ -3275,6 +3654,8 @@ spa_add_l2cache(spa_t *spa, nvlist_t *config)
ZPOOL_CONFIG_VDEV_STATS, (uint64_t **)&vs, &vsc)
== 0);
vdev_get_stats(vd, vs);
+ vdev_config_generate_stats(vd, l2cache[i]);
+
}
}
}
@@ -3483,18 +3864,6 @@ spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode,
goto out;
}
- /*
- * The L2ARC currently only supports disk devices in
- * kernel context. For user-level testing, we allow it.
- */
-#ifdef _KERNEL
- if ((strcmp(config, ZPOOL_CONFIG_L2CACHE) == 0) &&
- strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_DISK) != 0) {
- error = SET_ERROR(ENOTBLK);
- vdev_free(vd);
- goto out;
- }
-#endif
vd->vdev_top = vd;
if ((error = vdev_open(vd)) == 0 &&
@@ -3548,7 +3917,7 @@ spa_set_aux_vdevs(spa_aux_vdev_t *sav, nvlist_t **devs, int ndevs,
nvlist_t **newdevs;
/*
- * Generate new dev list by concatentating with the
+ * Generate new dev list by concatenating with the
* current dev list.
*/
VERIFY(nvlist_lookup_nvlist_array(sav->sav_config, config,
@@ -3680,6 +4049,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
spa->spa_uberblock.ub_txg = txg - 1;
spa->spa_uberblock.ub_version = version;
spa->spa_ubsync = spa->spa_uberblock;
+ spa->spa_load_state = SPA_LOAD_CREATE;
/*
* Create "The Godfather" zio to hold all async IOs
@@ -3823,6 +4193,12 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
if (version >= SPA_VERSION_ZPOOL_HISTORY)
spa_history_create_obj(spa, tx);
+ /*
+ * Generate some random noise for salted checksums to operate on.
+ */
+ (void) random_get_pseudo_bytes(spa->spa_cksum_salt.zcs_bytes,
+ sizeof (spa->spa_cksum_salt.zcs_bytes));
+
/*
* Set pool properties.
*/
@@ -3830,6 +4206,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION);
spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE);
spa->spa_autoexpand = zpool_prop_default_numeric(ZPOOL_PROP_AUTOEXPAND);
+ spa->spa_multihost = zpool_prop_default_numeric(ZPOOL_PROP_MULTIHOST);
if (props != NULL) {
spa_configfile_set(spa, props, B_FALSE);
@@ -3840,6 +4217,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
spa->spa_sync_on = B_TRUE;
txg_sync_start(spa->spa_dsl_pool);
+ mmp_thread_start(spa);
/*
* We explicitly wait for the first transaction to complete so that our
@@ -3848,6 +4226,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
txg_wait_synced(spa->spa_dsl_pool, txg);
spa_config_sync(spa, B_FALSE, B_TRUE);
+ spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_CREATE);
spa_history_log_version(spa, "create");
@@ -3857,6 +4236,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
*/
spa_evicting_os_wait(spa);
spa->spa_minref = refcount_count(&spa->spa_refcount);
+ spa->spa_load_state = SPA_LOAD_NONE;
mutex_exit(&spa_namespace_lock);
@@ -3910,6 +4290,7 @@ spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)
spa_configfile_set(spa, props, B_FALSE);
spa_config_sync(spa, B_FALSE, B_TRUE);
+ spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_IMPORT);
mutex_exit(&spa_namespace_lock);
return (0);
@@ -4034,10 +4415,14 @@ spa_import(char *pool, nvlist_t *config, nvlist_t *props, uint64_t flags)
*/
spa_async_request(spa, SPA_ASYNC_AUTOEXPAND);
- mutex_exit(&spa_namespace_lock);
spa_history_log_version(spa, "import");
+
+ spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_IMPORT);
+
zvol_create_minors(spa, pool, B_TRUE);
+ mutex_exit(&spa_namespace_lock);
+
return (0);
}
@@ -4233,7 +4618,7 @@ spa_export_common(char *pool, int new_state, nvlist_t **oldconfig,
}
export_spa:
- spa_event_notify(spa, NULL, FM_EREPORT_ZFS_POOL_DESTROY);
+ spa_event_notify(spa, NULL, NULL, ESC_ZFS_POOL_DESTROY);
if (spa->spa_state != POOL_STATE_UNINITIALIZED) {
spa_unload(spa);
@@ -4389,6 +4774,7 @@ spa_vdev_add(spa_t *spa, nvlist_t *nvroot)
mutex_enter(&spa_namespace_lock);
spa_config_update(spa, SPA_CONFIG_UPDATE_POOL);
+ spa_event_notify(spa, NULL, NULL, ESC_ZFS_VDEV_ADD);
mutex_exit(&spa_namespace_lock);
return (0);
@@ -4546,6 +4932,11 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
newvd->vdev_crtxg = oldvd->vdev_crtxg;
vdev_add_child(pvd, newvd);
+ /*
+ * Reevaluate the parent vdev state.
+ */
+ vdev_propagate_state(pvd);
+
tvd = newvd->vdev_top;
ASSERT(pvd->vdev_top == tvd);
ASSERT(tvd->vdev_parent == rvd);
@@ -4564,7 +4955,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
if (newvd->vdev_isspare) {
spa_spare_activate(newvd);
- spa_event_notify(spa, newvd, FM_EREPORT_ZFS_DEVICE_SPARE);
+ spa_event_notify(spa, newvd, NULL, ESC_ZFS_VDEV_SPARE);
}
oldvdpath = spa_strdup(oldvd->vdev_path);
@@ -4583,6 +4974,11 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
*/
dsl_resilver_restart(spa->spa_dsl_pool, dtl_max_txg);
+ if (spa->spa_bootfs)
+ spa_event_notify(spa, newvd, NULL, ESC_ZFS_BOOTFS_VDEV_ATTACH);
+
+ spa_event_notify(spa, newvd, NULL, ESC_ZFS_VDEV_ATTACH);
+
/*
* Commit the config
*/
@@ -4597,9 +4993,6 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
spa_strfree(oldvdpath);
spa_strfree(newvdpath);
- if (spa->spa_bootfs)
- spa_event_notify(spa, newvd, FM_EREPORT_ZFS_BOOTFS_VDEV_ATTACH);
-
return (0);
}
@@ -4792,13 +5185,13 @@ spa_vdev_detach(spa_t *spa, uint64_t guid, uint64_t pguid, int replace_done)
* But first make sure we're not on any *other* txg's DTL list, to
* prevent vd from being accessed after it's freed.
*/
- vdpath = spa_strdup(vd->vdev_path);
+ vdpath = spa_strdup(vd->vdev_path ? vd->vdev_path : "none");
for (t = 0; t < TXG_SIZE; t++)
(void) txg_list_remove_this(&tvd->vdev_dtl_list, vd, t);
vd->vdev_detached = B_TRUE;
vdev_dirty(tvd, VDD_DTL, vd, txg);
- spa_event_notify(spa, vd, FM_EREPORT_ZFS_DEVICE_REMOVE);
+ spa_event_notify(spa, vd, NULL, ESC_ZFS_VDEV_REMOVE);
/* hang on to the spa before we release the lock */
spa_open_ref(spa, FTAG);
@@ -4973,6 +5366,16 @@ spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config,
vml[c]->vdev_top->vdev_asize) == 0);
VERIFY(nvlist_add_uint64(child[c], ZPOOL_CONFIG_ASHIFT,
vml[c]->vdev_top->vdev_ashift) == 0);
+
+ /* transfer per-vdev ZAPs */
+ ASSERT3U(vml[c]->vdev_leaf_zap, !=, 0);
+ VERIFY0(nvlist_add_uint64(child[c],
+ ZPOOL_CONFIG_VDEV_LEAF_ZAP, vml[c]->vdev_leaf_zap));
+
+ ASSERT3U(vml[c]->vdev_top->vdev_top_zap, !=, 0);
+ VERIFY0(nvlist_add_uint64(child[c],
+ ZPOOL_CONFIG_VDEV_TOP_ZAP,
+ vml[c]->vdev_parent->vdev_top_zap));
}
if (error != 0) {
@@ -5014,11 +5417,13 @@ spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config,
spa->spa_config_txg) == 0);
VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID,
spa_generate_guid(NULL)) == 0);
+ VERIFY0(nvlist_add_boolean(config, ZPOOL_CONFIG_HAS_PER_VDEV_ZAPS));
(void) nvlist_lookup_string(props,
zpool_prop_to_name(ZPOOL_PROP_ALTROOT), &altroot);
/* add the new pool to the namespace */
newspa = spa_add(newname, config, altroot);
+ newspa->spa_avz_action = AVZ_ACTION_REBUILD;
newspa->spa_config_txg = spa->spa_config_txg;
spa_set_log_state(newspa, SPA_LOG_CLEAR);
@@ -5076,9 +5481,11 @@ spa_vdev_split_mirror(spa_t *spa, char *newname, nvlist_t *config,
if (error == 0)
spa_history_log_internal(spa, "detach", tx,
"vdev=%s", vml[c]->vdev_path);
+
vdev_free(vml[c]);
}
}
+ spa->spa_avz_action = AVZ_ACTION_REBUILD;
vdev_config_dirty(spa->spa_root_vdev);
spa->spa_config_splitting = NULL;
nvlist_free(nvl);
@@ -5144,7 +5551,7 @@ spa_nvlist_lookup_by_guid(nvlist_t **nvpp, int count, uint64_t target_guid)
static void
spa_vdev_remove_aux(nvlist_t *config, char *name, nvlist_t **dev, int count,
- nvlist_t *dev_to_remove)
+ nvlist_t *dev_to_remove)
{
nvlist_t **newdev = NULL;
int i, j;
@@ -5270,6 +5677,7 @@ int
spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare)
{
vdev_t *vd;
+ sysevent_t *ev = NULL;
metaslab_group_t *mg;
nvlist_t **spares, **l2cache, *nv;
uint64_t txg = 0;
@@ -5293,6 +5701,10 @@ spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare)
* in this pool.
*/
if (vd == NULL || unspare) {
+ if (vd == NULL)
+ vd = spa_lookup_by_guid(spa, guid, B_TRUE);
+ ev = spa_event_create(spa, vd, NULL,
+ ESC_ZFS_VDEV_REMOVE_AUX);
spa_vdev_remove_aux(spa->spa_spares.sav_config,
ZPOOL_CONFIG_SPARES, spares, nspares, nv);
spa_load_spares(spa);
@@ -5307,6 +5719,8 @@ spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare)
/*
* Cache devices can always be removed.
*/
+ vd = spa_lookup_by_guid(spa, guid, B_TRUE);
+ ev = spa_event_create(spa, vd, NULL, ESC_ZFS_VDEV_REMOVE_AUX);
spa_vdev_remove_aux(spa->spa_l2cache.sav_config,
ZPOOL_CONFIG_L2CACHE, l2cache, nl2cache, nv);
spa_load_l2cache(spa);
@@ -5347,6 +5761,7 @@ spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare)
/*
* Clean up the vdev namespace.
*/
+ ev = spa_event_create(spa, vd, NULL, ESC_ZFS_VDEV_REMOVE_DEV);
spa_vdev_remove_from_namespace(spa, vd);
} else if (vd != NULL) {
@@ -5362,7 +5777,10 @@ spa_vdev_remove(spa_t *spa, uint64_t guid, boolean_t unspare)
}
if (!locked)
- return (spa_vdev_exit(spa, NULL, txg, error));
+ error = spa_vdev_exit(spa, NULL, txg, error);
+
+ if (ev)
+ spa_event_post(ev);
return (error);
}
@@ -5542,6 +5960,16 @@ spa_vdev_setfru(spa_t *spa, uint64_t guid, const char *newfru)
* SPA Scanning
* ==========================================================================
*/
+int
+spa_scrub_pause_resume(spa_t *spa, pool_scrub_cmd_t cmd)
+{
+ ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == 0);
+
+ if (dsl_scan_resilvering(spa->spa_dsl_pool))
+ return (SET_ERROR(EBUSY));
+
+ return (dsl_scrub_set_pause_resume(spa->spa_dsl_pool, cmd));
+}
int
spa_scan_stop(spa_t *spa)
@@ -5636,7 +6064,7 @@ spa_async_autoexpand(spa_t *spa, vdev_t *vd)
if (!vd->vdev_ops->vdev_op_leaf || vd->vdev_physpath == NULL)
return;
- spa_event_notify(vd->vdev_spa, vd, FM_EREPORT_ZFS_DEVICE_AUTOEXPAND);
+ spa_event_notify(vd->vdev_spa, vd, NULL, ESC_ZFS_VDEV_AUTOEXPAND);
}
static void
@@ -5743,13 +6171,34 @@ spa_async_resume(spa_t *spa)
mutex_exit(&spa->spa_async_lock);
}
+static boolean_t
+spa_async_tasks_pending(spa_t *spa)
+{
+ uint_t non_config_tasks;
+ uint_t config_task;
+ boolean_t config_task_suspended;
+
+ non_config_tasks = spa->spa_async_tasks & ~SPA_ASYNC_CONFIG_UPDATE;
+ config_task = spa->spa_async_tasks & SPA_ASYNC_CONFIG_UPDATE;
+ if (spa->spa_ccw_fail_time == 0) {
+ config_task_suspended = B_FALSE;
+ } else {
+ config_task_suspended =
+ (gethrtime() - spa->spa_ccw_fail_time) <
+ ((hrtime_t)zfs_ccw_retry_interval * NANOSEC);
+ }
+
+ return (non_config_tasks || (config_task && !config_task_suspended));
+}
+
static void
spa_async_dispatch(spa_t *spa)
{
mutex_enter(&spa->spa_async_lock);
- if (spa->spa_async_tasks && !spa->spa_async_suspended &&
+ if (spa_async_tasks_pending(spa) &&
+ !spa->spa_async_suspended &&
spa->spa_async_thread == NULL &&
- rootdir != NULL && !vn_is_readonly(rootdir))
+ rootdir != NULL)
spa->spa_async_thread = thread_create(NULL, 0,
spa_async_thread, spa, 0, &p0, TS_RUN, maxclsyspri);
mutex_exit(&spa->spa_async_lock);
@@ -5891,16 +6340,121 @@ spa_sync_aux_dev(spa_t *spa, spa_aux_vdev_t *sav, dmu_tx_t *tx,
sav->sav_sync = B_FALSE;
}
+/*
+ * Rebuild spa's all-vdev ZAP from the vdev ZAPs indicated in each vdev_t.
+ * The all-vdev ZAP must be empty.
+ */
+static void
+spa_avz_build(vdev_t *vd, uint64_t avz, dmu_tx_t *tx)
+{
+ spa_t *spa = vd->vdev_spa;
+ uint64_t i;
+
+ if (vd->vdev_top_zap != 0) {
+ VERIFY0(zap_add_int(spa->spa_meta_objset, avz,
+ vd->vdev_top_zap, tx));
+ }
+ if (vd->vdev_leaf_zap != 0) {
+ VERIFY0(zap_add_int(spa->spa_meta_objset, avz,
+ vd->vdev_leaf_zap, tx));
+ }
+ for (i = 0; i < vd->vdev_children; i++) {
+ spa_avz_build(vd->vdev_child[i], avz, tx);
+ }
+}
+
static void
spa_sync_config_object(spa_t *spa, dmu_tx_t *tx)
{
nvlist_t *config;
- if (list_is_empty(&spa->spa_config_dirty_list))
+ /*
+ * If the pool is being imported from a pre-per-vdev-ZAP version of ZFS,
+ * its config may not be dirty but we still need to build per-vdev ZAPs.
+ * Similarly, if the pool is being assembled (e.g. after a split), we
+ * need to rebuild the AVZ although the config may not be dirty.
+ */
+ if (list_is_empty(&spa->spa_config_dirty_list) &&
+ spa->spa_avz_action == AVZ_ACTION_NONE)
return;
spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
+ ASSERT(spa->spa_avz_action == AVZ_ACTION_NONE ||
+ spa->spa_avz_action == AVZ_ACTION_INITIALIZE ||
+ spa->spa_all_vdev_zaps != 0);
+
+ if (spa->spa_avz_action == AVZ_ACTION_REBUILD) {
+ zap_cursor_t zc;
+ zap_attribute_t za;
+
+ /* Make and build the new AVZ */
+ uint64_t new_avz = zap_create(spa->spa_meta_objset,
+ DMU_OTN_ZAP_METADATA, DMU_OT_NONE, 0, tx);
+ spa_avz_build(spa->spa_root_vdev, new_avz, tx);
+
+ /* Diff old AVZ with new one */
+ for (zap_cursor_init(&zc, spa->spa_meta_objset,
+ spa->spa_all_vdev_zaps);
+ zap_cursor_retrieve(&zc, &za) == 0;
+ zap_cursor_advance(&zc)) {
+ uint64_t vdzap = za.za_first_integer;
+ if (zap_lookup_int(spa->spa_meta_objset, new_avz,
+ vdzap) == ENOENT) {
+ /*
+ * ZAP is listed in old AVZ but not in new one;
+ * destroy it
+ */
+ VERIFY0(zap_destroy(spa->spa_meta_objset, vdzap,
+ tx));
+ }
+ }
+
+ zap_cursor_fini(&zc);
+
+ /* Destroy the old AVZ */
+ VERIFY0(zap_destroy(spa->spa_meta_objset,
+ spa->spa_all_vdev_zaps, tx));
+
+ /* Replace the old AVZ in the dir obj with the new one */
+ VERIFY0(zap_update(spa->spa_meta_objset,
+ DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_VDEV_ZAP_MAP,
+ sizeof (new_avz), 1, &new_avz, tx));
+
+ spa->spa_all_vdev_zaps = new_avz;
+ } else if (spa->spa_avz_action == AVZ_ACTION_DESTROY) {
+ zap_cursor_t zc;
+ zap_attribute_t za;
+
+ /* Walk through the AVZ and destroy all listed ZAPs */
+ for (zap_cursor_init(&zc, spa->spa_meta_objset,
+ spa->spa_all_vdev_zaps);
+ zap_cursor_retrieve(&zc, &za) == 0;
+ zap_cursor_advance(&zc)) {
+ uint64_t zap = za.za_first_integer;
+ VERIFY0(zap_destroy(spa->spa_meta_objset, zap, tx));
+ }
+
+ zap_cursor_fini(&zc);
+
+ /* Destroy and unlink the AVZ itself */
+ VERIFY0(zap_destroy(spa->spa_meta_objset,
+ spa->spa_all_vdev_zaps, tx));
+ VERIFY0(zap_remove(spa->spa_meta_objset,
+ DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_VDEV_ZAP_MAP, tx));
+ spa->spa_all_vdev_zaps = 0;
+ }
+
+ if (spa->spa_all_vdev_zaps == 0) {
+ spa->spa_all_vdev_zaps = zap_create_link(spa->spa_meta_objset,
+ DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT,
+ DMU_POOL_VDEV_ZAP_MAP, tx);
+ }
+ spa->spa_avz_action = AVZ_ACTION_NONE;
+
+ /* Create ZAPs for vdevs that don't have them. */
+ vdev_construct_zaps(spa->spa_root_vdev, tx);
+
config = spa_config_generate(spa, spa->spa_root_vdev,
dmu_tx_get_txg(tx), B_FALSE);
@@ -5914,8 +6468,7 @@ spa_sync_config_object(spa_t *spa, dmu_tx_t *tx)
spa_config_exit(spa, SCL_STATE, FTAG);
- if (spa->spa_config_syncing)
- nvlist_free(spa->spa_config_syncing);
+ nvlist_free(spa->spa_config_syncing);
spa->spa_config_syncing = config;
spa_sync_nvlist(spa, spa->spa_config_object, config, tx);
@@ -5981,7 +6534,7 @@ spa_sync_props(void *arg, dmu_tx_t *tx)
case ZPOOL_PROP_VERSION:
intval = fnvpair_value_uint64(elem);
/*
- * The version is synced seperatly before other
+ * The version is synced separately before other
* properties and should be correct by now.
*/
ASSERT3U(spa_version(spa), >=, intval);
@@ -6011,7 +6564,7 @@ spa_sync_props(void *arg, dmu_tx_t *tx)
* We need to dirty the configuration on all the vdevs
* so that their labels get updated. It's unnecessary
* to do this for pool creation since the vdev's
- * configuratoin has already been dirtied.
+ * configuration has already been dirtied.
*/
if (tx->tx_txg != TXG_INITIAL)
vdev_config_dirty(spa->spa_root_vdev);
@@ -6074,6 +6627,9 @@ spa_sync_props(void *arg, dmu_tx_t *tx)
spa_async_request(spa,
SPA_ASYNC_AUTOEXPAND);
break;
+ case ZPOOL_PROP_MULTIHOST:
+ spa->spa_multihost = intval;
+ break;
case ZPOOL_PROP_DEDUPDITTO:
spa->spa_dedup_ditto = intval;
break;
@@ -6144,6 +6700,20 @@ spa_sync_upgrades(spa_t *spa, dmu_tx_t *tx)
if (lz4_en && !lz4_ac)
spa_feature_incr(spa, SPA_FEATURE_LZ4_COMPRESS, tx);
}
+
+ /*
+ * If we haven't written the salt, do so now. Note that the
+ * feature may not be activated yet, but that's fine since
+ * the presence of this ZAP entry is backwards compatible.
+ */
+ if (zap_contains(spa->spa_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
+ DMU_POOL_CHECKSUM_SALT) == ENOENT) {
+ VERIFY0(zap_add(spa->spa_meta_objset,
+ DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CHECKSUM_SALT, 1,
+ sizeof (spa->spa_cksum_salt.zcs_bytes),
+ spa->spa_cksum_salt.zcs_bytes, tx));
+ }
+
rrw_exit(&dp->dp_config_rwlock, FTAG);
}
@@ -6157,10 +6727,14 @@ spa_sync(spa_t *spa, uint64_t txg)
dsl_pool_t *dp = spa->spa_dsl_pool;
objset_t *mos = spa->spa_meta_objset;
bplist_t *free_bpl = &spa->spa_free_bplist[txg & TXG_MASK];
+ metaslab_class_t *mc;
vdev_t *rvd = spa->spa_root_vdev;
vdev_t *vd;
dmu_tx_t *tx;
int error;
+ uint32_t max_queue_depth = zfs_vdev_async_write_max_active *
+ zfs_vdev_queue_depth_pct / 100;
+ uint64_t queue_depth_total;
int c;
VERIFY(spa_writeable(spa));
@@ -6173,6 +6747,10 @@ spa_sync(spa_t *spa, uint64_t txg)
spa->spa_syncing_txg = txg;
spa->spa_sync_pass = 0;
+ mutex_enter(&spa->spa_alloc_lock);
+ VERIFY0(avl_numnodes(&spa->spa_alloc_tree));
+ mutex_exit(&spa->spa_alloc_lock);
+
/*
* If there are any pending vdev state changes, convert them
* into config changes that go out with this transaction group.
@@ -6200,8 +6778,8 @@ spa_sync(spa_t *spa, uint64_t txg)
tx = dmu_tx_create_assigned(dp, txg);
spa->spa_sync_starttime = gethrtime();
- taskq_cancel_id(system_taskq, spa->spa_deadman_tqid);
- spa->spa_deadman_tqid = taskq_dispatch_delay(system_taskq,
+ taskq_cancel_id(system_delay_taskq, spa->spa_deadman_tqid);
+ spa->spa_deadman_tqid = taskq_dispatch_delay(system_delay_taskq,
spa_deadman, spa, TQ_SLEEP, ddi_get_lbolt() +
NSEC_TO_TICK(spa->spa_deadman_synctime));
@@ -6226,6 +6804,38 @@ spa_sync(spa_t *spa, uint64_t txg)
}
}
+ /*
+ * Set the top-level vdev's max queue depth. Evaluate each
+ * top-level's async write queue depth in case it changed.
+ * The max queue depth will not change in the middle of syncing
+ * out this txg.
+ */
+ queue_depth_total = 0;
+ for (c = 0; c < rvd->vdev_children; c++) {
+ vdev_t *tvd = rvd->vdev_child[c];
+ metaslab_group_t *mg = tvd->vdev_mg;
+
+ if (mg == NULL || mg->mg_class != spa_normal_class(spa) ||
+ !metaslab_group_initialized(mg))
+ continue;
+
+ /*
+ * It is safe to do a lock-free check here because only async
+ * allocations look at mg_max_alloc_queue_depth, and async
+ * allocations all happen from spa_sync().
+ */
+ ASSERT0(refcount_count(&mg->mg_alloc_queue_depth));
+ mg->mg_max_alloc_queue_depth = max_queue_depth;
+ queue_depth_total += mg->mg_max_alloc_queue_depth;
+ }
+ mc = spa_normal_class(spa);
+ ASSERT0(refcount_count(&mc->mc_alloc_slots));
+ mc->mc_alloc_max_slots = queue_depth_total;
+ mc->mc_alloc_throttle_enabled = zio_dva_throttle_enabled;
+
+ ASSERT3U(mc->mc_alloc_max_slots, <=,
+ max_queue_depth * rvd->vdev_children);
+
/*
* Iterate to convergence.
*/
@@ -6292,6 +6902,23 @@ spa_sync(spa_t *spa, uint64_t txg)
} while (dmu_objset_is_dirty(mos, txg));
+#ifdef ZFS_DEBUG
+ if (!list_is_empty(&spa->spa_config_dirty_list)) {
+ /*
+ * Make sure that the number of ZAPs for all the vdevs matches
+ * the number of ZAPs in the per-vdev ZAP list. This only gets
+ * called if the config is dirty; otherwise there may be
+ * outstanding AVZ operations that weren't completed in
+ * spa_sync_config_object.
+ */
+ uint64_t all_vdev_zap_entry_count;
+ ASSERT0(zap_count(spa->spa_meta_objset,
+ spa->spa_all_vdev_zaps, &all_vdev_zap_entry_count));
+ ASSERT3U(vdev_count_verify_zaps(spa->spa_root_vdev), ==,
+ all_vdev_zap_entry_count);
+ }
+#endif
+
/*
* Rewrite the vdev configuration (which includes the uberblock)
* to commit the transaction group.
@@ -6322,16 +6949,10 @@ spa_sync(spa_t *spa, uint64_t txg)
if (svdcount == SPA_DVAS_PER_BP)
break;
}
- error = vdev_config_sync(svd, svdcount, txg, B_FALSE);
- if (error != 0)
- error = vdev_config_sync(svd, svdcount, txg,
- B_TRUE);
+ error = vdev_config_sync(svd, svdcount, txg);
} else {
error = vdev_config_sync(rvd->vdev_child,
- rvd->vdev_children, txg, B_FALSE);
- if (error != 0)
- error = vdev_config_sync(rvd->vdev_child,
- rvd->vdev_children, txg, B_TRUE);
+ rvd->vdev_children, txg);
}
if (error == 0)
@@ -6346,7 +6967,7 @@ spa_sync(spa_t *spa, uint64_t txg)
}
dmu_tx_commit(tx);
- taskq_cancel_id(system_taskq, spa->spa_deadman_tqid);
+ taskq_cancel_id(system_delay_taskq, spa->spa_deadman_tqid);
spa->spa_deadman_tqid = 0;
/*
@@ -6365,10 +6986,12 @@ spa_sync(spa_t *spa, uint64_t txg)
spa->spa_config_syncing = NULL;
}
- spa->spa_ubsync = spa->spa_uberblock;
-
dsl_pool_sync_done(dp, txg);
+ mutex_enter(&spa->spa_alloc_lock);
+ VERIFY0(avl_numnodes(&spa->spa_alloc_tree));
+ mutex_exit(&spa->spa_alloc_lock);
+
/*
* Update usable space statistics.
*/
@@ -6387,6 +7010,13 @@ spa_sync(spa_t *spa, uint64_t txg)
spa->spa_sync_pass = 0;
+ /*
+ * Update the last synced uberblock here. We want to do this at
+ * the end of spa_sync() so that consumers of spa_last_synced_txg()
+ * will be guaranteed that all the processing associated with
+ * that txg has been completed.
+ */
+ spa->spa_ubsync = spa->spa_uberblock;
spa_config_exit(spa, SCL_CONFIG, FTAG);
spa_handle_ignored_writes(spa);
@@ -6550,18 +7180,44 @@ spa_has_active_shared_spare(spa_t *spa)
return (B_FALSE);
}
+static sysevent_t *
+spa_event_create(spa_t *spa, vdev_t *vd, nvlist_t *hist_nvl, const char *name)
+{
+ sysevent_t *ev = NULL;
+#ifdef _KERNEL
+ nvlist_t *resource;
+
+ resource = zfs_event_create(spa, vd, FM_SYSEVENT_CLASS, name, hist_nvl);
+ if (resource) {
+ ev = kmem_alloc(sizeof (sysevent_t), KM_SLEEP);
+ ev->resource = resource;
+ }
+#endif
+ return (ev);
+}
+
+static void
+spa_event_post(sysevent_t *ev)
+{
+#ifdef _KERNEL
+ if (ev) {
+ zfs_zevent_post(ev->resource, NULL, zfs_zevent_post_cb);
+ kmem_free(ev, sizeof (*ev));
+ }
+#endif
+}
+
/*
- * Post a FM_EREPORT_ZFS_* event from sys/fm/fs/zfs.h. The payload will be
+ * Post a zevent corresponding to the given sysevent. The 'name' must be one
+ * of the event definitions in sys/sysevent/eventdefs.h. The payload will be
* filled in from the spa and (optionally) the vdev. This doesn't do anything
* in the userland libzpool, as we don't want consumers to misinterpret ztest
* or zdb as real changes.
*/
void
-spa_event_notify(spa_t *spa, vdev_t *vd, const char *name)
+spa_event_notify(spa_t *spa, vdev_t *vd, nvlist_t *hist_nvl, const char *name)
{
-#ifdef _KERNEL
- zfs_ereport_post(name, spa, vd, NULL, 0, 0);
-#endif
+ spa_event_post(spa_event_create(spa, vd, hist_nvl, name));
}
#if defined(_KERNEL) && defined(HAVE_SPL)
@@ -6635,6 +7291,7 @@ module_param(spa_load_verify_data, int, 0644);
MODULE_PARM_DESC(spa_load_verify_data,
"Set to traverse data on pool import");
+/* CSTYLED */
module_param(zio_taskq_batch_pct, uint, 0444);
MODULE_PARM_DESC(zio_taskq_batch_pct,
"Percentage of CPUs to run an IO worker thread");
diff --git a/zfs/module/zfs/spa_config.c b/zfs/module/zfs/spa_config.c
index 19432e0a024e..5b792b868455 100644
--- a/zfs/module/zfs/spa_config.c
+++ b/zfs/module/zfs/spa_config.c
@@ -22,10 +22,12 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
+ * Copyright 2017 Joyent, Inc.
*/
#include <sys/spa.h>
+#include <sys/fm/fs/zfs.h>
#include <sys/spa_impl.h>
#include <sys/nvpair.h>
#include <sys/uio.h>
@@ -128,7 +130,7 @@ spa_config_load(void)
if (nvpair_type(nvpair) != DATA_TYPE_NVLIST)
continue;
- VERIFY(nvpair_value_nvlist(nvpair, &child) == 0);
+ child = fnvpair_value_nvlist(nvpair);
if (spa_lookup(nvpair_name(nvpair)) != NULL)
continue;
@@ -145,35 +147,30 @@ spa_config_load(void)
kobj_close_file(file);
}
-static void
+static int
spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl)
{
size_t buflen;
char *buf;
vnode_t *vp;
int oflags = FWRITE | FTRUNC | FCREAT | FOFFMAX;
- int error;
char *temp;
+ int err;
/*
* If the nvlist is empty (NULL), then remove the old cachefile.
*/
if (nvl == NULL) {
- (void) vn_remove(dp->scd_path, UIO_SYSSPACE, RMFILE);
- return;
+ err = vn_remove(dp->scd_path, UIO_SYSSPACE, RMFILE);
+ return (err);
}
/*
* Pack the configuration into a buffer.
*/
- VERIFY(nvlist_size(nvl, &buflen, NV_ENCODE_XDR) == 0);
-
- buf = vmem_alloc(buflen, KM_SLEEP);
+ buf = fnvlist_pack(nvl, &buflen);
temp = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
- VERIFY(nvlist_pack(nvl, &buf, &buflen, NV_ENCODE_XDR,
- KM_SLEEP) == 0);
-
#if defined(__linux__) && defined(_KERNEL)
/*
* Write the configuration to disk. Due to the complexity involved
@@ -181,16 +178,16 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl)
* and overwritten in place. In the event of an error the file is
* unlinked to make sure we always have a consistent view of the data.
*/
- error = vn_open(dp->scd_path, UIO_SYSSPACE, oflags, 0644, &vp, 0, 0);
- if (error == 0) {
- error = vn_rdwr(UIO_WRITE, vp, buf, buflen, 0,
+ err = vn_open(dp->scd_path, UIO_SYSSPACE, oflags, 0644, &vp, 0, 0);
+ if (err == 0) {
+ err = vn_rdwr(UIO_WRITE, vp, buf, buflen, 0,
UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, NULL);
- if (error == 0)
- error = VOP_FSYNC(vp, FSYNC, kcred, NULL);
+ if (err == 0)
+ err = VOP_FSYNC(vp, FSYNC, kcred, NULL);
(void) VOP_CLOSE(vp, oflags, 1, 0, kcred, NULL);
- if (error)
+ if (err)
(void) vn_remove(dp->scd_path, UIO_SYSSPACE, RMFILE);
}
#else
@@ -201,21 +198,23 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl)
*/
(void) snprintf(temp, MAXPATHLEN, "%s.tmp", dp->scd_path);
- error = vn_open(temp, UIO_SYSSPACE, oflags, 0644, &vp, CRCREAT, 0);
- if (error == 0) {
- if (vn_rdwr(UIO_WRITE, vp, buf, buflen, 0, UIO_SYSSPACE,
- 0, RLIM64_INFINITY, kcred, NULL) == 0 &&
- VOP_FSYNC(vp, FSYNC, kcred, NULL) == 0) {
- (void) vn_rename(temp, dp->scd_path, UIO_SYSSPACE);
- }
+ err = vn_open(temp, UIO_SYSSPACE, oflags, 0644, &vp, CRCREAT, 0);
+ if (err == 0) {
+ err = vn_rdwr(UIO_WRITE, vp, buf, buflen, 0, UIO_SYSSPACE,
+ 0, RLIM64_INFINITY, kcred, NULL);
+ if (err == 0)
+ err = VOP_FSYNC(vp, FSYNC, kcred, NULL);
+ if (err == 0)
+ err = vn_rename(temp, dp->scd_path, UIO_SYSSPACE);
(void) VOP_CLOSE(vp, oflags, 1, 0, kcred, NULL);
}
(void) vn_remove(temp, UIO_SYSSPACE, RMFILE);
#endif
- vmem_free(buf, buflen);
+ fnvlist_pack_free(buf, buflen);
kmem_free(temp, MAXPATHLEN);
+ return (err);
}
/*
@@ -224,7 +223,7 @@ spa_config_write(spa_config_dirent_t *dp, nvlist_t *nvl)
* the configuration has been synced to the MOS. This exposes a window where
* the MOS config will have been updated but the cache file has not. If
* the system were to crash at that instant then the cached config may not
- * contain the correct information to open the pool and an explicity import
+ * contain the correct information to open the pool and an explicit import
* would be required.
*/
void
@@ -233,6 +232,8 @@ spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent)
spa_config_dirent_t *dp, *tdp;
nvlist_t *nvl;
char *pool_name;
+ boolean_t ccw_failure;
+ int error = 0;
ASSERT(MUTEX_HELD(&spa_namespace_lock));
@@ -244,6 +245,7 @@ spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent)
* cachefile is changed, the new one is pushed onto this list, allowing
* us to update previous cachefiles that no longer contain this pool.
*/
+ ccw_failure = B_FALSE;
for (dp = list_head(&target->spa_config_list); dp != NULL;
dp = list_next(&target->spa_config_list, dp)) {
spa_t *spa = NULL;
@@ -269,6 +271,7 @@ spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent)
mutex_enter(&spa->spa_props_lock);
tdp = list_head(&spa->spa_config_list);
if (spa->spa_config == NULL ||
+ tdp == NULL ||
tdp->scd_path == NULL ||
strcmp(tdp->scd_path, dp->scd_path) != 0) {
mutex_exit(&spa->spa_props_lock);
@@ -276,24 +279,44 @@ spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent)
}
if (nvl == NULL)
- VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME,
- KM_SLEEP) == 0);
+ nvl = fnvlist_alloc();
- if (spa->spa_import_flags & ZFS_IMPORT_TEMP_NAME) {
- VERIFY0(nvlist_lookup_string(spa->spa_config,
- ZPOOL_CONFIG_POOL_NAME, &pool_name));
- } else
+ if (spa->spa_import_flags & ZFS_IMPORT_TEMP_NAME)
+ pool_name = fnvlist_lookup_string(
+ spa->spa_config, ZPOOL_CONFIG_POOL_NAME);
+ else
pool_name = spa_name(spa);
- VERIFY(nvlist_add_nvlist(nvl, pool_name,
- spa->spa_config) == 0);
+ fnvlist_add_nvlist(nvl, pool_name, spa->spa_config);
mutex_exit(&spa->spa_props_lock);
}
- spa_config_write(dp, nvl);
+ error = spa_config_write(dp, nvl);
+ if (error != 0)
+ ccw_failure = B_TRUE;
nvlist_free(nvl);
}
+ if (ccw_failure) {
+ /*
+ * Keep trying so that configuration data is
+ * written if/when any temporary filesystem
+ * resource issues are resolved.
+ */
+ if (target->spa_ccw_fail_time == 0) {
+ zfs_ereport_post(FM_EREPORT_ZFS_CONFIG_CACHE_WRITE,
+ target, NULL, NULL, 0, 0);
+ }
+ target->spa_ccw_fail_time = gethrtime();
+ spa_async_request(target, SPA_ASYNC_CONFIG_UPDATE);
+ } else {
+ /*
+ * Do not rate limit future attempts to update
+ * the config cache.
+ */
+ target->spa_ccw_fail_time = 0;
+ }
+
/*
* Remove any config entries older than the current one.
*/
@@ -308,7 +331,7 @@ spa_config_sync(spa_t *target, boolean_t removing, boolean_t postsysevent)
spa_config_generation++;
if (postsysevent)
- spa_event_notify(target, NULL, FM_EREPORT_ZFS_CONFIG_SYNC);
+ spa_event_notify(target, NULL, NULL, ESC_ZFS_CONFIG_SYNC);
}
/*
@@ -326,15 +349,15 @@ spa_all_configs(uint64_t *generation)
if (*generation == spa_config_generation)
return (NULL);
- VERIFY(nvlist_alloc(&pools, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+ pools = fnvlist_alloc();
mutex_enter(&spa_namespace_lock);
while ((spa = spa_next(spa)) != NULL) {
if (INGLOBALZONE(curproc) ||
zone_dataset_visible(spa_name(spa), NULL)) {
mutex_enter(&spa->spa_props_lock);
- VERIFY(nvlist_add_nvlist(pools, spa_name(spa),
- spa->spa_config) == 0);
+ fnvlist_add_nvlist(pools, spa_name(spa),
+ spa->spa_config);
mutex_exit(&spa->spa_props_lock);
}
}
@@ -348,8 +371,7 @@ void
spa_config_set(spa_t *spa, nvlist_t *config)
{
mutex_enter(&spa->spa_props_lock);
- if (spa->spa_config != NULL)
- nvlist_free(spa->spa_config);
+ nvlist_free(spa->spa_config);
spa->spa_config = config;
mutex_exit(&spa->spa_props_lock);
}
@@ -369,6 +391,7 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
boolean_t locked = B_FALSE;
uint64_t split_guid;
char *pool_name;
+ int config_gen_flags = 0;
if (vd == NULL) {
vd = rvd;
@@ -397,55 +420,38 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
*/
if (spa->spa_import_flags & ZFS_IMPORT_TEMP_NAME) {
VERIFY0(nvlist_lookup_string(spa->spa_config,
- ZPOOL_CONFIG_POOL_NAME, &pool_name));
+ ZPOOL_CONFIG_POOL_NAME, &pool_name));
} else
pool_name = spa_name(spa);
- VERIFY(nvlist_alloc(&config, NV_UNIQUE_NAME, KM_SLEEP) == 0);
-
- VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_VERSION,
- spa_version(spa)) == 0);
- VERIFY(nvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME,
- pool_name) == 0);
- VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE,
- spa_state(spa)) == 0);
- VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG,
- txg) == 0);
- VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID,
- spa_guid(spa)) == 0);
- VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_ERRATA,
- spa->spa_errata) == 0);
- VERIFY(spa->spa_comment == NULL || nvlist_add_string(config,
- ZPOOL_CONFIG_COMMENT, spa->spa_comment) == 0);
-
-
-#ifdef _KERNEL
- hostid = zone_get_hostid(NULL);
-#else /* _KERNEL */
- /*
- * We're emulating the system's hostid in userland, so we can't use
- * zone_get_hostid().
- */
- (void) ddi_strtoul(hw_serial, NULL, 10, &hostid);
-#endif /* _KERNEL */
- if (hostid != 0) {
- VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID,
- hostid) == 0);
- }
- VERIFY0(nvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME,
- utsname()->nodename));
+ config = fnvlist_alloc();
+
+ fnvlist_add_uint64(config, ZPOOL_CONFIG_VERSION, spa_version(spa));
+ fnvlist_add_string(config, ZPOOL_CONFIG_POOL_NAME, pool_name);
+ fnvlist_add_uint64(config, ZPOOL_CONFIG_POOL_STATE, spa_state(spa));
+ fnvlist_add_uint64(config, ZPOOL_CONFIG_POOL_TXG, txg);
+ fnvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID, spa_guid(spa));
+ fnvlist_add_uint64(config, ZPOOL_CONFIG_ERRATA, spa->spa_errata);
+ if (spa->spa_comment != NULL)
+ fnvlist_add_string(config, ZPOOL_CONFIG_COMMENT,
+ spa->spa_comment);
+
+ hostid = spa_get_hostid();
+ if (hostid != 0)
+ fnvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID, hostid);
+ fnvlist_add_string(config, ZPOOL_CONFIG_HOSTNAME, utsname()->nodename);
if (vd != rvd) {
- VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TOP_GUID,
- vd->vdev_top->vdev_guid) == 0);
- VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_GUID,
- vd->vdev_guid) == 0);
+ fnvlist_add_uint64(config, ZPOOL_CONFIG_TOP_GUID,
+ vd->vdev_top->vdev_guid);
+ fnvlist_add_uint64(config, ZPOOL_CONFIG_GUID,
+ vd->vdev_guid);
if (vd->vdev_isspare)
- VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_SPARE,
- 1ULL) == 0);
+ fnvlist_add_uint64(config,
+ ZPOOL_CONFIG_IS_SPARE, 1ULL);
if (vd->vdev_islog)
- VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_IS_LOG,
- 1ULL) == 0);
+ fnvlist_add_uint64(config,
+ ZPOOL_CONFIG_IS_LOG, 1ULL);
vd = vd->vdev_top; /* label contains top config */
} else {
/*
@@ -453,8 +459,12 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
* in the mos config, and not in the vdev labels
*/
if (spa->spa_config_splitting != NULL)
- VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_SPLIT,
- spa->spa_config_splitting) == 0);
+ fnvlist_add_nvlist(config, ZPOOL_CONFIG_SPLIT,
+ spa->spa_config_splitting);
+
+ fnvlist_add_boolean(config, ZPOOL_CONFIG_HAS_PER_VDEV_ZAPS);
+
+ config_gen_flags |= VDEV_CONFIG_MOS;
}
/*
@@ -469,19 +479,18 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
if (spa->spa_config_splitting != NULL &&
nvlist_lookup_uint64(spa->spa_config_splitting,
ZPOOL_CONFIG_SPLIT_GUID, &split_guid) == 0) {
- VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_SPLIT_GUID,
- split_guid) == 0);
+ fnvlist_add_uint64(config, ZPOOL_CONFIG_SPLIT_GUID, split_guid);
}
- nvroot = vdev_config_generate(spa, vd, getstats, 0);
- VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
+ nvroot = vdev_config_generate(spa, vd, getstats, config_gen_flags);
+ fnvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot);
nvlist_free(nvroot);
/*
* Store what's necessary for reading the MOS in the label.
*/
- VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURES_FOR_READ,
- spa->spa_label_features) == 0);
+ fnvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURES_FOR_READ,
+ spa->spa_label_features);
if (getstats && spa_load_state(spa) == SPA_LOAD_NONE) {
ddt_histogram_t *ddh;
@@ -490,23 +499,23 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
ddh = kmem_zalloc(sizeof (ddt_histogram_t), KM_SLEEP);
ddt_get_dedup_histogram(spa, ddh);
- VERIFY(nvlist_add_uint64_array(config,
+ fnvlist_add_uint64_array(config,
ZPOOL_CONFIG_DDT_HISTOGRAM,
- (uint64_t *)ddh, sizeof (*ddh) / sizeof (uint64_t)) == 0);
+ (uint64_t *)ddh, sizeof (*ddh) / sizeof (uint64_t));
kmem_free(ddh, sizeof (ddt_histogram_t));
ddo = kmem_zalloc(sizeof (ddt_object_t), KM_SLEEP);
ddt_get_dedup_object_stats(spa, ddo);
- VERIFY(nvlist_add_uint64_array(config,
+ fnvlist_add_uint64_array(config,
ZPOOL_CONFIG_DDT_OBJ_STATS,
- (uint64_t *)ddo, sizeof (*ddo) / sizeof (uint64_t)) == 0);
+ (uint64_t *)ddo, sizeof (*ddo) / sizeof (uint64_t));
kmem_free(ddo, sizeof (ddt_object_t));
dds = kmem_zalloc(sizeof (ddt_stat_t), KM_SLEEP);
ddt_get_dedup_stats(spa, dds);
- VERIFY(nvlist_add_uint64_array(config,
+ fnvlist_add_uint64_array(config,
ZPOOL_CONFIG_DDT_STATS,
- (uint64_t *)dds, sizeof (*dds) / sizeof (uint64_t)) == 0);
+ (uint64_t *)dds, sizeof (*dds) / sizeof (uint64_t));
kmem_free(dds, sizeof (ddt_stat_t));
}
diff --git a/zfs/module/zfs/spa_errlog.c b/zfs/module/zfs/spa_errlog.c
index 925e2af60402..3c8aa543bae2 100644
--- a/zfs/module/zfs/spa_errlog.c
+++ b/zfs/module/zfs/spa_errlog.c
@@ -73,13 +73,13 @@ bookmark_to_name(zbookmark_phys_t *zb, char *buf, size_t len)
static void
name_to_bookmark(char *buf, zbookmark_phys_t *zb)
{
- zb->zb_objset = strtonum(buf, &buf);
+ zb->zb_objset = zfs_strtonum(buf, &buf);
ASSERT(*buf == ':');
- zb->zb_object = strtonum(buf + 1, &buf);
+ zb->zb_object = zfs_strtonum(buf + 1, &buf);
ASSERT(*buf == ':');
- zb->zb_level = (int)strtonum(buf + 1, &buf);
+ zb->zb_level = (int)zfs_strtonum(buf + 1, &buf);
ASSERT(*buf == ':');
- zb->zb_blkid = strtonum(buf + 1, &buf);
+ zb->zb_blkid = zfs_strtonum(buf + 1, &buf);
ASSERT(*buf == '\0');
}
#endif
diff --git a/zfs/module/zfs/spa_history.c b/zfs/module/zfs/spa_history.c
index 01aa4641e63f..73571c032598 100644
--- a/zfs/module/zfs/spa_history.c
+++ b/zfs/module/zfs/spa_history.c
@@ -21,7 +21,9 @@
/*
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2014 Integros [integros.com]
+ * Copyright 2017 Joyent, Inc.
*/
#include <sys/spa.h>
@@ -46,7 +48,7 @@
* The history log is stored as a dmu object containing
* <packed record length, record nvlist> tuples.
*
- * Where "record nvlist" is a nvlist containing uint64_ts and strings, and
+ * Where "record nvlist" is an nvlist containing uint64_ts and strings, and
* "packed record length" is the packed length of the "record nvlist" stored
* as a little endian uint64_t.
*
@@ -191,6 +193,71 @@ spa_history_zone(void)
#endif
}
+/*
+ * Post a history sysevent.
+ *
+ * The nvlist_t* passed into this function will be transformed into a new
+ * nvlist where:
+ *
+ * 1. Nested nvlists will be flattened to a single level
+ * 2. Keys will have their names normalized (to remove any problematic
+ * characters, such as whitespace)
+ *
+ * The nvlist_t passed into this function will duplicated and should be freed
+ * by caller.
+ *
+ */
+static void
+spa_history_log_notify(spa_t *spa, nvlist_t *nvl)
+{
+ nvlist_t *hist_nvl = fnvlist_alloc();
+ uint64_t uint64;
+ char *string;
+
+ if (nvlist_lookup_string(nvl, ZPOOL_HIST_CMD, &string) == 0)
+ fnvlist_add_string(hist_nvl, ZFS_EV_HIST_CMD, string);
+
+ if (nvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME, &string) == 0)
+ fnvlist_add_string(hist_nvl, ZFS_EV_HIST_INT_NAME, string);
+
+ if (nvlist_lookup_string(nvl, ZPOOL_HIST_ZONE, &string) == 0)
+ fnvlist_add_string(hist_nvl, ZFS_EV_HIST_ZONE, string);
+
+ if (nvlist_lookup_string(nvl, ZPOOL_HIST_HOST, &string) == 0)
+ fnvlist_add_string(hist_nvl, ZFS_EV_HIST_HOST, string);
+
+ if (nvlist_lookup_string(nvl, ZPOOL_HIST_DSNAME, &string) == 0)
+ fnvlist_add_string(hist_nvl, ZFS_EV_HIST_DSNAME, string);
+
+ if (nvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR, &string) == 0)
+ fnvlist_add_string(hist_nvl, ZFS_EV_HIST_INT_STR, string);
+
+ if (nvlist_lookup_string(nvl, ZPOOL_HIST_IOCTL, &string) == 0)
+ fnvlist_add_string(hist_nvl, ZFS_EV_HIST_IOCTL, string);
+
+ if (nvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME, &string) == 0)
+ fnvlist_add_string(hist_nvl, ZFS_EV_HIST_INT_NAME, string);
+
+ if (nvlist_lookup_uint64(nvl, ZPOOL_HIST_DSID, &uint64) == 0)
+ fnvlist_add_uint64(hist_nvl, ZFS_EV_HIST_DSID, uint64);
+
+ if (nvlist_lookup_uint64(nvl, ZPOOL_HIST_TXG, &uint64) == 0)
+ fnvlist_add_uint64(hist_nvl, ZFS_EV_HIST_TXG, uint64);
+
+ if (nvlist_lookup_uint64(nvl, ZPOOL_HIST_TIME, &uint64) == 0)
+ fnvlist_add_uint64(hist_nvl, ZFS_EV_HIST_TIME, uint64);
+
+ if (nvlist_lookup_uint64(nvl, ZPOOL_HIST_WHO, &uint64) == 0)
+ fnvlist_add_uint64(hist_nvl, ZFS_EV_HIST_WHO, uint64);
+
+ if (nvlist_lookup_uint64(nvl, ZPOOL_HIST_INT_EVENT, &uint64) == 0)
+ fnvlist_add_uint64(hist_nvl, ZFS_EV_HIST_INT_EVENT, uint64);
+
+ spa_event_notify(spa, NULL, hist_nvl, ESC_ZFS_HISTORY_EVENT);
+
+ nvlist_free(hist_nvl);
+}
+
/*
* Write out a history event.
*/
@@ -254,6 +321,22 @@ spa_history_log_sync(void *arg, dmu_tx_t *tx)
fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_NAME),
fnvlist_lookup_string(nvl, ZPOOL_HIST_INT_STR));
}
+ /*
+ * The history sysevent is posted only for internal history
+ * messages to show what has happened, not how it happened. For
+ * example, the following command:
+ *
+ * # zfs destroy -r tank/foo
+ *
+ * will result in one sysevent posted per dataset that is
+ * destroyed as a result of the command - which could be more
+ * than one event in total. By contrast, if the sysevent was
+ * posted as a result of the ZPOOL_HIST_CMD key being present
+ * it would result in only one sysevent being posted with the
+ * full command line arguments, requiring the consumer to know
+ * how to parse and understand zfs(1M) command invocations.
+ */
+ spa_history_log_notify(spa, nvl);
} else if (nvlist_exists(nvl, ZPOOL_HIST_IOCTL)) {
zfs_dbgmsg("ioctl %s",
fnvlist_lookup_string(nvl, ZPOOL_HIST_IOCTL));
@@ -493,7 +576,7 @@ spa_history_log_internal_ds(dsl_dataset_t *ds, const char *operation,
dmu_tx_t *tx, const char *fmt, ...)
{
va_list adx;
- char namebuf[MAXNAMELEN];
+ char namebuf[ZFS_MAX_DATASET_NAME_LEN];
nvlist_t *nvl = fnvlist_alloc();
ASSERT(tx != NULL);
@@ -512,7 +595,7 @@ spa_history_log_internal_dd(dsl_dir_t *dd, const char *operation,
dmu_tx_t *tx, const char *fmt, ...)
{
va_list adx;
- char namebuf[MAXNAMELEN];
+ char namebuf[ZFS_MAX_DATASET_NAME_LEN];
nvlist_t *nvl = fnvlist_alloc();
ASSERT(tx != NULL);
@@ -533,7 +616,7 @@ spa_history_log_version(spa_t *spa, const char *operation)
utsname_t *u = utsname();
spa_history_log_internal(spa, operation, NULL,
- "pool version %llu; software version %llu/%d; uts %s %s %s %s",
+ "pool version %llu; software version %llu/%llu; uts %s %s %s %s",
(u_longlong_t)spa_version(spa), SPA_VERSION, ZPL_VERSION,
u->nodename, u->release, u->version, u->machine);
}
diff --git a/zfs/module/zfs/spa_misc.c b/zfs/module/zfs/spa_misc.c
index 409dce121212..3787e010f60a 100644
--- a/zfs/module/zfs/spa_misc.c
+++ b/zfs/module/zfs/spa_misc.c
@@ -20,9 +20,11 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
- * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
+ * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
+ * Copyright 2013 Saso Kiselkov. All rights reserved.
+ * Copyright (c) 2017 Datto Inc.
*/
#include <sys/zfs_context.h>
@@ -36,6 +38,7 @@
#include <sys/zil.h>
#include <sys/vdev_impl.h>
#include <sys/vdev_file.h>
+#include <sys/vdev_raidz.h>
#include <sys/metaslab.h>
#include <sys/uberblock_impl.h>
#include <sys/txg.h>
@@ -52,7 +55,8 @@
#include <sys/ddt.h>
#include <sys/kstat.h>
#include "zfs_prop.h"
-#include "zfeature_common.h"
+#include <sys/zfeature.h>
+#include "qat_compress.h"
/*
* SPA locking
@@ -295,6 +299,12 @@ int zfs_free_leak_on_eio = B_FALSE;
*/
unsigned long zfs_deadman_synctime_ms = 1000000ULL;
+/*
+ * Check time in milliseconds. This defines the frequency at which we check
+ * for hung I/O.
+ */
+unsigned long zfs_deadman_checktime_ms = 5000ULL;
+
/*
* By default the deadman is enabled.
*/
@@ -333,9 +343,14 @@ int spa_asize_inflation = 24;
* it is possible to run the pool completely out of space, causing it to
* be permanently read-only.
*
+ * Note that on very small pools, the slop space will be larger than
+ * 3.2%, in an effort to have it be at least spa_min_slop (128MB),
+ * but we never allow it to be more than half the pool size.
+ *
* See also the comments in zfs_space_check_t.
*/
int spa_slop_shift = 5;
+uint64_t spa_min_slop = 128 * 1024 * 1024;
/*
* ==========================================================================
@@ -385,14 +400,16 @@ spa_config_tryenter(spa_t *spa, int locks, void *tag, krw_t rw)
if (rw == RW_READER) {
if (scl->scl_writer || scl->scl_write_wanted) {
mutex_exit(&scl->scl_lock);
- spa_config_exit(spa, locks ^ (1 << i), tag);
+ spa_config_exit(spa, locks & ((1 << i) - 1),
+ tag);
return (0);
}
} else {
ASSERT(scl->scl_writer != curthread);
if (!refcount_is_zero(&scl->scl_count)) {
mutex_exit(&scl->scl_lock);
- spa_config_exit(spa, locks ^ (1 << i), tag);
+ spa_config_exit(spa, locks & ((1 << i) - 1),
+ tag);
return (0);
}
scl->scl_writer = curthread;
@@ -520,15 +537,19 @@ spa_deadman(void *arg)
{
spa_t *spa = arg;
+ /* Disable the deadman if the pool is suspended. */
+ if (spa_suspended(spa))
+ return;
+
zfs_dbgmsg("slow spa_sync: started %llu seconds ago, calls %llu",
(gethrtime() - spa->spa_sync_starttime) / NANOSEC,
++spa->spa_deadman_calls);
if (zfs_deadman_enabled)
vdev_deadman(spa->spa_root_vdev);
- spa->spa_deadman_tqid = taskq_dispatch_delay(system_taskq,
- spa_deadman, spa, KM_SLEEP, ddi_get_lbolt() +
- NSEC_TO_TICK(spa->spa_deadman_synctime));
+ spa->spa_deadman_tqid = taskq_dispatch_delay(system_delay_taskq,
+ spa_deadman, spa, TQ_SLEEP, ddi_get_lbolt() +
+ MSEC_TO_TICK(zfs_deadman_checktime_ms));
}
/*
@@ -555,10 +576,12 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
mutex_init(&spa->spa_history_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&spa->spa_proc_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&spa->spa_props_lock, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&spa->spa_cksum_tmpls_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&spa->spa_scrub_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&spa->spa_suspend_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&spa->spa_vdev_top_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&spa->spa_feat_stats_lock, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&spa->spa_alloc_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&spa->spa_async_cv, NULL, CV_DEFAULT, NULL);
cv_init(&spa->spa_evicting_os_cv, NULL, CV_DEFAULT, NULL);
@@ -591,6 +614,9 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
if (altroot)
spa->spa_root = spa_strdup(altroot);
+ avl_create(&spa->spa_alloc_tree, zio_bookmark_compare,
+ sizeof (zio_t), offsetof(zio_t, io_alloc_node));
+
/*
* Every pool starts with the default cachefile
*/
@@ -626,6 +652,9 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
spa->spa_min_ashift = INT_MAX;
spa->spa_max_ashift = 0;
+ /* Reset cached value */
+ spa->spa_dedup_dspace = ~0ULL;
+
/*
* As a pool is being created, treat all features as disabled by
* setting SPA_FEATURE_DISABLED for all entries in the feature
@@ -668,6 +697,7 @@ spa_remove(spa_t *spa)
kmem_free(dp, sizeof (spa_config_dirent_t));
}
+ avl_destroy(&spa->spa_alloc_tree);
list_destroy(&spa->spa_config_list);
nvlist_free(spa->spa_label_features);
@@ -683,12 +713,15 @@ spa_remove(spa_t *spa)
for (t = 0; t < TXG_SIZE; t++)
bplist_destroy(&spa->spa_free_bplist[t]);
+ zio_checksum_templates_free(spa);
+
cv_destroy(&spa->spa_async_cv);
cv_destroy(&spa->spa_evicting_os_cv);
cv_destroy(&spa->spa_proc_cv);
cv_destroy(&spa->spa_scrub_io_cv);
cv_destroy(&spa->spa_suspend_cv);
+ mutex_destroy(&spa->spa_alloc_lock);
mutex_destroy(&spa->spa_async_lock);
mutex_destroy(&spa->spa_errlist_lock);
mutex_destroy(&spa->spa_errlog_lock);
@@ -696,6 +729,7 @@ spa_remove(spa_t *spa)
mutex_destroy(&spa->spa_history_lock);
mutex_destroy(&spa->spa_proc_lock);
mutex_destroy(&spa->spa_props_lock);
+ mutex_destroy(&spa->spa_cksum_tmpls_lock);
mutex_destroy(&spa->spa_scrub_lock);
mutex_destroy(&spa->spa_suspend_lock);
mutex_destroy(&spa->spa_vdev_top_lock);
@@ -794,18 +828,13 @@ typedef struct spa_aux {
int aux_count;
} spa_aux_t;
-static int
+static inline int
spa_aux_compare(const void *a, const void *b)
{
- const spa_aux_t *sa = a;
- const spa_aux_t *sb = b;
+ const spa_aux_t *sa = (const spa_aux_t *)a;
+ const spa_aux_t *sb = (const spa_aux_t *)b;
- if (sa->aux_guid < sb->aux_guid)
- return (-1);
- else if (sa->aux_guid > sb->aux_guid)
- return (1);
- else
- return (0);
+ return (AVL_CMP(sa->aux_guid, sb->aux_guid));
}
void
@@ -1153,13 +1182,21 @@ int
spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error)
{
boolean_t config_changed = B_FALSE;
+ vdev_t *vdev_top;
+
+ if (vd == NULL || vd == spa->spa_root_vdev) {
+ vdev_top = spa->spa_root_vdev;
+ } else {
+ vdev_top = vd->vdev_top;
+ }
if (vd != NULL || error == 0)
- vdev_dtl_reassess(vd ? vd->vdev_top : spa->spa_root_vdev,
- 0, 0, B_FALSE);
+ vdev_dtl_reassess(vdev_top, 0, 0, B_FALSE);
if (vd != NULL) {
- vdev_state_dirty(vd->vdev_top);
+ if (vd != spa->spa_root_vdev)
+ vdev_state_dirty(vdev_top);
+
config_changed = B_TRUE;
spa->spa_config_generation++;
}
@@ -1347,6 +1384,9 @@ spa_get_random(uint64_t range)
ASSERT(range != 0);
+ if (range == 1)
+ return (0);
+
(void) random_get_pseudo_bytes((void *)&r, sizeof (uint64_t));
return (r % range);
@@ -1428,7 +1468,7 @@ zfs_panic_recover(const char *fmt, ...)
* lowercase hexadecimal numbers that don't overflow.
*/
uint64_t
-strtonum(const char *str, char **nptr)
+zfs_strtonum(const char *str, char **nptr)
{
uint64_t val = 0;
char c;
@@ -1568,6 +1608,16 @@ spa_syncing_txg(spa_t *spa)
return (spa->spa_syncing_txg);
}
+/*
+ * Return the last txg where data can be dirtied. The final txgs
+ * will be used to just clear out any deferred frees that remain.
+ */
+uint64_t
+spa_final_dirty_txg(spa_t *spa)
+{
+ return (spa->spa_final_txg - TXG_DEFER_SIZE);
+}
+
pool_state_t
spa_state(spa_t *spa)
{
@@ -1586,23 +1636,33 @@ spa_freeze_txg(spa_t *spa)
return (spa->spa_freeze_txg);
}
-/* ARGSUSED */
+/*
+ * Return the inflated asize for a logical write in bytes. This is used by the
+ * DMU to calculate the space a logical write will require on disk.
+ * If lsize is smaller than the largest physical block size allocatable on this
+ * pool we use its value instead, since the write will end up using the whole
+ * block anyway.
+ */
uint64_t
-spa_get_asize(spa_t *spa, uint64_t lsize)
+spa_get_worst_case_asize(spa_t *spa, uint64_t lsize)
{
- return (lsize * spa_asize_inflation);
+ if (lsize == 0)
+ return (0); /* No inflation needed */
+ return (MAX(lsize, 1 << spa->spa_max_ashift) * spa_asize_inflation);
}
/*
* Return the amount of slop space in bytes. It is 1/32 of the pool (3.2%),
- * or at least 32MB.
+ * or at least 128MB, unless that would cause it to be more than half the
+ * pool size.
*
* See the comment above spa_slop_shift for details.
*/
uint64_t
-spa_get_slop_space(spa_t *spa) {
+spa_get_slop_space(spa_t *spa)
+{
uint64_t space = spa_get_dspace(spa);
- return (MAX(space >> spa_slop_shift, SPA_MINDEVSIZE >> 1));
+ return (MAX(space >> spa_slop_shift, MIN(space >> 1, spa_min_slop)));
}
uint64_t
@@ -1771,11 +1831,8 @@ spa_name_compare(const void *a1, const void *a2)
int s;
s = strcmp(s1->spa_name, s2->spa_name);
- if (s > 0)
- return (1);
- if (s < 0)
- return (-1);
- return (0);
+
+ return (AVL_ISIGN(s));
}
void
@@ -1824,16 +1881,20 @@ spa_init(int mode)
refcount_init();
unique_init();
range_tree_init();
+ metaslab_alloc_trace_init();
ddt_init();
zio_init();
dmu_init();
zil_init();
vdev_cache_stat_init();
+ vdev_raidz_math_init();
+ vdev_file_init();
zfs_prop_init();
zpool_prop_init();
zpool_feature_init();
spa_config_load();
l2arc_start();
+ qat_init();
}
void
@@ -1843,15 +1904,19 @@ spa_fini(void)
spa_evict_all();
+ vdev_file_fini();
vdev_cache_stat_fini();
+ vdev_raidz_math_fini();
zil_fini();
dmu_fini();
zio_fini();
ddt_fini();
+ metaslab_alloc_trace_fini();
range_tree_fini();
unique_fini();
refcount_fini();
fm_fini();
+ qat_fini();
avl_destroy(&spa_namespace_avl);
avl_destroy(&spa_spare_avl);
@@ -1946,6 +2011,11 @@ spa_scan_stat_init(spa_t *spa)
{
/* data not stored on disk */
spa->spa_scan_pass_start = gethrestime_sec();
+ if (dsl_scan_is_paused_scrub(spa->spa_dsl_pool->dp_scan))
+ spa->spa_scan_pass_scrub_pause = spa->spa_scan_pass_start;
+ else
+ spa->spa_scan_pass_scrub_pause = 0;
+ spa->spa_scan_pass_scrub_spent_paused = 0;
spa->spa_scan_pass_exam = 0;
vdev_scan_stat_init(spa->spa_root_vdev);
}
@@ -1976,6 +2046,8 @@ spa_scan_get_stats(spa_t *spa, pool_scan_stat_t *ps)
/* data not stored on disk */
ps->pss_pass_start = spa->spa_scan_pass_start;
ps->pss_pass_exam = spa->spa_scan_pass_exam;
+ ps->pss_pass_scrub_pause = spa->spa_scan_pass_scrub_pause;
+ ps->pss_pass_scrub_spent_paused = spa->spa_scan_pass_scrub_spent_paused;
return (0);
}
@@ -1995,6 +2067,39 @@ spa_maxblocksize(spa_t *spa)
return (SPA_OLD_MAXBLOCKSIZE);
}
+int
+spa_maxdnodesize(spa_t *spa)
+{
+ if (spa_feature_is_enabled(spa, SPA_FEATURE_LARGE_DNODE))
+ return (DNODE_MAX_SIZE);
+ else
+ return (DNODE_MIN_SIZE);
+}
+
+boolean_t
+spa_multihost(spa_t *spa)
+{
+ return (spa->spa_multihost ? B_TRUE : B_FALSE);
+}
+
+unsigned long
+spa_get_hostid(void)
+{
+ unsigned long myhostid;
+
+#ifdef _KERNEL
+ myhostid = zone_get_hostid(NULL);
+#else /* _KERNEL */
+ /*
+ * We're emulating the system's hostid in userland, so
+ * we can't use zone_get_hostid().
+ */
+ (void) ddi_strtoul(hw_serial, NULL, 10, &myhostid);
+#endif /* _KERNEL */
+
+ return (myhostid);
+}
+
#if defined(_KERNEL) && defined(HAVE_SPL)
/* Namespace manipulation */
EXPORT_SYMBOL(spa_lookup);
@@ -2037,7 +2142,6 @@ EXPORT_SYMBOL(spa_version);
EXPORT_SYMBOL(spa_state);
EXPORT_SYMBOL(spa_load_state);
EXPORT_SYMBOL(spa_freeze_txg);
-EXPORT_SYMBOL(spa_get_asize);
EXPORT_SYMBOL(spa_get_dspace);
EXPORT_SYMBOL(spa_update_dspace);
EXPORT_SYMBOL(spa_deflate);
@@ -2051,6 +2155,7 @@ EXPORT_SYMBOL(spa_bootfs);
EXPORT_SYMBOL(spa_delegation);
EXPORT_SYMBOL(spa_meta_objset);
EXPORT_SYMBOL(spa_maxblocksize);
+EXPORT_SYMBOL(spa_maxdnodesize);
/* Miscellaneous support routines */
EXPORT_SYMBOL(spa_rename);
@@ -2072,9 +2177,9 @@ EXPORT_SYMBOL(spa_has_slogs);
EXPORT_SYMBOL(spa_is_root);
EXPORT_SYMBOL(spa_writeable);
EXPORT_SYMBOL(spa_mode);
-
EXPORT_SYMBOL(spa_namespace_lock);
+/* BEGIN CSTYLED */
module_param(zfs_flags, uint, 0644);
MODULE_PARM_DESC(zfs_flags, "Set additional debugging flags");
@@ -2088,6 +2193,10 @@ MODULE_PARM_DESC(zfs_free_leak_on_eio,
module_param(zfs_deadman_synctime_ms, ulong, 0644);
MODULE_PARM_DESC(zfs_deadman_synctime_ms, "Expiration time in milliseconds");
+module_param(zfs_deadman_checktime_ms, ulong, 0644);
+MODULE_PARM_DESC(zfs_deadman_checktime_ms,
+ "Dead I/O check interval in milliseconds");
+
module_param(zfs_deadman_enabled, int, 0644);
MODULE_PARM_DESC(zfs_deadman_enabled, "Enable deadman timer");
@@ -2097,4 +2206,5 @@ MODULE_PARM_DESC(spa_asize_inflation,
module_param(spa_slop_shift, int, 0644);
MODULE_PARM_DESC(spa_slop_shift, "Reserved free space in pool");
+/* END CSTYLED */
#endif
diff --git a/zfs/module/zfs/spa_stats.c b/zfs/module/zfs/spa_stats.c
index 2b8559b5d276..7ca359806174 100644
--- a/zfs/module/zfs/spa_stats.c
+++ b/zfs/module/zfs/spa_stats.c
@@ -21,6 +21,7 @@
#include <sys/zfs_context.h>
#include <sys/spa_impl.h>
+#include <sys/vdev_impl.h>
/*
* Keeps stats on last N reads per spa_t, disabled by default.
@@ -37,6 +38,11 @@ int zfs_read_history_hits = 0;
*/
int zfs_txg_history = 0;
+/*
+ * Keeps stats on the last N MMP updates, disabled by default.
+ */
+int zfs_multihost_history = 0;
+
/*
* ==========================================================================
* SPA Read History Routines
@@ -106,7 +112,7 @@ spa_read_history_addr(kstat_t *ksp, loff_t n)
}
/*
- * When the kstat is written discard all spa_read_history_t entires. The
+ * When the kstat is written discard all spa_read_history_t entries. The
* ssh->lock will be held until ksp->ks_ndata entries are processed.
*/
static int
@@ -327,7 +333,7 @@ spa_txg_history_addr(kstat_t *ksp, loff_t n)
}
/*
- * When the kstat is written discard all spa_txg_history_t entires. The
+ * When the kstat is written discard all spa_txg_history_t entries. The
* ssh->lock will be held until ksp->ks_ndata entries are processed.
*/
static int
@@ -474,7 +480,7 @@ spa_txg_history_set(spa_t *spa, uint64_t txg, txg_state_t completed_state,
/*
* Set txg IO stats.
*/
-int
+static int
spa_txg_history_set_io(spa_t *spa, uint64_t txg, uint64_t nread,
uint64_t nwritten, uint64_t reads, uint64_t writes, uint64_t ndirty)
{
@@ -503,6 +509,54 @@ spa_txg_history_set_io(spa_t *spa, uint64_t txg, uint64_t nread,
return (error);
}
+txg_stat_t *
+spa_txg_history_init_io(spa_t *spa, uint64_t txg, dsl_pool_t *dp)
+{
+ txg_stat_t *ts;
+
+ if (zfs_txg_history == 0)
+ return (NULL);
+
+ ts = kmem_alloc(sizeof (txg_stat_t), KM_SLEEP);
+
+ spa_config_enter(spa, SCL_ALL, FTAG, RW_READER);
+ vdev_get_stats(spa->spa_root_vdev, &ts->vs1);
+ spa_config_exit(spa, SCL_ALL, FTAG);
+
+ ts->txg = txg;
+ ts->ndirty = dp->dp_dirty_pertxg[txg & TXG_MASK];
+
+ spa_txg_history_set(spa, txg, TXG_STATE_WAIT_FOR_SYNC, gethrtime());
+
+ return (ts);
+}
+
+void
+spa_txg_history_fini_io(spa_t *spa, txg_stat_t *ts)
+{
+ if (ts == NULL)
+ return;
+
+ if (zfs_txg_history == 0) {
+ kmem_free(ts, sizeof (txg_stat_t));
+ return;
+ }
+
+ spa_config_enter(spa, SCL_ALL, FTAG, RW_READER);
+ vdev_get_stats(spa->spa_root_vdev, &ts->vs2);
+ spa_config_exit(spa, SCL_ALL, FTAG);
+
+ spa_txg_history_set(spa, ts->txg, TXG_STATE_SYNCED, gethrtime());
+ spa_txg_history_set_io(spa, ts->txg,
+ ts->vs2.vs_bytes[ZIO_TYPE_READ] - ts->vs1.vs_bytes[ZIO_TYPE_READ],
+ ts->vs2.vs_bytes[ZIO_TYPE_WRITE] - ts->vs1.vs_bytes[ZIO_TYPE_WRITE],
+ ts->vs2.vs_ops[ZIO_TYPE_READ] - ts->vs1.vs_ops[ZIO_TYPE_READ],
+ ts->vs2.vs_ops[ZIO_TYPE_WRITE] - ts->vs1.vs_ops[ZIO_TYPE_WRITE],
+ ts->ndirty);
+
+ kmem_free(ts, sizeof (txg_stat_t));
+}
+
/*
* ==========================================================================
* SPA TX Assign Histogram Routines
@@ -600,7 +654,7 @@ spa_tx_assign_add_nsecs(spa_t *spa, uint64_t nsecs)
spa_stats_history_t *ssh = &spa->spa_stats.tx_assign_histogram;
uint64_t idx = 0;
- while (((1 << idx) < nsecs) && (idx < ssh->size - 1))
+ while (((1ULL << idx) < nsecs) && (idx < ssh->size - 1))
idx++;
atomic_inc_64(&((kstat_named_t *)ssh->private)[idx].value.ui64);
@@ -653,6 +707,198 @@ spa_io_history_destroy(spa_t *spa)
mutex_destroy(&ssh->lock);
}
+/*
+ * ==========================================================================
+ * SPA MMP History Routines
+ * ==========================================================================
+ */
+
+/*
+ * MMP statistics - Information exported regarding each MMP update
+ */
+
+typedef struct spa_mmp_history {
+ uint64_t txg; /* txg of last sync */
+ uint64_t timestamp; /* UTC time of of last sync */
+ uint64_t mmp_delay; /* nanosec since last MMP write */
+ uint64_t vdev_guid; /* unique ID of leaf vdev */
+ char *vdev_path;
+ uint64_t vdev_label; /* vdev label */
+ list_node_t smh_link;
+} spa_mmp_history_t;
+
+static int
+spa_mmp_history_headers(char *buf, size_t size)
+{
+ (void) snprintf(buf, size, "%-10s %-10s %-12s %-24s %-10s %s\n",
+ "txg", "timestamp", "mmp_delay", "vdev_guid", "vdev_label",
+ "vdev_path");
+ return (0);
+}
+
+static int
+spa_mmp_history_data(char *buf, size_t size, void *data)
+{
+ spa_mmp_history_t *smh = (spa_mmp_history_t *)data;
+
+ (void) snprintf(buf, size, "%-10llu %-10llu %-12llu %-24llu %-10llu "
+ "%s\n",
+ (u_longlong_t)smh->txg, (u_longlong_t)smh->timestamp,
+ (u_longlong_t)smh->mmp_delay, (u_longlong_t)smh->vdev_guid,
+ (u_longlong_t)smh->vdev_label,
+ (smh->vdev_path ? smh->vdev_path : "-"));
+
+ return (0);
+}
+
+/*
+ * Calculate the address for the next spa_stats_history_t entry. The
+ * ssh->lock will be held until ksp->ks_ndata entries are processed.
+ */
+static void *
+spa_mmp_history_addr(kstat_t *ksp, loff_t n)
+{
+ spa_t *spa = ksp->ks_private;
+ spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
+
+ ASSERT(MUTEX_HELD(&ssh->lock));
+
+ if (n == 0)
+ ssh->private = list_tail(&ssh->list);
+ else if (ssh->private)
+ ssh->private = list_prev(&ssh->list, ssh->private);
+
+ return (ssh->private);
+}
+
+/*
+ * When the kstat is written discard all spa_mmp_history_t entries. The
+ * ssh->lock will be held until ksp->ks_ndata entries are processed.
+ */
+static int
+spa_mmp_history_update(kstat_t *ksp, int rw)
+{
+ spa_t *spa = ksp->ks_private;
+ spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
+
+ ASSERT(MUTEX_HELD(&ssh->lock));
+
+ if (rw == KSTAT_WRITE) {
+ spa_mmp_history_t *smh;
+
+ while ((smh = list_remove_head(&ssh->list))) {
+ ssh->size--;
+ if (smh->vdev_path)
+ strfree(smh->vdev_path);
+ kmem_free(smh, sizeof (spa_mmp_history_t));
+ }
+
+ ASSERT3U(ssh->size, ==, 0);
+ }
+
+ ksp->ks_ndata = ssh->size;
+ ksp->ks_data_size = ssh->size * sizeof (spa_mmp_history_t);
+
+ return (0);
+}
+
+static void
+spa_mmp_history_init(spa_t *spa)
+{
+ spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
+ char name[KSTAT_STRLEN];
+ kstat_t *ksp;
+
+ mutex_init(&ssh->lock, NULL, MUTEX_DEFAULT, NULL);
+ list_create(&ssh->list, sizeof (spa_mmp_history_t),
+ offsetof(spa_mmp_history_t, smh_link));
+
+ ssh->count = 0;
+ ssh->size = 0;
+ ssh->private = NULL;
+
+ (void) snprintf(name, KSTAT_STRLEN, "zfs/%s", spa_name(spa));
+
+ ksp = kstat_create(name, 0, "multihost", "misc",
+ KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
+ ssh->kstat = ksp;
+
+ if (ksp) {
+ ksp->ks_lock = &ssh->lock;
+ ksp->ks_data = NULL;
+ ksp->ks_private = spa;
+ ksp->ks_update = spa_mmp_history_update;
+ kstat_set_raw_ops(ksp, spa_mmp_history_headers,
+ spa_mmp_history_data, spa_mmp_history_addr);
+ kstat_install(ksp);
+ }
+}
+
+static void
+spa_mmp_history_destroy(spa_t *spa)
+{
+ spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
+ spa_mmp_history_t *smh;
+ kstat_t *ksp;
+
+ ksp = ssh->kstat;
+ if (ksp)
+ kstat_delete(ksp);
+
+ mutex_enter(&ssh->lock);
+ while ((smh = list_remove_head(&ssh->list))) {
+ ssh->size--;
+ if (smh->vdev_path)
+ strfree(smh->vdev_path);
+ kmem_free(smh, sizeof (spa_mmp_history_t));
+ }
+
+ ASSERT3U(ssh->size, ==, 0);
+ list_destroy(&ssh->list);
+ mutex_exit(&ssh->lock);
+
+ mutex_destroy(&ssh->lock);
+}
+
+/*
+ * Add a new MMP update to historical record.
+ */
+void
+spa_mmp_history_add(uint64_t txg, uint64_t timestamp, uint64_t mmp_delay,
+ vdev_t *vd, int label)
+{
+ spa_t *spa = vd->vdev_spa;
+ spa_stats_history_t *ssh = &spa->spa_stats.mmp_history;
+ spa_mmp_history_t *smh, *rm;
+
+ if (zfs_multihost_history == 0 && ssh->size == 0)
+ return;
+
+ smh = kmem_zalloc(sizeof (spa_mmp_history_t), KM_SLEEP);
+ smh->txg = txg;
+ smh->timestamp = timestamp;
+ smh->mmp_delay = mmp_delay;
+ smh->vdev_guid = vd->vdev_guid;
+ if (vd->vdev_path)
+ smh->vdev_path = strdup(vd->vdev_path);
+ smh->vdev_label = label;
+
+ mutex_enter(&ssh->lock);
+
+ list_insert_head(&ssh->list, smh);
+ ssh->size++;
+
+ while (ssh->size > zfs_multihost_history) {
+ ssh->size--;
+ rm = list_remove_tail(&ssh->list);
+ if (rm->vdev_path)
+ strfree(rm->vdev_path);
+ kmem_free(rm, sizeof (spa_mmp_history_t));
+ }
+
+ mutex_exit(&ssh->lock);
+}
+
void
spa_stats_init(spa_t *spa)
{
@@ -660,6 +906,7 @@ spa_stats_init(spa_t *spa)
spa_txg_history_init(spa);
spa_tx_assign_init(spa);
spa_io_history_init(spa);
+ spa_mmp_history_init(spa);
}
void
@@ -669,15 +916,25 @@ spa_stats_destroy(spa_t *spa)
spa_txg_history_destroy(spa);
spa_read_history_destroy(spa);
spa_io_history_destroy(spa);
+ spa_mmp_history_destroy(spa);
}
#if defined(_KERNEL) && defined(HAVE_SPL)
+/* CSTYLED */
module_param(zfs_read_history, int, 0644);
-MODULE_PARM_DESC(zfs_read_history, "Historic statistics for the last N reads");
+MODULE_PARM_DESC(zfs_read_history,
+ "Historical statistics for the last N reads");
module_param(zfs_read_history_hits, int, 0644);
-MODULE_PARM_DESC(zfs_read_history_hits, "Include cache hits in read history");
+MODULE_PARM_DESC(zfs_read_history_hits,
+ "Include cache hits in read history");
module_param(zfs_txg_history, int, 0644);
-MODULE_PARM_DESC(zfs_txg_history, "Historic statistics for the last N txgs");
+MODULE_PARM_DESC(zfs_txg_history,
+ "Historical statistics for the last N txgs");
+
+module_param(zfs_multihost_history, int, 0644);
+MODULE_PARM_DESC(zfs_multihost_history,
+ "Historical statistics for last N multihost writes");
+/* END CSTYLED */
#endif
diff --git a/zfs/module/zfs/space_map.c b/zfs/module/zfs/space_map.c
index b3aa469bf45b..a8f88b6710fe 100644
--- a/zfs/module/zfs/space_map.c
+++ b/zfs/module/zfs/space_map.c
@@ -23,7 +23,7 @@
* Use is subject to license terms.
*/
/*
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -72,12 +72,12 @@ space_map_load(space_map_t *sm, range_tree_t *rt, maptype_t maptype)
}
bufsize = MAX(sm->sm_blksz, SPA_MINBLOCKSIZE);
- entry_map = zio_buf_alloc(bufsize);
+ entry_map = vmem_alloc(bufsize, KM_SLEEP);
mutex_exit(sm->sm_lock);
if (end > bufsize) {
- dmu_prefetch(sm->sm_os, space_map_object(sm), bufsize,
- end - bufsize);
+ dmu_prefetch(sm->sm_os, space_map_object(sm), 0, bufsize,
+ end - bufsize, ZIO_PRIORITY_SYNC_READ);
}
mutex_enter(sm->sm_lock);
@@ -128,7 +128,7 @@ space_map_load(space_map_t *sm, range_tree_t *rt, maptype_t maptype)
else
range_tree_vacate(rt, NULL, NULL);
- zio_buf_free(entry_map, bufsize);
+ vmem_free(entry_map, bufsize);
return (error);
}
@@ -173,7 +173,6 @@ space_map_histogram_add(space_map_t *sm, range_tree_t *rt, dmu_tx_t *tx)
dmu_buf_will_dirty(sm->sm_dbuf, tx);
ASSERT(space_map_histogram_verify(sm, rt));
-
/*
* Transfer the content of the range tree histogram to the space
* map histogram. The space map histogram contains 32 buckets ranging
@@ -272,7 +271,7 @@ space_map_write(space_map_t *sm, range_tree_t *rt, maptype_t maptype,
expected_entries = space_map_entries(sm, rt);
- entry_map = zio_buf_alloc(sm->sm_blksz);
+ entry_map = vmem_alloc(sm->sm_blksz, KM_SLEEP);
entry_map_end = entry_map + (sm->sm_blksz / sizeof (uint64_t));
entry = entry_map;
@@ -335,7 +334,7 @@ space_map_write(space_map_t *sm, range_tree_t *rt, maptype_t maptype,
VERIFY3U(range_tree_space(rt), ==, rt_space);
VERIFY3U(range_tree_space(rt), ==, total);
- zio_buf_free(entry_map, sm->sm_blksz);
+ vmem_free(entry_map, sm->sm_blksz);
}
static int
@@ -412,6 +411,7 @@ space_map_truncate(space_map_t *sm, dmu_tx_t *tx)
ASSERT(dsl_pool_sync_context(dmu_objset_pool(os)));
ASSERT(dmu_tx_is_syncing(tx));
+ VERIFY3U(dmu_tx_get_txg(tx), <=, spa_final_dirty_txg(spa));
dmu_object_info_from_db(sm->sm_dbuf, &doi);
@@ -426,9 +426,10 @@ space_map_truncate(space_map_t *sm, dmu_tx_t *tx)
if ((spa_feature_is_enabled(spa, SPA_FEATURE_SPACEMAP_HISTOGRAM) &&
doi.doi_bonus_size != sizeof (space_map_phys_t)) ||
doi.doi_data_block_size != space_map_blksz) {
- zfs_dbgmsg("txg %llu, spa %s, reallocating: "
- "old bonus %llu, old blocksz %u", dmu_tx_get_txg(tx),
- spa_name(spa), doi.doi_bonus_size, doi.doi_data_block_size);
+ zfs_dbgmsg("txg %llu, spa %s, sm %p, reallocating "
+ "object[%llu]: old bonus %u, old blocksz %u",
+ dmu_tx_get_txg(tx), spa_name(spa), sm, sm->sm_object,
+ doi.doi_bonus_size, doi.doi_data_block_size);
space_map_free(sm, tx);
dmu_buf_rele(sm->sm_dbuf, sm);
diff --git a/zfs/module/zfs/space_reftree.c b/zfs/module/zfs/space_reftree.c
index a508092c530e..038572b08873 100644
--- a/zfs/module/zfs/space_reftree.c
+++ b/zfs/module/zfs/space_reftree.c
@@ -23,7 +23,7 @@
* Use is subject to license terms.
*/
/*
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2015 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -54,20 +54,14 @@
static int
space_reftree_compare(const void *x1, const void *x2)
{
- const space_ref_t *sr1 = x1;
- const space_ref_t *sr2 = x2;
+ const space_ref_t *sr1 = (const space_ref_t *)x1;
+ const space_ref_t *sr2 = (const space_ref_t *)x2;
- if (sr1->sr_offset < sr2->sr_offset)
- return (-1);
- if (sr1->sr_offset > sr2->sr_offset)
- return (1);
+ int cmp = AVL_CMP(sr1->sr_offset, sr2->sr_offset);
+ if (likely(cmp))
+ return (cmp);
- if (sr1 < sr2)
- return (-1);
- if (sr1 > sr2)
- return (1);
-
- return (0);
+ return (AVL_PCMP(sr1, sr2));
}
void
@@ -103,7 +97,7 @@ space_reftree_add_node(avl_tree_t *t, uint64_t offset, int64_t refcnt)
void
space_reftree_add_seg(avl_tree_t *t, uint64_t start, uint64_t end,
- int64_t refcnt)
+ int64_t refcnt)
{
space_reftree_add_node(t, start, refcnt);
space_reftree_add_node(t, end, -refcnt);
diff --git a/zfs/module/zfs/trace.c b/zfs/module/zfs/trace.c
index 0c9990e8547b..e4ebf31b3fbe 100644
--- a/zfs/module/zfs/trace.c
+++ b/zfs/module/zfs/trace.c
@@ -47,4 +47,5 @@
#include <sys/trace_multilist.h>
#include <sys/trace_txg.h>
#include <sys/trace_zil.h>
+#include <sys/trace_zio.h>
#include <sys/trace_zrlock.h>
diff --git a/zfs/module/zfs/txg.c b/zfs/module/zfs/txg.c
index 1d5ee97b1368..65bd7f93acdf 100644
--- a/zfs/module/zfs/txg.c
+++ b/zfs/module/zfs/txg.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Portions Copyright 2011 Martin Matuska
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -31,6 +31,7 @@
#include <sys/dmu_tx.h>
#include <sys/dsl_pool.h>
#include <sys/dsl_scan.h>
+#include <sys/zil.h>
#include <sys/callb.h>
#include <sys/trace_txg.h>
@@ -128,7 +129,7 @@ txg_init(dsl_pool_t *dp, uint64_t txg)
int i;
mutex_init(&tx->tx_cpu[c].tc_lock, NULL, MUTEX_DEFAULT, NULL);
- mutex_init(&tx->tx_cpu[c].tc_open_lock, NULL, MUTEX_DEFAULT,
+ mutex_init(&tx->tx_cpu[c].tc_open_lock, NULL, MUTEX_NOLOCKDEP,
NULL);
for (i = 0; i < TXG_SIZE; i++) {
cv_init(&tx->tx_cpu[c].tc_cv[i], NULL, CV_DEFAULT,
@@ -212,7 +213,7 @@ txg_sync_start(dsl_pool_t *dp)
* 32-bit x86. This is due in part to nested pools and
* scrub_visitbp() recursion.
*/
- tx->tx_sync_thread = thread_create(NULL, 32<<10, txg_sync_thread,
+ tx->tx_sync_thread = thread_create(NULL, 0, txg_sync_thread,
dp, 0, &p0, TS_RUN, defclsyspri);
mutex_exit(&tx->tx_sync_lock);
@@ -365,6 +366,7 @@ static void
txg_quiesce(dsl_pool_t *dp, uint64_t txg)
{
tx_state_t *tx = &dp->dp_tx;
+ uint64_t tx_open_time;
int g = txg & TXG_MASK;
int c;
@@ -376,10 +378,7 @@ txg_quiesce(dsl_pool_t *dp, uint64_t txg)
ASSERT(txg == tx->tx_open_txg);
tx->tx_open_txg++;
- tx->tx_open_time = gethrtime();
-
- spa_txg_history_set(dp->dp_spa, txg, TXG_STATE_OPEN, tx->tx_open_time);
- spa_txg_history_add(dp->dp_spa, tx->tx_open_txg, tx->tx_open_time);
+ tx->tx_open_time = tx_open_time = gethrtime();
DTRACE_PROBE2(txg__quiescing, dsl_pool_t *, dp, uint64_t, txg);
DTRACE_PROBE2(txg__opened, dsl_pool_t *, dp, uint64_t, tx->tx_open_txg);
@@ -391,6 +390,9 @@ txg_quiesce(dsl_pool_t *dp, uint64_t txg)
for (c = 0; c < max_ncpus; c++)
mutex_exit(&tx->tx_cpu[c].tc_open_lock);
+ spa_txg_history_set(dp->dp_spa, txg, TXG_STATE_OPEN, tx_open_time);
+ spa_txg_history_add(dp->dp_spa, txg + 1, tx_open_time);
+
/*
* Quiesce the transaction group by waiting for everyone to txg_exit().
*/
@@ -480,22 +482,17 @@ txg_sync_thread(dsl_pool_t *dp)
spa_t *spa = dp->dp_spa;
tx_state_t *tx = &dp->dp_tx;
callb_cpr_t cpr;
- vdev_stat_t *vs1, *vs2;
clock_t start, delta;
(void) spl_fstrans_mark();
txg_thread_enter(tx, &cpr);
- vs1 = kmem_alloc(sizeof (vdev_stat_t), KM_SLEEP);
- vs2 = kmem_alloc(sizeof (vdev_stat_t), KM_SLEEP);
-
start = delta = 0;
for (;;) {
- clock_t timer, timeout;
+ clock_t timeout = zfs_txg_timeout * hz;
+ clock_t timer;
uint64_t txg;
- uint64_t ndirty;
-
- timeout = zfs_txg_timeout * hz;
+ txg_stat_t *ts;
/*
* We sync when we're scanning, there's someone waiting
@@ -526,15 +523,8 @@ txg_sync_thread(dsl_pool_t *dp)
txg_thread_wait(tx, &cpr, &tx->tx_quiesce_done_cv, 0);
}
- if (tx->tx_exiting) {
- kmem_free(vs2, sizeof (vdev_stat_t));
- kmem_free(vs1, sizeof (vdev_stat_t));
+ if (tx->tx_exiting)
txg_thread_exit(tx, &cpr, &tx->tx_sync_thread);
- }
-
- spa_config_enter(spa, SCL_ALL, FTAG, RW_READER);
- vdev_get_stats(spa->spa_root_vdev, vs1);
- spa_config_exit(spa, SCL_ALL, FTAG);
/*
* Consume the quiesced txg which has been handed off to
@@ -545,16 +535,13 @@ txg_sync_thread(dsl_pool_t *dp)
tx->tx_quiesced_txg = 0;
tx->tx_syncing_txg = txg;
DTRACE_PROBE2(txg__syncing, dsl_pool_t *, dp, uint64_t, txg);
+ ts = spa_txg_history_init_io(spa, txg, dp);
cv_broadcast(&tx->tx_quiesce_more_cv);
dprintf("txg=%llu quiesce_txg=%llu sync_txg=%llu\n",
txg, tx->tx_quiesce_txg_waiting, tx->tx_sync_txg_waiting);
mutex_exit(&tx->tx_sync_lock);
- spa_txg_history_set(spa, txg, TXG_STATE_WAIT_FOR_SYNC,
- gethrtime());
- ndirty = dp->dp_dirty_pertxg[txg & TXG_MASK];
-
start = ddi_get_lbolt();
spa_sync(spa, txg);
delta = ddi_get_lbolt() - start;
@@ -563,23 +550,13 @@ txg_sync_thread(dsl_pool_t *dp)
tx->tx_synced_txg = txg;
tx->tx_syncing_txg = 0;
DTRACE_PROBE2(txg__synced, dsl_pool_t *, dp, uint64_t, txg);
+ spa_txg_history_fini_io(spa, ts);
cv_broadcast(&tx->tx_sync_done_cv);
/*
* Dispatch commit callbacks to worker threads.
*/
txg_dispatch_callbacks(dp, txg);
-
- spa_config_enter(spa, SCL_ALL, FTAG, RW_READER);
- vdev_get_stats(spa->spa_root_vdev, vs2);
- spa_config_exit(spa, SCL_ALL, FTAG);
- spa_txg_history_set_io(spa, txg,
- vs2->vs_bytes[ZIO_TYPE_READ]-vs1->vs_bytes[ZIO_TYPE_READ],
- vs2->vs_bytes[ZIO_TYPE_WRITE]-vs1->vs_bytes[ZIO_TYPE_WRITE],
- vs2->vs_ops[ZIO_TYPE_READ]-vs1->vs_ops[ZIO_TYPE_READ],
- vs2->vs_ops[ZIO_TYPE_WRITE]-vs1->vs_ops[ZIO_TYPE_WRITE],
- ndirty);
- spa_txg_history_set(spa, txg, TXG_STATE_SYNCED, gethrtime());
}
}
@@ -746,17 +723,33 @@ txg_sync_waiting(dsl_pool_t *dp)
tx->tx_quiesced_txg != 0);
}
+/*
+ * Verify that this txg is active (open, quiescing, syncing). Non-active
+ * txg's should not be manipulated.
+ */
+void
+txg_verify(spa_t *spa, uint64_t txg)
+{
+ ASSERTV(dsl_pool_t *dp = spa_get_dsl(spa));
+ if (txg <= TXG_INITIAL || txg == ZILTEST_TXG)
+ return;
+ ASSERT3U(txg, <=, dp->dp_tx.tx_open_txg);
+ ASSERT3U(txg, >=, dp->dp_tx.tx_synced_txg);
+ ASSERT3U(txg, >=, dp->dp_tx.tx_open_txg - TXG_CONCURRENT_STATES);
+}
+
/*
* Per-txg object lists.
*/
void
-txg_list_create(txg_list_t *tl, size_t offset)
+txg_list_create(txg_list_t *tl, spa_t *spa, size_t offset)
{
int t;
mutex_init(&tl->tl_lock, NULL, MUTEX_DEFAULT, NULL);
tl->tl_offset = offset;
+ tl->tl_spa = spa;
for (t = 0; t < TXG_SIZE; t++)
tl->tl_head[t] = NULL;
@@ -776,6 +769,7 @@ txg_list_destroy(txg_list_t *tl)
boolean_t
txg_list_empty(txg_list_t *tl, uint64_t txg)
{
+ txg_verify(tl->tl_spa, txg);
return (tl->tl_head[txg & TXG_MASK] == NULL);
}
@@ -810,6 +804,7 @@ txg_list_add(txg_list_t *tl, void *p, uint64_t txg)
txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset);
boolean_t add;
+ txg_verify(tl->tl_spa, txg);
mutex_enter(&tl->tl_lock);
add = (tn->tn_member[t] == 0);
if (add) {
@@ -834,6 +829,7 @@ txg_list_add_tail(txg_list_t *tl, void *p, uint64_t txg)
txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset);
boolean_t add;
+ txg_verify(tl->tl_spa, txg);
mutex_enter(&tl->tl_lock);
add = (tn->tn_member[t] == 0);
if (add) {
@@ -861,6 +857,7 @@ txg_list_remove(txg_list_t *tl, uint64_t txg)
txg_node_t *tn;
void *p = NULL;
+ txg_verify(tl->tl_spa, txg);
mutex_enter(&tl->tl_lock);
if ((tn = tl->tl_head[t]) != NULL) {
p = (char *)tn - tl->tl_offset;
@@ -882,6 +879,7 @@ txg_list_remove_this(txg_list_t *tl, void *p, uint64_t txg)
int t = txg & TXG_MASK;
txg_node_t *tn, **tp;
+ txg_verify(tl->tl_spa, txg);
mutex_enter(&tl->tl_lock);
for (tp = &tl->tl_head[t]; (tn = *tp) != NULL; tp = &tn->tn_next[t]) {
@@ -905,6 +903,7 @@ txg_list_member(txg_list_t *tl, void *p, uint64_t txg)
int t = txg & TXG_MASK;
txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset);
+ txg_verify(tl->tl_spa, txg);
return (tn->tn_member[t] != 0);
}
@@ -917,6 +916,7 @@ txg_list_head(txg_list_t *tl, uint64_t txg)
int t = txg & TXG_MASK;
txg_node_t *tn = tl->tl_head[t];
+ txg_verify(tl->tl_spa, txg);
return (tn == NULL ? NULL : (char *)tn - tl->tl_offset);
}
@@ -926,6 +926,7 @@ txg_list_next(txg_list_t *tl, void *p, uint64_t txg)
int t = txg & TXG_MASK;
txg_node_t *tn = (txg_node_t *)((char *)p + tl->tl_offset);
+ txg_verify(tl->tl_spa, txg);
tn = tn->tn_next[t];
return (tn == NULL ? NULL : (char *)tn - tl->tl_offset);
diff --git a/zfs/module/zfs/uberblock.c b/zfs/module/zfs/uberblock.c
index f8bdecdf5749..c1e85bdce760 100644
--- a/zfs/module/zfs/uberblock.c
+++ b/zfs/module/zfs/uberblock.c
@@ -44,7 +44,7 @@ uberblock_verify(uberblock_t *ub)
* transaction group.
*/
boolean_t
-uberblock_update(uberblock_t *ub, vdev_t *rvd, uint64_t txg)
+uberblock_update(uberblock_t *ub, vdev_t *rvd, uint64_t txg, uint64_t mmp_delay)
{
ASSERT(ub->ub_txg < txg);
@@ -57,6 +57,9 @@ uberblock_update(uberblock_t *ub, vdev_t *rvd, uint64_t txg)
ub->ub_guid_sum = rvd->vdev_guid_sum;
ub->ub_timestamp = gethrestime_sec();
ub->ub_software_version = SPA_VERSION;
+ ub->ub_mmp_magic = MMP_MAGIC;
+ ub->ub_mmp_delay = spa_multihost(rvd->vdev_spa) ? mmp_delay : 0;
+ ub->ub_mmp_seq = 0;
return (ub->ub_rootbp.blk_birth == txg);
}
diff --git a/zfs/module/zfs/unique.c b/zfs/module/zfs/unique.c
index 8c1d2e2f9856..5cdd025f49bc 100644
--- a/zfs/module/zfs/unique.c
+++ b/zfs/module/zfs/unique.c
@@ -42,14 +42,10 @@ typedef struct unique {
static int
unique_compare(const void *a, const void *b)
{
- const unique_t *una = a;
- const unique_t *unb = b;
-
- if (una->un_value < unb->un_value)
- return (-1);
- if (una->un_value > unb->un_value)
- return (+1);
- return (0);
+ const unique_t *una = (const unique_t *)a;
+ const unique_t *unb = (const unique_t *)b;
+
+ return (AVL_CMP(una->un_value, unb->un_value));
}
void
diff --git a/zfs/module/zfs/vdev.c b/zfs/module/zfs/vdev.c
index 8095b6177b27..df07d893dba2 100644
--- a/zfs/module/zfs/vdev.c
+++ b/zfs/module/zfs/vdev.c
@@ -21,8 +21,11 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2011, 2015 by Delphix. All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc.
+ * Copyright (c) 2014 Integros [integros.com]
+ * Copyright 2016 Toomas Soome <tsoome at me.com>
+ * Copyright 2017 Joyent, Inc.
*/
#include <sys/zfs_context.h>
@@ -43,7 +46,9 @@
#include <sys/arc.h>
#include <sys/zil.h>
#include <sys/dsl_scan.h>
+#include <sys/abd.h>
#include <sys/zvol.h>
+#include <sys/zfs_ratelimit.h>
/*
* When a vdev is added, it will be divided into approximately (but no
@@ -132,7 +137,8 @@ vdev_get_min_asize(vdev_t *vd)
* so each child must provide at least 1/Nth of its asize.
*/
if (pvd->vdev_ops == &vdev_raidz_ops)
- return (pvd->vdev_min_asize / pvd->vdev_children);
+ return ((pvd->vdev_min_asize + pvd->vdev_children - 1) /
+ pvd->vdev_children);
return (pvd->vdev_min_asize);
}
@@ -346,18 +352,28 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
vd->vdev_state = VDEV_STATE_CLOSED;
vd->vdev_ishole = (ops == &vdev_hole_ops);
+ /*
+ * Initialize rate limit structs for events. We rate limit ZIO delay
+ * and checksum events so that we don't overwhelm ZED with thousands
+ * of events when a disk is acting up.
+ */
+ zfs_ratelimit_init(&vd->vdev_delay_rl, DELAYS_PER_SECOND, 1);
+ zfs_ratelimit_init(&vd->vdev_checksum_rl, CHECKSUMS_PER_SECOND, 1);
+
list_link_init(&vd->vdev_config_dirty_node);
list_link_init(&vd->vdev_state_dirty_node);
- mutex_init(&vd->vdev_dtl_lock, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&vd->vdev_dtl_lock, NULL, MUTEX_NOLOCKDEP, NULL);
mutex_init(&vd->vdev_stat_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&vd->vdev_probe_lock, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&vd->vdev_queue_lock, NULL, MUTEX_DEFAULT, NULL);
+
for (t = 0; t < DTL_TYPES; t++) {
vd->vdev_dtl[t] = range_tree_create(NULL, NULL,
&vd->vdev_dtl_lock);
}
- txg_list_create(&vd->vdev_ms_list,
+ txg_list_create(&vd->vdev_ms_list, spa,
offsetof(struct metaslab, ms_txg_node));
- txg_list_create(&vd->vdev_dtl_list,
+ txg_list_create(&vd->vdev_dtl_list, spa,
offsetof(struct vdev, vdev_dtl_node));
vd->vdev_stat.vs_timestamp = gethrtime();
vdev_queue_init(vd);
@@ -379,6 +395,8 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
char *type;
uint64_t guid = 0, islog, nparity;
vdev_t *vd;
+ char *tmp = NULL;
+ int rc;
ASSERT(spa_config_held(spa, SCL_ALL, RW_WRITER) == SCL_ALL);
@@ -472,11 +490,29 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &vd->vdev_path) == 0)
vd->vdev_path = spa_strdup(vd->vdev_path);
+
+ /*
+ * ZPOOL_CONFIG_AUX_STATE = "external" means we previously forced a
+ * fault on a vdev and want it to persist across imports (like with
+ * zpool offline -f).
+ */
+ rc = nvlist_lookup_string(nv, ZPOOL_CONFIG_AUX_STATE, &tmp);
+ if (rc == 0 && tmp != NULL && strcmp(tmp, "external") == 0) {
+ vd->vdev_stat.vs_aux = VDEV_AUX_EXTERNAL;
+ vd->vdev_faulted = 1;
+ vd->vdev_label_aux = VDEV_AUX_EXTERNAL;
+ }
+
if (nvlist_lookup_string(nv, ZPOOL_CONFIG_DEVID, &vd->vdev_devid) == 0)
vd->vdev_devid = spa_strdup(vd->vdev_devid);
if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PHYS_PATH,
&vd->vdev_physpath) == 0)
vd->vdev_physpath = spa_strdup(vd->vdev_physpath);
+
+ if (nvlist_lookup_string(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH,
+ &vd->vdev_enc_sysfs_path) == 0)
+ vd->vdev_enc_sysfs_path = spa_strdup(vd->vdev_enc_sysfs_path);
+
if (nvlist_lookup_string(nv, ZPOOL_CONFIG_FRU, &vd->vdev_fru) == 0)
vd->vdev_fru = spa_strdup(vd->vdev_fru);
@@ -519,6 +555,10 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
&vd->vdev_asize);
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_REMOVING,
&vd->vdev_removing);
+ (void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_VDEV_TOP_ZAP,
+ &vd->vdev_top_zap);
+ } else {
+ ASSERT0(vd->vdev_top_zap);
}
if (parent && !parent->vdev_parent && alloctype != VDEV_ALLOC_ATTACH) {
@@ -530,9 +570,18 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
spa_log_class(spa) : spa_normal_class(spa), vd);
}
+ if (vd->vdev_ops->vdev_op_leaf &&
+ (alloctype == VDEV_ALLOC_LOAD || alloctype == VDEV_ALLOC_SPLIT)) {
+ (void) nvlist_lookup_uint64(nv,
+ ZPOOL_CONFIG_VDEV_LEAF_ZAP, &vd->vdev_leaf_zap);
+ } else {
+ ASSERT0(vd->vdev_leaf_zap);
+ }
+
/*
* If we're a leaf vdev, try to load the DTL object and other state.
*/
+
if (vd->vdev_ops->vdev_op_leaf &&
(alloctype == VDEV_ALLOC_LOAD || alloctype == VDEV_ALLOC_L2CACHE ||
alloctype == VDEV_ALLOC_ROOTPOOL)) {
@@ -558,12 +607,17 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
&vd->vdev_resilver_txg);
/*
- * When importing a pool, we want to ignore the persistent fault
- * state, as the diagnosis made on another system may not be
- * valid in the current context. Local vdevs will
- * remain in the faulted state.
+ * In general, when importing a pool we want to ignore the
+ * persistent fault state, as the diagnosis made on another
+ * system may not be valid in the current context. The only
+ * exception is if we forced a vdev to a persistently faulted
+ * state with 'zpool offline -f'. The persistent fault will
+ * remain across imports until cleared.
+ *
+ * Local vdevs will remain in the faulted state.
*/
- if (spa_load_state(spa) == SPA_LOAD_OPEN) {
+ if (spa_load_state(spa) == SPA_LOAD_OPEN ||
+ spa_load_state(spa) == SPA_LOAD_IMPORT) {
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_FAULTED,
&vd->vdev_faulted);
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_DEGRADED,
@@ -649,6 +703,10 @@ vdev_free(vdev_t *vd)
spa_strfree(vd->vdev_devid);
if (vd->vdev_physpath)
spa_strfree(vd->vdev_physpath);
+
+ if (vd->vdev_enc_sysfs_path)
+ spa_strfree(vd->vdev_enc_sysfs_path);
+
if (vd->vdev_fru)
spa_strfree(vd->vdev_fru);
@@ -668,10 +726,14 @@ vdev_free(vdev_t *vd)
}
mutex_exit(&vd->vdev_dtl_lock);
+ mutex_destroy(&vd->vdev_queue_lock);
mutex_destroy(&vd->vdev_dtl_lock);
mutex_destroy(&vd->vdev_stat_lock);
mutex_destroy(&vd->vdev_probe_lock);
+ zfs_ratelimit_fini(&vd->vdev_delay_rl);
+ zfs_ratelimit_fini(&vd->vdev_checksum_rl);
+
if (vd == spa->spa_root_vdev)
spa->spa_root_vdev = NULL;
@@ -691,13 +753,16 @@ vdev_top_transfer(vdev_t *svd, vdev_t *tvd)
ASSERT(tvd == tvd->vdev_top);
+ tvd->vdev_pending_fastwrite = svd->vdev_pending_fastwrite;
tvd->vdev_ms_array = svd->vdev_ms_array;
tvd->vdev_ms_shift = svd->vdev_ms_shift;
tvd->vdev_ms_count = svd->vdev_ms_count;
+ tvd->vdev_top_zap = svd->vdev_top_zap;
svd->vdev_ms_array = 0;
svd->vdev_ms_shift = 0;
svd->vdev_ms_count = 0;
+ svd->vdev_top_zap = 0;
if (tvd->vdev_mg)
ASSERT3P(tvd->vdev_mg, ==, svd->vdev_mg);
@@ -962,18 +1027,19 @@ vdev_probe_done(zio_t *zio)
vps->vps_readable = 1;
if (zio->io_error == 0 && spa_writeable(spa)) {
zio_nowait(zio_write_phys(vd->vdev_probe_zio, vd,
- zio->io_offset, zio->io_size, zio->io_data,
+ zio->io_offset, zio->io_size, zio->io_abd,
ZIO_CHECKSUM_OFF, vdev_probe_done, vps,
ZIO_PRIORITY_SYNC_WRITE, vps->vps_flags, B_TRUE));
} else {
- zio_buf_free(zio->io_data, zio->io_size);
+ abd_free(zio->io_abd);
}
} else if (zio->io_type == ZIO_TYPE_WRITE) {
if (zio->io_error == 0)
vps->vps_writeable = 1;
- zio_buf_free(zio->io_data, zio->io_size);
+ abd_free(zio->io_abd);
} else if (zio->io_type == ZIO_TYPE_NULL) {
zio_t *pio;
+ zio_link_t *zl;
vd->vdev_cant_read |= !vps->vps_readable;
vd->vdev_cant_write |= !vps->vps_writeable;
@@ -993,7 +1059,8 @@ vdev_probe_done(zio_t *zio)
vd->vdev_probe_zio = NULL;
mutex_exit(&vd->vdev_probe_lock);
- while ((pio = zio_walk_parents(zio)) != NULL)
+ zl = NULL;
+ while ((pio = zio_walk_parents(zio, &zl)) != NULL)
if (!vdev_accessible(vd, pio))
pio->io_error = SET_ERROR(ENXIO);
@@ -1087,8 +1154,8 @@ vdev_probe(vdev_t *vd, zio_t *zio)
for (l = 1; l < VDEV_LABELS; l++) {
zio_nowait(zio_read_phys(pio, vd,
vdev_label_offset(vd->vdev_psize, l,
- offsetof(vdev_label_t, vl_pad2)),
- VDEV_PAD_SIZE, zio_buf_alloc(VDEV_PAD_SIZE),
+ offsetof(vdev_label_t, vl_pad2)), VDEV_PAD_SIZE,
+ abd_alloc_for_io(VDEV_PAD_SIZE, B_TRUE),
ZIO_CHECKSUM_OFF, vdev_probe_done, vps,
ZIO_PRIORITY_SYNC_READ, vps->vps_flags, B_TRUE));
}
@@ -1108,7 +1175,6 @@ vdev_open_child(void *arg)
vd->vdev_open_thread = curthread;
vd->vdev_open_error = vdev_open(vd);
vd->vdev_open_thread = NULL;
- vd->vdev_parent->vdev_nonrot &= vd->vdev_nonrot;
}
static boolean_t
@@ -1135,29 +1201,30 @@ vdev_open_children(vdev_t *vd)
int children = vd->vdev_children;
int c;
- vd->vdev_nonrot = B_TRUE;
-
/*
* in order to handle pools on top of zvols, do the opens
* in a single thread so that the same thread holds the
* spa_namespace_lock
*/
if (vdev_uses_zvols(vd)) {
- for (c = 0; c < children; c++) {
+retry_sync:
+ for (c = 0; c < children; c++)
vd->vdev_child[c]->vdev_open_error =
vdev_open(vd->vdev_child[c]);
- vd->vdev_nonrot &= vd->vdev_child[c]->vdev_nonrot;
- }
- return;
- }
- tq = taskq_create("vdev_open", children, minclsyspri,
- children, children, TASKQ_PREPOPULATE);
+ } else {
+ tq = taskq_create("vdev_open", children, minclsyspri,
+ children, children, TASKQ_PREPOPULATE);
+ if (tq == NULL)
+ goto retry_sync;
- for (c = 0; c < children; c++)
- VERIFY(taskq_dispatch(tq, vdev_open_child, vd->vdev_child[c],
- TQ_SLEEP) != 0);
+ for (c = 0; c < children; c++)
+ VERIFY(taskq_dispatch(tq, vdev_open_child,
+ vd->vdev_child[c], TQ_SLEEP) != TASKQID_INVALID);
- taskq_destroy(tq);
+ taskq_destroy(tq);
+ }
+
+ vd->vdev_nonrot = B_TRUE;
for (c = 0; c < children; c++)
vd->vdev_nonrot &= vd->vdev_child[c]->vdev_nonrot;
@@ -1287,10 +1354,17 @@ vdev_open(vdev_t *vd)
max_asize = max_osize;
}
+ /*
+ * If the vdev was expanded, record this so that we can re-create the
+ * uberblock rings in labels {2,3}, during the next sync.
+ */
+ if ((psize > vd->vdev_psize) && (vd->vdev_psize != 0))
+ vd->vdev_copy_uberblocks = B_TRUE;
+
vd->vdev_psize = psize;
/*
- * Make sure the allocatable size hasn't shrunk.
+ * Make sure the allocatable size hasn't shrunk too much.
*/
if (asize < vd->vdev_min_asize) {
vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
@@ -1305,8 +1379,15 @@ vdev_open(vdev_t *vd)
*/
vd->vdev_asize = asize;
vd->vdev_max_asize = max_asize;
- if (vd->vdev_ashift == 0)
- vd->vdev_ashift = ashift;
+ if (vd->vdev_ashift == 0) {
+ vd->vdev_ashift = ashift; /* use detected value */
+ }
+ if (vd->vdev_ashift != 0 && (vd->vdev_ashift < ASHIFT_MIN ||
+ vd->vdev_ashift > ASHIFT_MAX)) {
+ vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
+ VDEV_AUX_BAD_ASHIFT);
+ return (SET_ERROR(EDOM));
+ }
} else {
/*
* Detect if the alignment requirement has increased.
@@ -1323,12 +1404,21 @@ vdev_open(vdev_t *vd)
}
/*
- * If all children are healthy and the asize has increased,
- * then we've experienced dynamic LUN growth. If automatic
- * expansion is enabled then use the additional space.
+ * If all children are healthy we update asize if either:
+ * The asize has increased, due to a device expansion caused by dynamic
+ * LUN growth or vdev replacement, and automatic expansion is enabled;
+ * making the additional space available.
+ *
+ * The asize has decreased, due to a device shrink usually caused by a
+ * vdev replace with a smaller device. This ensures that calculations
+ * based of max_asize and asize e.g. esize are always valid. It's safe
+ * to do this as we've already validated that asize is greater than
+ * vdev_min_asize.
*/
- if (vd->vdev_state == VDEV_STATE_HEALTHY && asize > vd->vdev_asize &&
- (vd->vdev_expanding || spa->spa_autoexpand))
+ if (vd->vdev_state == VDEV_STATE_HEALTHY &&
+ ((asize > vd->vdev_asize &&
+ (vd->vdev_expanding || spa->spa_autoexpand)) ||
+ (asize < vd->vdev_asize)))
vd->vdev_asize = asize;
vdev_set_min_asize(vd);
@@ -1750,6 +1840,21 @@ vdev_dtl_empty(vdev_t *vd, vdev_dtl_type_t t)
return (empty);
}
+/*
+ * Returns B_TRUE if vdev determines offset needs to be resilvered.
+ */
+boolean_t
+vdev_dtl_need_resilver(vdev_t *vd, uint64_t offset, size_t psize)
+{
+ ASSERT(vd != vd->vdev_spa->spa_root_vdev);
+
+ if (vd->vdev_ops->vdev_op_need_resilver == NULL ||
+ vd->vdev_ops->vdev_op_leaf)
+ return (B_TRUE);
+
+ return (vd->vdev_ops->vdev_op_need_resilver(vd, offset, psize));
+}
+
/*
* Returns the lowest txg in the DTL range.
*/
@@ -1982,6 +2087,51 @@ vdev_dtl_load(vdev_t *vd)
return (error);
}
+void
+vdev_destroy_unlink_zap(vdev_t *vd, uint64_t zapobj, dmu_tx_t *tx)
+{
+ spa_t *spa = vd->vdev_spa;
+
+ VERIFY0(zap_destroy(spa->spa_meta_objset, zapobj, tx));
+ VERIFY0(zap_remove_int(spa->spa_meta_objset, spa->spa_all_vdev_zaps,
+ zapobj, tx));
+}
+
+uint64_t
+vdev_create_link_zap(vdev_t *vd, dmu_tx_t *tx)
+{
+ spa_t *spa = vd->vdev_spa;
+ uint64_t zap = zap_create(spa->spa_meta_objset, DMU_OTN_ZAP_METADATA,
+ DMU_OT_NONE, 0, tx);
+
+ ASSERT(zap != 0);
+ VERIFY0(zap_add_int(spa->spa_meta_objset, spa->spa_all_vdev_zaps,
+ zap, tx));
+
+ return (zap);
+}
+
+void
+vdev_construct_zaps(vdev_t *vd, dmu_tx_t *tx)
+{
+ uint64_t i;
+
+ if (vd->vdev_ops != &vdev_hole_ops &&
+ vd->vdev_ops != &vdev_missing_ops &&
+ vd->vdev_ops != &vdev_root_ops &&
+ !vd->vdev_top->vdev_removing) {
+ if (vd->vdev_ops->vdev_op_leaf && vd->vdev_leaf_zap == 0) {
+ vd->vdev_leaf_zap = vdev_create_link_zap(vd, tx);
+ }
+ if (vd == vd->vdev_top && vd->vdev_top_zap == 0) {
+ vd->vdev_top_zap = vdev_create_link_zap(vd, tx);
+ }
+ }
+ for (i = 0; i < vd->vdev_children; i++) {
+ vdev_construct_zaps(vd->vdev_child[i], tx);
+ }
+}
+
void
vdev_dtl_sync(vdev_t *vd, uint64_t txg)
{
@@ -2004,6 +2154,18 @@ vdev_dtl_sync(vdev_t *vd, uint64_t txg)
space_map_close(vd->vdev_dtl_sm);
vd->vdev_dtl_sm = NULL;
mutex_exit(&vd->vdev_dtl_lock);
+
+ /*
+ * We only destroy the leaf ZAP for detached leaves or for
+ * removed log devices. Removed data devices handle leaf ZAP
+ * cleanup later, once cancellation is no longer possible.
+ */
+ if (vd->vdev_leaf_zap != 0 && (vd->vdev_detached ||
+ vd->vdev_top->vdev_islog)) {
+ vdev_destroy_unlink_zap(vd, vd->vdev_leaf_zap, tx);
+ vd->vdev_leaf_zap = 0;
+ }
+
dmu_tx_commit(tx);
return;
}
@@ -2150,7 +2312,6 @@ vdev_load(vdev_t *vd)
vdev_metaslab_init(vd, 0) != 0))
vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
VDEV_AUX_CORRUPT_DATA);
-
/*
* If this is a leaf vdev, load its DTL.
*/
@@ -2210,6 +2371,8 @@ vdev_remove(vdev_t *vd, uint64_t txg)
int m, i;
tx = dmu_tx_create_assigned(spa_get_dsl(spa), txg);
+ ASSERT(vd == vd->vdev_top);
+ ASSERT3U(txg, ==, spa_syncing_txg(spa));
if (vd->vdev_ms != NULL) {
metaslab_group_t *mg = vd->vdev_mg;
@@ -2251,6 +2414,11 @@ vdev_remove(vdev_t *vd, uint64_t txg)
(void) dmu_object_free(mos, vd->vdev_ms_array, tx);
vd->vdev_ms_array = 0;
}
+
+ if (vd->vdev_islog && vd->vdev_top_zap != 0) {
+ vdev_destroy_unlink_zap(vd, vd->vdev_top_zap, tx);
+ vd->vdev_top_zap = 0;
+ }
dmu_tx_commit(tx);
}
@@ -2331,6 +2499,32 @@ vdev_fault(spa_t *spa, uint64_t guid, vdev_aux_t aux)
tvd = vd->vdev_top;
+ /*
+ * If user did a 'zpool offline -f' then make the fault persist across
+ * reboots.
+ */
+ if (aux == VDEV_AUX_EXTERNAL_PERSIST) {
+ /*
+ * There are two kinds of forced faults: temporary and
+ * persistent. Temporary faults go away at pool import, while
+ * persistent faults stay set. Both types of faults can be
+ * cleared with a zpool clear.
+ *
+ * We tell if a vdev is persistently faulted by looking at the
+ * ZPOOL_CONFIG_AUX_STATE nvpair. If it's set to "external" at
+ * import then it's a persistent fault. Otherwise, it's
+ * temporary. We get ZPOOL_CONFIG_AUX_STATE set to "external"
+ * by setting vd.vdev_stat.vs_aux to VDEV_AUX_EXTERNAL. This
+ * tells vdev_config_generate() (which gets run later) to set
+ * ZPOOL_CONFIG_AUX_STATE to "external" in the nvlist.
+ */
+ vd->vdev_stat.vs_aux = VDEV_AUX_EXTERNAL;
+ vd->vdev_tmpoffline = B_FALSE;
+ aux = VDEV_AUX_EXTERNAL;
+ } else {
+ vd->vdev_tmpoffline = B_TRUE;
+ }
+
/*
* We don't directly use the aux state here, but if we do a
* vdev_reopen(), we need this value to be present to remember why we
@@ -2411,6 +2605,8 @@ int
vdev_online(spa_t *spa, uint64_t guid, uint64_t flags, vdev_state_t *newstate)
{
vdev_t *vd, *tvd, *pvd, *rvd = spa->spa_root_vdev;
+ boolean_t wasoffline;
+ vdev_state_t oldstate;
spa_vdev_state_enter(spa, SCL_NONE);
@@ -2420,6 +2616,9 @@ vdev_online(spa_t *spa, uint64_t guid, uint64_t flags, vdev_state_t *newstate)
if (!vd->vdev_ops->vdev_op_leaf)
return (spa_vdev_state_exit(spa, NULL, ENOTSUP));
+ wasoffline = (vd->vdev_offline || vd->vdev_tmpoffline);
+ oldstate = vd->vdev_state;
+
tvd = vd->vdev_top;
vd->vdev_offline = B_FALSE;
vd->vdev_tmpoffline = B_FALSE;
@@ -2455,6 +2654,12 @@ vdev_online(spa_t *spa, uint64_t guid, uint64_t flags, vdev_state_t *newstate)
return (spa_vdev_state_exit(spa, vd, ENOTSUP));
spa_async_request(spa, SPA_ASYNC_CONFIG_UPDATE);
}
+
+ if (wasoffline ||
+ (oldstate < VDEV_STATE_DEGRADED &&
+ vd->vdev_state >= VDEV_STATE_DEGRADED))
+ spa_event_notify(spa, vd, NULL, ESC_ZFS_VDEV_ONLINE);
+
return (spa_vdev_state_exit(spa, vd, 0));
}
@@ -2595,9 +2800,8 @@ vdev_clear(spa_t *spa, vdev_t *vd)
*/
if (vd->vdev_faulted || vd->vdev_degraded ||
!vdev_readable(vd) || !vdev_writeable(vd)) {
-
/*
- * When reopening in reponse to a clear event, it may be due to
+ * When reopening in response to a clear event, it may be due to
* a fmadm repair request. In this case, if the device is
* still broken, we want to still post the ereport again.
*/
@@ -2606,6 +2810,7 @@ vdev_clear(spa_t *spa, vdev_t *vd)
vd->vdev_faulted = vd->vdev_degraded = 0ULL;
vd->vdev_cant_read = B_FALSE;
vd->vdev_cant_write = B_FALSE;
+ vd->vdev_stat.vs_aux = 0;
vdev_reopen(vd == rvd ? rvd : vd->vdev_top);
@@ -2617,7 +2822,7 @@ vdev_clear(spa_t *spa, vdev_t *vd)
if (vd->vdev_aux == NULL && !vdev_is_dead(vd))
spa_async_request(spa, SPA_ASYNC_RESILVER);
- spa_event_notify(spa, vd, FM_EREPORT_ZFS_DEVICE_CLEAR);
+ spa_event_notify(spa, vd, NULL, ESC_ZFS_VDEV_CLEAR);
}
/*
@@ -2671,7 +2876,8 @@ vdev_allocatable(vdev_t *vd)
* we're asking two separate questions about it.
*/
return (!(state < VDEV_STATE_DEGRADED && state != VDEV_STATE_CLOSED) &&
- !vd->vdev_cant_write && !vd->vdev_ishole);
+ !vd->vdev_cant_write && !vd->vdev_ishole &&
+ vd->vdev_mg->mg_initialized);
}
boolean_t
@@ -2691,49 +2897,144 @@ vdev_accessible(vdev_t *vd, zio_t *zio)
return (B_TRUE);
}
+static void
+vdev_get_child_stat(vdev_t *cvd, vdev_stat_t *vs, vdev_stat_t *cvs)
+{
+ int t;
+ for (t = 0; t < ZIO_TYPES; t++) {
+ vs->vs_ops[t] += cvs->vs_ops[t];
+ vs->vs_bytes[t] += cvs->vs_bytes[t];
+ }
+
+ cvs->vs_scan_removing = cvd->vdev_removing;
+}
+
/*
- * Get statistics for the given vdev.
+ * Get extended stats
*/
-void
-vdev_get_stats(vdev_t *vd, vdev_stat_t *vs)
+static void
+vdev_get_child_stat_ex(vdev_t *cvd, vdev_stat_ex_t *vsx, vdev_stat_ex_t *cvsx)
{
- spa_t *spa = vd->vdev_spa;
- vdev_t *rvd = spa->spa_root_vdev;
- int c, t;
+ int t, b;
+ for (t = 0; t < ZIO_TYPES; t++) {
+ for (b = 0; b < ARRAY_SIZE(vsx->vsx_disk_histo[0]); b++)
+ vsx->vsx_disk_histo[t][b] += cvsx->vsx_disk_histo[t][b];
- ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0);
+ for (b = 0; b < ARRAY_SIZE(vsx->vsx_total_histo[0]); b++) {
+ vsx->vsx_total_histo[t][b] +=
+ cvsx->vsx_total_histo[t][b];
+ }
+ }
- mutex_enter(&vd->vdev_stat_lock);
- bcopy(&vd->vdev_stat, vs, sizeof (*vs));
- vs->vs_timestamp = gethrtime() - vs->vs_timestamp;
- vs->vs_state = vd->vdev_state;
- vs->vs_rsize = vdev_get_min_asize(vd);
- if (vd->vdev_ops->vdev_op_leaf)
- vs->vs_rsize += VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE;
- vs->vs_esize = vd->vdev_max_asize - vd->vdev_asize;
- if (vd->vdev_aux == NULL && vd == vd->vdev_top && !vd->vdev_ishole) {
- vs->vs_fragmentation = vd->vdev_mg->mg_fragmentation;
+ for (t = 0; t < ZIO_PRIORITY_NUM_QUEUEABLE; t++) {
+ for (b = 0; b < ARRAY_SIZE(vsx->vsx_queue_histo[0]); b++) {
+ vsx->vsx_queue_histo[t][b] +=
+ cvsx->vsx_queue_histo[t][b];
+ }
+ vsx->vsx_active_queue[t] += cvsx->vsx_active_queue[t];
+ vsx->vsx_pend_queue[t] += cvsx->vsx_pend_queue[t];
+
+ for (b = 0; b < ARRAY_SIZE(vsx->vsx_ind_histo[0]); b++)
+ vsx->vsx_ind_histo[t][b] += cvsx->vsx_ind_histo[t][b];
+
+ for (b = 0; b < ARRAY_SIZE(vsx->vsx_agg_histo[0]); b++)
+ vsx->vsx_agg_histo[t][b] += cvsx->vsx_agg_histo[t][b];
}
+}
+
+/*
+ * Get statistics for the given vdev.
+ */
+static void
+vdev_get_stats_ex_impl(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx)
+{
+ int c, t;
/*
* If we're getting stats on the root vdev, aggregate the I/O counts
* over all top-level vdevs (i.e. the direct children of the root).
*/
- if (vd == rvd) {
- for (c = 0; c < rvd->vdev_children; c++) {
- vdev_t *cvd = rvd->vdev_child[c];
+ if (!vd->vdev_ops->vdev_op_leaf) {
+ if (vs) {
+ memset(vs->vs_ops, 0, sizeof (vs->vs_ops));
+ memset(vs->vs_bytes, 0, sizeof (vs->vs_bytes));
+ }
+ if (vsx)
+ memset(vsx, 0, sizeof (*vsx));
+
+ for (c = 0; c < vd->vdev_children; c++) {
+ vdev_t *cvd = vd->vdev_child[c];
vdev_stat_t *cvs = &cvd->vdev_stat;
+ vdev_stat_ex_t *cvsx = &cvd->vdev_stat_ex;
- for (t = 0; t < ZIO_TYPES; t++) {
- vs->vs_ops[t] += cvs->vs_ops[t];
- vs->vs_bytes[t] += cvs->vs_bytes[t];
- }
- cvs->vs_scan_removing = cvd->vdev_removing;
+ vdev_get_stats_ex_impl(cvd, cvs, cvsx);
+ if (vs)
+ vdev_get_child_stat(cvd, vs, cvs);
+ if (vsx)
+ vdev_get_child_stat_ex(cvd, vsx, cvsx);
+
+ }
+ } else {
+ /*
+ * We're a leaf. Just copy our ZIO active queue stats in. The
+ * other leaf stats are updated in vdev_stat_update().
+ */
+ if (!vsx)
+ return;
+
+ memcpy(vsx, &vd->vdev_stat_ex, sizeof (vd->vdev_stat_ex));
+
+ for (t = 0; t < ARRAY_SIZE(vd->vdev_queue.vq_class); t++) {
+ vsx->vsx_active_queue[t] =
+ vd->vdev_queue.vq_class[t].vqc_active;
+ vsx->vsx_pend_queue[t] = avl_numnodes(
+ &vd->vdev_queue.vq_class[t].vqc_queued_tree);
}
}
+}
+
+void
+vdev_get_stats_ex(vdev_t *vd, vdev_stat_t *vs, vdev_stat_ex_t *vsx)
+{
+ vdev_t *tvd = vd->vdev_top;
+ mutex_enter(&vd->vdev_stat_lock);
+ if (vs) {
+ bcopy(&vd->vdev_stat, vs, sizeof (*vs));
+ vs->vs_timestamp = gethrtime() - vs->vs_timestamp;
+ vs->vs_state = vd->vdev_state;
+ vs->vs_rsize = vdev_get_min_asize(vd);
+ if (vd->vdev_ops->vdev_op_leaf)
+ vs->vs_rsize += VDEV_LABEL_START_SIZE +
+ VDEV_LABEL_END_SIZE;
+ /*
+ * Report expandable space on top-level, non-auxillary devices
+ * only. The expandable space is reported in terms of metaslab
+ * sized units since that determines how much space the pool
+ * can expand.
+ */
+ if (vd->vdev_aux == NULL && tvd != NULL) {
+ vs->vs_esize = P2ALIGN(
+ vd->vdev_max_asize - vd->vdev_asize,
+ 1ULL << tvd->vdev_ms_shift);
+ }
+ vs->vs_esize = vd->vdev_max_asize - vd->vdev_asize;
+ if (vd->vdev_aux == NULL && vd == vd->vdev_top &&
+ !vd->vdev_ishole) {
+ vs->vs_fragmentation = vd->vdev_mg->mg_fragmentation;
+ }
+ }
+
+ ASSERT(spa_config_held(vd->vdev_spa, SCL_ALL, RW_READER) != 0);
+ vdev_get_stats_ex_impl(vd, vs, vsx);
mutex_exit(&vd->vdev_stat_lock);
}
+void
+vdev_get_stats(vdev_t *vd, vdev_stat_t *vs)
+{
+ return (vdev_get_stats_ex(vd, vs, NULL));
+}
+
void
vdev_clear_stats(vdev_t *vd)
{
@@ -2767,6 +3068,7 @@ vdev_stat_update(zio_t *zio, uint64_t psize)
vdev_t *pvd;
uint64_t txg = zio->io_txg;
vdev_stat_t *vs = &vd->vdev_stat;
+ vdev_stat_ex_t *vsx = &vd->vdev_stat_ex;
zio_type_t type = zio->io_type;
int flags = zio->io_flags;
@@ -2817,8 +3119,33 @@ vdev_stat_update(zio_t *zio, uint64_t psize)
vs->vs_self_healed += psize;
}
- vs->vs_ops[type]++;
- vs->vs_bytes[type] += psize;
+ /*
+ * The bytes/ops/histograms are recorded at the leaf level and
+ * aggregated into the higher level vdevs in vdev_get_stats().
+ */
+ if (vd->vdev_ops->vdev_op_leaf &&
+ (zio->io_priority < ZIO_PRIORITY_NUM_QUEUEABLE)) {
+
+ vs->vs_ops[type]++;
+ vs->vs_bytes[type] += psize;
+
+ if (flags & ZIO_FLAG_DELEGATED) {
+ vsx->vsx_agg_histo[zio->io_priority]
+ [RQ_HISTO(zio->io_size)]++;
+ } else {
+ vsx->vsx_ind_histo[zio->io_priority]
+ [RQ_HISTO(zio->io_size)]++;
+ }
+
+ if (zio->io_delta && zio->io_delay) {
+ vsx->vsx_queue_histo[zio->io_priority]
+ [L_HISTO(zio->io_delta - zio->io_delay)]++;
+ vsx->vsx_disk_histo[type]
+ [L_HISTO(zio->io_delay)]++;
+ vsx->vsx_total_histo[type]
+ [L_HISTO(zio->io_delta)]++;
+ }
+ }
mutex_exit(&vd->vdev_stat_lock);
return;
@@ -3155,6 +3482,17 @@ vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state, vdev_aux_t aux)
spa_t *spa = vd->vdev_spa;
if (state == vd->vdev_state) {
+ /*
+ * Since vdev_offline() code path is already in an offline
+ * state we can miss a statechange event to OFFLINE. Check
+ * the previous state to catch this condition.
+ */
+ if (vd->vdev_ops->vdev_op_leaf &&
+ (state == VDEV_STATE_OFFLINE) &&
+ (vd->vdev_prevstate >= VDEV_STATE_FAULTED)) {
+ /* post an offline state change */
+ zfs_post_state_change(spa, vd, vd->vdev_prevstate);
+ }
vd->vdev_stat.vs_aux = aux;
return;
}
@@ -3178,19 +3516,6 @@ vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state, vdev_aux_t aux)
vd->vdev_ops->vdev_op_leaf)
vd->vdev_ops->vdev_op_close(vd);
- /*
- * If we have brought this vdev back into service, we need
- * to notify fmd so that it can gracefully repair any outstanding
- * cases due to a missing device. We do this in all cases, even those
- * that probably don't correlate to a repaired fault. This is sure to
- * catch all cases, and we let the zfs-retire agent sort it out. If
- * this is a transient state it's OK, as the retire agent will
- * double-check the state of the vdev before repairing it.
- */
- if (state == VDEV_STATE_HEALTHY && vd->vdev_ops->vdev_op_leaf &&
- vd->vdev_prevstate != state)
- zfs_post_state_change(spa, vd);
-
if (vd->vdev_removed &&
state == VDEV_STATE_CANT_OPEN &&
(aux == VDEV_AUX_OPEN_FAILED || vd->vdev_checkremove)) {
@@ -3258,6 +3583,9 @@ vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state, vdev_aux_t aux)
case VDEV_AUX_BAD_LABEL:
class = FM_EREPORT_ZFS_DEVICE_BAD_LABEL;
break;
+ case VDEV_AUX_BAD_ASHIFT:
+ class = FM_EREPORT_ZFS_DEVICE_BAD_ASHIFT;
+ break;
default:
class = FM_EREPORT_ZFS_DEVICE_UNKNOWN;
}
@@ -3271,44 +3599,44 @@ vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state, vdev_aux_t aux)
vd->vdev_removed = B_FALSE;
}
+ /*
+ * Notify ZED of any significant state-change on a leaf vdev.
+ *
+ */
+ if (vd->vdev_ops->vdev_op_leaf) {
+ /* preserve original state from a vdev_reopen() */
+ if ((vd->vdev_prevstate != VDEV_STATE_UNKNOWN) &&
+ (vd->vdev_prevstate != vd->vdev_state) &&
+ (save_state <= VDEV_STATE_CLOSED))
+ save_state = vd->vdev_prevstate;
+
+ /* filter out state change due to initial vdev_open */
+ if (save_state > VDEV_STATE_CLOSED)
+ zfs_post_state_change(spa, vd, save_state);
+ }
+
if (!isopen && vd->vdev_parent)
vdev_propagate_state(vd->vdev_parent);
}
/*
* Check the vdev configuration to ensure that it's capable of supporting
- * a root pool.
+ * a root pool. We do not support partial configuration.
*/
boolean_t
vdev_is_bootable(vdev_t *vd)
{
-#if defined(__sun__) || defined(__sun)
- /*
- * Currently, we do not support RAID-Z or partial configuration.
- * In addition, only a single top-level vdev is allowed and none of the
- * leaves can be wholedisks.
- */
- int c;
-
if (!vd->vdev_ops->vdev_op_leaf) {
- char *vdev_type = vd->vdev_ops->vdev_op_type;
+ const char *vdev_type = vd->vdev_ops->vdev_op_type;
- if (strcmp(vdev_type, VDEV_TYPE_ROOT) == 0 &&
- vd->vdev_children > 1) {
+ if (strcmp(vdev_type, VDEV_TYPE_MISSING) == 0)
return (B_FALSE);
- } else if (strcmp(vdev_type, VDEV_TYPE_RAIDZ) == 0 ||
- strcmp(vdev_type, VDEV_TYPE_MISSING) == 0) {
- return (B_FALSE);
- }
- } else if (vd->vdev_wholedisk == 1) {
- return (B_FALSE);
}
- for (c = 0; c < vd->vdev_children; c++) {
+ for (int c = 0; c < vd->vdev_children; c++) {
if (!vdev_is_bootable(vd->vdev_child[c]))
return (B_FALSE);
}
-#endif /* __sun__ || __sun */
return (B_TRUE);
}
@@ -3443,9 +3771,10 @@ EXPORT_SYMBOL(vdev_degrade);
EXPORT_SYMBOL(vdev_online);
EXPORT_SYMBOL(vdev_offline);
EXPORT_SYMBOL(vdev_clear);
-
+/* BEGIN CSTYLED */
module_param(metaslabs_per_vdev, int, 0644);
MODULE_PARM_DESC(metaslabs_per_vdev,
"Divide added vdev into approximately (but no more than) this number "
"of metaslabs");
+/* END CSTYLED */
#endif
diff --git a/zfs/module/zfs/vdev_cache.c b/zfs/module/zfs/vdev_cache.c
index 389fa6fd9d07..7317373b8a02 100644
--- a/zfs/module/zfs/vdev_cache.c
+++ b/zfs/module/zfs/vdev_cache.c
@@ -23,7 +23,7 @@
* Use is subject to license terms.
*/
/*
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2016 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -31,6 +31,7 @@
#include <sys/vdev_impl.h>
#include <sys/zio.h>
#include <sys/kstat.h>
+#include <sys/abd.h>
/*
* Virtual device read-ahead caching.
@@ -102,31 +103,26 @@ static vdc_stats_t vdc_stats = {
{ "misses", KSTAT_DATA_UINT64 }
};
-#define VDCSTAT_BUMP(stat) atomic_add_64(&vdc_stats.stat.value.ui64, 1);
+#define VDCSTAT_BUMP(stat) atomic_inc_64(&vdc_stats.stat.value.ui64);
-static int
+static inline int
vdev_cache_offset_compare(const void *a1, const void *a2)
{
- const vdev_cache_entry_t *ve1 = a1;
- const vdev_cache_entry_t *ve2 = a2;
-
- if (ve1->ve_offset < ve2->ve_offset)
- return (-1);
- if (ve1->ve_offset > ve2->ve_offset)
- return (1);
- return (0);
+ const vdev_cache_entry_t *ve1 = (const vdev_cache_entry_t *)a1;
+ const vdev_cache_entry_t *ve2 = (const vdev_cache_entry_t *)a2;
+
+ return (AVL_CMP(ve1->ve_offset, ve2->ve_offset));
}
static int
vdev_cache_lastused_compare(const void *a1, const void *a2)
{
- const vdev_cache_entry_t *ve1 = a1;
- const vdev_cache_entry_t *ve2 = a2;
+ const vdev_cache_entry_t *ve1 = (const vdev_cache_entry_t *)a1;
+ const vdev_cache_entry_t *ve2 = (const vdev_cache_entry_t *)a2;
- if (ddi_time_before(ve1->ve_lastused, ve2->ve_lastused))
- return (-1);
- if (ddi_time_after(ve1->ve_lastused, ve2->ve_lastused))
- return (1);
+ int cmp = AVL_CMP(ve1->ve_lastused, ve2->ve_lastused);
+ if (likely(cmp))
+ return (cmp);
/*
* Among equally old entries, sort by offset to ensure uniqueness.
@@ -141,12 +137,12 @@ static void
vdev_cache_evict(vdev_cache_t *vc, vdev_cache_entry_t *ve)
{
ASSERT(MUTEX_HELD(&vc->vc_lock));
- ASSERT(ve->ve_fill_io == NULL);
- ASSERT(ve->ve_data != NULL);
+ ASSERT3P(ve->ve_fill_io, ==, NULL);
+ ASSERT3P(ve->ve_abd, !=, NULL);
avl_remove(&vc->vc_lastused_tree, ve);
avl_remove(&vc->vc_offset_tree, ve);
- zio_buf_free(ve->ve_data, VCBS);
+ abd_free(ve->ve_abd);
kmem_free(ve, sizeof (vdev_cache_entry_t));
}
@@ -176,14 +172,14 @@ vdev_cache_allocate(zio_t *zio)
ve = avl_first(&vc->vc_lastused_tree);
if (ve->ve_fill_io != NULL)
return (NULL);
- ASSERT(ve->ve_hits != 0);
+ ASSERT3U(ve->ve_hits, !=, 0);
vdev_cache_evict(vc, ve);
}
ve = kmem_zalloc(sizeof (vdev_cache_entry_t), KM_SLEEP);
ve->ve_offset = offset;
ve->ve_lastused = ddi_get_lbolt();
- ve->ve_data = zio_buf_alloc(VCBS);
+ ve->ve_abd = abd_alloc_for_io(VCBS, B_TRUE);
avl_add(&vc->vc_offset_tree, ve);
avl_add(&vc->vc_lastused_tree, ve);
@@ -197,7 +193,7 @@ vdev_cache_hit(vdev_cache_t *vc, vdev_cache_entry_t *ve, zio_t *zio)
uint64_t cache_phase = P2PHASE(zio->io_offset, VCBS);
ASSERT(MUTEX_HELD(&vc->vc_lock));
- ASSERT(ve->ve_fill_io == NULL);
+ ASSERT3P(ve->ve_fill_io, ==, NULL);
if (ve->ve_lastused != ddi_get_lbolt()) {
avl_remove(&vc->vc_lastused_tree, ve);
@@ -206,7 +202,7 @@ vdev_cache_hit(vdev_cache_t *vc, vdev_cache_entry_t *ve, zio_t *zio)
}
ve->ve_hits++;
- bcopy(ve->ve_data + cache_phase, zio->io_data, zio->io_size);
+ abd_copy_off(zio->io_abd, ve->ve_abd, 0, cache_phase, zio->io_size);
}
/*
@@ -219,17 +215,18 @@ vdev_cache_fill(zio_t *fio)
vdev_cache_t *vc = &vd->vdev_cache;
vdev_cache_entry_t *ve = fio->io_private;
zio_t *pio;
+ zio_link_t *zl;
- ASSERT(fio->io_size == VCBS);
+ ASSERT3U(fio->io_size, ==, VCBS);
/*
* Add data to the cache.
*/
mutex_enter(&vc->vc_lock);
- ASSERT(ve->ve_fill_io == fio);
- ASSERT(ve->ve_offset == fio->io_offset);
- ASSERT(ve->ve_data == fio->io_data);
+ ASSERT3P(ve->ve_fill_io, ==, fio);
+ ASSERT3U(ve->ve_offset, ==, fio->io_offset);
+ ASSERT3P(ve->ve_abd, ==, fio->io_abd);
ve->ve_fill_io = NULL;
@@ -238,7 +235,8 @@ vdev_cache_fill(zio_t *fio)
* any reads that were queued up before the missed update are still
* valid, so we can satisfy them from this line before we evict it.
*/
- while ((pio = zio_walk_parents(fio)) != NULL)
+ zl = NULL;
+ while ((pio = zio_walk_parents(fio, &zl)) != NULL)
vdev_cache_hit(vc, ve, pio);
if (fio->io_error || ve->ve_missed_update)
@@ -259,7 +257,7 @@ vdev_cache_read(zio_t *zio)
zio_t *fio;
ASSERTV(uint64_t cache_phase = P2PHASE(zio->io_offset, VCBS));
- ASSERT(zio->io_type == ZIO_TYPE_READ);
+ ASSERT3U(zio->io_type, ==, ZIO_TYPE_READ);
if (zio->io_flags & ZIO_FLAG_DONT_CACHE)
return (B_FALSE);
@@ -273,7 +271,7 @@ vdev_cache_read(zio_t *zio)
if (P2BOUNDARY(zio->io_offset, zio->io_size, VCBS))
return (B_FALSE);
- ASSERT(cache_phase + zio->io_size <= VCBS);
+ ASSERT3U(cache_phase + zio->io_size, <=, VCBS);
mutex_enter(&vc->vc_lock);
@@ -312,7 +310,7 @@ vdev_cache_read(zio_t *zio)
}
fio = zio_vdev_delegated_io(zio->io_vd, cache_offset,
- ve->ve_data, VCBS, ZIO_TYPE_READ, ZIO_PRIORITY_NOW,
+ ve->ve_abd, VCBS, ZIO_TYPE_READ, ZIO_PRIORITY_NOW,
ZIO_FLAG_DONT_CACHE, vdev_cache_fill, ve);
ve->ve_fill_io = fio;
@@ -340,7 +338,7 @@ vdev_cache_write(zio_t *zio)
uint64_t max_offset = P2ROUNDUP(io_end, VCBS);
avl_index_t where;
- ASSERT(zio->io_type == ZIO_TYPE_WRITE);
+ ASSERT3U(zio->io_type, ==, ZIO_TYPE_WRITE);
mutex_enter(&vc->vc_lock);
@@ -357,8 +355,9 @@ vdev_cache_write(zio_t *zio)
if (ve->ve_fill_io != NULL) {
ve->ve_missed_update = 1;
} else {
- bcopy((char *)zio->io_data + start - io_start,
- ve->ve_data + start - ve->ve_offset, end - start);
+ abd_copy_off(ve->ve_abd, zio->io_abd,
+ start - ve->ve_offset, start - io_start,
+ end - start);
}
ve = AVL_NEXT(&vc->vc_offset_tree, ve);
}
diff --git a/zfs/module/zfs/vdev_disk.c b/zfs/module/zfs/vdev_disk.c
index 9968b0379cf5..5ae50a315342 100644
--- a/zfs/module/zfs/vdev_disk.c
+++ b/zfs/module/zfs/vdev_disk.c
@@ -23,13 +23,14 @@
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
* Rewritten for Linux by Brian Behlendorf <behlendorf1 at llnl.gov>.
* LLNL-CODE-403049.
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
#include <sys/spa.h>
#include <sys/vdev_disk.h>
#include <sys/vdev_impl.h>
+#include <sys/abd.h>
#include <sys/fs/zfs.h>
#include <sys/zio.h>
#include <sys/sunldi.h>
@@ -98,9 +99,9 @@ vdev_disk_error(zio_t *zio)
{
#ifdef ZFS_DEBUG
printk("ZFS: zio error=%d type=%d offset=%llu size=%llu "
- "flags=%x delay=%llu\n", zio->io_error, zio->io_type,
+ "flags=%x\n", zio->io_error, zio->io_type,
(u_longlong_t)zio->io_offset, (u_longlong_t)zio->io_size,
- zio->io_flags, (u_longlong_t)zio->io_delay);
+ zio->io_flags);
#endif
}
@@ -407,12 +408,12 @@ vdev_disk_dio_put(dio_request_t *dr)
vdev_disk_dio_free(dr);
if (zio) {
- zio->io_delay = jiffies_64 - zio->io_delay;
zio->io_error = error;
ASSERT3S(zio->io_error, >=, 0);
if (zio->io_error)
vdev_disk_error(zio);
- zio_interrupt(zio);
+
+ zio_delay_interrupt(zio);
}
}
@@ -435,17 +436,10 @@ BIO_END_IO_PROTO(vdev_disk_physio_completion, bio, error)
#endif
}
- /* Drop reference aquired by __vdev_disk_physio */
+ /* Drop reference acquired by __vdev_disk_physio */
rc = vdev_disk_dio_put(dr);
}
-static inline unsigned long
-bio_nr_pages(void *bio_ptr, unsigned int bio_size)
-{
- return ((((unsigned long)bio_ptr + bio_size + PAGE_SIZE - 1) >>
- PAGE_SHIFT) - ((unsigned long)bio_ptr >> PAGE_SHIFT));
-}
-
static unsigned int
bio_map(struct bio *bio, void *bio_ptr, unsigned int bio_size)
{
@@ -485,6 +479,15 @@ bio_map(struct bio *bio, void *bio_ptr, unsigned int bio_size)
return (bio_size);
}
+static unsigned int
+bio_map_abd_off(struct bio *bio, abd_t *abd, unsigned int size, size_t off)
+{
+ if (abd_is_linear(abd))
+ return (bio_map(bio, ((char *)abd_to_buf(abd)) + off, size));
+
+ return (abd_scatter_bio_map_off(bio, abd, size, off));
+}
+
static inline void
vdev_submit_bio_impl(struct bio *bio)
{
@@ -495,6 +498,14 @@ vdev_submit_bio_impl(struct bio *bio)
#endif
}
+#ifndef HAVE_BIO_SET_DEV
+static inline void
+bio_set_dev(struct bio *bio, struct block_device *bdev)
+{
+ bio->bi_bdev = bdev;
+}
+#endif /* !HAVE_BIO_SET_DEV */
+
static inline void
vdev_submit_bio(struct bio *bio)
{
@@ -512,11 +523,11 @@ vdev_submit_bio(struct bio *bio)
}
static int
-__vdev_disk_physio(struct block_device *bdev, zio_t *zio, caddr_t kbuf_ptr,
- size_t kbuf_size, uint64_t kbuf_offset, int rw, int flags)
+__vdev_disk_physio(struct block_device *bdev, zio_t *zio,
+ size_t io_size, uint64_t io_offset, int rw, int flags)
{
dio_request_t *dr;
- caddr_t bio_ptr;
+ uint64_t abd_offset;
uint64_t bio_offset;
int bio_size, bio_count = 16;
int i = 0, error = 0;
@@ -524,7 +535,8 @@ __vdev_disk_physio(struct block_device *bdev, zio_t *zio, caddr_t kbuf_ptr,
struct blk_plug plug;
#endif
- ASSERT3U(kbuf_offset + kbuf_size, <=, bdev->bd_inode->i_size);
+ ASSERT(zio != NULL);
+ ASSERT3U(io_offset + io_size, <=, bdev->bd_inode->i_size);
retry:
dr = vdev_disk_dio_alloc(bio_count);
@@ -543,9 +555,10 @@ __vdev_disk_physio(struct block_device *bdev, zio_t *zio, caddr_t kbuf_ptr,
* their volume block size to match the maximum request size and
* the common case will be one bio per vdev IO request.
*/
- bio_ptr = kbuf_ptr;
- bio_offset = kbuf_offset;
- bio_size = kbuf_size;
+
+ abd_offset = 0;
+ bio_offset = io_offset;
+ bio_size = io_size;
for (i = 0; i <= dr->dr_bio_count; i++) {
/* Finished constructing bio's for given buffer */
@@ -565,7 +578,8 @@ __vdev_disk_physio(struct block_device *bdev, zio_t *zio, caddr_t kbuf_ptr,
/* bio_alloc() with __GFP_WAIT never returns NULL */
dr->dr_bio[i] = bio_alloc(GFP_NOIO,
- MIN(bio_nr_pages(bio_ptr, bio_size), BIO_MAX_PAGES));
+ MIN(abd_nr_pages_off(zio->io_abd, bio_size, abd_offset),
+ BIO_MAX_PAGES));
if (unlikely(dr->dr_bio[i] == NULL)) {
vdev_disk_dio_free(dr);
return (ENOMEM);
@@ -574,28 +588,23 @@ __vdev_disk_physio(struct block_device *bdev, zio_t *zio, caddr_t kbuf_ptr,
/* Matching put called by vdev_disk_physio_completion */
vdev_disk_dio_get(dr);
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4,14,0)
- dr->dr_bio[i]->bi_bdev = bdev;
-#else
bio_set_dev(dr->dr_bio[i], bdev);
-#endif
BIO_BI_SECTOR(dr->dr_bio[i]) = bio_offset >> 9;
dr->dr_bio[i]->bi_end_io = vdev_disk_physio_completion;
dr->dr_bio[i]->bi_private = dr;
bio_set_op_attrs(dr->dr_bio[i], rw, flags);
/* Remaining size is returned to become the new size */
- bio_size = bio_map(dr->dr_bio[i], bio_ptr, bio_size);
+ bio_size = bio_map_abd_off(dr->dr_bio[i], zio->io_abd,
+ bio_size, abd_offset);
/* Advance in buffer and construct another bio if needed */
- bio_ptr += BIO_BI_SIZE(dr->dr_bio[i]);
+ abd_offset += BIO_BI_SIZE(dr->dr_bio[i]);
bio_offset += BIO_BI_SIZE(dr->dr_bio[i]);
}
/* Extra reference to protect dio_request during vdev_submit_bio */
vdev_disk_dio_get(dr);
- if (zio)
- zio->io_delay = jiffies_64;
#if defined(HAVE_BLK_QUEUE_HAVE_BLK_PLUG)
if (dr->dr_bio_count > 1)
@@ -626,7 +635,6 @@ BIO_END_IO_PROTO(vdev_disk_io_flush_completion, bio, error)
zio->io_error = -error;
#endif
- zio->io_delay = jiffies_64 - zio->io_delay;
if (zio->io_error && (zio->io_error == EOPNOTSUPP))
zio->io_vd->vdev_nowritecache = B_TRUE;
@@ -654,12 +662,7 @@ vdev_disk_io_flush(struct block_device *bdev, zio_t *zio)
bio->bi_end_io = vdev_disk_io_flush_completion;
bio->bi_private = zio;
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4,14,0)
- bio->bi_bdev = bdev;
-#else
bio_set_dev(bio, bdev);
-#endif
- zio->io_delay = jiffies_64;
bio_set_flush(bio);
vdev_submit_bio(bio);
invalidate_bdev(bdev);
@@ -699,8 +702,6 @@ vdev_disk_io_start(zio_t *zio)
return;
zio->io_error = error;
- if (error == ENOTSUP)
- v->vdev_nowritecache = B_TRUE;
break;
@@ -738,7 +739,8 @@ vdev_disk_io_start(zio_t *zio)
return;
}
- error = __vdev_disk_physio(vd->vd_bdev, zio, zio->io_data,
+ zio->io_target_timestamp = zio_handle_io_delay(zio);
+ error = __vdev_disk_physio(vd->vd_bdev, zio,
zio->io_size, zio->io_offset, rw, flags);
if (error) {
zio->io_error = error;
@@ -803,6 +805,7 @@ vdev_ops_t vdev_disk_ops = {
vdev_disk_io_start,
vdev_disk_io_done,
NULL,
+ NULL,
vdev_disk_hold,
vdev_disk_rele,
VDEV_TYPE_DISK, /* name of this vdev type */
diff --git a/zfs/module/zfs/vdev_file.c b/zfs/module/zfs/vdev_file.c
index a29ea7bf9515..13c32e0836f5 100644
--- a/zfs/module/zfs/vdev_file.c
+++ b/zfs/module/zfs/vdev_file.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -31,11 +31,14 @@
#include <sys/zio.h>
#include <sys/fs/zfs.h>
#include <sys/fm/fs/zfs.h>
+#include <sys/abd.h>
/*
* Virtual device vector for files.
*/
+static taskq_t *vdev_file_taskq;
+
static void
vdev_file_hold(vdev_t *vd)
{
@@ -150,16 +153,26 @@ vdev_file_io_strategy(void *arg)
vdev_t *vd = zio->io_vd;
vdev_file_t *vf = vd->vdev_tsd;
ssize_t resid;
+ void *buf;
+
+ if (zio->io_type == ZIO_TYPE_READ)
+ buf = abd_borrow_buf(zio->io_abd, zio->io_size);
+ else
+ buf = abd_borrow_buf_copy(zio->io_abd, zio->io_size);
zio->io_error = vn_rdwr(zio->io_type == ZIO_TYPE_READ ?
- UIO_READ : UIO_WRITE, vf->vf_vnode, zio->io_data,
- zio->io_size, zio->io_offset, UIO_SYSSPACE,
- 0, RLIM64_INFINITY, kcred, &resid);
+ UIO_READ : UIO_WRITE, vf->vf_vnode, buf, zio->io_size,
+ zio->io_offset, UIO_SYSSPACE, 0, RLIM64_INFINITY, kcred, &resid);
+
+ if (zio->io_type == ZIO_TYPE_READ)
+ abd_return_buf_copy(zio->io_abd, buf, zio->io_size);
+ else
+ abd_return_buf(zio->io_abd, buf, zio->io_size);
if (resid != 0 && zio->io_error == 0)
zio->io_error = SET_ERROR(ENOSPC);
- zio_interrupt(zio);
+ zio_delay_interrupt(zio);
}
static void
@@ -200,9 +213,10 @@ vdev_file_io_start(zio_t *zio)
* already set, see xfs_vm_writepage(). Therefore
* the sync must be dispatched to a different context.
*/
- if (spl_fstrans_check()) {
- VERIFY3U(taskq_dispatch(system_taskq,
- vdev_file_io_fsync, zio, TQ_SLEEP), !=, 0);
+ if (__spl_pf_fstrans_check()) {
+ VERIFY3U(taskq_dispatch(vdev_file_taskq,
+ vdev_file_io_fsync, zio, TQ_SLEEP), !=,
+ TASKQID_INVALID);
return;
}
@@ -217,8 +231,10 @@ vdev_file_io_start(zio_t *zio)
return;
}
- VERIFY3U(taskq_dispatch(system_taskq, vdev_file_io_strategy, zio,
- TQ_SLEEP), !=, 0);
+ zio->io_target_timestamp = zio_handle_io_delay(zio);
+
+ VERIFY3U(taskq_dispatch(vdev_file_taskq, vdev_file_io_strategy, zio,
+ TQ_SLEEP), !=, TASKQID_INVALID);
}
/* ARGSUSED */
@@ -234,12 +250,28 @@ vdev_ops_t vdev_file_ops = {
vdev_file_io_start,
vdev_file_io_done,
NULL,
+ NULL,
vdev_file_hold,
vdev_file_rele,
VDEV_TYPE_FILE, /* name of this vdev type */
B_TRUE /* leaf vdev */
};
+void
+vdev_file_init(void)
+{
+ vdev_file_taskq = taskq_create("z_vdev_file", MAX(boot_ncpus, 16),
+ minclsyspri, boot_ncpus, INT_MAX, TASKQ_DYNAMIC);
+
+ VERIFY(vdev_file_taskq);
+}
+
+void
+vdev_file_fini(void)
+{
+ taskq_destroy(vdev_file_taskq);
+}
+
/*
* From userland we access disks just like files.
*/
@@ -252,6 +284,7 @@ vdev_ops_t vdev_disk_ops = {
vdev_file_io_start,
vdev_file_io_done,
NULL,
+ NULL,
vdev_file_hold,
vdev_file_rele,
VDEV_TYPE_DISK, /* name of this vdev type */
diff --git a/zfs/module/zfs/vdev_label.c b/zfs/module/zfs/vdev_label.c
index 7f588ed6b0b5..27e79ae60e33 100644
--- a/zfs/module/zfs/vdev_label.c
+++ b/zfs/module/zfs/vdev_label.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
*/
/*
@@ -145,6 +145,7 @@
#include <sys/metaslab.h>
#include <sys/zio.h>
#include <sys/dsl_scan.h>
+#include <sys/abd.h>
#include <sys/fs/zfs.h>
/*
@@ -178,11 +179,12 @@ vdev_label_number(uint64_t psize, uint64_t offset)
}
static void
-vdev_label_read(zio_t *zio, vdev_t *vd, int l, void *buf, uint64_t offset,
- uint64_t size, zio_done_func_t *done, void *private, int flags)
+vdev_label_read(zio_t *zio, vdev_t *vd, int l, abd_t *buf, uint64_t offset,
+ uint64_t size, zio_done_func_t *done, void *private, int flags)
{
- ASSERT(spa_config_held(zio->io_spa, SCL_STATE_ALL, RW_WRITER) ==
- SCL_STATE_ALL);
+ ASSERT(
+ spa_config_held(zio->io_spa, SCL_STATE, RW_READER) == SCL_STATE ||
+ spa_config_held(zio->io_spa, SCL_STATE, RW_WRITER) == SCL_STATE);
ASSERT(flags & ZIO_FLAG_CONFIG_WRITER);
zio_nowait(zio_read_phys(zio, vd,
@@ -191,14 +193,13 @@ vdev_label_read(zio_t *zio, vdev_t *vd, int l, void *buf, uint64_t offset,
ZIO_PRIORITY_SYNC_READ, flags, B_TRUE));
}
-static void
-vdev_label_write(zio_t *zio, vdev_t *vd, int l, void *buf, uint64_t offset,
- uint64_t size, zio_done_func_t *done, void *private, int flags)
+void
+vdev_label_write(zio_t *zio, vdev_t *vd, int l, abd_t *buf, uint64_t offset,
+ uint64_t size, zio_done_func_t *done, void *private, int flags)
{
- ASSERT(spa_config_held(zio->io_spa, SCL_ALL, RW_WRITER) == SCL_ALL ||
- (spa_config_held(zio->io_spa, SCL_CONFIG | SCL_STATE, RW_READER) ==
- (SCL_CONFIG | SCL_STATE) &&
- dsl_pool_sync_context(spa_get_dsl(zio->io_spa))));
+ ASSERT(
+ spa_config_held(zio->io_spa, SCL_STATE, RW_READER) == SCL_STATE ||
+ spa_config_held(zio->io_spa, SCL_STATE, RW_WRITER) == SCL_STATE);
ASSERT(flags & ZIO_FLAG_CONFIG_WRITER);
zio_nowait(zio_write_phys(zio, vd,
@@ -207,6 +208,149 @@ vdev_label_write(zio_t *zio, vdev_t *vd, int l, void *buf, uint64_t offset,
ZIO_PRIORITY_SYNC_WRITE, flags, B_TRUE));
}
+/*
+ * Generate the nvlist representing this vdev's stats
+ */
+void
+vdev_config_generate_stats(vdev_t *vd, nvlist_t *nv)
+{
+ nvlist_t *nvx;
+ vdev_stat_t *vs;
+ vdev_stat_ex_t *vsx;
+
+ vs = kmem_alloc(sizeof (*vs), KM_SLEEP);
+ vsx = kmem_alloc(sizeof (*vsx), KM_SLEEP);
+
+ vdev_get_stats_ex(vd, vs, vsx);
+ fnvlist_add_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
+ (uint64_t *)vs, sizeof (*vs) / sizeof (uint64_t));
+
+ kmem_free(vs, sizeof (*vs));
+
+ /*
+ * Add extended stats into a special extended stats nvlist. This keeps
+ * all the extended stats nicely grouped together. The extended stats
+ * nvlist is then added to the main nvlist.
+ */
+ nvx = fnvlist_alloc();
+
+ /* ZIOs in flight to disk */
+ fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE,
+ vsx->vsx_active_queue[ZIO_PRIORITY_SYNC_READ]);
+
+ fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_SYNC_W_ACTIVE_QUEUE,
+ vsx->vsx_active_queue[ZIO_PRIORITY_SYNC_WRITE]);
+
+ fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE,
+ vsx->vsx_active_queue[ZIO_PRIORITY_ASYNC_READ]);
+
+ fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE,
+ vsx->vsx_active_queue[ZIO_PRIORITY_ASYNC_WRITE]);
+
+ fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE,
+ vsx->vsx_active_queue[ZIO_PRIORITY_SCRUB]);
+
+ /* ZIOs pending */
+ fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_SYNC_R_PEND_QUEUE,
+ vsx->vsx_pend_queue[ZIO_PRIORITY_SYNC_READ]);
+
+ fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_SYNC_W_PEND_QUEUE,
+ vsx->vsx_pend_queue[ZIO_PRIORITY_SYNC_WRITE]);
+
+ fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_ASYNC_R_PEND_QUEUE,
+ vsx->vsx_pend_queue[ZIO_PRIORITY_ASYNC_READ]);
+
+ fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_ASYNC_W_PEND_QUEUE,
+ vsx->vsx_pend_queue[ZIO_PRIORITY_ASYNC_WRITE]);
+
+ fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_SCRUB_PEND_QUEUE,
+ vsx->vsx_pend_queue[ZIO_PRIORITY_SCRUB]);
+
+ /* Histograms */
+ fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO,
+ vsx->vsx_total_histo[ZIO_TYPE_READ],
+ ARRAY_SIZE(vsx->vsx_total_histo[ZIO_TYPE_READ]));
+
+ fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO,
+ vsx->vsx_total_histo[ZIO_TYPE_WRITE],
+ ARRAY_SIZE(vsx->vsx_total_histo[ZIO_TYPE_WRITE]));
+
+ fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO,
+ vsx->vsx_disk_histo[ZIO_TYPE_READ],
+ ARRAY_SIZE(vsx->vsx_disk_histo[ZIO_TYPE_READ]));
+
+ fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO,
+ vsx->vsx_disk_histo[ZIO_TYPE_WRITE],
+ ARRAY_SIZE(vsx->vsx_disk_histo[ZIO_TYPE_WRITE]));
+
+ fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_SYNC_R_LAT_HISTO,
+ vsx->vsx_queue_histo[ZIO_PRIORITY_SYNC_READ],
+ ARRAY_SIZE(vsx->vsx_queue_histo[ZIO_PRIORITY_SYNC_READ]));
+
+ fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_SYNC_W_LAT_HISTO,
+ vsx->vsx_queue_histo[ZIO_PRIORITY_SYNC_WRITE],
+ ARRAY_SIZE(vsx->vsx_queue_histo[ZIO_PRIORITY_SYNC_WRITE]));
+
+ fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_ASYNC_R_LAT_HISTO,
+ vsx->vsx_queue_histo[ZIO_PRIORITY_ASYNC_READ],
+ ARRAY_SIZE(vsx->vsx_queue_histo[ZIO_PRIORITY_ASYNC_READ]));
+
+ fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO,
+ vsx->vsx_queue_histo[ZIO_PRIORITY_ASYNC_WRITE],
+ ARRAY_SIZE(vsx->vsx_queue_histo[ZIO_PRIORITY_ASYNC_WRITE]));
+
+ fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO,
+ vsx->vsx_queue_histo[ZIO_PRIORITY_SCRUB],
+ ARRAY_SIZE(vsx->vsx_queue_histo[ZIO_PRIORITY_SCRUB]));
+
+ /* Request sizes */
+ fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_SYNC_IND_R_HISTO,
+ vsx->vsx_ind_histo[ZIO_PRIORITY_SYNC_READ],
+ ARRAY_SIZE(vsx->vsx_ind_histo[ZIO_PRIORITY_SYNC_READ]));
+
+ fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_SYNC_IND_W_HISTO,
+ vsx->vsx_ind_histo[ZIO_PRIORITY_SYNC_WRITE],
+ ARRAY_SIZE(vsx->vsx_ind_histo[ZIO_PRIORITY_SYNC_WRITE]));
+
+ fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_ASYNC_IND_R_HISTO,
+ vsx->vsx_ind_histo[ZIO_PRIORITY_ASYNC_READ],
+ ARRAY_SIZE(vsx->vsx_ind_histo[ZIO_PRIORITY_ASYNC_READ]));
+
+ fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_ASYNC_IND_W_HISTO,
+ vsx->vsx_ind_histo[ZIO_PRIORITY_ASYNC_WRITE],
+ ARRAY_SIZE(vsx->vsx_ind_histo[ZIO_PRIORITY_ASYNC_WRITE]));
+
+ fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_IND_SCRUB_HISTO,
+ vsx->vsx_ind_histo[ZIO_PRIORITY_SCRUB],
+ ARRAY_SIZE(vsx->vsx_ind_histo[ZIO_PRIORITY_SCRUB]));
+
+ fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_SYNC_AGG_R_HISTO,
+ vsx->vsx_agg_histo[ZIO_PRIORITY_SYNC_READ],
+ ARRAY_SIZE(vsx->vsx_agg_histo[ZIO_PRIORITY_SYNC_READ]));
+
+ fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_SYNC_AGG_W_HISTO,
+ vsx->vsx_agg_histo[ZIO_PRIORITY_SYNC_WRITE],
+ ARRAY_SIZE(vsx->vsx_agg_histo[ZIO_PRIORITY_SYNC_WRITE]));
+
+ fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_ASYNC_AGG_R_HISTO,
+ vsx->vsx_agg_histo[ZIO_PRIORITY_ASYNC_READ],
+ ARRAY_SIZE(vsx->vsx_agg_histo[ZIO_PRIORITY_ASYNC_READ]));
+
+ fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_ASYNC_AGG_W_HISTO,
+ vsx->vsx_agg_histo[ZIO_PRIORITY_ASYNC_WRITE],
+ ARRAY_SIZE(vsx->vsx_agg_histo[ZIO_PRIORITY_ASYNC_WRITE]));
+
+ fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_AGG_SCRUB_HISTO,
+ vsx->vsx_agg_histo[ZIO_PRIORITY_SCRUB],
+ ARRAY_SIZE(vsx->vsx_agg_histo[ZIO_PRIORITY_SCRUB]));
+
+ /* Add extended stats nvlist to main nvlist */
+ fnvlist_add_nvlist(nv, ZPOOL_CONFIG_VDEV_STATS_EX, nvx);
+
+ fnvlist_free(nvx);
+ kmem_free(vsx, sizeof (*vsx));
+}
+
/*
* Generate the nvlist representing this vdev's config.
*/
@@ -215,7 +359,6 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
vdev_config_flag_t flags)
{
nvlist_t *nv = NULL;
-
nv = fnvlist_alloc();
fnvlist_add_string(nv, ZPOOL_CONFIG_TYPE, vd->vdev_ops->vdev_op_type);
@@ -233,6 +376,10 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
fnvlist_add_string(nv, ZPOOL_CONFIG_PHYS_PATH,
vd->vdev_physpath);
+ if (vd->vdev_enc_sysfs_path != NULL)
+ fnvlist_add_string(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH,
+ vd->vdev_enc_sysfs_path);
+
if (vd->vdev_fru != NULL)
fnvlist_add_string(nv, ZPOOL_CONFIG_FRU, vd->vdev_fru);
@@ -291,13 +438,24 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
if (vd->vdev_crtxg)
fnvlist_add_uint64(nv, ZPOOL_CONFIG_CREATE_TXG, vd->vdev_crtxg);
+ if (flags & VDEV_CONFIG_MOS) {
+ if (vd->vdev_leaf_zap != 0) {
+ ASSERT(vd->vdev_ops->vdev_op_leaf);
+ fnvlist_add_uint64(nv, ZPOOL_CONFIG_VDEV_LEAF_ZAP,
+ vd->vdev_leaf_zap);
+ }
+
+ if (vd->vdev_top_zap != 0) {
+ ASSERT(vd == vd->vdev_top);
+ fnvlist_add_uint64(nv, ZPOOL_CONFIG_VDEV_TOP_ZAP,
+ vd->vdev_top_zap);
+ }
+ }
+
if (getstats) {
- vdev_stat_t vs;
pool_scan_stat_t ps;
- vdev_get_stats(vd, &vs);
- fnvlist_add_uint64_array(nv, ZPOOL_CONFIG_VDEV_STATS,
- (uint64_t *)&vs, sizeof (vs) / sizeof (uint64_t));
+ vdev_config_generate_stats(vd, nv);
/* provide either current or previous scan information */
if (spa_scan_get_stats(spa, &ps) == 0) {
@@ -361,6 +519,7 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
if (vd->vdev_ishole)
fnvlist_add_uint64(nv, ZPOOL_CONFIG_IS_HOLE, B_TRUE);
+ /* Set the reason why we're FAULTED/DEGRADED. */
switch (vd->vdev_stat.vs_aux) {
case VDEV_AUX_ERR_EXCEEDED:
aux = "err_exceeded";
@@ -371,8 +530,15 @@ vdev_config_generate(spa_t *spa, vdev_t *vd, boolean_t getstats,
break;
}
- if (aux != NULL)
+ if (aux != NULL && !vd->vdev_tmpoffline) {
fnvlist_add_string(nv, ZPOOL_CONFIG_AUX_STATE, aux);
+ } else {
+ /*
+ * We're healthy - clear any previous AUX_STATE values.
+ */
+ if (nvlist_exists(nv, ZPOOL_CONFIG_AUX_STATE))
+ nvlist_remove_all(nv, ZPOOL_CONFIG_AUX_STATE);
+ }
if (vd->vdev_splitting && vd->vdev_orig_guid != 0LL) {
fnvlist_add_uint64(nv, ZPOOL_CONFIG_ORIG_GUID,
@@ -430,6 +596,7 @@ vdev_label_read_config(vdev_t *vd, uint64_t txg)
spa_t *spa = vd->vdev_spa;
nvlist_t *config = NULL;
vdev_phys_t *vp;
+ abd_t *vp_abd;
zio_t *zio;
uint64_t best_txg = 0;
int error = 0;
@@ -442,7 +609,8 @@ vdev_label_read_config(vdev_t *vd, uint64_t txg)
if (!vdev_readable(vd))
return (NULL);
- vp = zio_buf_alloc(sizeof (vdev_phys_t));
+ vp_abd = abd_alloc_linear(sizeof (vdev_phys_t), B_TRUE);
+ vp = abd_to_buf(vp_abd);
retry:
for (l = 0; l < VDEV_LABELS; l++) {
@@ -450,7 +618,7 @@ vdev_label_read_config(vdev_t *vd, uint64_t txg)
zio = zio_root(spa, NULL, NULL, flags);
- vdev_label_read(zio, vd, l, vp,
+ vdev_label_read(zio, vd, l, vp_abd,
offsetof(vdev_label_t, vl_vdev_phys),
sizeof (vdev_phys_t), NULL, NULL, flags);
@@ -489,7 +657,7 @@ vdev_label_read_config(vdev_t *vd, uint64_t txg)
goto retry;
}
- zio_buf_free(vp, sizeof (vdev_phys_t));
+ abd_free(vp_abd);
return (config);
}
@@ -625,8 +793,10 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
spa_t *spa = vd->vdev_spa;
nvlist_t *label;
vdev_phys_t *vp;
- char *pad2;
+ abd_t *vp_abd;
+ abd_t *pad2;
uberblock_t *ub;
+ abd_t *ub_abd;
zio_t *zio;
char *buf;
size_t buflen;
@@ -710,8 +880,9 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
/*
* Initialize its label.
*/
- vp = zio_buf_alloc(sizeof (vdev_phys_t));
- bzero(vp, sizeof (vdev_phys_t));
+ vp_abd = abd_alloc_linear(sizeof (vdev_phys_t), B_TRUE);
+ abd_zero(vp_abd, sizeof (vdev_phys_t));
+ vp = abd_to_buf(vp_abd);
/*
* Generate a label describing the pool and our top-level vdev.
@@ -771,7 +942,7 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
error = nvlist_pack(label, &buf, &buflen, NV_ENCODE_XDR, KM_SLEEP);
if (error != 0) {
nvlist_free(label);
- zio_buf_free(vp, sizeof (vdev_phys_t));
+ abd_free(vp_abd);
/* EFAULT means nvlist_pack ran out of room */
return (error == EFAULT ? ENAMETOOLONG : EINVAL);
}
@@ -779,14 +950,15 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
/*
* Initialize uberblock template.
*/
- ub = zio_buf_alloc(VDEV_UBERBLOCK_RING);
- bzero(ub, VDEV_UBERBLOCK_RING);
- *ub = spa->spa_uberblock;
+ ub_abd = abd_alloc_linear(VDEV_UBERBLOCK_RING, B_TRUE);
+ abd_zero(ub_abd, VDEV_UBERBLOCK_RING);
+ abd_copy_from_buf(ub_abd, &spa->spa_uberblock, sizeof (uberblock_t));
+ ub = abd_to_buf(ub_abd);
ub->ub_txg = 0;
/* Initialize the 2nd padding area. */
- pad2 = zio_buf_alloc(VDEV_PAD_SIZE);
- bzero(pad2, VDEV_PAD_SIZE);
+ pad2 = abd_alloc_for_io(VDEV_PAD_SIZE, B_TRUE);
+ abd_zero(pad2, VDEV_PAD_SIZE);
/*
* Write everything in parallel.
@@ -796,7 +968,7 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
for (l = 0; l < VDEV_LABELS; l++) {
- vdev_label_write(zio, vd, l, vp,
+ vdev_label_write(zio, vd, l, vp_abd,
offsetof(vdev_label_t, vl_vdev_phys),
sizeof (vdev_phys_t), NULL, NULL, flags);
@@ -809,7 +981,7 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
offsetof(vdev_label_t, vl_pad2),
VDEV_PAD_SIZE, NULL, NULL, flags);
- vdev_label_write(zio, vd, l, ub,
+ vdev_label_write(zio, vd, l, ub_abd,
offsetof(vdev_label_t, vl_uberblock),
VDEV_UBERBLOCK_RING, NULL, NULL, flags);
}
@@ -822,9 +994,9 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
}
nvlist_free(label);
- zio_buf_free(pad2, VDEV_PAD_SIZE);
- zio_buf_free(ub, VDEV_UBERBLOCK_RING);
- zio_buf_free(vp, sizeof (vdev_phys_t));
+ abd_free(pad2);
+ abd_free(ub_abd);
+ abd_free(vp_abd);
/*
* If this vdev hasn't been previously identified as a spare, then we
@@ -862,19 +1034,13 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
* among uberblocks with equal txg, choose the one with the latest timestamp.
*/
static int
-vdev_uberblock_compare(uberblock_t *ub1, uberblock_t *ub2)
+vdev_uberblock_compare(const uberblock_t *ub1, const uberblock_t *ub2)
{
- if (ub1->ub_txg < ub2->ub_txg)
- return (-1);
- if (ub1->ub_txg > ub2->ub_txg)
- return (1);
+ int cmp = AVL_CMP(ub1->ub_txg, ub2->ub_txg);
+ if (likely(cmp))
+ return (cmp);
- if (ub1->ub_timestamp < ub2->ub_timestamp)
- return (-1);
- if (ub1->ub_timestamp > ub2->ub_timestamp)
- return (1);
-
- return (0);
+ return (AVL_CMP(ub1->ub_timestamp, ub2->ub_timestamp));
}
struct ubl_cbdata {
@@ -888,7 +1054,7 @@ vdev_uberblock_load_done(zio_t *zio)
vdev_t *vd = zio->io_vd;
spa_t *spa = zio->io_spa;
zio_t *rio = zio->io_private;
- uberblock_t *ub = zio->io_data;
+ uberblock_t *ub = abd_to_buf(zio->io_abd);
struct ubl_cbdata *cbp = rio->io_private;
ASSERT3U(zio->io_size, ==, VDEV_UBERBLOCK_SIZE(vd));
@@ -909,24 +1075,22 @@ vdev_uberblock_load_done(zio_t *zio)
mutex_exit(&rio->io_lock);
}
- zio_buf_free(zio->io_data, zio->io_size);
+ abd_free(zio->io_abd);
}
static void
vdev_uberblock_load_impl(zio_t *zio, vdev_t *vd, int flags,
struct ubl_cbdata *cbp)
{
- int c, l, n;
-
- for (c = 0; c < vd->vdev_children; c++)
+ for (int c = 0; c < vd->vdev_children; c++)
vdev_uberblock_load_impl(zio, vd->vdev_child[c], flags, cbp);
if (vd->vdev_ops->vdev_op_leaf && vdev_readable(vd)) {
- for (l = 0; l < VDEV_LABELS; l++) {
- for (n = 0; n < VDEV_UBERBLOCK_COUNT(vd); n++) {
+ for (int l = 0; l < VDEV_LABELS; l++) {
+ for (int n = 0; n < VDEV_UBERBLOCK_COUNT(vd); n++) {
vdev_label_read(zio, vd, l,
- zio_buf_alloc(VDEV_UBERBLOCK_SIZE(vd)),
- VDEV_UBERBLOCK_OFFSET(vd, n),
+ abd_alloc_linear(VDEV_UBERBLOCK_SIZE(vd),
+ B_TRUE), VDEV_UBERBLOCK_OFFSET(vd, n),
VDEV_UBERBLOCK_SIZE(vd),
vdev_uberblock_load_done, zio, flags);
}
@@ -974,6 +1138,60 @@ vdev_uberblock_load(vdev_t *rvd, uberblock_t *ub, nvlist_t **config)
spa_config_exit(spa, SCL_ALL, FTAG);
}
+/*
+ * For use when a leaf vdev is expanded.
+ * The location of labels 2 and 3 changed, and at the new location the
+ * uberblock rings are either empty or contain garbage. The sync will write
+ * new configs there because the vdev is dirty, but expansion also needs the
+ * uberblock rings copied. Read them from label 0 which did not move.
+ *
+ * Since the point is to populate labels {2,3} with valid uberblocks,
+ * we zero uberblocks we fail to read or which are not valid.
+ */
+
+static void
+vdev_copy_uberblocks(vdev_t *vd)
+{
+ abd_t *ub_abd;
+ zio_t *write_zio;
+ int locks = (SCL_L2ARC | SCL_ZIO);
+ int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL |
+ ZIO_FLAG_SPECULATIVE;
+
+ ASSERT(spa_config_held(vd->vdev_spa, SCL_STATE, RW_READER) ==
+ SCL_STATE);
+ ASSERT(vd->vdev_ops->vdev_op_leaf);
+
+ spa_config_enter(vd->vdev_spa, locks, FTAG, RW_READER);
+
+ ub_abd = abd_alloc_linear(VDEV_UBERBLOCK_SIZE(vd), B_TRUE);
+
+ write_zio = zio_root(vd->vdev_spa, NULL, NULL, flags);
+ for (int n = 0; n < VDEV_UBERBLOCK_COUNT(vd); n++) {
+ const int src_label = 0;
+ zio_t *zio;
+
+ zio = zio_root(vd->vdev_spa, NULL, NULL, flags);
+ vdev_label_read(zio, vd, src_label, ub_abd,
+ VDEV_UBERBLOCK_OFFSET(vd, n), VDEV_UBERBLOCK_SIZE(vd),
+ NULL, NULL, flags);
+
+ if (zio_wait(zio) || uberblock_verify(abd_to_buf(ub_abd)))
+ abd_zero(ub_abd, VDEV_UBERBLOCK_SIZE(vd));
+
+ for (int l = 2; l < VDEV_LABELS; l++)
+ vdev_label_write(write_zio, vd, l, ub_abd,
+ VDEV_UBERBLOCK_OFFSET(vd, n),
+ VDEV_UBERBLOCK_SIZE(vd), NULL, NULL,
+ flags | ZIO_FLAG_DONT_PROPAGATE);
+ }
+ (void) zio_wait(write_zio);
+
+ spa_config_exit(vd->vdev_spa, locks, FTAG);
+
+ abd_free(ub_abd);
+}
+
/*
* On success, increment root zio's count of good writes.
* We only get credit for writes to known-visible vdevs; see spa_vdev_add().
@@ -984,7 +1202,7 @@ vdev_uberblock_sync_done(zio_t *zio)
uint64_t *good_writes = zio->io_private;
if (zio->io_error == 0 && zio->io_vd->vdev_top->vdev_ms_array != 0)
- atomic_add_64(good_writes, 1);
+ atomic_inc_64(good_writes);
}
/*
@@ -993,10 +1211,7 @@ vdev_uberblock_sync_done(zio_t *zio)
static void
vdev_uberblock_sync(zio_t *zio, uberblock_t *ub, vdev_t *vd, int flags)
{
- uberblock_t *ubbuf;
- int c, l, n;
-
- for (c = 0; c < vd->vdev_children; c++)
+ for (int c = 0; c < vd->vdev_children; c++)
vdev_uberblock_sync(zio, ub, vd->vdev_child[c], flags);
if (!vd->vdev_ops->vdev_op_leaf)
@@ -1005,19 +1220,28 @@ vdev_uberblock_sync(zio_t *zio, uberblock_t *ub, vdev_t *vd, int flags)
if (!vdev_writeable(vd))
return;
- n = ub->ub_txg & (VDEV_UBERBLOCK_COUNT(vd) - 1);
+ /* If the vdev was expanded, need to copy uberblock rings. */
+ if (vd->vdev_state == VDEV_STATE_HEALTHY &&
+ vd->vdev_copy_uberblocks == B_TRUE) {
+ vdev_copy_uberblocks(vd);
+ vd->vdev_copy_uberblocks = B_FALSE;
+ }
- ubbuf = zio_buf_alloc(VDEV_UBERBLOCK_SIZE(vd));
- bzero(ubbuf, VDEV_UBERBLOCK_SIZE(vd));
- *ubbuf = *ub;
+ int m = spa_multihost(vd->vdev_spa) ? MMP_BLOCKS_PER_LABEL : 0;
+ int n = ub->ub_txg % (VDEV_UBERBLOCK_COUNT(vd) - m);
- for (l = 0; l < VDEV_LABELS; l++)
- vdev_label_write(zio, vd, l, ubbuf,
+ /* Copy the uberblock_t into the ABD */
+ abd_t *ub_abd = abd_alloc_for_io(VDEV_UBERBLOCK_SIZE(vd), B_TRUE);
+ abd_zero(ub_abd, VDEV_UBERBLOCK_SIZE(vd));
+ abd_copy_from_buf(ub_abd, ub, sizeof (uberblock_t));
+
+ for (int l = 0; l < VDEV_LABELS; l++)
+ vdev_label_write(zio, vd, l, ub_abd,
VDEV_UBERBLOCK_OFFSET(vd, n), VDEV_UBERBLOCK_SIZE(vd),
vdev_uberblock_sync_done, zio->io_private,
flags | ZIO_FLAG_DONT_PROPAGATE);
- zio_buf_free(ubbuf, VDEV_UBERBLOCK_SIZE(vd));
+ abd_free(ub_abd);
}
/* Sync the uberblocks to all vdevs in svd[] */
@@ -1060,7 +1284,7 @@ vdev_label_sync_done(zio_t *zio)
uint64_t *good_writes = zio->io_private;
if (zio->io_error == 0)
- atomic_add_64(good_writes, 1);
+ atomic_inc_64(good_writes);
}
/*
@@ -1094,6 +1318,7 @@ vdev_label_sync(zio_t *zio, vdev_t *vd, int l, uint64_t txg, int flags)
{
nvlist_t *label;
vdev_phys_t *vp;
+ abd_t *vp_abd;
char *buf;
size_t buflen;
int c;
@@ -1112,15 +1337,16 @@ vdev_label_sync(zio_t *zio, vdev_t *vd, int l, uint64_t txg, int flags)
*/
label = spa_config_generate(vd->vdev_spa, vd, txg, B_FALSE);
- vp = zio_buf_alloc(sizeof (vdev_phys_t));
- bzero(vp, sizeof (vdev_phys_t));
+ vp_abd = abd_alloc_linear(sizeof (vdev_phys_t), B_TRUE);
+ abd_zero(vp_abd, sizeof (vdev_phys_t));
+ vp = abd_to_buf(vp_abd);
buf = vp->vp_nvlist;
buflen = sizeof (vp->vp_nvlist);
if (!nvlist_pack(label, &buf, &buflen, NV_ENCODE_XDR, KM_SLEEP)) {
for (; l < VDEV_LABELS; l += 2) {
- vdev_label_write(zio, vd, l, vp,
+ vdev_label_write(zio, vd, l, vp_abd,
offsetof(vdev_label_t, vl_vdev_phys),
sizeof (vdev_phys_t),
vdev_label_sync_done, zio->io_private,
@@ -1128,7 +1354,7 @@ vdev_label_sync(zio_t *zio, vdev_t *vd, int l, uint64_t txg, int flags)
}
}
- zio_buf_free(vp, sizeof (vdev_phys_t));
+ abd_free(vp_abd);
nvlist_free(label);
}
@@ -1187,15 +1413,16 @@ vdev_label_sync_list(spa_t *spa, int l, uint64_t txg, int flags)
* at any time, you can just call it again, and it will resume its work.
*/
int
-vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg, boolean_t tryhard)
+vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg)
{
spa_t *spa = svd[0]->vdev_spa;
uberblock_t *ub = &spa->spa_uberblock;
vdev_t *vd;
zio_t *zio;
- int error;
+ int error = 0;
int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL;
+retry:
/*
* Normally, we don't want to try too hard to write every label and
* uberblock. If there is a flaky disk, we don't want the rest of the
@@ -1203,8 +1430,11 @@ vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg, boolean_t tryhard)
* single label out, we should retry with ZIO_FLAG_TRYHARD before
* bailing out and declaring the pool faulted.
*/
- if (tryhard)
+ if (error != 0) {
+ if ((flags & ZIO_FLAG_TRYHARD) != 0)
+ return (error);
flags |= ZIO_FLAG_TRYHARD;
+ }
ASSERT(ub->ub_txg <= txg);
@@ -1214,10 +1444,13 @@ vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg, boolean_t tryhard)
* and the vdev configuration hasn't changed,
* then there's nothing to do.
*/
- if (ub->ub_txg < txg &&
- uberblock_update(ub, spa->spa_root_vdev, txg) == B_FALSE &&
- list_is_empty(&spa->spa_config_dirty_list))
- return (0);
+ if (ub->ub_txg < txg) {
+ boolean_t changed = uberblock_update(ub, spa->spa_root_vdev,
+ txg, spa->spa_mmp.mmp_delay);
+
+ if (!changed && list_is_empty(&spa->spa_config_dirty_list))
+ return (0);
+ }
if (txg > spa_freeze_txg(spa))
return (0);
@@ -1248,7 +1481,7 @@ vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg, boolean_t tryhard)
* are committed to stable storage before the uberblock update.
*/
if ((error = vdev_label_sync_list(spa, 0, txg, flags)) != 0)
- return (error);
+ goto retry;
/*
* Sync the uberblocks to all vdevs in svd[].
@@ -1266,7 +1499,11 @@ vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg, boolean_t tryhard)
* to the new uberblocks.
*/
if ((error = vdev_uberblock_sync_list(svd, svdcount, ub, flags)) != 0)
- return (error);
+ goto retry;
+
+
+ if (spa_multihost(spa))
+ mmp_update_uberblock(spa, ub);
/*
* Sync out odd labels for every dirty vdev. If the system dies
@@ -1278,5 +1515,8 @@ vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg, boolean_t tryhard)
* to disk to ensure that all odd-label updates are committed to
* stable storage before the next transaction group begins.
*/
- return (vdev_label_sync_list(spa, 1, txg, flags));
+ if ((error = vdev_label_sync_list(spa, 1, txg, flags)) != 0)
+ goto retry;
+
+ return (0);
}
diff --git a/zfs/module/zfs/vdev_mirror.c b/zfs/module/zfs/vdev_mirror.c
index 6b699e883e37..15d1f204ffed 100644
--- a/zfs/module/zfs/vdev_mirror.c
+++ b/zfs/module/zfs/vdev_mirror.c
@@ -24,13 +24,14 @@
*/
/*
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
#include <sys/spa.h>
#include <sys/vdev_impl.h>
#include <sys/zio.h>
+#include <sys/abd.h>
#include <sys/fs/zfs.h>
/*
@@ -41,44 +42,70 @@ typedef struct mirror_child {
vdev_t *mc_vd;
uint64_t mc_offset;
int mc_error;
- int mc_pending;
+ int mc_load;
uint8_t mc_tried;
uint8_t mc_skipped;
uint8_t mc_speculative;
} mirror_child_t;
typedef struct mirror_map {
+ int *mm_preferred;
+ int mm_preferred_cnt;
int mm_children;
- int mm_replacing;
- int mm_preferred;
- int mm_root;
- mirror_child_t mm_child[1];
+ boolean_t mm_replacing;
+ boolean_t mm_root;
+ mirror_child_t mm_child[];
} mirror_map_t;
+static int vdev_mirror_shift = 21;
+
/*
- * When the children are equally busy queue incoming requests to a single
- * child for N microseconds. This is done to maximize the likelihood that
- * the Linux elevator will be able to merge requests while it is plugged.
- * Otherwise, requests are queued to the least busy device.
- *
- * For rotational disks the Linux elevator will plug for 10ms which is
- * why zfs_vdev_mirror_switch_us is set to 10ms by default. For non-
- * rotational disks the elevator will not plug, but 10ms is still a small
- * enough value that the requests will get spread over all the children.
+ * The load configuration settings below are tuned by default for
+ * the case where all devices are of the same rotational type.
*
- * For fast SSDs it may make sense to decrease zfs_vdev_mirror_switch_us
- * significantly to bound the worst case latencies. It would probably be
- * ideal to calculate a decaying average of the last observed latencies and
- * use that to dynamically adjust the zfs_vdev_mirror_switch_us time.
+ * If there is a mixture of rotating and non-rotating media, setting
+ * zfs_vdev_mirror_non_rotating_seek_inc to 0 may well provide better results
+ * as it will direct more reads to the non-rotating vdevs which are more likely
+ * to have a higher performance.
*/
-int zfs_vdev_mirror_switch_us = 10000;
+
+/* Rotating media load calculation configuration. */
+static int zfs_vdev_mirror_rotating_inc = 0;
+static int zfs_vdev_mirror_rotating_seek_inc = 5;
+static int zfs_vdev_mirror_rotating_seek_offset = 1 * 1024 * 1024;
+
+/* Non-rotating media load calculation configuration. */
+static int zfs_vdev_mirror_non_rotating_inc = 0;
+static int zfs_vdev_mirror_non_rotating_seek_inc = 1;
+
+static inline size_t
+vdev_mirror_map_size(int children)
+{
+ return (offsetof(mirror_map_t, mm_child[children]) +
+ sizeof (int) * children);
+}
+
+static inline mirror_map_t *
+vdev_mirror_map_alloc(int children, boolean_t replacing, boolean_t root)
+{
+ mirror_map_t *mm;
+
+ mm = kmem_zalloc(vdev_mirror_map_size(children), KM_SLEEP);
+ mm->mm_children = children;
+ mm->mm_replacing = replacing;
+ mm->mm_root = root;
+ mm->mm_preferred = (int *)((uintptr_t)mm +
+ offsetof(mirror_map_t, mm_child[children]));
+
+ return (mm);
+}
static void
vdev_mirror_map_free(zio_t *zio)
{
mirror_map_t *mm = zio->io_vsd;
- kmem_free(mm, offsetof(mirror_map_t, mm_child[mm->mm_children]));
+ kmem_free(mm, vdev_mirror_map_size(mm->mm_children));
}
static const zio_vsd_ops_t vdev_mirror_vsd_ops = {
@@ -87,9 +114,54 @@ static const zio_vsd_ops_t vdev_mirror_vsd_ops = {
};
static int
-vdev_mirror_pending(vdev_t *vd)
+vdev_mirror_load(mirror_map_t *mm, vdev_t *vd, uint64_t zio_offset)
{
- return (avl_numnodes(&vd->vdev_queue.vq_active_tree));
+ uint64_t lastoffset;
+ int load;
+
+ /* All DVAs have equal weight at the root. */
+ if (mm->mm_root)
+ return (INT_MAX);
+
+ /*
+ * We don't return INT_MAX if the device is resilvering i.e.
+ * vdev_resilver_txg != 0 as when tested performance was slightly
+ * worse overall when resilvering with compared to without.
+ */
+
+ /* Standard load based on pending queue length. */
+ load = vdev_queue_length(vd);
+ lastoffset = vdev_queue_lastoffset(vd);
+
+ if (vd->vdev_nonrot) {
+ /* Non-rotating media. */
+ if (lastoffset == zio_offset)
+ return (load + zfs_vdev_mirror_non_rotating_inc);
+
+ /*
+ * Apply a seek penalty even for non-rotating devices as
+ * sequential I/O's can be aggregated into fewer operations on
+ * the device, thus avoiding unnecessary per-command overhead
+ * and boosting performance.
+ */
+ return (load + zfs_vdev_mirror_non_rotating_seek_inc);
+ }
+
+ /* Rotating media I/O's which directly follow the last I/O. */
+ if (lastoffset == zio_offset)
+ return (load + zfs_vdev_mirror_rotating_inc);
+
+ /*
+ * Apply half the seek increment to I/O's within seek offset
+ * of the last I/O queued to this vdev as they should incur less
+ * of a seek increment.
+ */
+ if (ABS(lastoffset - zio_offset) <
+ zfs_vdev_mirror_rotating_seek_offset)
+ return (load + (zfs_vdev_mirror_rotating_seek_inc / 2));
+
+ /* Apply the full seek increment to all other I/O's. */
+ return (load + zfs_vdev_mirror_rotating_seek_inc);
}
/*
@@ -97,38 +169,19 @@ vdev_mirror_pending(vdev_t *vd)
* is this functions only caller, as small as possible on the stack.
*/
noinline static mirror_map_t *
-vdev_mirror_map_alloc(zio_t *zio)
+vdev_mirror_map_init(zio_t *zio)
{
mirror_map_t *mm = NULL;
mirror_child_t *mc;
vdev_t *vd = zio->io_vd;
- int c, d;
+ int c;
if (vd == NULL) {
dva_t *dva = zio->io_bp->blk_dva;
spa_t *spa = zio->io_spa;
- c = BP_GET_NDVAS(zio->io_bp);
-
- mm = kmem_zalloc(offsetof(mirror_map_t, mm_child[c]),
- KM_SLEEP);
- mm->mm_children = c;
- mm->mm_replacing = B_FALSE;
- mm->mm_preferred = spa_get_random(c);
- mm->mm_root = B_TRUE;
-
- /*
- * Check the other, lower-index DVAs to see if they're on
- * the same vdev as the child we picked. If they are, use
- * them since they are likely to have been allocated from
- * the primary metaslab in use at the time, and hence are
- * more likely to have locality with single-copy data.
- */
- for (c = mm->mm_preferred, d = c - 1; d >= 0; d--) {
- if (DVA_GET_VDEV(&dva[d]) == DVA_GET_VDEV(&dva[c]))
- mm->mm_preferred = d;
- }
-
+ mm = vdev_mirror_map_alloc(BP_GET_NDVAS(zio->io_bp), B_FALSE,
+ B_TRUE);
for (c = 0; c < mm->mm_children; c++) {
mc = &mm->mm_child[c];
@@ -136,56 +189,13 @@ vdev_mirror_map_alloc(zio_t *zio)
mc->mc_offset = DVA_GET_OFFSET(&dva[c]);
}
} else {
- int lowest_pending = INT_MAX;
- int lowest_nr = 1;
-
- c = vd->vdev_children;
-
- mm = kmem_zalloc(offsetof(mirror_map_t, mm_child[c]),
- KM_SLEEP);
- mm->mm_children = c;
- mm->mm_replacing = (vd->vdev_ops == &vdev_replacing_ops ||
- vd->vdev_ops == &vdev_spare_ops);
- mm->mm_preferred = 0;
- mm->mm_root = B_FALSE;
-
+ mm = vdev_mirror_map_alloc(vd->vdev_children,
+ (vd->vdev_ops == &vdev_replacing_ops ||
+ vd->vdev_ops == &vdev_spare_ops), B_FALSE);
for (c = 0; c < mm->mm_children; c++) {
mc = &mm->mm_child[c];
mc->mc_vd = vd->vdev_child[c];
mc->mc_offset = zio->io_offset;
-
- if (mm->mm_replacing)
- continue;
-
- if (!vdev_readable(mc->mc_vd)) {
- mc->mc_error = SET_ERROR(ENXIO);
- mc->mc_tried = 1;
- mc->mc_skipped = 1;
- mc->mc_pending = INT_MAX;
- continue;
- }
-
- mc->mc_pending = vdev_mirror_pending(mc->mc_vd);
- if (mc->mc_pending < lowest_pending) {
- lowest_pending = mc->mc_pending;
- lowest_nr = 1;
- } else if (mc->mc_pending == lowest_pending) {
- lowest_nr++;
- }
- }
-
- d = gethrtime() / (NSEC_PER_USEC * zfs_vdev_mirror_switch_us);
- d = (d % lowest_nr) + 1;
-
- for (c = 0; c < mm->mm_children; c++) {
- mc = &mm->mm_child[c];
-
- if (mm->mm_child[c].mc_pending == lowest_pending) {
- if (--d == 0) {
- mm->mm_preferred = c;
- break;
- }
- }
}
}
@@ -257,18 +267,19 @@ vdev_mirror_scrub_done(zio_t *zio)
if (zio->io_error == 0) {
zio_t *pio;
+ zio_link_t *zl = NULL;
mutex_enter(&zio->io_lock);
- while ((pio = zio_walk_parents(zio)) != NULL) {
+ while ((pio = zio_walk_parents(zio, &zl)) != NULL) {
mutex_enter(&pio->io_lock);
ASSERT3U(zio->io_size, >=, pio->io_size);
- bcopy(zio->io_data, pio->io_data, pio->io_size);
+ abd_copy(pio->io_abd, zio->io_abd, pio->io_size);
mutex_exit(&pio->io_lock);
}
mutex_exit(&zio->io_lock);
}
- zio_buf_free(zio->io_data, zio->io_size);
+ abd_free(zio->io_abd);
mc->mc_error = zio->io_error;
mc->mc_tried = 1;
@@ -276,6 +287,54 @@ vdev_mirror_scrub_done(zio_t *zio)
}
/*
+ * Check the other, lower-index DVAs to see if they're on the same
+ * vdev as the child we picked. If they are, use them since they
+ * are likely to have been allocated from the primary metaslab in
+ * use at the time, and hence are more likely to have locality with
+ * single-copy data.
+ */
+static int
+vdev_mirror_dva_select(zio_t *zio, int p)
+{
+ dva_t *dva = zio->io_bp->blk_dva;
+ mirror_map_t *mm = zio->io_vsd;
+ int preferred;
+ int c;
+
+ preferred = mm->mm_preferred[p];
+ for (p--; p >= 0; p--) {
+ c = mm->mm_preferred[p];
+ if (DVA_GET_VDEV(&dva[c]) == DVA_GET_VDEV(&dva[preferred]))
+ preferred = c;
+ }
+ return (preferred);
+}
+
+static int
+vdev_mirror_preferred_child_randomize(zio_t *zio)
+{
+ mirror_map_t *mm = zio->io_vsd;
+ int p;
+
+ if (mm->mm_root) {
+ p = spa_get_random(mm->mm_preferred_cnt);
+ return (vdev_mirror_dva_select(zio, p));
+ }
+
+ /*
+ * To ensure we don't always favour the first matching vdev,
+ * which could lead to wear leveling issues on SSD's, we
+ * use the I/O offset as a pseudo random seed into the vdevs
+ * which have the lowest load.
+ */
+ p = (zio->io_offset >> vdev_mirror_shift) % mm->mm_preferred_cnt;
+ return (mm->mm_preferred[p]);
+}
+
+/*
+ * Try to find a vdev whose DTL doesn't contain the block we want to read
+ * prefering vdevs based on determined load.
+ *
* Try to find a child whose DTL doesn't contain the block we want to read.
* If we can't, try the read on any vdev we haven't already tried.
*/
@@ -283,43 +342,70 @@ static int
vdev_mirror_child_select(zio_t *zio)
{
mirror_map_t *mm = zio->io_vsd;
- mirror_child_t *mc;
uint64_t txg = zio->io_txg;
- int i, c;
+ int c, lowest_load;
ASSERT(zio->io_bp == NULL || BP_PHYSICAL_BIRTH(zio->io_bp) == txg);
- /*
- * Try to find a child whose DTL doesn't contain the block to read.
- * If a child is known to be completely inaccessible (indicated by
- * vdev_readable() returning B_FALSE), don't even try.
- */
- for (i = 0, c = mm->mm_preferred; i < mm->mm_children; i++, c++) {
- if (c >= mm->mm_children)
- c = 0;
+ lowest_load = INT_MAX;
+ mm->mm_preferred_cnt = 0;
+ for (c = 0; c < mm->mm_children; c++) {
+ mirror_child_t *mc;
+
mc = &mm->mm_child[c];
if (mc->mc_tried || mc->mc_skipped)
continue;
+
if (mc->mc_vd == NULL || !vdev_readable(mc->mc_vd)) {
mc->mc_error = SET_ERROR(ENXIO);
mc->mc_tried = 1; /* don't even try */
mc->mc_skipped = 1;
continue;
}
- if (!vdev_dtl_contains(mc->mc_vd, DTL_MISSING, txg, 1))
- return (c);
- mc->mc_error = SET_ERROR(ESTALE);
- mc->mc_skipped = 1;
- mc->mc_speculative = 1;
+
+ if (vdev_dtl_contains(mc->mc_vd, DTL_MISSING, txg, 1)) {
+ mc->mc_error = SET_ERROR(ESTALE);
+ mc->mc_skipped = 1;
+ mc->mc_speculative = 1;
+ continue;
+ }
+
+ mc->mc_load = vdev_mirror_load(mm, mc->mc_vd, mc->mc_offset);
+ if (mc->mc_load > lowest_load)
+ continue;
+
+ if (mc->mc_load < lowest_load) {
+ lowest_load = mc->mc_load;
+ mm->mm_preferred_cnt = 0;
+ }
+ mm->mm_preferred[mm->mm_preferred_cnt] = c;
+ mm->mm_preferred_cnt++;
+ }
+
+ if (mm->mm_preferred_cnt == 1) {
+ vdev_queue_register_lastoffset(
+ mm->mm_child[mm->mm_preferred[0]].mc_vd, zio);
+ return (mm->mm_preferred[0]);
+ }
+
+ if (mm->mm_preferred_cnt > 1) {
+ int c = vdev_mirror_preferred_child_randomize(zio);
+
+ vdev_queue_register_lastoffset(mm->mm_child[c].mc_vd, zio);
+ return (c);
}
/*
* Every device is either missing or has this txg in its DTL.
* Look for any child we haven't already tried before giving up.
*/
- for (c = 0; c < mm->mm_children; c++)
- if (!mm->mm_child[c].mc_tried)
+ for (c = 0; c < mm->mm_children; c++) {
+ if (!mm->mm_child[c].mc_tried) {
+ vdev_queue_register_lastoffset(mm->mm_child[c].mc_vd,
+ zio);
return (c);
+ }
+ }
/*
* Every child failed. There's no place left to look.
@@ -334,7 +420,7 @@ vdev_mirror_io_start(zio_t *zio)
mirror_child_t *mc;
int c, children;
- mm = vdev_mirror_map_alloc(zio);
+ mm = vdev_mirror_map_init(zio);
if (zio->io_type == ZIO_TYPE_READ) {
if ((zio->io_flags & ZIO_FLAG_SCRUB) && !mm->mm_replacing) {
@@ -348,7 +434,8 @@ vdev_mirror_io_start(zio_t *zio)
mc = &mm->mm_child[c];
zio_nowait(zio_vdev_child_io(zio, zio->io_bp,
mc->mc_vd, mc->mc_offset,
- zio_buf_alloc(zio->io_size), zio->io_size,
+ abd_alloc_sametype(zio->io_abd,
+ zio->io_size), zio->io_size,
zio->io_type, zio->io_priority, 0,
vdev_mirror_scrub_done, mc));
}
@@ -373,7 +460,7 @@ vdev_mirror_io_start(zio_t *zio)
while (children--) {
mc = &mm->mm_child[c];
zio_nowait(zio_vdev_child_io(zio, zio->io_bp,
- mc->mc_vd, mc->mc_offset, zio->io_data, zio->io_size,
+ mc->mc_vd, mc->mc_offset, zio->io_abd, zio->io_size,
zio->io_type, zio->io_priority, 0,
vdev_mirror_child_done, mc));
c++;
@@ -458,7 +545,7 @@ vdev_mirror_io_done(zio_t *zio)
mc = &mm->mm_child[c];
zio_vdev_io_redone(zio);
zio_nowait(zio_vdev_child_io(zio, zio->io_bp,
- mc->mc_vd, mc->mc_offset, zio->io_data, zio->io_size,
+ mc->mc_vd, mc->mc_offset, zio->io_abd, zio->io_size,
ZIO_TYPE_READ, zio->io_priority, 0,
vdev_mirror_child_done, mc));
return;
@@ -499,7 +586,7 @@ vdev_mirror_io_done(zio_t *zio)
zio_nowait(zio_vdev_child_io(zio, zio->io_bp,
mc->mc_vd, mc->mc_offset,
- zio->io_data, zio->io_size,
+ zio->io_abd, zio->io_size,
ZIO_TYPE_WRITE, ZIO_PRIORITY_ASYNC_WRITE,
ZIO_FLAG_IO_REPAIR | (unexpected_errors ?
ZIO_FLAG_SELF_HEAL : 0), NULL, NULL));
@@ -528,6 +615,7 @@ vdev_ops_t vdev_mirror_ops = {
vdev_mirror_state_change,
NULL,
NULL,
+ NULL,
VDEV_TYPE_MIRROR, /* name of this vdev type */
B_FALSE /* not a leaf vdev */
};
@@ -541,6 +629,7 @@ vdev_ops_t vdev_replacing_ops = {
vdev_mirror_state_change,
NULL,
NULL,
+ NULL,
VDEV_TYPE_REPLACING, /* name of this vdev type */
B_FALSE /* not a leaf vdev */
};
@@ -554,11 +643,33 @@ vdev_ops_t vdev_spare_ops = {
vdev_mirror_state_change,
NULL,
NULL,
+ NULL,
VDEV_TYPE_SPARE, /* name of this vdev type */
B_FALSE /* not a leaf vdev */
};
#if defined(_KERNEL) && defined(HAVE_SPL)
-module_param(zfs_vdev_mirror_switch_us, int, 0644);
-MODULE_PARM_DESC(zfs_vdev_mirror_switch_us, "Switch mirrors every N usecs");
+/* BEGIN CSTYLED */
+module_param(zfs_vdev_mirror_rotating_inc, int, 0644);
+MODULE_PARM_DESC(zfs_vdev_mirror_rotating_inc,
+ "Rotating media load increment for non-seeking I/O's");
+
+module_param(zfs_vdev_mirror_rotating_seek_inc, int, 0644);
+MODULE_PARM_DESC(zfs_vdev_mirror_rotating_seek_inc,
+ "Rotating media load increment for seeking I/O's");
+
+module_param(zfs_vdev_mirror_rotating_seek_offset, int, 0644);
+
+MODULE_PARM_DESC(zfs_vdev_mirror_rotating_seek_offset,
+ "Offset in bytes from the last I/O which "
+ "triggers a reduced rotating media seek increment");
+
+module_param(zfs_vdev_mirror_non_rotating_inc, int, 0644);
+MODULE_PARM_DESC(zfs_vdev_mirror_non_rotating_inc,
+ "Non-rotating media load increment for non-seeking I/O's");
+
+module_param(zfs_vdev_mirror_non_rotating_seek_inc, int, 0644);
+MODULE_PARM_DESC(zfs_vdev_mirror_non_rotating_seek_inc,
+ "Non-rotating media load increment for seeking I/O's");
+/* END CSTYLED */
#endif
diff --git a/zfs/module/zfs/vdev_missing.c b/zfs/module/zfs/vdev_missing.c
index 228757334234..d7d017fb8fbe 100644
--- a/zfs/module/zfs/vdev_missing.c
+++ b/zfs/module/zfs/vdev_missing.c
@@ -88,6 +88,7 @@ vdev_ops_t vdev_missing_ops = {
NULL,
NULL,
NULL,
+ NULL,
VDEV_TYPE_MISSING, /* name of this vdev type */
B_TRUE /* leaf vdev */
};
@@ -101,6 +102,7 @@ vdev_ops_t vdev_hole_ops = {
NULL,
NULL,
NULL,
+ NULL,
VDEV_TYPE_HOLE, /* name of this vdev type */
B_TRUE /* leaf vdev */
};
diff --git a/zfs/module/zfs/vdev_queue.c b/zfs/module/zfs/vdev_queue.c
index 49f6fc57a573..6b3e87291590 100644
--- a/zfs/module/zfs/vdev_queue.c
+++ b/zfs/module/zfs/vdev_queue.c
@@ -33,9 +33,11 @@
#include <sys/zio.h>
#include <sys/avl.h>
#include <sys/dsl_pool.h>
+#include <sys/metaslab_impl.h>
#include <sys/spa.h>
#include <sys/spa_impl.h>
#include <sys/kstat.h>
+#include <sys/abd.h>
/*
* ZFS I/O Scheduler
@@ -171,23 +173,35 @@ int zfs_vdev_aggregation_limit = SPA_OLD_MAXBLOCKSIZE;
int zfs_vdev_read_gap_limit = 32 << 10;
int zfs_vdev_write_gap_limit = 4 << 10;
+/*
+ * Define the queue depth percentage for each top-level. This percentage is
+ * used in conjunction with zfs_vdev_async_max_active to determine how many
+ * allocations a specific top-level vdev should handle. Once the queue depth
+ * reaches zfs_vdev_queue_depth_pct * zfs_vdev_async_write_max_active / 100
+ * then allocator will stop allocating blocks on that top-level device.
+ * The default kernel setting is 1000% which will yield 100 allocations per
+ * device. For userland testing, the default setting is 300% which equates
+ * to 30 allocations per device.
+ */
+#ifdef _KERNEL
+int zfs_vdev_queue_depth_pct = 1000;
+#else
+int zfs_vdev_queue_depth_pct = 300;
+#endif
+
+
int
vdev_queue_offset_compare(const void *x1, const void *x2)
{
- const zio_t *z1 = x1;
- const zio_t *z2 = x2;
+ const zio_t *z1 = (const zio_t *)x1;
+ const zio_t *z2 = (const zio_t *)x2;
- if (z1->io_offset < z2->io_offset)
- return (-1);
- if (z1->io_offset > z2->io_offset)
- return (1);
+ int cmp = AVL_CMP(z1->io_offset, z2->io_offset);
- if (z1 < z2)
- return (-1);
- if (z1 > z2)
- return (1);
+ if (likely(cmp))
+ return (cmp);
- return (0);
+ return (AVL_PCMP(z1, z2));
}
static inline avl_tree_t *
@@ -209,20 +223,15 @@ vdev_queue_type_tree(vdev_queue_t *vq, zio_type_t t)
int
vdev_queue_timestamp_compare(const void *x1, const void *x2)
{
- const zio_t *z1 = x1;
- const zio_t *z2 = x2;
+ const zio_t *z1 = (const zio_t *)x1;
+ const zio_t *z2 = (const zio_t *)x2;
- if (z1->io_timestamp < z2->io_timestamp)
- return (-1);
- if (z1->io_timestamp > z2->io_timestamp)
- return (1);
+ int cmp = AVL_CMP(z1->io_timestamp, z2->io_timestamp);
- if (z1 < z2)
- return (-1);
- if (z1 > z2)
- return (1);
+ if (likely(cmp))
+ return (cmp);
- return (0);
+ return (AVL_PCMP(z1, z2));
}
static int
@@ -362,11 +371,11 @@ vdev_queue_init(vdev_t *vd)
avl_create(&vq->vq_active_tree, vdev_queue_offset_compare,
sizeof (zio_t), offsetof(struct zio, io_queue_node));
avl_create(vdev_queue_type_tree(vq, ZIO_TYPE_READ),
- vdev_queue_offset_compare, sizeof (zio_t),
- offsetof(struct zio, io_offset_node));
+ vdev_queue_offset_compare, sizeof (zio_t),
+ offsetof(struct zio, io_offset_node));
avl_create(vdev_queue_type_tree(vq, ZIO_TYPE_WRITE),
- vdev_queue_offset_compare, sizeof (zio_t),
- offsetof(struct zio, io_offset_node));
+ vdev_queue_offset_compare, sizeof (zio_t),
+ offsetof(struct zio, io_offset_node));
for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) {
int (*compfn) (const void *, const void *);
@@ -381,8 +390,10 @@ vdev_queue_init(vdev_t *vd)
else
compfn = vdev_queue_offset_compare;
avl_create(vdev_queue_class_tree(vq, p), compfn,
- sizeof (zio_t), offsetof(struct zio, io_queue_node));
+ sizeof (zio_t), offsetof(struct zio, io_queue_node));
}
+
+ vq->vq_lastoffset = 0;
}
void
@@ -484,13 +495,14 @@ vdev_queue_agg_io_done(zio_t *aio)
{
if (aio->io_type == ZIO_TYPE_READ) {
zio_t *pio;
- while ((pio = zio_walk_parents(aio)) != NULL) {
- bcopy((char *)aio->io_data + (pio->io_offset -
- aio->io_offset), pio->io_data, pio->io_size);
+ zio_link_t *zl = NULL;
+ while ((pio = zio_walk_parents(aio, &zl)) != NULL) {
+ abd_copy_off(pio->io_abd, aio->io_abd,
+ 0, pio->io_offset - aio->io_offset, pio->io_size);
}
}
- zio_buf_free(aio->io_data, aio->io_size);
+ abd_free(aio->io_abd);
}
/*
@@ -508,20 +520,18 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
zio_t *first, *last, *aio, *dio, *mandatory, *nio;
uint64_t maxgap = 0;
uint64_t size;
+ uint64_t limit;
+ int maxblocksize;
boolean_t stretch = B_FALSE;
avl_tree_t *t = vdev_queue_type_tree(vq, zio->io_type);
enum zio_flag flags = zio->io_flags & ZIO_FLAG_AGG_INHERIT;
- void *buf;
+ abd_t *abd;
- if (zio->io_flags & ZIO_FLAG_DONT_AGGREGATE)
- return (NULL);
+ maxblocksize = spa_maxblocksize(vq->vq_vdev->vdev_spa);
+ limit = MAX(MIN(zfs_vdev_aggregation_limit, maxblocksize), 0);
- /*
- * Prevent users from setting the zfs_vdev_aggregation_limit
- * tuning larger than SPA_MAXBLOCKSIZE.
- */
- zfs_vdev_aggregation_limit =
- MIN(zfs_vdev_aggregation_limit, SPA_MAXBLOCKSIZE);
+ if (zio->io_flags & ZIO_FLAG_DONT_AGGREGATE || limit == 0)
+ return (NULL);
first = last = zio;
@@ -549,7 +559,7 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
*/
while ((dio = AVL_PREV(t, first)) != NULL &&
(dio->io_flags & ZIO_FLAG_AGG_INHERIT) == flags &&
- IO_SPAN(dio, last) <= zfs_vdev_aggregation_limit &&
+ IO_SPAN(dio, last) <= limit &&
IO_GAP(dio, first) <= maxgap) {
first = dio;
if (mandatory == NULL && !(first->io_flags & ZIO_FLAG_OPTIONAL))
@@ -573,8 +583,9 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
*/
while ((dio = AVL_NEXT(t, last)) != NULL &&
(dio->io_flags & ZIO_FLAG_AGG_INHERIT) == flags &&
- (IO_SPAN(first, dio) <= zfs_vdev_aggregation_limit ||
+ (IO_SPAN(first, dio) <= limit ||
(dio->io_flags & ZIO_FLAG_OPTIONAL)) &&
+ IO_SPAN(first, dio) <= maxblocksize &&
IO_GAP(last, dio) <= maxgap) {
last = dio;
if (!(last->io_flags & ZIO_FLAG_OPTIONAL))
@@ -605,7 +616,12 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
}
if (stretch) {
- /* This may be a no-op. */
+ /*
+ * We are going to include an optional io in our aggregated
+ * span, thus closing the write gap. Only mandatory i/os can
+ * start aggregated spans, so make sure that the next i/o
+ * after our span is mandatory.
+ */
dio = AVL_NEXT(t, last);
dio->io_flags &= ~ZIO_FLAG_OPTIONAL;
} else {
@@ -621,13 +637,14 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
return (NULL);
size = IO_SPAN(first, last);
+ ASSERT3U(size, <=, maxblocksize);
- buf = zio_buf_alloc_flags(size, KM_NOSLEEP);
- if (buf == NULL)
+ abd = abd_alloc_for_io(size, B_TRUE);
+ if (abd == NULL)
return (NULL);
aio = zio_vdev_delegated_io(first->io_vd, first->io_offset,
- buf, size, first->io_type, zio->io_priority,
+ abd, size, first->io_type, zio->io_priority,
flags | ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE,
vdev_queue_agg_io_done, NULL);
aio->io_timestamp = first->io_timestamp;
@@ -640,12 +657,11 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio)
if (dio->io_flags & ZIO_FLAG_NODATA) {
ASSERT3U(dio->io_type, ==, ZIO_TYPE_WRITE);
- bzero((char *)aio->io_data + (dio->io_offset -
- aio->io_offset), dio->io_size);
+ abd_zero_off(aio->io_abd,
+ dio->io_offset - aio->io_offset, dio->io_size);
} else if (dio->io_type == ZIO_TYPE_WRITE) {
- bcopy(dio->io_data, (char *)aio->io_data +
- (dio->io_offset - aio->io_offset),
- dio->io_size);
+ abd_copy_off(aio->io_abd, dio->io_abd,
+ dio->io_offset - aio->io_offset, 0, dio->io_size);
}
zio_add_child(dio, aio);
@@ -767,9 +783,6 @@ vdev_queue_io_done(zio_t *zio)
vdev_queue_t *vq = &zio->io_vd->vdev_queue;
zio_t *nio;
- if (zio_injection_enabled)
- delay(SEC_TO_TICK(zio_handle_io_delay(zio)));
-
mutex_enter(&vq->vq_lock);
vdev_queue_pending_remove(vq, zio);
@@ -792,6 +805,30 @@ vdev_queue_io_done(zio_t *zio)
mutex_exit(&vq->vq_lock);
}
+/*
+ * As these three methods are only used for load calculations we're not
+ * concerned if we get an incorrect value on 32bit platforms due to lack of
+ * vq_lock mutex use here, instead we prefer to keep it lock free for
+ * performance.
+ */
+int
+vdev_queue_length(vdev_t *vd)
+{
+ return (avl_numnodes(&vd->vdev_queue.vq_active_tree));
+}
+
+uint64_t
+vdev_queue_lastoffset(vdev_t *vd)
+{
+ return (vd->vdev_queue.vq_lastoffset);
+}
+
+void
+vdev_queue_register_lastoffset(vdev_t *vd, zio_t *zio)
+{
+ vd->vdev_queue.vq_lastoffset = zio->io_offset + zio->io_size;
+}
+
#if defined(_KERNEL) && defined(HAVE_SPL)
module_param(zfs_vdev_aggregation_limit, int, 0644);
MODULE_PARM_DESC(zfs_vdev_aggregation_limit, "Max vdev I/O aggregation size");
@@ -850,4 +887,8 @@ MODULE_PARM_DESC(zfs_vdev_sync_write_max_active,
module_param(zfs_vdev_sync_write_min_active, int, 0644);
MODULE_PARM_DESC(zfs_vdev_sync_write_min_active,
"Min active sync write I/Os per vdev");
+
+module_param(zfs_vdev_queue_depth_pct, int, 0644);
+MODULE_PARM_DESC(zfs_vdev_queue_depth_pct,
+ "Queue depth percentage for each top-level vdev");
#endif
diff --git a/zfs/module/zfs/vdev_raidz.c b/zfs/module/zfs/vdev_raidz.c
index b9479092c842..ba850b4f83d8 100644
--- a/zfs/module/zfs/vdev_raidz.c
+++ b/zfs/module/zfs/vdev_raidz.c
@@ -22,6 +22,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2016 Gvozden Nešković. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -29,8 +30,11 @@
#include <sys/vdev_impl.h>
#include <sys/zio.h>
#include <sys/zio_checksum.h>
+#include <sys/abd.h>
#include <sys/fs/zfs.h>
#include <sys/fm/fs/zfs.h>
+#include <sys/vdev_raidz.h>
+#include <sys/vdev_raidz_impl.h>
/*
* Virtual device vector for RAID-Z.
@@ -99,34 +103,6 @@
* or in concert to recover missing data columns.
*/
-typedef struct raidz_col {
- uint64_t rc_devidx; /* child device index for I/O */
- uint64_t rc_offset; /* device offset */
- uint64_t rc_size; /* I/O size */
- void *rc_data; /* I/O data */
- void *rc_gdata; /* used to store the "good" version */
- int rc_error; /* I/O error for this device */
- uint8_t rc_tried; /* Did we attempt this I/O column? */
- uint8_t rc_skipped; /* Did we skip this I/O column? */
-} raidz_col_t;
-
-typedef struct raidz_map {
- uint64_t rm_cols; /* Regular column count */
- uint64_t rm_scols; /* Count including skipped columns */
- uint64_t rm_bigcols; /* Number of oversized columns */
- uint64_t rm_asize; /* Actual total I/O size */
- uint64_t rm_missingdata; /* Count of missing data devices */
- uint64_t rm_missingparity; /* Count of missing parity devices */
- uint64_t rm_firstdatacol; /* First data column/parity count */
- uint64_t rm_nskip; /* Skipped sectors for padding */
- uint64_t rm_skipstart; /* Column index of padding start */
- void *rm_datacopy; /* rm_asize-buffer of copied data */
- uintptr_t rm_reports; /* # of referencing checksum reports */
- uint8_t rm_freed; /* map no longer has referencing ZIO */
- uint8_t rm_ecksuminjected; /* checksum error was injected */
- raidz_col_t rm_col[1]; /* Flexible array of I/O columns */
-} raidz_map_t;
-
#define VDEV_RAIDZ_P 0
#define VDEV_RAIDZ_Q 1
#define VDEV_RAIDZ_R 2
@@ -154,123 +130,23 @@ typedef struct raidz_map {
VDEV_RAIDZ_64MUL_2((x), mask); \
}
-/*
- * Force reconstruction to use the general purpose method.
- */
-int vdev_raidz_default_to_general;
-
-/* Powers of 2 in the Galois field defined above. */
-static const uint8_t vdev_raidz_pow2[256] = {
- 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
- 0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26,
- 0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9,
- 0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0,
- 0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35,
- 0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23,
- 0x46, 0x8c, 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0,
- 0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1,
- 0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc,
- 0x65, 0xca, 0x89, 0x0f, 0x1e, 0x3c, 0x78, 0xf0,
- 0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f,
- 0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2,
- 0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88,
- 0x0d, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce,
- 0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93,
- 0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc,
- 0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9,
- 0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54,
- 0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa,
- 0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73,
- 0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e,
- 0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff,
- 0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4,
- 0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41,
- 0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x07, 0x0e,
- 0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6,
- 0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef,
- 0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x09,
- 0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5,
- 0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16,
- 0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83,
- 0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e, 0x01
-};
-/* Logs of 2 in the Galois field defined above. */
-static const uint8_t vdev_raidz_log2[256] = {
- 0x00, 0x00, 0x01, 0x19, 0x02, 0x32, 0x1a, 0xc6,
- 0x03, 0xdf, 0x33, 0xee, 0x1b, 0x68, 0xc7, 0x4b,
- 0x04, 0x64, 0xe0, 0x0e, 0x34, 0x8d, 0xef, 0x81,
- 0x1c, 0xc1, 0x69, 0xf8, 0xc8, 0x08, 0x4c, 0x71,
- 0x05, 0x8a, 0x65, 0x2f, 0xe1, 0x24, 0x0f, 0x21,
- 0x35, 0x93, 0x8e, 0xda, 0xf0, 0x12, 0x82, 0x45,
- 0x1d, 0xb5, 0xc2, 0x7d, 0x6a, 0x27, 0xf9, 0xb9,
- 0xc9, 0x9a, 0x09, 0x78, 0x4d, 0xe4, 0x72, 0xa6,
- 0x06, 0xbf, 0x8b, 0x62, 0x66, 0xdd, 0x30, 0xfd,
- 0xe2, 0x98, 0x25, 0xb3, 0x10, 0x91, 0x22, 0x88,
- 0x36, 0xd0, 0x94, 0xce, 0x8f, 0x96, 0xdb, 0xbd,
- 0xf1, 0xd2, 0x13, 0x5c, 0x83, 0x38, 0x46, 0x40,
- 0x1e, 0x42, 0xb6, 0xa3, 0xc3, 0x48, 0x7e, 0x6e,
- 0x6b, 0x3a, 0x28, 0x54, 0xfa, 0x85, 0xba, 0x3d,
- 0xca, 0x5e, 0x9b, 0x9f, 0x0a, 0x15, 0x79, 0x2b,
- 0x4e, 0xd4, 0xe5, 0xac, 0x73, 0xf3, 0xa7, 0x57,
- 0x07, 0x70, 0xc0, 0xf7, 0x8c, 0x80, 0x63, 0x0d,
- 0x67, 0x4a, 0xde, 0xed, 0x31, 0xc5, 0xfe, 0x18,
- 0xe3, 0xa5, 0x99, 0x77, 0x26, 0xb8, 0xb4, 0x7c,
- 0x11, 0x44, 0x92, 0xd9, 0x23, 0x20, 0x89, 0x2e,
- 0x37, 0x3f, 0xd1, 0x5b, 0x95, 0xbc, 0xcf, 0xcd,
- 0x90, 0x87, 0x97, 0xb2, 0xdc, 0xfc, 0xbe, 0x61,
- 0xf2, 0x56, 0xd3, 0xab, 0x14, 0x2a, 0x5d, 0x9e,
- 0x84, 0x3c, 0x39, 0x53, 0x47, 0x6d, 0x41, 0xa2,
- 0x1f, 0x2d, 0x43, 0xd8, 0xb7, 0x7b, 0xa4, 0x76,
- 0xc4, 0x17, 0x49, 0xec, 0x7f, 0x0c, 0x6f, 0xf6,
- 0x6c, 0xa1, 0x3b, 0x52, 0x29, 0x9d, 0x55, 0xaa,
- 0xfb, 0x60, 0x86, 0xb1, 0xbb, 0xcc, 0x3e, 0x5a,
- 0xcb, 0x59, 0x5f, 0xb0, 0x9c, 0xa9, 0xa0, 0x51,
- 0x0b, 0xf5, 0x16, 0xeb, 0x7a, 0x75, 0x2c, 0xd7,
- 0x4f, 0xae, 0xd5, 0xe9, 0xe6, 0xe7, 0xad, 0xe8,
- 0x74, 0xd6, 0xf4, 0xea, 0xa8, 0x50, 0x58, 0xaf,
-};
-
-static void vdev_raidz_generate_parity(raidz_map_t *rm);
-
-/*
- * Multiply a given number by 2 raised to the given power.
- */
-static uint8_t
-vdev_raidz_exp2(uint_t a, int exp)
-{
- if (a == 0)
- return (0);
-
- ASSERT(exp >= 0);
- ASSERT(vdev_raidz_log2[a] > 0 || a == 1);
-
- exp += vdev_raidz_log2[a];
- if (exp > 255)
- exp -= 255;
-
- return (vdev_raidz_pow2[exp]);
-}
-
-static void
+void
vdev_raidz_map_free(raidz_map_t *rm)
{
int c;
- size_t size;
for (c = 0; c < rm->rm_firstdatacol; c++) {
- zio_buf_free(rm->rm_col[c].rc_data, rm->rm_col[c].rc_size);
+ abd_free(rm->rm_col[c].rc_abd);
if (rm->rm_col[c].rc_gdata != NULL)
- zio_buf_free(rm->rm_col[c].rc_gdata,
- rm->rm_col[c].rc_size);
+ abd_free(rm->rm_col[c].rc_gdata);
}
- size = 0;
for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++)
- size += rm->rm_col[c].rc_size;
+ abd_put(rm->rm_col[c].rc_abd);
- if (rm->rm_datacopy != NULL)
- zio_buf_free(rm->rm_datacopy, size);
+ if (rm->rm_abd_copy != NULL)
+ abd_free(rm->rm_abd_copy);
kmem_free(rm, offsetof(raidz_map_t, rm_col[rm->rm_scols]));
}
@@ -300,14 +176,14 @@ vdev_raidz_cksum_free(void *arg, size_t ignored)
}
static void
-vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const void *good_data)
+vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const abd_t *good_data)
{
raidz_map_t *rm = zcr->zcr_cbdata;
- size_t c = zcr->zcr_cbinfo;
- size_t x;
+ const size_t c = zcr->zcr_cbinfo;
+ size_t x, offset;
- const char *good = NULL;
- const char *bad = rm->rm_col[c].rc_data;
+ const abd_t *good = NULL;
+ const abd_t *bad = rm->rm_col[c].rc_abd;
if (good_data == NULL) {
zfs_ereport_finish_checksum(zcr, NULL, NULL, B_FALSE);
@@ -321,8 +197,7 @@ vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const void *good_data)
* data never changes for a given logical ZIO)
*/
if (rm->rm_col[0].rc_gdata == NULL) {
- char *bad_parity[VDEV_RAIDZ_MAXPARITY];
- char *buf;
+ abd_t *bad_parity[VDEV_RAIDZ_MAXPARITY];
/*
* Set up the rm_col[]s to generate the parity for
@@ -330,16 +205,22 @@ vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const void *good_data)
* replacing them with buffers to hold the result.
*/
for (x = 0; x < rm->rm_firstdatacol; x++) {
- bad_parity[x] = rm->rm_col[x].rc_data;
- rm->rm_col[x].rc_data = rm->rm_col[x].rc_gdata =
- zio_buf_alloc(rm->rm_col[x].rc_size);
+ bad_parity[x] = rm->rm_col[x].rc_abd;
+ rm->rm_col[x].rc_abd =
+ rm->rm_col[x].rc_gdata =
+ abd_alloc_sametype(rm->rm_col[x].rc_abd,
+ rm->rm_col[x].rc_size);
}
/* fill in the data columns from good_data */
- buf = (char *)good_data;
+ offset = 0;
for (; x < rm->rm_cols; x++) {
- rm->rm_col[x].rc_data = buf;
- buf += rm->rm_col[x].rc_size;
+ abd_put(rm->rm_col[x].rc_abd);
+
+ rm->rm_col[x].rc_abd =
+ abd_get_offset_size((abd_t *)good_data,
+ offset, rm->rm_col[x].rc_size);
+ offset += rm->rm_col[x].rc_size;
}
/*
@@ -349,27 +230,34 @@ vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const void *good_data)
/* restore everything back to its original state */
for (x = 0; x < rm->rm_firstdatacol; x++)
- rm->rm_col[x].rc_data = bad_parity[x];
+ rm->rm_col[x].rc_abd = bad_parity[x];
- buf = rm->rm_datacopy;
+ offset = 0;
for (x = rm->rm_firstdatacol; x < rm->rm_cols; x++) {
- rm->rm_col[x].rc_data = buf;
- buf += rm->rm_col[x].rc_size;
+ abd_put(rm->rm_col[x].rc_abd);
+ rm->rm_col[x].rc_abd = abd_get_offset_size(
+ rm->rm_abd_copy, offset,
+ rm->rm_col[x].rc_size);
+ offset += rm->rm_col[x].rc_size;
}
}
ASSERT3P(rm->rm_col[c].rc_gdata, !=, NULL);
- good = rm->rm_col[c].rc_gdata;
+ good = abd_get_offset_size(rm->rm_col[c].rc_gdata, 0,
+ rm->rm_col[c].rc_size);
} else {
/* adjust good_data to point at the start of our column */
- good = good_data;
-
+ offset = 0;
for (x = rm->rm_firstdatacol; x < c; x++)
- good += rm->rm_col[x].rc_size;
+ offset += rm->rm_col[x].rc_size;
+
+ good = abd_get_offset_size((abd_t *)good_data, offset,
+ rm->rm_col[c].rc_size);
}
/* we drop the ereport if it ends up that the data was good */
zfs_ereport_finish_checksum(zcr, good, bad, B_TRUE);
+ abd_put((abd_t *)good);
}
/*
@@ -382,7 +270,7 @@ static void
vdev_raidz_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *arg)
{
size_t c = (size_t)(uintptr_t)arg;
- caddr_t buf;
+ size_t offset;
raidz_map_t *rm = zio->io_vsd;
size_t size;
@@ -396,7 +284,7 @@ vdev_raidz_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *arg)
rm->rm_reports++;
ASSERT3U(rm->rm_reports, >, 0);
- if (rm->rm_datacopy != NULL)
+ if (rm->rm_abd_copy != NULL)
return;
/*
@@ -412,17 +300,21 @@ vdev_raidz_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *arg)
for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++)
size += rm->rm_col[c].rc_size;
- buf = rm->rm_datacopy = zio_buf_alloc(size);
+ rm->rm_abd_copy = abd_alloc_for_io(size, B_FALSE);
- for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
+ for (offset = 0, c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
raidz_col_t *col = &rm->rm_col[c];
+ abd_t *tmp = abd_get_offset_size(rm->rm_abd_copy, offset,
+ col->rc_size);
+
+ abd_copy(tmp, col->rc_abd, col->rc_size);
- bcopy(col->rc_data, buf, col->rc_size);
- col->rc_data = buf;
+ abd_put(col->rc_abd);
+ col->rc_abd = tmp;
- buf += col->rc_size;
+ offset += col->rc_size;
}
- ASSERT3P(buf - (caddr_t)rm->rm_datacopy, ==, size);
+ ASSERT3U(offset, ==, size);
}
static const zio_vsd_ops_t vdev_raidz_vsd_ops = {
@@ -437,20 +329,21 @@ static const zio_vsd_ops_t vdev_raidz_vsd_ops = {
* Avoid inlining the function to keep vdev_raidz_io_start(), which
* is this functions only caller, as small as possible on the stack.
*/
-noinline static raidz_map_t *
-vdev_raidz_map_alloc(zio_t *zio, uint64_t unit_shift, uint64_t dcols,
+noinline raidz_map_t *
+vdev_raidz_map_alloc(zio_t *zio, uint64_t ashift, uint64_t dcols,
uint64_t nparity)
{
raidz_map_t *rm;
/* The starting RAIDZ (parent) vdev sector of the block. */
- uint64_t b = zio->io_offset >> unit_shift;
+ uint64_t b = zio->io_offset >> ashift;
/* The zio's size in units of the vdev's minimum sector size. */
- uint64_t s = zio->io_size >> unit_shift;
+ uint64_t s = zio->io_size >> ashift;
/* The first column for this stripe. */
uint64_t f = b % dcols;
/* The starting byte offset on each child vdev. */
- uint64_t o = (b / dcols) << unit_shift;
+ uint64_t o = (b / dcols) << ashift;
uint64_t q, r, c, bc, col, acols, scols, coff, devidx, asize, tot;
+ uint64_t off = 0;
/*
* "Quotient": The number of data sectors for this stripe on all but
@@ -495,7 +388,7 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t unit_shift, uint64_t dcols,
rm->rm_missingdata = 0;
rm->rm_missingparity = 0;
rm->rm_firstdatacol = nparity;
- rm->rm_datacopy = NULL;
+ rm->rm_abd_copy = NULL;
rm->rm_reports = 0;
rm->rm_freed = 0;
rm->rm_ecksuminjected = 0;
@@ -507,11 +400,11 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t unit_shift, uint64_t dcols,
coff = o;
if (col >= dcols) {
col -= dcols;
- coff += 1ULL << unit_shift;
+ coff += 1ULL << ashift;
}
rm->rm_col[c].rc_devidx = col;
rm->rm_col[c].rc_offset = coff;
- rm->rm_col[c].rc_data = NULL;
+ rm->rm_col[c].rc_abd = NULL;
rm->rm_col[c].rc_gdata = NULL;
rm->rm_col[c].rc_error = 0;
rm->rm_col[c].rc_tried = 0;
@@ -520,27 +413,32 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t unit_shift, uint64_t dcols,
if (c >= acols)
rm->rm_col[c].rc_size = 0;
else if (c < bc)
- rm->rm_col[c].rc_size = (q + 1) << unit_shift;
+ rm->rm_col[c].rc_size = (q + 1) << ashift;
else
- rm->rm_col[c].rc_size = q << unit_shift;
+ rm->rm_col[c].rc_size = q << ashift;
asize += rm->rm_col[c].rc_size;
}
- ASSERT3U(asize, ==, tot << unit_shift);
- rm->rm_asize = roundup(asize, (nparity + 1) << unit_shift);
+ ASSERT3U(asize, ==, tot << ashift);
+ rm->rm_asize = roundup(asize, (nparity + 1) << ashift);
rm->rm_nskip = roundup(tot, nparity + 1) - tot;
- ASSERT3U(rm->rm_asize - asize, ==, rm->rm_nskip << unit_shift);
+ ASSERT3U(rm->rm_asize - asize, ==, rm->rm_nskip << ashift);
ASSERT3U(rm->rm_nskip, <=, nparity);
for (c = 0; c < rm->rm_firstdatacol; c++)
- rm->rm_col[c].rc_data = zio_buf_alloc(rm->rm_col[c].rc_size);
+ rm->rm_col[c].rc_abd =
+ abd_alloc_linear(rm->rm_col[c].rc_size, B_FALSE);
- rm->rm_col[c].rc_data = zio->io_data;
+ rm->rm_col[c].rc_abd = abd_get_offset_size(zio->io_abd, 0,
+ rm->rm_col[c].rc_size);
+ off = rm->rm_col[c].rc_size;
- for (c = c + 1; c < acols; c++)
- rm->rm_col[c].rc_data = (char *)rm->rm_col[c - 1].rc_data +
- rm->rm_col[c - 1].rc_size;
+ for (c = c + 1; c < acols; c++) {
+ rm->rm_col[c].rc_abd = abd_get_offset_size(zio->io_abd, off,
+ rm->rm_col[c].rc_size);
+ off += rm->rm_col[c].rc_size;
+ }
/*
* If all data stored spans all columns, there's a danger that parity
@@ -579,32 +477,91 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t unit_shift, uint64_t dcols,
zio->io_vsd = rm;
zio->io_vsd_ops = &vdev_raidz_vsd_ops;
+
+ /* init RAIDZ parity ops */
+ rm->rm_ops = vdev_raidz_math_get_ops();
+
return (rm);
}
+struct pqr_struct {
+ uint64_t *p;
+ uint64_t *q;
+ uint64_t *r;
+};
+
+static int
+vdev_raidz_p_func(void *buf, size_t size, void *private)
+{
+ struct pqr_struct *pqr = private;
+ const uint64_t *src = buf;
+ int i, cnt = size / sizeof (src[0]);
+
+ ASSERT(pqr->p && !pqr->q && !pqr->r);
+
+ for (i = 0; i < cnt; i++, src++, pqr->p++)
+ *pqr->p ^= *src;
+
+ return (0);
+}
+
+static int
+vdev_raidz_pq_func(void *buf, size_t size, void *private)
+{
+ struct pqr_struct *pqr = private;
+ const uint64_t *src = buf;
+ uint64_t mask;
+ int i, cnt = size / sizeof (src[0]);
+
+ ASSERT(pqr->p && pqr->q && !pqr->r);
+
+ for (i = 0; i < cnt; i++, src++, pqr->p++, pqr->q++) {
+ *pqr->p ^= *src;
+ VDEV_RAIDZ_64MUL_2(*pqr->q, mask);
+ *pqr->q ^= *src;
+ }
+
+ return (0);
+}
+
+static int
+vdev_raidz_pqr_func(void *buf, size_t size, void *private)
+{
+ struct pqr_struct *pqr = private;
+ const uint64_t *src = buf;
+ uint64_t mask;
+ int i, cnt = size / sizeof (src[0]);
+
+ ASSERT(pqr->p && pqr->q && pqr->r);
+
+ for (i = 0; i < cnt; i++, src++, pqr->p++, pqr->q++, pqr->r++) {
+ *pqr->p ^= *src;
+ VDEV_RAIDZ_64MUL_2(*pqr->q, mask);
+ *pqr->q ^= *src;
+ VDEV_RAIDZ_64MUL_4(*pqr->r, mask);
+ *pqr->r ^= *src;
+ }
+
+ return (0);
+}
+
static void
vdev_raidz_generate_parity_p(raidz_map_t *rm)
{
- uint64_t *p, *src, pcount, ccount, i;
+ uint64_t *p;
int c;
-
- pcount = rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (src[0]);
+ abd_t *src;
for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
- src = rm->rm_col[c].rc_data;
- p = rm->rm_col[VDEV_RAIDZ_P].rc_data;
- ccount = rm->rm_col[c].rc_size / sizeof (src[0]);
+ src = rm->rm_col[c].rc_abd;
+ p = abd_to_buf(rm->rm_col[VDEV_RAIDZ_P].rc_abd);
if (c == rm->rm_firstdatacol) {
- ASSERT(ccount == pcount);
- for (i = 0; i < ccount; i++, src++, p++) {
- *p = *src;
- }
+ abd_copy_to_buf(p, src, rm->rm_col[c].rc_size);
} else {
- ASSERT(ccount <= pcount);
- for (i = 0; i < ccount; i++, src++, p++) {
- *p ^= *src;
- }
+ struct pqr_struct pqr = { p, NULL, NULL };
+ (void) abd_iterate_func(src, 0, rm->rm_col[c].rc_size,
+ vdev_raidz_p_func, &pqr);
}
}
}
@@ -612,50 +569,43 @@ vdev_raidz_generate_parity_p(raidz_map_t *rm)
static void
vdev_raidz_generate_parity_pq(raidz_map_t *rm)
{
- uint64_t *p, *q, *src, pcnt, ccnt, mask, i;
+ uint64_t *p, *q, pcnt, ccnt, mask, i;
int c;
+ abd_t *src;
- pcnt = rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (src[0]);
+ pcnt = rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (p[0]);
ASSERT(rm->rm_col[VDEV_RAIDZ_P].rc_size ==
rm->rm_col[VDEV_RAIDZ_Q].rc_size);
for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
- src = rm->rm_col[c].rc_data;
- p = rm->rm_col[VDEV_RAIDZ_P].rc_data;
- q = rm->rm_col[VDEV_RAIDZ_Q].rc_data;
+ src = rm->rm_col[c].rc_abd;
+ p = abd_to_buf(rm->rm_col[VDEV_RAIDZ_P].rc_abd);
+ q = abd_to_buf(rm->rm_col[VDEV_RAIDZ_Q].rc_abd);
- ccnt = rm->rm_col[c].rc_size / sizeof (src[0]);
+ ccnt = rm->rm_col[c].rc_size / sizeof (p[0]);
if (c == rm->rm_firstdatacol) {
ASSERT(ccnt == pcnt || ccnt == 0);
- for (i = 0; i < ccnt; i++, src++, p++, q++) {
- *p = *src;
- *q = *src;
- }
- for (; i < pcnt; i++, src++, p++, q++) {
- *p = 0;
- *q = 0;
+ abd_copy_to_buf(p, src, rm->rm_col[c].rc_size);
+ (void) memcpy(q, p, rm->rm_col[c].rc_size);
+
+ for (i = ccnt; i < pcnt; i++) {
+ p[i] = 0;
+ q[i] = 0;
}
} else {
- ASSERT(ccnt <= pcnt);
-
- /*
- * Apply the algorithm described above by multiplying
- * the previous result and adding in the new value.
- */
- for (i = 0; i < ccnt; i++, src++, p++, q++) {
- *p ^= *src;
+ struct pqr_struct pqr = { p, q, NULL };
- VDEV_RAIDZ_64MUL_2(*q, mask);
- *q ^= *src;
- }
+ ASSERT(ccnt <= pcnt);
+ (void) abd_iterate_func(src, 0, rm->rm_col[c].rc_size,
+ vdev_raidz_pq_func, &pqr);
/*
* Treat short columns as though they are full of 0s.
* Note that there's therefore nothing needed for P.
*/
- for (; i < pcnt; i++, q++) {
- VDEV_RAIDZ_64MUL_2(*q, mask);
+ for (i = ccnt; i < pcnt; i++) {
+ VDEV_RAIDZ_64MUL_2(q[i], mask);
}
}
}
@@ -664,59 +614,49 @@ vdev_raidz_generate_parity_pq(raidz_map_t *rm)
static void
vdev_raidz_generate_parity_pqr(raidz_map_t *rm)
{
- uint64_t *p, *q, *r, *src, pcnt, ccnt, mask, i;
+ uint64_t *p, *q, *r, pcnt, ccnt, mask, i;
int c;
+ abd_t *src;
- pcnt = rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (src[0]);
+ pcnt = rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (p[0]);
ASSERT(rm->rm_col[VDEV_RAIDZ_P].rc_size ==
rm->rm_col[VDEV_RAIDZ_Q].rc_size);
ASSERT(rm->rm_col[VDEV_RAIDZ_P].rc_size ==
rm->rm_col[VDEV_RAIDZ_R].rc_size);
for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
- src = rm->rm_col[c].rc_data;
- p = rm->rm_col[VDEV_RAIDZ_P].rc_data;
- q = rm->rm_col[VDEV_RAIDZ_Q].rc_data;
- r = rm->rm_col[VDEV_RAIDZ_R].rc_data;
+ src = rm->rm_col[c].rc_abd;
+ p = abd_to_buf(rm->rm_col[VDEV_RAIDZ_P].rc_abd);
+ q = abd_to_buf(rm->rm_col[VDEV_RAIDZ_Q].rc_abd);
+ r = abd_to_buf(rm->rm_col[VDEV_RAIDZ_R].rc_abd);
- ccnt = rm->rm_col[c].rc_size / sizeof (src[0]);
+ ccnt = rm->rm_col[c].rc_size / sizeof (p[0]);
if (c == rm->rm_firstdatacol) {
ASSERT(ccnt == pcnt || ccnt == 0);
- for (i = 0; i < ccnt; i++, src++, p++, q++, r++) {
- *p = *src;
- *q = *src;
- *r = *src;
- }
- for (; i < pcnt; i++, src++, p++, q++, r++) {
- *p = 0;
- *q = 0;
- *r = 0;
+ abd_copy_to_buf(p, src, rm->rm_col[c].rc_size);
+ (void) memcpy(q, p, rm->rm_col[c].rc_size);
+ (void) memcpy(r, p, rm->rm_col[c].rc_size);
+
+ for (i = ccnt; i < pcnt; i++) {
+ p[i] = 0;
+ q[i] = 0;
+ r[i] = 0;
}
} else {
- ASSERT(ccnt <= pcnt);
-
- /*
- * Apply the algorithm described above by multiplying
- * the previous result and adding in the new value.
- */
- for (i = 0; i < ccnt; i++, src++, p++, q++, r++) {
- *p ^= *src;
-
- VDEV_RAIDZ_64MUL_2(*q, mask);
- *q ^= *src;
+ struct pqr_struct pqr = { p, q, r };
- VDEV_RAIDZ_64MUL_4(*r, mask);
- *r ^= *src;
- }
+ ASSERT(ccnt <= pcnt);
+ (void) abd_iterate_func(src, 0, rm->rm_col[c].rc_size,
+ vdev_raidz_pqr_func, &pqr);
/*
* Treat short columns as though they are full of 0s.
* Note that there's therefore nothing needed for P.
*/
- for (; i < pcnt; i++, q++, r++) {
- VDEV_RAIDZ_64MUL_2(*q, mask);
- VDEV_RAIDZ_64MUL_4(*r, mask);
+ for (i = ccnt; i < pcnt; i++) {
+ VDEV_RAIDZ_64MUL_2(q[i], mask);
+ VDEV_RAIDZ_64MUL_4(r[i], mask);
}
}
}
@@ -726,9 +666,13 @@ vdev_raidz_generate_parity_pqr(raidz_map_t *rm)
* Generate RAID parity in the first virtual columns according to the number of
* parity columns available.
*/
-static void
+void
vdev_raidz_generate_parity(raidz_map_t *rm)
{
+ /* Generate using the new math implementation */
+ if (vdev_raidz_math_generate(rm) != RAIDZ_ORIGINAL_IMPL)
+ return;
+
switch (rm->rm_firstdatacol) {
case 1:
vdev_raidz_generate_parity_p(rm);
@@ -744,40 +688,159 @@ vdev_raidz_generate_parity(raidz_map_t *rm)
}
}
+/* ARGSUSED */
+static int
+vdev_raidz_reconst_p_func(void *dbuf, void *sbuf, size_t size, void *private)
+{
+ uint64_t *dst = dbuf;
+ uint64_t *src = sbuf;
+ int cnt = size / sizeof (src[0]);
+ int i;
+
+ for (i = 0; i < cnt; i++) {
+ dst[i] ^= src[i];
+ }
+
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+vdev_raidz_reconst_q_pre_func(void *dbuf, void *sbuf, size_t size,
+ void *private)
+{
+ uint64_t *dst = dbuf;
+ uint64_t *src = sbuf;
+ uint64_t mask;
+ int cnt = size / sizeof (dst[0]);
+ int i;
+
+ for (i = 0; i < cnt; i++, dst++, src++) {
+ VDEV_RAIDZ_64MUL_2(*dst, mask);
+ *dst ^= *src;
+ }
+
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+vdev_raidz_reconst_q_pre_tail_func(void *buf, size_t size, void *private)
+{
+ uint64_t *dst = buf;
+ uint64_t mask;
+ int cnt = size / sizeof (dst[0]);
+ int i;
+
+ for (i = 0; i < cnt; i++, dst++) {
+ /* same operation as vdev_raidz_reconst_q_pre_func() on dst */
+ VDEV_RAIDZ_64MUL_2(*dst, mask);
+ }
+
+ return (0);
+}
+
+struct reconst_q_struct {
+ uint64_t *q;
+ int exp;
+};
+
+static int
+vdev_raidz_reconst_q_post_func(void *buf, size_t size, void *private)
+{
+ struct reconst_q_struct *rq = private;
+ uint64_t *dst = buf;
+ int cnt = size / sizeof (dst[0]);
+ int i;
+
+ for (i = 0; i < cnt; i++, dst++, rq->q++) {
+ int j;
+ uint8_t *b;
+
+ *dst ^= *rq->q;
+ for (j = 0, b = (uint8_t *)dst; j < 8; j++, b++) {
+ *b = vdev_raidz_exp2(*b, rq->exp);
+ }
+ }
+
+ return (0);
+}
+
+struct reconst_pq_struct {
+ uint8_t *p;
+ uint8_t *q;
+ uint8_t *pxy;
+ uint8_t *qxy;
+ int aexp;
+ int bexp;
+};
+
+static int
+vdev_raidz_reconst_pq_func(void *xbuf, void *ybuf, size_t size, void *private)
+{
+ struct reconst_pq_struct *rpq = private;
+ uint8_t *xd = xbuf;
+ uint8_t *yd = ybuf;
+ int i;
+
+ for (i = 0; i < size;
+ i++, rpq->p++, rpq->q++, rpq->pxy++, rpq->qxy++, xd++, yd++) {
+ *xd = vdev_raidz_exp2(*rpq->p ^ *rpq->pxy, rpq->aexp) ^
+ vdev_raidz_exp2(*rpq->q ^ *rpq->qxy, rpq->bexp);
+ *yd = *rpq->p ^ *rpq->pxy ^ *xd;
+ }
+
+ return (0);
+}
+
+static int
+vdev_raidz_reconst_pq_tail_func(void *xbuf, size_t size, void *private)
+{
+ struct reconst_pq_struct *rpq = private;
+ uint8_t *xd = xbuf;
+ int i;
+
+ for (i = 0; i < size;
+ i++, rpq->p++, rpq->q++, rpq->pxy++, rpq->qxy++, xd++) {
+ /* same operation as vdev_raidz_reconst_pq_func() on xd */
+ *xd = vdev_raidz_exp2(*rpq->p ^ *rpq->pxy, rpq->aexp) ^
+ vdev_raidz_exp2(*rpq->q ^ *rpq->qxy, rpq->bexp);
+ }
+
+ return (0);
+}
+
static int
vdev_raidz_reconstruct_p(raidz_map_t *rm, int *tgts, int ntgts)
{
- uint64_t *dst, *src, xcount, ccount, count, i;
int x = tgts[0];
int c;
+ abd_t *dst, *src;
ASSERT(ntgts == 1);
ASSERT(x >= rm->rm_firstdatacol);
ASSERT(x < rm->rm_cols);
- xcount = rm->rm_col[x].rc_size / sizeof (src[0]);
- ASSERT(xcount <= rm->rm_col[VDEV_RAIDZ_P].rc_size / sizeof (src[0]));
- ASSERT(xcount > 0);
+ ASSERT(rm->rm_col[x].rc_size <= rm->rm_col[VDEV_RAIDZ_P].rc_size);
+ ASSERT(rm->rm_col[x].rc_size > 0);
- src = rm->rm_col[VDEV_RAIDZ_P].rc_data;
- dst = rm->rm_col[x].rc_data;
- for (i = 0; i < xcount; i++, dst++, src++) {
- *dst = *src;
- }
+ src = rm->rm_col[VDEV_RAIDZ_P].rc_abd;
+ dst = rm->rm_col[x].rc_abd;
+
+ abd_copy_from_buf(dst, abd_to_buf(src), rm->rm_col[x].rc_size);
for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
- src = rm->rm_col[c].rc_data;
- dst = rm->rm_col[x].rc_data;
+ uint64_t size = MIN(rm->rm_col[x].rc_size,
+ rm->rm_col[c].rc_size);
+
+ src = rm->rm_col[c].rc_abd;
+ dst = rm->rm_col[x].rc_abd;
if (c == x)
continue;
- ccount = rm->rm_col[c].rc_size / sizeof (src[0]);
- count = MIN(ccount, xcount);
-
- for (i = 0; i < count; i++, dst++, src++) {
- *dst ^= *src;
- }
+ (void) abd_iterate_func2(dst, src, 0, 0, size,
+ vdev_raidz_reconst_p_func, NULL);
}
return (1 << VDEV_RAIDZ_P);
@@ -786,57 +849,46 @@ vdev_raidz_reconstruct_p(raidz_map_t *rm, int *tgts, int ntgts)
static int
vdev_raidz_reconstruct_q(raidz_map_t *rm, int *tgts, int ntgts)
{
- uint64_t *dst, *src, xcount, ccount, count, mask, i;
- uint8_t *b;
int x = tgts[0];
- int c, j, exp;
+ int c, exp;
+ abd_t *dst, *src;
+ struct reconst_q_struct rq;
ASSERT(ntgts == 1);
- xcount = rm->rm_col[x].rc_size / sizeof (src[0]);
- ASSERT(xcount <= rm->rm_col[VDEV_RAIDZ_Q].rc_size / sizeof (src[0]));
+ ASSERT(rm->rm_col[x].rc_size <= rm->rm_col[VDEV_RAIDZ_Q].rc_size);
for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
- src = rm->rm_col[c].rc_data;
- dst = rm->rm_col[x].rc_data;
-
- if (c == x)
- ccount = 0;
- else
- ccount = rm->rm_col[c].rc_size / sizeof (src[0]);
+ uint64_t size = (c == x) ? 0 : MIN(rm->rm_col[x].rc_size,
+ rm->rm_col[c].rc_size);
- count = MIN(ccount, xcount);
+ src = rm->rm_col[c].rc_abd;
+ dst = rm->rm_col[x].rc_abd;
if (c == rm->rm_firstdatacol) {
- for (i = 0; i < count; i++, dst++, src++) {
- *dst = *src;
- }
- for (; i < xcount; i++, dst++) {
- *dst = 0;
- }
+ abd_copy(dst, src, size);
+ if (rm->rm_col[x].rc_size > size)
+ abd_zero_off(dst, size,
+ rm->rm_col[x].rc_size - size);
} else {
- for (i = 0; i < count; i++, dst++, src++) {
- VDEV_RAIDZ_64MUL_2(*dst, mask);
- *dst ^= *src;
- }
-
- for (; i < xcount; i++, dst++) {
- VDEV_RAIDZ_64MUL_2(*dst, mask);
- }
+ ASSERT3U(size, <=, rm->rm_col[x].rc_size);
+ (void) abd_iterate_func2(dst, src, 0, 0, size,
+ vdev_raidz_reconst_q_pre_func, NULL);
+ (void) abd_iterate_func(dst,
+ size, rm->rm_col[x].rc_size - size,
+ vdev_raidz_reconst_q_pre_tail_func, NULL);
}
}
- src = rm->rm_col[VDEV_RAIDZ_Q].rc_data;
- dst = rm->rm_col[x].rc_data;
+ src = rm->rm_col[VDEV_RAIDZ_Q].rc_abd;
+ dst = rm->rm_col[x].rc_abd;
exp = 255 - (rm->rm_cols - 1 - x);
+ rq.q = abd_to_buf(src);
+ rq.exp = exp;
- for (i = 0; i < xcount; i++, dst++, src++) {
- *dst ^= *src;
- for (j = 0, b = (uint8_t *)dst; j < 8; j++, b++) {
- *b = vdev_raidz_exp2(*b, exp);
- }
- }
+ (void) abd_iterate_func(dst, 0, rm->rm_col[x].rc_size,
+ vdev_raidz_reconst_q_post_func, &rq);
return (1 << VDEV_RAIDZ_Q);
}
@@ -844,11 +896,13 @@ vdev_raidz_reconstruct_q(raidz_map_t *rm, int *tgts, int ntgts)
static int
vdev_raidz_reconstruct_pq(raidz_map_t *rm, int *tgts, int ntgts)
{
- uint8_t *p, *q, *pxy, *qxy, *xd, *yd, tmp, a, b, aexp, bexp;
- void *pdata, *qdata;
- uint64_t xsize, ysize, i;
+ uint8_t *p, *q, *pxy, *qxy, tmp, a, b, aexp, bexp;
+ abd_t *pdata, *qdata;
+ uint64_t xsize, ysize;
int x = tgts[0];
int y = tgts[1];
+ abd_t *xd, *yd;
+ struct reconst_pq_struct rpq;
ASSERT(ntgts == 2);
ASSERT(x < y);
@@ -864,15 +918,15 @@ vdev_raidz_reconstruct_pq(raidz_map_t *rm, int *tgts, int ntgts)
* parity so we make those columns appear to be full of zeros by
* setting their lengths to zero.
*/
- pdata = rm->rm_col[VDEV_RAIDZ_P].rc_data;
- qdata = rm->rm_col[VDEV_RAIDZ_Q].rc_data;
+ pdata = rm->rm_col[VDEV_RAIDZ_P].rc_abd;
+ qdata = rm->rm_col[VDEV_RAIDZ_Q].rc_abd;
xsize = rm->rm_col[x].rc_size;
ysize = rm->rm_col[y].rc_size;
- rm->rm_col[VDEV_RAIDZ_P].rc_data =
- zio_buf_alloc(rm->rm_col[VDEV_RAIDZ_P].rc_size);
- rm->rm_col[VDEV_RAIDZ_Q].rc_data =
- zio_buf_alloc(rm->rm_col[VDEV_RAIDZ_Q].rc_size);
+ rm->rm_col[VDEV_RAIDZ_P].rc_abd =
+ abd_alloc_linear(rm->rm_col[VDEV_RAIDZ_P].rc_size, B_TRUE);
+ rm->rm_col[VDEV_RAIDZ_Q].rc_abd =
+ abd_alloc_linear(rm->rm_col[VDEV_RAIDZ_Q].rc_size, B_TRUE);
rm->rm_col[x].rc_size = 0;
rm->rm_col[y].rc_size = 0;
@@ -881,12 +935,12 @@ vdev_raidz_reconstruct_pq(raidz_map_t *rm, int *tgts, int ntgts)
rm->rm_col[x].rc_size = xsize;
rm->rm_col[y].rc_size = ysize;
- p = pdata;
- q = qdata;
- pxy = rm->rm_col[VDEV_RAIDZ_P].rc_data;
- qxy = rm->rm_col[VDEV_RAIDZ_Q].rc_data;
- xd = rm->rm_col[x].rc_data;
- yd = rm->rm_col[y].rc_data;
+ p = abd_to_buf(pdata);
+ q = abd_to_buf(qdata);
+ pxy = abd_to_buf(rm->rm_col[VDEV_RAIDZ_P].rc_abd);
+ qxy = abd_to_buf(rm->rm_col[VDEV_RAIDZ_Q].rc_abd);
+ xd = rm->rm_col[x].rc_abd;
+ yd = rm->rm_col[y].rc_abd;
/*
* We now have:
@@ -910,24 +964,27 @@ vdev_raidz_reconstruct_pq(raidz_map_t *rm, int *tgts, int ntgts)
aexp = vdev_raidz_log2[vdev_raidz_exp2(a, tmp)];
bexp = vdev_raidz_log2[vdev_raidz_exp2(b, tmp)];
- for (i = 0; i < xsize; i++, p++, q++, pxy++, qxy++, xd++, yd++) {
- *xd = vdev_raidz_exp2(*p ^ *pxy, aexp) ^
- vdev_raidz_exp2(*q ^ *qxy, bexp);
+ ASSERT3U(xsize, >=, ysize);
+ rpq.p = p;
+ rpq.q = q;
+ rpq.pxy = pxy;
+ rpq.qxy = qxy;
+ rpq.aexp = aexp;
+ rpq.bexp = bexp;
- if (i < ysize)
- *yd = *p ^ *pxy ^ *xd;
- }
+ (void) abd_iterate_func2(xd, yd, 0, 0, ysize,
+ vdev_raidz_reconst_pq_func, &rpq);
+ (void) abd_iterate_func(xd, ysize, xsize - ysize,
+ vdev_raidz_reconst_pq_tail_func, &rpq);
- zio_buf_free(rm->rm_col[VDEV_RAIDZ_P].rc_data,
- rm->rm_col[VDEV_RAIDZ_P].rc_size);
- zio_buf_free(rm->rm_col[VDEV_RAIDZ_Q].rc_data,
- rm->rm_col[VDEV_RAIDZ_Q].rc_size);
+ abd_free(rm->rm_col[VDEV_RAIDZ_P].rc_abd);
+ abd_free(rm->rm_col[VDEV_RAIDZ_Q].rc_abd);
/*
* Restore the saved parity data.
*/
- rm->rm_col[VDEV_RAIDZ_P].rc_data = pdata;
- rm->rm_col[VDEV_RAIDZ_Q].rc_data = qdata;
+ rm->rm_col[VDEV_RAIDZ_P].rc_abd = pdata;
+ rm->rm_col[VDEV_RAIDZ_Q].rc_abd = qdata;
return ((1 << VDEV_RAIDZ_P) | (1 << VDEV_RAIDZ_Q));
}
@@ -1217,8 +1274,8 @@ vdev_raidz_matrix_reconstruct(raidz_map_t *rm, int n, int nmissing,
int i, j, x, cc, c;
uint8_t *src;
uint64_t ccount;
- uint8_t *dst[VDEV_RAIDZ_MAXPARITY];
- uint64_t dcount[VDEV_RAIDZ_MAXPARITY];
+ uint8_t *dst[VDEV_RAIDZ_MAXPARITY] = { NULL };
+ uint64_t dcount[VDEV_RAIDZ_MAXPARITY] = { 0 };
uint8_t log = 0;
uint8_t val;
int ll;
@@ -1245,7 +1302,7 @@ vdev_raidz_matrix_reconstruct(raidz_map_t *rm, int n, int nmissing,
c = used[i];
ASSERT3U(c, <, rm->rm_cols);
- src = rm->rm_col[c].rc_data;
+ src = abd_to_buf(rm->rm_col[c].rc_abd);
ccount = rm->rm_col[c].rc_size;
for (j = 0; j < nmissing; j++) {
cc = missing[j] + rm->rm_firstdatacol;
@@ -1253,7 +1310,7 @@ vdev_raidz_matrix_reconstruct(raidz_map_t *rm, int n, int nmissing,
ASSERT3U(cc, <, rm->rm_cols);
ASSERT3U(cc, !=, c);
- dst[j] = rm->rm_col[cc].rc_data;
+ dst[j] = abd_to_buf(rm->rm_col[cc].rc_abd);
dcount[j] = rm->rm_col[cc].rc_size;
}
@@ -1301,8 +1358,25 @@ vdev_raidz_reconstruct_general(raidz_map_t *rm, int *tgts, int ntgts)
uint8_t *invrows[VDEV_RAIDZ_MAXPARITY];
uint8_t *used;
+ abd_t **bufs = NULL;
+
int code = 0;
+ /*
+ * Matrix reconstruction can't use scatter ABDs yet, so we allocate
+ * temporary linear ABDs.
+ */
+ if (!abd_is_linear(rm->rm_col[rm->rm_firstdatacol].rc_abd)) {
+ bufs = kmem_alloc(rm->rm_cols * sizeof (abd_t *), KM_PUSHPAGE);
+
+ for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
+ raidz_col_t *col = &rm->rm_col[c];
+
+ bufs[c] = col->rc_abd;
+ col->rc_abd = abd_alloc_linear(col->rc_size, B_TRUE);
+ abd_copy(col->rc_abd, bufs[c], col->rc_size);
+ }
+ }
n = rm->rm_cols - rm->rm_firstdatacol;
@@ -1389,15 +1463,29 @@ vdev_raidz_reconstruct_general(raidz_map_t *rm, int *tgts, int ntgts)
kmem_free(p, psize);
+ /*
+ * copy back from temporary linear abds and free them
+ */
+ if (bufs) {
+ for (c = rm->rm_firstdatacol; c < rm->rm_cols; c++) {
+ raidz_col_t *col = &rm->rm_col[c];
+
+ abd_copy(bufs[c], col->rc_abd, col->rc_size);
+ abd_free(col->rc_abd);
+ col->rc_abd = bufs[c];
+ }
+ kmem_free(bufs, rm->rm_cols * sizeof (abd_t *));
+ }
+
return (code);
}
-static int
-vdev_raidz_reconstruct(raidz_map_t *rm, int *t, int nt)
+int
+vdev_raidz_reconstruct(raidz_map_t *rm, const int *t, int nt)
{
int tgts[VDEV_RAIDZ_MAXPARITY], *dt;
int ntgts;
- int i, c;
+ int i, c, ret;
int code;
int nbadparity, nbaddata;
int parity_valid[VDEV_RAIDZ_MAXPARITY];
@@ -1435,34 +1523,37 @@ vdev_raidz_reconstruct(raidz_map_t *rm, int *t, int nt)
dt = &tgts[nbadparity];
+ /* Reconstruct using the new math implementation */
+ ret = vdev_raidz_math_reconstruct(rm, parity_valid, dt, nbaddata);
+ if (ret != RAIDZ_ORIGINAL_IMPL)
+ return (ret);
+
/*
* See if we can use any of our optimized reconstruction routines.
*/
- if (!vdev_raidz_default_to_general) {
- switch (nbaddata) {
- case 1:
- if (parity_valid[VDEV_RAIDZ_P])
- return (vdev_raidz_reconstruct_p(rm, dt, 1));
+ switch (nbaddata) {
+ case 1:
+ if (parity_valid[VDEV_RAIDZ_P])
+ return (vdev_raidz_reconstruct_p(rm, dt, 1));
- ASSERT(rm->rm_firstdatacol > 1);
+ ASSERT(rm->rm_firstdatacol > 1);
- if (parity_valid[VDEV_RAIDZ_Q])
- return (vdev_raidz_reconstruct_q(rm, dt, 1));
+ if (parity_valid[VDEV_RAIDZ_Q])
+ return (vdev_raidz_reconstruct_q(rm, dt, 1));
- ASSERT(rm->rm_firstdatacol > 2);
- break;
+ ASSERT(rm->rm_firstdatacol > 2);
+ break;
- case 2:
- ASSERT(rm->rm_firstdatacol > 1);
+ case 2:
+ ASSERT(rm->rm_firstdatacol > 1);
- if (parity_valid[VDEV_RAIDZ_P] &&
- parity_valid[VDEV_RAIDZ_Q])
- return (vdev_raidz_reconstruct_pq(rm, dt, 2));
+ if (parity_valid[VDEV_RAIDZ_P] &&
+ parity_valid[VDEV_RAIDZ_Q])
+ return (vdev_raidz_reconstruct_pq(rm, dt, 2));
- ASSERT(rm->rm_firstdatacol > 2);
+ ASSERT(rm->rm_firstdatacol > 2);
- break;
- }
+ break;
}
code = vdev_raidz_reconstruct_general(rm, tgts, ntgts);
@@ -1589,7 +1680,7 @@ vdev_raidz_io_start(zio_t *zio)
rc = &rm->rm_col[c];
cvd = vd->vdev_child[rc->rc_devidx];
zio_nowait(zio_vdev_child_io(zio, NULL, cvd,
- rc->rc_offset, rc->rc_data, rc->rc_size,
+ rc->rc_offset, rc->rc_abd, rc->rc_size,
zio->io_type, zio->io_priority, 0,
vdev_raidz_child_done, rc));
}
@@ -1646,7 +1737,7 @@ vdev_raidz_io_start(zio_t *zio)
if (c >= rm->rm_firstdatacol || rm->rm_missingdata > 0 ||
(zio->io_flags & (ZIO_FLAG_SCRUB | ZIO_FLAG_RESILVER))) {
zio_nowait(zio_vdev_child_io(zio, NULL, cvd,
- rc->rc_offset, rc->rc_data, rc->rc_size,
+ rc->rc_offset, rc->rc_abd, rc->rc_size,
zio->io_type, zio->io_priority, 0,
vdev_raidz_child_done, rc));
}
@@ -1660,7 +1751,7 @@ vdev_raidz_io_start(zio_t *zio)
* Report a checksum error for a child of a RAID-Z device.
*/
static void
-raidz_checksum_error(zio_t *zio, raidz_col_t *rc, void *bad_data)
+raidz_checksum_error(zio_t *zio, raidz_col_t *rc, abd_t *bad_data)
{
vdev_t *vd = zio->io_vd->vdev_child[rc->rc_devidx];
@@ -1676,7 +1767,7 @@ raidz_checksum_error(zio_t *zio, raidz_col_t *rc, void *bad_data)
zbc.zbc_injected = rm->rm_ecksuminjected;
zfs_ereport_post_checksum(zio->io_spa, vd, zio,
- rc->rc_offset, rc->rc_size, rc->rc_data, bad_data,
+ rc->rc_offset, rc->rc_size, rc->rc_abd, bad_data,
&zbc);
}
}
@@ -1710,16 +1801,24 @@ raidz_checksum_verify(zio_t *zio)
static int
raidz_parity_verify(zio_t *zio, raidz_map_t *rm)
{
- void *orig[VDEV_RAIDZ_MAXPARITY];
+ abd_t *orig[VDEV_RAIDZ_MAXPARITY];
int c, ret = 0;
raidz_col_t *rc;
+ blkptr_t *bp = zio->io_bp;
+ enum zio_checksum checksum = (bp == NULL ? zio->io_prop.zp_checksum :
+ (BP_IS_GANG(bp) ? ZIO_CHECKSUM_GANG_HEADER : BP_GET_CHECKSUM(bp)));
+
+ if (checksum == ZIO_CHECKSUM_NOPARITY)
+ return (ret);
+
for (c = 0; c < rm->rm_firstdatacol; c++) {
rc = &rm->rm_col[c];
if (!rc->rc_tried || rc->rc_error != 0)
continue;
- orig[c] = zio_buf_alloc(rc->rc_size);
- bcopy(rc->rc_data, orig[c], rc->rc_size);
+
+ orig[c] = abd_alloc_sametype(rc->rc_abd, rc->rc_size);
+ abd_copy(orig[c], rc->rc_abd, rc->rc_size);
}
vdev_raidz_generate_parity(rm);
@@ -1728,22 +1827,17 @@ raidz_parity_verify(zio_t *zio, raidz_map_t *rm)
rc = &rm->rm_col[c];
if (!rc->rc_tried || rc->rc_error != 0)
continue;
- if (bcmp(orig[c], rc->rc_data, rc->rc_size) != 0) {
+ if (abd_cmp(orig[c], rc->rc_abd) != 0) {
raidz_checksum_error(zio, rc, orig[c]);
rc->rc_error = SET_ERROR(ECKSUM);
ret++;
}
- zio_buf_free(orig[c], rc->rc_size);
+ abd_free(orig[c]);
}
return (ret);
}
-/*
- * Keep statistics on all the ways that we used parity to correct data.
- */
-static uint64_t raidz_corrected[1 << VDEV_RAIDZ_MAXPARITY];
-
static int
vdev_raidz_worst_error(raidz_map_t *rm)
{
@@ -1768,7 +1862,7 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors)
{
raidz_map_t *rm = zio->io_vsd;
raidz_col_t *rc;
- void *orig[VDEV_RAIDZ_MAXPARITY];
+ abd_t *orig[VDEV_RAIDZ_MAXPARITY];
int tstore[VDEV_RAIDZ_MAXPARITY + 2];
int *tgts = &tstore[1];
int curr, next, i, c, n;
@@ -1817,7 +1911,8 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors)
ASSERT(orig[i] != NULL);
}
- orig[n - 1] = zio_buf_alloc(rm->rm_col[0].rc_size);
+ orig[n - 1] = abd_alloc_sametype(rm->rm_col[0].rc_abd,
+ rm->rm_col[0].rc_size);
curr = 0;
next = tgts[curr];
@@ -1836,7 +1931,7 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors)
ASSERT3S(c, >=, 0);
ASSERT3S(c, <, rm->rm_cols);
rc = &rm->rm_col[c];
- bcopy(rc->rc_data, orig[i], rc->rc_size);
+ abd_copy(orig[i], rc->rc_abd, rc->rc_size);
}
/*
@@ -1845,7 +1940,6 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors)
*/
code = vdev_raidz_reconstruct(rm, tgts, n);
if (raidz_checksum_verify(zio) == 0) {
- atomic_inc_64(&raidz_corrected[code]);
for (i = 0; i < n; i++) {
c = tgts[i];
@@ -1867,7 +1961,7 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors)
for (i = 0; i < n; i++) {
c = tgts[i];
rc = &rm->rm_col[c];
- bcopy(orig[i], rc->rc_data, rc->rc_size);
+ abd_copy(rc->rc_abd, orig[i], rc->rc_size);
}
do {
@@ -1904,9 +1998,8 @@ vdev_raidz_combrec(zio_t *zio, int total_errors, int data_errors)
}
n--;
done:
- for (i = 0; i < n; i++) {
- zio_buf_free(orig[i], rm->rm_col[0].rc_size);
- }
+ for (i = 0; i < n; i++)
+ abd_free(orig[i]);
return (ret);
}
@@ -2058,8 +2151,6 @@ vdev_raidz_io_done(zio_t *zio)
code = vdev_raidz_reconstruct(rm, tgts, n);
if (raidz_checksum_verify(zio) == 0) {
- atomic_inc_64(&raidz_corrected[code]);
-
/*
* If we read more parity disks than were used
* for reconstruction, confirm that the other
@@ -2108,7 +2199,7 @@ vdev_raidz_io_done(zio_t *zio)
continue;
zio_nowait(zio_vdev_child_io(zio, NULL,
vd->vdev_child[rc->rc_devidx],
- rc->rc_offset, rc->rc_data, rc->rc_size,
+ rc->rc_offset, rc->rc_abd, rc->rc_size,
zio->io_type, zio->io_priority, 0,
vdev_raidz_child_done, rc));
} while (++c < rm->rm_cols);
@@ -2188,7 +2279,7 @@ vdev_raidz_io_done(zio_t *zio)
continue;
zio_nowait(zio_vdev_child_io(zio, NULL, cvd,
- rc->rc_offset, rc->rc_data, rc->rc_size,
+ rc->rc_offset, rc->rc_abd, rc->rc_size,
ZIO_TYPE_WRITE, ZIO_PRIORITY_ASYNC_WRITE,
ZIO_FLAG_IO_REPAIR | (unexpected_errors ?
ZIO_FLAG_SELF_HEAL : 0), NULL, NULL));
@@ -2208,6 +2299,44 @@ vdev_raidz_state_change(vdev_t *vd, int faulted, int degraded)
vdev_set_state(vd, B_FALSE, VDEV_STATE_HEALTHY, VDEV_AUX_NONE);
}
+/*
+ * Determine if any portion of the provided block resides on a child vdev
+ * with a dirty DTL and therefore needs to be resilvered. The function
+ * assumes that at least one DTL is dirty which imples that full stripe
+ * width blocks must be resilvered.
+ */
+static boolean_t
+vdev_raidz_need_resilver(vdev_t *vd, uint64_t offset, size_t psize)
+{
+ uint64_t dcols = vd->vdev_children;
+ uint64_t nparity = vd->vdev_nparity;
+ uint64_t ashift = vd->vdev_top->vdev_ashift;
+ /* The starting RAIDZ (parent) vdev sector of the block. */
+ uint64_t b = offset >> ashift;
+ /* The zio's size in units of the vdev's minimum sector size. */
+ uint64_t s = ((psize - 1) >> ashift) + 1;
+ /* The first column for this stripe. */
+ uint64_t f = b % dcols;
+
+ if (s + nparity >= dcols)
+ return (B_TRUE);
+
+ for (uint64_t c = 0; c < s + nparity; c++) {
+ uint64_t devidx = (f + c) % dcols;
+ vdev_t *cvd = vd->vdev_child[devidx];
+
+ /*
+ * dsl_scan_need_resilver() already checked vd with
+ * vdev_dtl_contains(). So here just check cvd with
+ * vdev_dtl_empty(), cheaper and a good approximation.
+ */
+ if (!vdev_dtl_empty(cvd, DTL_PARTIAL))
+ return (B_TRUE);
+ }
+
+ return (B_FALSE);
+}
+
vdev_ops_t vdev_raidz_ops = {
vdev_raidz_open,
vdev_raidz_close,
@@ -2215,6 +2344,7 @@ vdev_ops_t vdev_raidz_ops = {
vdev_raidz_io_start,
vdev_raidz_io_done,
vdev_raidz_state_change,
+ vdev_raidz_need_resilver,
NULL,
NULL,
VDEV_TYPE_RAIDZ, /* name of this vdev type */
diff --git a/zfs/module/zfs/vdev_raidz_math.c b/zfs/module/zfs/vdev_raidz_math.c
new file mode 100644
index 000000000000..a64e3b023574
--- /dev/null
+++ b/zfs/module/zfs/vdev_raidz_math.c
@@ -0,0 +1,652 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/types.h>
+#include <sys/zio.h>
+#include <sys/debug.h>
+#include <sys/zfs_debug.h>
+
+#include <sys/vdev_raidz.h>
+#include <sys/vdev_raidz_impl.h>
+
+extern boolean_t raidz_will_scalar_work(void);
+
+/* Opaque implementation with NULL methods to represent original methods */
+static const raidz_impl_ops_t vdev_raidz_original_impl = {
+ .name = "original",
+ .is_supported = raidz_will_scalar_work,
+};
+
+/* RAIDZ parity op that contain the fastest methods */
+static raidz_impl_ops_t vdev_raidz_fastest_impl = {
+ .name = "fastest"
+};
+
+/* All compiled in implementations */
+const raidz_impl_ops_t *raidz_all_maths[] = {
+ &vdev_raidz_original_impl,
+ &vdev_raidz_scalar_impl,
+#if defined(__x86_64) && defined(HAVE_SSE2) /* only x86_64 for now */
+ &vdev_raidz_sse2_impl,
+#endif
+#if defined(__x86_64) && defined(HAVE_SSSE3) /* only x86_64 for now */
+ &vdev_raidz_ssse3_impl,
+#endif
+#if defined(__x86_64) && defined(HAVE_AVX2) /* only x86_64 for now */
+ &vdev_raidz_avx2_impl,
+#endif
+#if defined(__x86_64) && defined(HAVE_AVX512F) /* only x86_64 for now */
+ &vdev_raidz_avx512f_impl,
+#endif
+#if defined(__x86_64) && defined(HAVE_AVX512BW) /* only x86_64 for now */
+ &vdev_raidz_avx512bw_impl,
+#endif
+#if defined(__aarch64__)
+ &vdev_raidz_aarch64_neon_impl,
+ &vdev_raidz_aarch64_neonx2_impl,
+#endif
+};
+
+/* Indicate that benchmark has been completed */
+static boolean_t raidz_math_initialized = B_FALSE;
+
+/* Select raidz implementation */
+#define IMPL_FASTEST (UINT32_MAX)
+#define IMPL_CYCLE (UINT32_MAX - 1)
+#define IMPL_ORIGINAL (0)
+#define IMPL_SCALAR (1)
+
+#define RAIDZ_IMPL_READ(i) (*(volatile uint32_t *) &(i))
+
+static uint32_t zfs_vdev_raidz_impl = IMPL_SCALAR;
+static uint32_t user_sel_impl = IMPL_FASTEST;
+
+/* Hold all supported implementations */
+static size_t raidz_supp_impl_cnt = 0;
+static raidz_impl_ops_t *raidz_supp_impl[ARRAY_SIZE(raidz_all_maths)];
+
+/*
+ * kstats values for supported implementations
+ * Values represent per disk throughput of 8 disk+parity raidz vdev [B/s]
+ */
+static raidz_impl_kstat_t raidz_impl_kstats[ARRAY_SIZE(raidz_all_maths) + 1];
+
+/* kstat for benchmarked implementations */
+static kstat_t *raidz_math_kstat = NULL;
+
+/*
+ * Selects the raidz operation for raidz_map
+ * If rm_ops is set to NULL original raidz implementation will be used
+ */
+raidz_impl_ops_t *
+vdev_raidz_math_get_ops()
+{
+ raidz_impl_ops_t *ops = NULL;
+ const uint32_t impl = RAIDZ_IMPL_READ(zfs_vdev_raidz_impl);
+
+ switch (impl) {
+ case IMPL_FASTEST:
+ ASSERT(raidz_math_initialized);
+ ops = &vdev_raidz_fastest_impl;
+ break;
+#if !defined(_KERNEL)
+ case IMPL_CYCLE:
+ {
+ ASSERT(raidz_math_initialized);
+ ASSERT3U(raidz_supp_impl_cnt, >, 0);
+ /* Cycle through all supported implementations */
+ static size_t cycle_impl_idx = 0;
+ size_t idx = (++cycle_impl_idx) % raidz_supp_impl_cnt;
+ ops = raidz_supp_impl[idx];
+ }
+ break;
+#endif
+ case IMPL_ORIGINAL:
+ ops = (raidz_impl_ops_t *)&vdev_raidz_original_impl;
+ break;
+ case IMPL_SCALAR:
+ ops = (raidz_impl_ops_t *)&vdev_raidz_scalar_impl;
+ break;
+ default:
+ ASSERT3U(impl, <, raidz_supp_impl_cnt);
+ ASSERT3U(raidz_supp_impl_cnt, >, 0);
+ ops = raidz_supp_impl[impl];
+ break;
+ }
+
+ ASSERT3P(ops, !=, NULL);
+
+ return (ops);
+}
+
+/*
+ * Select parity generation method for raidz_map
+ */
+int
+vdev_raidz_math_generate(raidz_map_t *rm)
+{
+ raidz_gen_f gen_parity = NULL;
+
+ switch (raidz_parity(rm)) {
+ case 1:
+ gen_parity = rm->rm_ops->gen[RAIDZ_GEN_P];
+ break;
+ case 2:
+ gen_parity = rm->rm_ops->gen[RAIDZ_GEN_PQ];
+ break;
+ case 3:
+ gen_parity = rm->rm_ops->gen[RAIDZ_GEN_PQR];
+ break;
+ default:
+ gen_parity = NULL;
+ cmn_err(CE_PANIC, "invalid RAID-Z configuration %d",
+ raidz_parity(rm));
+ break;
+ }
+
+ /* if method is NULL execute the original implementation */
+ if (gen_parity == NULL)
+ return (RAIDZ_ORIGINAL_IMPL);
+
+ gen_parity(rm);
+
+ return (0);
+}
+
+static raidz_rec_f
+reconstruct_fun_p_sel(raidz_map_t *rm, const int *parity_valid,
+ const int nbaddata)
+{
+ if (nbaddata == 1 && parity_valid[CODE_P]) {
+ return (rm->rm_ops->rec[RAIDZ_REC_P]);
+ }
+ return ((raidz_rec_f) NULL);
+}
+
+static raidz_rec_f
+reconstruct_fun_pq_sel(raidz_map_t *rm, const int *parity_valid,
+ const int nbaddata)
+{
+ if (nbaddata == 1) {
+ if (parity_valid[CODE_P]) {
+ return (rm->rm_ops->rec[RAIDZ_REC_P]);
+ } else if (parity_valid[CODE_Q]) {
+ return (rm->rm_ops->rec[RAIDZ_REC_Q]);
+ }
+ } else if (nbaddata == 2 &&
+ parity_valid[CODE_P] && parity_valid[CODE_Q]) {
+ return (rm->rm_ops->rec[RAIDZ_REC_PQ]);
+ }
+ return ((raidz_rec_f) NULL);
+}
+
+static raidz_rec_f
+reconstruct_fun_pqr_sel(raidz_map_t *rm, const int *parity_valid,
+ const int nbaddata)
+{
+ if (nbaddata == 1) {
+ if (parity_valid[CODE_P]) {
+ return (rm->rm_ops->rec[RAIDZ_REC_P]);
+ } else if (parity_valid[CODE_Q]) {
+ return (rm->rm_ops->rec[RAIDZ_REC_Q]);
+ } else if (parity_valid[CODE_R]) {
+ return (rm->rm_ops->rec[RAIDZ_REC_R]);
+ }
+ } else if (nbaddata == 2) {
+ if (parity_valid[CODE_P] && parity_valid[CODE_Q]) {
+ return (rm->rm_ops->rec[RAIDZ_REC_PQ]);
+ } else if (parity_valid[CODE_P] && parity_valid[CODE_R]) {
+ return (rm->rm_ops->rec[RAIDZ_REC_PR]);
+ } else if (parity_valid[CODE_Q] && parity_valid[CODE_R]) {
+ return (rm->rm_ops->rec[RAIDZ_REC_QR]);
+ }
+ } else if (nbaddata == 3 &&
+ parity_valid[CODE_P] && parity_valid[CODE_Q] &&
+ parity_valid[CODE_R]) {
+ return (rm->rm_ops->rec[RAIDZ_REC_PQR]);
+ }
+ return ((raidz_rec_f) NULL);
+}
+
+/*
+ * Select data reconstruction method for raidz_map
+ * @parity_valid - Parity validity flag
+ * @dt - Failed data index array
+ * @nbaddata - Number of failed data columns
+ */
+int
+vdev_raidz_math_reconstruct(raidz_map_t *rm, const int *parity_valid,
+ const int *dt, const int nbaddata)
+{
+ raidz_rec_f rec_fn = NULL;
+
+ switch (raidz_parity(rm)) {
+ case PARITY_P:
+ rec_fn = reconstruct_fun_p_sel(rm, parity_valid, nbaddata);
+ break;
+ case PARITY_PQ:
+ rec_fn = reconstruct_fun_pq_sel(rm, parity_valid, nbaddata);
+ break;
+ case PARITY_PQR:
+ rec_fn = reconstruct_fun_pqr_sel(rm, parity_valid, nbaddata);
+ break;
+ default:
+ cmn_err(CE_PANIC, "invalid RAID-Z configuration %d",
+ raidz_parity(rm));
+ break;
+ }
+
+ if (rec_fn == NULL)
+ return (RAIDZ_ORIGINAL_IMPL);
+ else
+ return (rec_fn(rm, dt));
+}
+
+const char *raidz_gen_name[] = {
+ "gen_p", "gen_pq", "gen_pqr"
+};
+const char *raidz_rec_name[] = {
+ "rec_p", "rec_q", "rec_r",
+ "rec_pq", "rec_pr", "rec_qr", "rec_pqr"
+};
+
+#define RAIDZ_KSTAT_LINE_LEN (17 + 10*12 + 1)
+
+static int
+raidz_math_kstat_headers(char *buf, size_t size)
+{
+ int i;
+ ssize_t off;
+
+ ASSERT3U(size, >=, RAIDZ_KSTAT_LINE_LEN);
+
+ off = snprintf(buf, size, "%-17s", "implementation");
+
+ for (i = 0; i < ARRAY_SIZE(raidz_gen_name); i++)
+ off += snprintf(buf + off, size - off, "%-16s",
+ raidz_gen_name[i]);
+
+ for (i = 0; i < ARRAY_SIZE(raidz_rec_name); i++)
+ off += snprintf(buf + off, size - off, "%-16s",
+ raidz_rec_name[i]);
+
+ (void) snprintf(buf + off, size - off, "\n");
+
+ return (0);
+}
+
+static int
+raidz_math_kstat_data(char *buf, size_t size, void *data)
+{
+ raidz_impl_kstat_t *fstat = &raidz_impl_kstats[raidz_supp_impl_cnt];
+ raidz_impl_kstat_t *cstat = (raidz_impl_kstat_t *)data;
+ ssize_t off = 0;
+ int i;
+
+ ASSERT3U(size, >=, RAIDZ_KSTAT_LINE_LEN);
+
+ if (cstat == fstat) {
+ off += snprintf(buf + off, size - off, "%-17s", "fastest");
+
+ for (i = 0; i < ARRAY_SIZE(raidz_gen_name); i++) {
+ int id = fstat->gen[i];
+ off += snprintf(buf + off, size - off, "%-16s",
+ raidz_supp_impl[id]->name);
+ }
+ for (i = 0; i < ARRAY_SIZE(raidz_rec_name); i++) {
+ int id = fstat->rec[i];
+ off += snprintf(buf + off, size - off, "%-16s",
+ raidz_supp_impl[id]->name);
+ }
+ } else {
+ ptrdiff_t id = cstat - raidz_impl_kstats;
+
+ off += snprintf(buf + off, size - off, "%-17s",
+ raidz_supp_impl[id]->name);
+
+ for (i = 0; i < ARRAY_SIZE(raidz_gen_name); i++)
+ off += snprintf(buf + off, size - off, "%-16llu",
+ (u_longlong_t)cstat->gen[i]);
+
+ for (i = 0; i < ARRAY_SIZE(raidz_rec_name); i++)
+ off += snprintf(buf + off, size - off, "%-16llu",
+ (u_longlong_t)cstat->rec[i]);
+ }
+
+ (void) snprintf(buf + off, size - off, "\n");
+
+ return (0);
+}
+
+static void *
+raidz_math_kstat_addr(kstat_t *ksp, loff_t n)
+{
+ if (n <= raidz_supp_impl_cnt)
+ ksp->ks_private = (void *) (raidz_impl_kstats + n);
+ else
+ ksp->ks_private = NULL;
+
+ return (ksp->ks_private);
+}
+
+#define BENCH_D_COLS (8ULL)
+#define BENCH_COLS (BENCH_D_COLS + PARITY_PQR)
+#define BENCH_ZIO_SIZE (1ULL << SPA_OLD_MAXBLOCKSHIFT) /* 128 kiB */
+#define BENCH_NS MSEC2NSEC(25) /* 25ms */
+
+typedef void (*benchmark_fn)(raidz_map_t *rm, const int fn);
+
+static void
+benchmark_gen_impl(raidz_map_t *rm, const int fn)
+{
+ (void) fn;
+ vdev_raidz_generate_parity(rm);
+}
+
+static void
+benchmark_rec_impl(raidz_map_t *rm, const int fn)
+{
+ static const int rec_tgt[7][3] = {
+ {1, 2, 3}, /* rec_p: bad QR & D[0] */
+ {0, 2, 3}, /* rec_q: bad PR & D[0] */
+ {0, 1, 3}, /* rec_r: bad PQ & D[0] */
+ {2, 3, 4}, /* rec_pq: bad R & D[0][1] */
+ {1, 3, 4}, /* rec_pr: bad Q & D[0][1] */
+ {0, 3, 4}, /* rec_qr: bad P & D[0][1] */
+ {3, 4, 5} /* rec_pqr: bad & D[0][1][2] */
+ };
+
+ vdev_raidz_reconstruct(rm, rec_tgt[fn], 3);
+}
+
+/*
+ * Benchmarking of all supported implementations (raidz_supp_impl_cnt)
+ * is performed by setting the rm_ops pointer and calling the top level
+ * generate/reconstruct methods of bench_rm.
+ */
+static void
+benchmark_raidz_impl(raidz_map_t *bench_rm, const int fn, benchmark_fn bench_fn)
+{
+ uint64_t run_cnt, speed, best_speed = 0;
+ hrtime_t t_start, t_diff;
+ raidz_impl_ops_t *curr_impl;
+ raidz_impl_kstat_t *fstat = &raidz_impl_kstats[raidz_supp_impl_cnt];
+ int impl, i;
+
+ for (impl = 0; impl < raidz_supp_impl_cnt; impl++) {
+ /* set an implementation to benchmark */
+ curr_impl = raidz_supp_impl[impl];
+ bench_rm->rm_ops = curr_impl;
+
+ run_cnt = 0;
+ t_start = gethrtime();
+
+ do {
+ for (i = 0; i < 25; i++, run_cnt++)
+ bench_fn(bench_rm, fn);
+
+ t_diff = gethrtime() - t_start;
+ } while (t_diff < BENCH_NS);
+
+ speed = run_cnt * BENCH_ZIO_SIZE * NANOSEC;
+ speed /= (t_diff * BENCH_COLS);
+
+ if (bench_fn == benchmark_gen_impl)
+ raidz_impl_kstats[impl].gen[fn] = speed;
+ else
+ raidz_impl_kstats[impl].rec[fn] = speed;
+
+ /* Update fastest implementation method */
+ if (speed > best_speed) {
+ best_speed = speed;
+
+ if (bench_fn == benchmark_gen_impl) {
+ fstat->gen[fn] = impl;
+ vdev_raidz_fastest_impl.gen[fn] =
+ curr_impl->gen[fn];
+ } else {
+ fstat->rec[fn] = impl;
+ vdev_raidz_fastest_impl.rec[fn] =
+ curr_impl->rec[fn];
+ }
+ }
+ }
+}
+
+void
+vdev_raidz_math_init(void)
+{
+ raidz_impl_ops_t *curr_impl;
+ zio_t *bench_zio = NULL;
+ raidz_map_t *bench_rm = NULL;
+ uint64_t bench_parity;
+ int i, c, fn;
+
+ /* move supported impl into raidz_supp_impl */
+ for (i = 0, c = 0; i < ARRAY_SIZE(raidz_all_maths); i++) {
+ curr_impl = (raidz_impl_ops_t *)raidz_all_maths[i];
+
+ /* initialize impl */
+ if (curr_impl->init)
+ curr_impl->init();
+
+ if (curr_impl->is_supported())
+ raidz_supp_impl[c++] = (raidz_impl_ops_t *)curr_impl;
+ }
+ membar_producer(); /* complete raidz_supp_impl[] init */
+ raidz_supp_impl_cnt = c; /* number of supported impl */
+
+#if !defined(_KERNEL)
+ /* Skip benchmarking and use last implementation as fastest */
+ memcpy(&vdev_raidz_fastest_impl, raidz_supp_impl[raidz_supp_impl_cnt-1],
+ sizeof (vdev_raidz_fastest_impl));
+ strcpy(vdev_raidz_fastest_impl.name, "fastest");
+
+ raidz_math_initialized = B_TRUE;
+
+ /* Use 'cycle' math selection method for userspace */
+ VERIFY0(vdev_raidz_impl_set("cycle"));
+ return;
+#endif
+
+ /* Fake an zio and run the benchmark on a warmed up buffer */
+ bench_zio = kmem_zalloc(sizeof (zio_t), KM_SLEEP);
+ bench_zio->io_offset = 0;
+ bench_zio->io_size = BENCH_ZIO_SIZE; /* only data columns */
+ bench_zio->io_abd = abd_alloc_linear(BENCH_ZIO_SIZE, B_TRUE);
+ memset(abd_to_buf(bench_zio->io_abd), 0xAA, BENCH_ZIO_SIZE);
+
+ /* Benchmark parity generation methods */
+ for (fn = 0; fn < RAIDZ_GEN_NUM; fn++) {
+ bench_parity = fn + 1;
+ /* New raidz_map is needed for each generate_p/q/r */
+ bench_rm = vdev_raidz_map_alloc(bench_zio, SPA_MINBLOCKSHIFT,
+ BENCH_D_COLS + bench_parity, bench_parity);
+
+ benchmark_raidz_impl(bench_rm, fn, benchmark_gen_impl);
+
+ vdev_raidz_map_free(bench_rm);
+ }
+
+ /* Benchmark data reconstruction methods */
+ bench_rm = vdev_raidz_map_alloc(bench_zio, SPA_MINBLOCKSHIFT,
+ BENCH_COLS, PARITY_PQR);
+
+ for (fn = 0; fn < RAIDZ_REC_NUM; fn++)
+ benchmark_raidz_impl(bench_rm, fn, benchmark_rec_impl);
+
+ vdev_raidz_map_free(bench_rm);
+
+ /* cleanup the bench zio */
+ abd_free(bench_zio->io_abd);
+ kmem_free(bench_zio, sizeof (zio_t));
+
+ /* install kstats for all impl */
+ raidz_math_kstat = kstat_create("zfs", 0, "vdev_raidz_bench", "misc",
+ KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
+
+ if (raidz_math_kstat != NULL) {
+ raidz_math_kstat->ks_data = NULL;
+ raidz_math_kstat->ks_ndata = UINT32_MAX;
+ kstat_set_raw_ops(raidz_math_kstat,
+ raidz_math_kstat_headers,
+ raidz_math_kstat_data,
+ raidz_math_kstat_addr);
+ kstat_install(raidz_math_kstat);
+ }
+
+ /* Finish initialization */
+ atomic_swap_32(&zfs_vdev_raidz_impl, user_sel_impl);
+ raidz_math_initialized = B_TRUE;
+}
+
+void
+vdev_raidz_math_fini(void)
+{
+ raidz_impl_ops_t const *curr_impl;
+ int i;
+
+ if (raidz_math_kstat != NULL) {
+ kstat_delete(raidz_math_kstat);
+ raidz_math_kstat = NULL;
+ }
+
+ /* fini impl */
+ for (i = 0; i < ARRAY_SIZE(raidz_all_maths); i++) {
+ curr_impl = raidz_all_maths[i];
+ if (curr_impl->fini)
+ curr_impl->fini();
+ }
+}
+
+static const struct {
+ char *name;
+ uint32_t sel;
+} math_impl_opts[] = {
+#if !defined(_KERNEL)
+ { "cycle", IMPL_CYCLE },
+#endif
+ { "fastest", IMPL_FASTEST },
+ { "original", IMPL_ORIGINAL },
+ { "scalar", IMPL_SCALAR }
+};
+
+/*
+ * Function sets desired raidz implementation.
+ *
+ * If we are called before init(), user preference will be saved in
+ * user_sel_impl, and applied in later init() call. This occurs when module
+ * parameter is specified on module load. Otherwise, directly update
+ * zfs_vdev_raidz_impl.
+ *
+ * @val Name of raidz implementation to use
+ * @param Unused.
+ */
+int
+vdev_raidz_impl_set(const char *val)
+{
+ int err = -EINVAL;
+ char req_name[RAIDZ_IMPL_NAME_MAX];
+ uint32_t impl = RAIDZ_IMPL_READ(user_sel_impl);
+ size_t i;
+
+ /* sanitize input */
+ i = strnlen(val, RAIDZ_IMPL_NAME_MAX);
+ if (i == 0 || i == RAIDZ_IMPL_NAME_MAX)
+ return (err);
+
+ strlcpy(req_name, val, RAIDZ_IMPL_NAME_MAX);
+ while (i > 0 && !!isspace(req_name[i-1]))
+ i--;
+ req_name[i] = '\0';
+
+ /* Check mandatory options */
+ for (i = 0; i < ARRAY_SIZE(math_impl_opts); i++) {
+ if (strcmp(req_name, math_impl_opts[i].name) == 0) {
+ impl = math_impl_opts[i].sel;
+ err = 0;
+ break;
+ }
+ }
+
+ /* check all supported impl if init() was already called */
+ if (err != 0 && raidz_math_initialized) {
+ /* check all supported implementations */
+ for (i = 0; i < raidz_supp_impl_cnt; i++) {
+ if (strcmp(req_name, raidz_supp_impl[i]->name) == 0) {
+ impl = i;
+ err = 0;
+ break;
+ }
+ }
+ }
+
+ if (err == 0) {
+ if (raidz_math_initialized)
+ atomic_swap_32(&zfs_vdev_raidz_impl, impl);
+ else
+ atomic_swap_32(&user_sel_impl, impl);
+ }
+
+ return (err);
+}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+#include <linux/mod_compat.h>
+
+static int
+zfs_vdev_raidz_impl_set(const char *val, zfs_kernel_param_t *kp)
+{
+ return (vdev_raidz_impl_set(val));
+}
+
+static int
+zfs_vdev_raidz_impl_get(char *buffer, zfs_kernel_param_t *kp)
+{
+ int i, cnt = 0;
+ char *fmt;
+ const uint32_t impl = RAIDZ_IMPL_READ(zfs_vdev_raidz_impl);
+
+ ASSERT(raidz_math_initialized);
+
+ /* list mandatory options */
+ for (i = 0; i < ARRAY_SIZE(math_impl_opts) - 2; i++) {
+ fmt = (impl == math_impl_opts[i].sel) ? "[%s] " : "%s ";
+ cnt += sprintf(buffer + cnt, fmt, math_impl_opts[i].name);
+ }
+
+ /* list all supported implementations */
+ for (i = 0; i < raidz_supp_impl_cnt; i++) {
+ fmt = (i == impl) ? "[%s] " : "%s ";
+ cnt += sprintf(buffer + cnt, fmt, raidz_supp_impl[i]->name);
+ }
+
+ return (cnt);
+}
+
+module_param_call(zfs_vdev_raidz_impl, zfs_vdev_raidz_impl_set,
+ zfs_vdev_raidz_impl_get, NULL, 0644);
+MODULE_PARM_DESC(zfs_vdev_raidz_impl, "Select raidz implementation.");
+#endif
diff --git a/zfs/module/zfs/vdev_raidz_math_aarch64_neon.c b/zfs/module/zfs/vdev_raidz_math_aarch64_neon.c
new file mode 100644
index 000000000000..e3ad06776503
--- /dev/null
+++ b/zfs/module/zfs/vdev_raidz_math_aarch64_neon.c
@@ -0,0 +1,2279 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (C) 2016 Romain Dolbeau. All rights reserved.
+ */
+
+#include <sys/isa_defs.h>
+#include <sys/types.h>
+
+#if defined(__aarch64__)
+
+#include "vdev_raidz_math_aarch64_neon_common.h"
+
+#define SYN_STRIDE 4
+
+#define ZERO_STRIDE 4
+#define ZERO_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_33_36()
+#define ZERO_D 0, 1, 2, 3
+
+#define COPY_STRIDE 4
+#define COPY_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_33_36()
+#define COPY_D 0, 1, 2, 3
+
+#define ADD_STRIDE 4
+#define ADD_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_33_36()
+#define ADD_D 0, 1, 2, 3
+
+#define MUL_STRIDE 4
+#define MUL_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_33_36()
+#define MUL_D 0, 1, 2, 3
+
+#define GEN_P_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_33_36()
+#define GEN_P_STRIDE 4
+#define GEN_P_P 0, 1, 2, 3
+
+#define GEN_PQ_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_6_7() \
+ GEN_X_DEFINE_16() \
+ GEN_X_DEFINE_17() \
+ GEN_X_DEFINE_33_36()
+#define GEN_PQ_STRIDE 4
+#define GEN_PQ_D 0, 1, 2, 3
+#define GEN_PQ_C 4, 5, 6, 7
+
+#define GEN_PQR_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_6_7() \
+ GEN_X_DEFINE_16() \
+ GEN_X_DEFINE_17() \
+ GEN_X_DEFINE_33_36()
+#define GEN_PQR_STRIDE 4
+#define GEN_PQR_D 0, 1, 2, 3
+#define GEN_PQR_C 4, 5, 6, 7
+
+#define SYN_Q_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_6_7() \
+ GEN_X_DEFINE_16() \
+ GEN_X_DEFINE_17() \
+ GEN_X_DEFINE_33_36()
+#define SYN_Q_STRIDE 4
+#define SYN_Q_D 0, 1, 2, 3
+#define SYN_Q_X 4, 5, 6, 7
+
+#define SYN_R_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_6_7() \
+ GEN_X_DEFINE_16() \
+ GEN_X_DEFINE_17() \
+ GEN_X_DEFINE_33_36()
+#define SYN_R_STRIDE 4
+#define SYN_R_D 0, 1, 2, 3
+#define SYN_R_X 4, 5, 6, 7
+
+#define SYN_PQ_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_6_7() \
+ GEN_X_DEFINE_16() \
+ GEN_X_DEFINE_17() \
+ GEN_X_DEFINE_33_36()
+#define SYN_PQ_STRIDE 4
+#define SYN_PQ_D 0, 1, 2, 3
+#define SYN_PQ_X 4, 5, 6, 7
+
+#define REC_PQ_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_31() \
+ GEN_X_DEFINE_32() \
+ GEN_X_DEFINE_33_36()
+#define REC_PQ_STRIDE 2
+#define REC_PQ_X 0, 1
+#define REC_PQ_Y 2, 3
+#define REC_PQ_T 4, 5
+
+#define SYN_PR_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_6_7() \
+ GEN_X_DEFINE_16() \
+ GEN_X_DEFINE_17() \
+ GEN_X_DEFINE_33_36()
+#define SYN_PR_STRIDE 4
+#define SYN_PR_D 0, 1, 2, 3
+#define SYN_PR_X 4, 5, 6, 7
+
+#define REC_PR_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_31() \
+ GEN_X_DEFINE_32() \
+ GEN_X_DEFINE_33_36()
+#define REC_PR_STRIDE 2
+#define REC_PR_X 0, 1
+#define REC_PR_Y 2, 3
+#define REC_PR_T 4, 5
+
+#define SYN_QR_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_6_7() \
+ GEN_X_DEFINE_16() \
+ GEN_X_DEFINE_17() \
+ GEN_X_DEFINE_33_36()
+#define SYN_QR_STRIDE 4
+#define SYN_QR_D 0, 1, 2, 3
+#define SYN_QR_X 4, 5, 6, 7
+
+#define REC_QR_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_31() \
+ GEN_X_DEFINE_32() \
+ GEN_X_DEFINE_33_36()
+#define REC_QR_STRIDE 2
+#define REC_QR_X 0, 1
+#define REC_QR_Y 2, 3
+#define REC_QR_T 4, 5
+
+#define SYN_PQR_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_6_7() \
+ GEN_X_DEFINE_16() \
+ GEN_X_DEFINE_17() \
+ GEN_X_DEFINE_33_36()
+#define SYN_PQR_STRIDE 4
+#define SYN_PQR_D 0, 1, 2, 3
+#define SYN_PQR_X 4, 5, 6, 7
+
+#define REC_PQR_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_6_7() \
+ GEN_X_DEFINE_8_9() \
+ GEN_X_DEFINE_31() \
+ GEN_X_DEFINE_32() \
+ GEN_X_DEFINE_33_36()
+#define REC_PQR_STRIDE 2
+#define REC_PQR_X 0, 1
+#define REC_PQR_Y 2, 3
+#define REC_PQR_Z 4, 5
+#define REC_PQR_XS 6, 7
+#define REC_PQR_YS 8, 9
+
+
+#include <sys/vdev_raidz_impl.h>
+#include "vdev_raidz_math_impl.h"
+
+DEFINE_GEN_METHODS(aarch64_neon);
+DEFINE_REC_METHODS(aarch64_neon);
+
+static boolean_t
+raidz_will_aarch64_neon_work(void)
+{
+ return (B_TRUE); // __arch64__ requires NEON
+}
+
+const raidz_impl_ops_t vdev_raidz_aarch64_neon_impl = {
+ .init = NULL,
+ .fini = NULL,
+ .gen = RAIDZ_GEN_METHODS(aarch64_neon),
+ .rec = RAIDZ_REC_METHODS(aarch64_neon),
+ .is_supported = &raidz_will_aarch64_neon_work,
+ .name = "aarch64_neon"
+};
+
+#endif /* defined(__aarch64__) */
+
+
+#if defined(__aarch64__)
+/* BEGIN CSTYLED */
+const uint8_t
+__attribute__((aligned(256))) gf_clmul_mod_lt[4*256][16] = {
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+ 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+ 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e,
+ 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+ 0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09,
+ 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+ 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c,
+ 0x20, 0x24, 0x28, 0x2c, 0x30, 0x34, 0x38, 0x3c },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+ 0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x05, 0x0a, 0x0f, 0x14, 0x11, 0x1e, 0x1b,
+ 0x28, 0x2d, 0x22, 0x27, 0x3c, 0x39, 0x36, 0x33 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+ 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x06, 0x0c, 0x0a, 0x18, 0x1e, 0x14, 0x12,
+ 0x30, 0x36, 0x3c, 0x3a, 0x28, 0x2e, 0x24, 0x22 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+ 0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x07, 0x0e, 0x09, 0x1c, 0x1b, 0x12, 0x15,
+ 0x38, 0x3f, 0x36, 0x31, 0x24, 0x23, 0x2a, 0x2d },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+ 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x08, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38,
+ 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+ 0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f,
+ 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+ 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x0a, 0x14, 0x1e, 0x28, 0x22, 0x3c, 0x36,
+ 0x50, 0x5a, 0x44, 0x4e, 0x78, 0x72, 0x6c, 0x66 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+ 0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31,
+ 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+ 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x0c, 0x18, 0x14, 0x30, 0x3c, 0x28, 0x24,
+ 0x60, 0x6c, 0x78, 0x74, 0x50, 0x5c, 0x48, 0x44 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+ 0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23,
+ 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+ 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a,
+ 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+ 0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x0f, 0x1e, 0x11, 0x3c, 0x33, 0x22, 0x2d,
+ 0x78, 0x77, 0x66, 0x69, 0x44, 0x4b, 0x5a, 0x55 },
+ { 0x00, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53,
+ 0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+ 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ { 0x00, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53,
+ 0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb },
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+ 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
+ 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff },
+ { 0x00, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53,
+ 0xf5, 0xe8, 0xcf, 0xd2, 0x81, 0x9c, 0xbb, 0xa6 },
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+ 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e,
+ 0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee },
+ { 0x00, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53,
+ 0xf5, 0xe8, 0xcf, 0xd2, 0x81, 0x9c, 0xbb, 0xa6 },
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+ 0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x13, 0x26, 0x35, 0x4c, 0x5f, 0x6a, 0x79,
+ 0x98, 0x8b, 0xbe, 0xad, 0xd4, 0xc7, 0xf2, 0xe1 },
+ { 0x00, 0x1d, 0x3a, 0x27, 0x69, 0x74, 0x53, 0x4e,
+ 0xd2, 0xcf, 0xe8, 0xf5, 0xbb, 0xa6, 0x81, 0x9c },
+ { 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+ 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x14, 0x28, 0x3c, 0x50, 0x44, 0x78, 0x6c,
+ 0xa0, 0xb4, 0x88, 0x9c, 0xf0, 0xe4, 0xd8, 0xcc },
+ { 0x00, 0x1d, 0x3a, 0x27, 0x69, 0x74, 0x53, 0x4e,
+ 0xd2, 0xcf, 0xe8, 0xf5, 0xbb, 0xa6, 0x81, 0x9c },
+ { 0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+ 0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x15, 0x2a, 0x3f, 0x54, 0x41, 0x7e, 0x6b,
+ 0xa8, 0xbd, 0x82, 0x97, 0xfc, 0xe9, 0xd6, 0xc3 },
+ { 0x00, 0x1d, 0x3a, 0x27, 0x69, 0x74, 0x53, 0x4e,
+ 0xcf, 0xd2, 0xf5, 0xe8, 0xa6, 0xbb, 0x9c, 0x81 },
+ { 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+ 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x16, 0x2c, 0x3a, 0x58, 0x4e, 0x74, 0x62,
+ 0xb0, 0xa6, 0x9c, 0x8a, 0xe8, 0xfe, 0xc4, 0xd2 },
+ { 0x00, 0x1d, 0x3a, 0x27, 0x69, 0x74, 0x53, 0x4e,
+ 0xcf, 0xd2, 0xf5, 0xe8, 0xa6, 0xbb, 0x9c, 0x81 },
+ { 0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+ 0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x17, 0x2e, 0x39, 0x5c, 0x4b, 0x72, 0x65,
+ 0xb8, 0xaf, 0x96, 0x81, 0xe4, 0xf3, 0xca, 0xdd },
+ { 0x00, 0x1d, 0x27, 0x3a, 0x4e, 0x53, 0x69, 0x74,
+ 0x9c, 0x81, 0xbb, 0xa6, 0xd2, 0xcf, 0xf5, 0xe8 },
+ { 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+ 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x18, 0x30, 0x28, 0x60, 0x78, 0x50, 0x48,
+ 0xc0, 0xd8, 0xf0, 0xe8, 0xa0, 0xb8, 0x90, 0x88 },
+ { 0x00, 0x1d, 0x27, 0x3a, 0x4e, 0x53, 0x69, 0x74,
+ 0x9c, 0x81, 0xbb, 0xa6, 0xd2, 0xcf, 0xf5, 0xe8 },
+ { 0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+ 0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x19, 0x32, 0x2b, 0x64, 0x7d, 0x56, 0x4f,
+ 0xc8, 0xd1, 0xfa, 0xe3, 0xac, 0xb5, 0x9e, 0x87 },
+ { 0x00, 0x1d, 0x27, 0x3a, 0x4e, 0x53, 0x69, 0x74,
+ 0x81, 0x9c, 0xa6, 0xbb, 0xcf, 0xd2, 0xe8, 0xf5 },
+ { 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+ 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x1a, 0x34, 0x2e, 0x68, 0x72, 0x5c, 0x46,
+ 0xd0, 0xca, 0xe4, 0xfe, 0xb8, 0xa2, 0x8c, 0x96 },
+ { 0x00, 0x1d, 0x27, 0x3a, 0x4e, 0x53, 0x69, 0x74,
+ 0x81, 0x9c, 0xa6, 0xbb, 0xcf, 0xd2, 0xe8, 0xf5 },
+ { 0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+ 0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41,
+ 0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99 },
+ { 0x00, 0x1d, 0x27, 0x3a, 0x53, 0x4e, 0x74, 0x69,
+ 0xa6, 0xbb, 0x81, 0x9c, 0xf5, 0xe8, 0xd2, 0xcf },
+ { 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+ 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x1c, 0x38, 0x24, 0x70, 0x6c, 0x48, 0x54,
+ 0xe0, 0xfc, 0xd8, 0xc4, 0x90, 0x8c, 0xa8, 0xb4 },
+ { 0x00, 0x1d, 0x27, 0x3a, 0x53, 0x4e, 0x74, 0x69,
+ 0xa6, 0xbb, 0x81, 0x9c, 0xf5, 0xe8, 0xd2, 0xcf },
+ { 0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+ 0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53,
+ 0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb },
+ { 0x00, 0x1d, 0x27, 0x3a, 0x53, 0x4e, 0x74, 0x69,
+ 0xbb, 0xa6, 0x9c, 0x81, 0xe8, 0xf5, 0xcf, 0xd2 },
+ { 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+ 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x1e, 0x3c, 0x22, 0x78, 0x66, 0x44, 0x5a,
+ 0xf0, 0xee, 0xcc, 0xd2, 0x88, 0x96, 0xb4, 0xaa },
+ { 0x00, 0x1d, 0x27, 0x3a, 0x53, 0x4e, 0x74, 0x69,
+ 0xbb, 0xa6, 0x9c, 0x81, 0xe8, 0xf5, 0xcf, 0xd2 },
+ { 0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+ 0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x1f, 0x3e, 0x21, 0x7c, 0x63, 0x42, 0x5d,
+ 0xf8, 0xe7, 0xc6, 0xd9, 0x84, 0x9b, 0xba, 0xa5 },
+ { 0x00, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6,
+ 0xcd, 0xf7, 0xb9, 0x83, 0x25, 0x1f, 0x51, 0x6b },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+ 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0 },
+ { 0x00, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6,
+ 0xcd, 0xf7, 0xb9, 0x83, 0x25, 0x1f, 0x51, 0x6b },
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+ 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x21, 0x42, 0x63, 0x84, 0xa5, 0xc6, 0xe7,
+ 0x08, 0x29, 0x4a, 0x6b, 0x8c, 0xad, 0xce, 0xef },
+ { 0x00, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6,
+ 0xd0, 0xea, 0xa4, 0x9e, 0x38, 0x02, 0x4c, 0x76 },
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+ 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x22, 0x44, 0x66, 0x88, 0xaa, 0xcc, 0xee,
+ 0x10, 0x32, 0x54, 0x76, 0x98, 0xba, 0xdc, 0xfe },
+ { 0x00, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6,
+ 0xd0, 0xea, 0xa4, 0x9e, 0x38, 0x02, 0x4c, 0x76 },
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+ 0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x23, 0x46, 0x65, 0x8c, 0xaf, 0xca, 0xe9,
+ 0x18, 0x3b, 0x5e, 0x7d, 0x94, 0xb7, 0xd2, 0xf1 },
+ { 0x00, 0x3a, 0x74, 0x4e, 0xf5, 0xcf, 0x81, 0xbb,
+ 0xf7, 0xcd, 0x83, 0xb9, 0x02, 0x38, 0x76, 0x4c },
+ { 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+ 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x24, 0x48, 0x6c, 0x90, 0xb4, 0xd8, 0xfc,
+ 0x20, 0x04, 0x68, 0x4c, 0xb0, 0x94, 0xf8, 0xdc },
+ { 0x00, 0x3a, 0x74, 0x4e, 0xf5, 0xcf, 0x81, 0xbb,
+ 0xf7, 0xcd, 0x83, 0xb9, 0x02, 0x38, 0x76, 0x4c },
+ { 0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+ 0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x25, 0x4a, 0x6f, 0x94, 0xb1, 0xde, 0xfb,
+ 0x28, 0x0d, 0x62, 0x47, 0xbc, 0x99, 0xf6, 0xd3 },
+ { 0x00, 0x3a, 0x74, 0x4e, 0xf5, 0xcf, 0x81, 0xbb,
+ 0xea, 0xd0, 0x9e, 0xa4, 0x1f, 0x25, 0x6b, 0x51 },
+ { 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+ 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x26, 0x4c, 0x6a, 0x98, 0xbe, 0xd4, 0xf2,
+ 0x30, 0x16, 0x7c, 0x5a, 0xa8, 0x8e, 0xe4, 0xc2 },
+ { 0x00, 0x3a, 0x74, 0x4e, 0xf5, 0xcf, 0x81, 0xbb,
+ 0xea, 0xd0, 0x9e, 0xa4, 0x1f, 0x25, 0x6b, 0x51 },
+ { 0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+ 0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5,
+ 0x38, 0x1f, 0x76, 0x51, 0xa4, 0x83, 0xea, 0xcd },
+ { 0x00, 0x3a, 0x69, 0x53, 0xd2, 0xe8, 0xbb, 0x81,
+ 0xb9, 0x83, 0xd0, 0xea, 0x6b, 0x51, 0x02, 0x38 },
+ { 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+ 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x28, 0x50, 0x78, 0xa0, 0x88, 0xf0, 0xd8,
+ 0x40, 0x68, 0x10, 0x38, 0xe0, 0xc8, 0xb0, 0x98 },
+ { 0x00, 0x3a, 0x69, 0x53, 0xd2, 0xe8, 0xbb, 0x81,
+ 0xb9, 0x83, 0xd0, 0xea, 0x6b, 0x51, 0x02, 0x38 },
+ { 0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+ 0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x29, 0x52, 0x7b, 0xa4, 0x8d, 0xf6, 0xdf,
+ 0x48, 0x61, 0x1a, 0x33, 0xec, 0xc5, 0xbe, 0x97 },
+ { 0x00, 0x3a, 0x69, 0x53, 0xd2, 0xe8, 0xbb, 0x81,
+ 0xa4, 0x9e, 0xcd, 0xf7, 0x76, 0x4c, 0x1f, 0x25 },
+ { 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+ 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x2a, 0x54, 0x7e, 0xa8, 0x82, 0xfc, 0xd6,
+ 0x50, 0x7a, 0x04, 0x2e, 0xf8, 0xd2, 0xac, 0x86 },
+ { 0x00, 0x3a, 0x69, 0x53, 0xd2, 0xe8, 0xbb, 0x81,
+ 0xa4, 0x9e, 0xcd, 0xf7, 0x76, 0x4c, 0x1f, 0x25 },
+ { 0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+ 0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x2b, 0x56, 0x7d, 0xac, 0x87, 0xfa, 0xd1,
+ 0x58, 0x73, 0x0e, 0x25, 0xf4, 0xdf, 0xa2, 0x89 },
+ { 0x00, 0x3a, 0x69, 0x53, 0xcf, 0xf5, 0xa6, 0x9c,
+ 0x83, 0xb9, 0xea, 0xd0, 0x4c, 0x76, 0x25, 0x1f },
+ { 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+ 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x2c, 0x58, 0x74, 0xb0, 0x9c, 0xe8, 0xc4,
+ 0x60, 0x4c, 0x38, 0x14, 0xd0, 0xfc, 0x88, 0xa4 },
+ { 0x00, 0x3a, 0x69, 0x53, 0xcf, 0xf5, 0xa6, 0x9c,
+ 0x83, 0xb9, 0xea, 0xd0, 0x4c, 0x76, 0x25, 0x1f },
+ { 0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+ 0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x2d, 0x5a, 0x77, 0xb4, 0x99, 0xee, 0xc3,
+ 0x68, 0x45, 0x32, 0x1f, 0xdc, 0xf1, 0x86, 0xab },
+ { 0x00, 0x3a, 0x69, 0x53, 0xcf, 0xf5, 0xa6, 0x9c,
+ 0x9e, 0xa4, 0xf7, 0xcd, 0x51, 0x6b, 0x38, 0x02 },
+ { 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+ 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x2e, 0x5c, 0x72, 0xb8, 0x96, 0xe4, 0xca,
+ 0x70, 0x5e, 0x2c, 0x02, 0xc8, 0xe6, 0x94, 0xba },
+ { 0x00, 0x3a, 0x69, 0x53, 0xcf, 0xf5, 0xa6, 0x9c,
+ 0x9e, 0xa4, 0xf7, 0xcd, 0x51, 0x6b, 0x38, 0x02 },
+ { 0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+ 0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x2f, 0x5e, 0x71, 0xbc, 0x93, 0xe2, 0xcd,
+ 0x78, 0x57, 0x26, 0x09, 0xc4, 0xeb, 0x9a, 0xb5 },
+ { 0x00, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5,
+ 0x25, 0x02, 0x6b, 0x4c, 0xb9, 0x9e, 0xf7, 0xd0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+ 0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10 },
+ { 0x00, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5,
+ 0x25, 0x02, 0x6b, 0x4c, 0xb9, 0x9e, 0xf7, 0xd0 },
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+ 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x31, 0x62, 0x53, 0xc4, 0xf5, 0xa6, 0x97,
+ 0x88, 0xb9, 0xea, 0xdb, 0x4c, 0x7d, 0x2e, 0x1f },
+ { 0x00, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5,
+ 0x38, 0x1f, 0x76, 0x51, 0xa4, 0x83, 0xea, 0xcd },
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+ 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x32, 0x64, 0x56, 0xc8, 0xfa, 0xac, 0x9e,
+ 0x90, 0xa2, 0xf4, 0xc6, 0x58, 0x6a, 0x3c, 0x0e },
+ { 0x00, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5,
+ 0x38, 0x1f, 0x76, 0x51, 0xa4, 0x83, 0xea, 0xcd },
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+ 0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x33, 0x66, 0x55, 0xcc, 0xff, 0xaa, 0x99,
+ 0x98, 0xab, 0xfe, 0xcd, 0x54, 0x67, 0x32, 0x01 },
+ { 0x00, 0x27, 0x4e, 0x69, 0x81, 0xa6, 0xcf, 0xe8,
+ 0x1f, 0x38, 0x51, 0x76, 0x9e, 0xb9, 0xd0, 0xf7 },
+ { 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+ 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x34, 0x68, 0x5c, 0xd0, 0xe4, 0xb8, 0x8c,
+ 0xa0, 0x94, 0xc8, 0xfc, 0x70, 0x44, 0x18, 0x2c },
+ { 0x00, 0x27, 0x4e, 0x69, 0x81, 0xa6, 0xcf, 0xe8,
+ 0x1f, 0x38, 0x51, 0x76, 0x9e, 0xb9, 0xd0, 0xf7 },
+ { 0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+ 0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x35, 0x6a, 0x5f, 0xd4, 0xe1, 0xbe, 0x8b,
+ 0xa8, 0x9d, 0xc2, 0xf7, 0x7c, 0x49, 0x16, 0x23 },
+ { 0x00, 0x27, 0x4e, 0x69, 0x81, 0xa6, 0xcf, 0xe8,
+ 0x02, 0x25, 0x4c, 0x6b, 0x83, 0xa4, 0xcd, 0xea },
+ { 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+ 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x36, 0x6c, 0x5a, 0xd8, 0xee, 0xb4, 0x82,
+ 0xb0, 0x86, 0xdc, 0xea, 0x68, 0x5e, 0x04, 0x32 },
+ { 0x00, 0x27, 0x4e, 0x69, 0x81, 0xa6, 0xcf, 0xe8,
+ 0x02, 0x25, 0x4c, 0x6b, 0x83, 0xa4, 0xcd, 0xea },
+ { 0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+ 0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x37, 0x6e, 0x59, 0xdc, 0xeb, 0xb2, 0x85,
+ 0xb8, 0x8f, 0xd6, 0xe1, 0x64, 0x53, 0x0a, 0x3d },
+ { 0x00, 0x27, 0x53, 0x74, 0xa6, 0x81, 0xf5, 0xd2,
+ 0x51, 0x76, 0x02, 0x25, 0xf7, 0xd0, 0xa4, 0x83 },
+ { 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+ 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x38, 0x70, 0x48, 0xe0, 0xd8, 0x90, 0xa8,
+ 0xc0, 0xf8, 0xb0, 0x88, 0x20, 0x18, 0x50, 0x68 },
+ { 0x00, 0x27, 0x53, 0x74, 0xa6, 0x81, 0xf5, 0xd2,
+ 0x51, 0x76, 0x02, 0x25, 0xf7, 0xd0, 0xa4, 0x83 },
+ { 0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+ 0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x39, 0x72, 0x4b, 0xe4, 0xdd, 0x96, 0xaf,
+ 0xc8, 0xf1, 0xba, 0x83, 0x2c, 0x15, 0x5e, 0x67 },
+ { 0x00, 0x27, 0x53, 0x74, 0xa6, 0x81, 0xf5, 0xd2,
+ 0x4c, 0x6b, 0x1f, 0x38, 0xea, 0xcd, 0xb9, 0x9e },
+ { 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+ 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6,
+ 0xd0, 0xea, 0xa4, 0x9e, 0x38, 0x02, 0x4c, 0x76 },
+ { 0x00, 0x27, 0x53, 0x74, 0xa6, 0x81, 0xf5, 0xd2,
+ 0x4c, 0x6b, 0x1f, 0x38, 0xea, 0xcd, 0xb9, 0x9e },
+ { 0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+ 0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x3b, 0x76, 0x4d, 0xec, 0xd7, 0x9a, 0xa1,
+ 0xd8, 0xe3, 0xae, 0x95, 0x34, 0x0f, 0x42, 0x79 },
+ { 0x00, 0x27, 0x53, 0x74, 0xbb, 0x9c, 0xe8, 0xcf,
+ 0x6b, 0x4c, 0x38, 0x1f, 0xd0, 0xf7, 0x83, 0xa4 },
+ { 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+ 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x3c, 0x78, 0x44, 0xf0, 0xcc, 0x88, 0xb4,
+ 0xe0, 0xdc, 0x98, 0xa4, 0x10, 0x2c, 0x68, 0x54 },
+ { 0x00, 0x27, 0x53, 0x74, 0xbb, 0x9c, 0xe8, 0xcf,
+ 0x6b, 0x4c, 0x38, 0x1f, 0xd0, 0xf7, 0x83, 0xa4 },
+ { 0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+ 0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x3d, 0x7a, 0x47, 0xf4, 0xc9, 0x8e, 0xb3,
+ 0xe8, 0xd5, 0x92, 0xaf, 0x1c, 0x21, 0x66, 0x5b },
+ { 0x00, 0x27, 0x53, 0x74, 0xbb, 0x9c, 0xe8, 0xcf,
+ 0x76, 0x51, 0x25, 0x02, 0xcd, 0xea, 0x9e, 0xb9 },
+ { 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+ 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x3e, 0x7c, 0x42, 0xf8, 0xc6, 0x84, 0xba,
+ 0xf0, 0xce, 0x8c, 0xb2, 0x08, 0x36, 0x74, 0x4a },
+ { 0x00, 0x27, 0x53, 0x74, 0xbb, 0x9c, 0xe8, 0xcf,
+ 0x76, 0x51, 0x25, 0x02, 0xcd, 0xea, 0x9e, 0xb9 },
+ { 0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+ 0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x3f, 0x7e, 0x41, 0xfc, 0xc3, 0x82, 0xbd,
+ 0xf8, 0xc7, 0x86, 0xb9, 0x04, 0x3b, 0x7a, 0x45 },
+ { 0x00, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51,
+ 0x87, 0xf3, 0x6f, 0x1b, 0x4a, 0x3e, 0xa2, 0xd6 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+ 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0 },
+ { 0x00, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51,
+ 0x87, 0xf3, 0x6f, 0x1b, 0x4a, 0x3e, 0xa2, 0xd6 },
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+ 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x41, 0x82, 0xc3, 0x04, 0x45, 0x86, 0xc7,
+ 0x08, 0x49, 0x8a, 0xcb, 0x0c, 0x4d, 0x8e, 0xcf },
+ { 0x00, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51,
+ 0x9a, 0xee, 0x72, 0x06, 0x57, 0x23, 0xbf, 0xcb },
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+ 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x42, 0x84, 0xc6, 0x08, 0x4a, 0x8c, 0xce,
+ 0x10, 0x52, 0x94, 0xd6, 0x18, 0x5a, 0x9c, 0xde },
+ { 0x00, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51,
+ 0x9a, 0xee, 0x72, 0x06, 0x57, 0x23, 0xbf, 0xcb },
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+ 0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x43, 0x86, 0xc5, 0x0c, 0x4f, 0x8a, 0xc9,
+ 0x18, 0x5b, 0x9e, 0xdd, 0x14, 0x57, 0x92, 0xd1 },
+ { 0x00, 0x74, 0xe8, 0x9c, 0xd0, 0xa4, 0x38, 0x4c,
+ 0xbd, 0xc9, 0x55, 0x21, 0x6d, 0x19, 0x85, 0xf1 },
+ { 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+ 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x44, 0x88, 0xcc, 0x10, 0x54, 0x98, 0xdc,
+ 0x20, 0x64, 0xa8, 0xec, 0x30, 0x74, 0xb8, 0xfc },
+ { 0x00, 0x74, 0xe8, 0x9c, 0xd0, 0xa4, 0x38, 0x4c,
+ 0xbd, 0xc9, 0x55, 0x21, 0x6d, 0x19, 0x85, 0xf1 },
+ { 0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+ 0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x45, 0x8a, 0xcf, 0x14, 0x51, 0x9e, 0xdb,
+ 0x28, 0x6d, 0xa2, 0xe7, 0x3c, 0x79, 0xb6, 0xf3 },
+ { 0x00, 0x74, 0xe8, 0x9c, 0xd0, 0xa4, 0x38, 0x4c,
+ 0xa0, 0xd4, 0x48, 0x3c, 0x70, 0x04, 0x98, 0xec },
+ { 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+ 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x46, 0x8c, 0xca, 0x18, 0x5e, 0x94, 0xd2,
+ 0x30, 0x76, 0xbc, 0xfa, 0x28, 0x6e, 0xa4, 0xe2 },
+ { 0x00, 0x74, 0xe8, 0x9c, 0xd0, 0xa4, 0x38, 0x4c,
+ 0xa0, 0xd4, 0x48, 0x3c, 0x70, 0x04, 0x98, 0xec },
+ { 0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+ 0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x47, 0x8e, 0xc9, 0x1c, 0x5b, 0x92, 0xd5,
+ 0x38, 0x7f, 0xb6, 0xf1, 0x24, 0x63, 0xaa, 0xed },
+ { 0x00, 0x74, 0xf5, 0x81, 0xf7, 0x83, 0x02, 0x76,
+ 0xf3, 0x87, 0x06, 0x72, 0x04, 0x70, 0xf1, 0x85 },
+ { 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+ 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x48, 0x90, 0xd8, 0x20, 0x68, 0xb0, 0xf8,
+ 0x40, 0x08, 0xd0, 0x98, 0x60, 0x28, 0xf0, 0xb8 },
+ { 0x00, 0x74, 0xf5, 0x81, 0xf7, 0x83, 0x02, 0x76,
+ 0xf3, 0x87, 0x06, 0x72, 0x04, 0x70, 0xf1, 0x85 },
+ { 0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+ 0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x49, 0x92, 0xdb, 0x24, 0x6d, 0xb6, 0xff,
+ 0x48, 0x01, 0xda, 0x93, 0x6c, 0x25, 0xfe, 0xb7 },
+ { 0x00, 0x74, 0xf5, 0x81, 0xf7, 0x83, 0x02, 0x76,
+ 0xee, 0x9a, 0x1b, 0x6f, 0x19, 0x6d, 0xec, 0x98 },
+ { 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+ 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x4a, 0x94, 0xde, 0x28, 0x62, 0xbc, 0xf6,
+ 0x50, 0x1a, 0xc4, 0x8e, 0x78, 0x32, 0xec, 0xa6 },
+ { 0x00, 0x74, 0xf5, 0x81, 0xf7, 0x83, 0x02, 0x76,
+ 0xee, 0x9a, 0x1b, 0x6f, 0x19, 0x6d, 0xec, 0x98 },
+ { 0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+ 0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x4b, 0x96, 0xdd, 0x2c, 0x67, 0xba, 0xf1,
+ 0x58, 0x13, 0xce, 0x85, 0x74, 0x3f, 0xe2, 0xa9 },
+ { 0x00, 0x74, 0xf5, 0x81, 0xea, 0x9e, 0x1f, 0x6b,
+ 0xc9, 0xbd, 0x3c, 0x48, 0x23, 0x57, 0xd6, 0xa2 },
+ { 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+ 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x4c, 0x98, 0xd4, 0x30, 0x7c, 0xa8, 0xe4,
+ 0x60, 0x2c, 0xf8, 0xb4, 0x50, 0x1c, 0xc8, 0x84 },
+ { 0x00, 0x74, 0xf5, 0x81, 0xea, 0x9e, 0x1f, 0x6b,
+ 0xc9, 0xbd, 0x3c, 0x48, 0x23, 0x57, 0xd6, 0xa2 },
+ { 0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+ 0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x4d, 0x9a, 0xd7, 0x34, 0x79, 0xae, 0xe3,
+ 0x68, 0x25, 0xf2, 0xbf, 0x5c, 0x11, 0xc6, 0x8b },
+ { 0x00, 0x74, 0xf5, 0x81, 0xea, 0x9e, 0x1f, 0x6b,
+ 0xd4, 0xa0, 0x21, 0x55, 0x3e, 0x4a, 0xcb, 0xbf },
+ { 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+ 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x4e, 0x9c, 0xd2, 0x38, 0x76, 0xa4, 0xea,
+ 0x70, 0x3e, 0xec, 0xa2, 0x48, 0x06, 0xd4, 0x9a },
+ { 0x00, 0x74, 0xf5, 0x81, 0xea, 0x9e, 0x1f, 0x6b,
+ 0xd4, 0xa0, 0x21, 0x55, 0x3e, 0x4a, 0xcb, 0xbf },
+ { 0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+ 0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x4f, 0x9e, 0xd1, 0x3c, 0x73, 0xa2, 0xed,
+ 0x78, 0x37, 0xe6, 0xa9, 0x44, 0x0b, 0xda, 0x95 },
+ { 0x00, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x02,
+ 0x6f, 0x06, 0xbd, 0xd4, 0xd6, 0xbf, 0x04, 0x6d },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+ 0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30 },
+ { 0x00, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x02,
+ 0x6f, 0x06, 0xbd, 0xd4, 0xd6, 0xbf, 0x04, 0x6d },
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+ 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x51, 0xa2, 0xf3, 0x44, 0x15, 0xe6, 0xb7,
+ 0x88, 0xd9, 0x2a, 0x7b, 0xcc, 0x9d, 0x6e, 0x3f },
+ { 0x00, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x02,
+ 0x72, 0x1b, 0xa0, 0xc9, 0xcb, 0xa2, 0x19, 0x70 },
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+ 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x52, 0xa4, 0xf6, 0x48, 0x1a, 0xec, 0xbe,
+ 0x90, 0xc2, 0x34, 0x66, 0xd8, 0x8a, 0x7c, 0x2e },
+ { 0x00, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x02,
+ 0x72, 0x1b, 0xa0, 0xc9, 0xcb, 0xa2, 0x19, 0x70 },
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+ 0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x53, 0xa6, 0xf5, 0x4c, 0x1f, 0xea, 0xb9,
+ 0x98, 0xcb, 0x3e, 0x6d, 0xd4, 0x87, 0x72, 0x21 },
+ { 0x00, 0x69, 0xd2, 0xbb, 0xa4, 0xcd, 0x76, 0x1f,
+ 0x55, 0x3c, 0x87, 0xee, 0xf1, 0x98, 0x23, 0x4a },
+ { 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+ 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x54, 0xa8, 0xfc, 0x50, 0x04, 0xf8, 0xac,
+ 0xa0, 0xf4, 0x08, 0x5c, 0xf0, 0xa4, 0x58, 0x0c },
+ { 0x00, 0x69, 0xd2, 0xbb, 0xa4, 0xcd, 0x76, 0x1f,
+ 0x55, 0x3c, 0x87, 0xee, 0xf1, 0x98, 0x23, 0x4a },
+ { 0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+ 0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x55, 0xaa, 0xff, 0x54, 0x01, 0xfe, 0xab,
+ 0xa8, 0xfd, 0x02, 0x57, 0xfc, 0xa9, 0x56, 0x03 },
+ { 0x00, 0x69, 0xd2, 0xbb, 0xa4, 0xcd, 0x76, 0x1f,
+ 0x48, 0x21, 0x9a, 0xf3, 0xec, 0x85, 0x3e, 0x57 },
+ { 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+ 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x56, 0xac, 0xfa, 0x58, 0x0e, 0xf4, 0xa2,
+ 0xb0, 0xe6, 0x1c, 0x4a, 0xe8, 0xbe, 0x44, 0x12 },
+ { 0x00, 0x69, 0xd2, 0xbb, 0xa4, 0xcd, 0x76, 0x1f,
+ 0x48, 0x21, 0x9a, 0xf3, 0xec, 0x85, 0x3e, 0x57 },
+ { 0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+ 0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x57, 0xae, 0xf9, 0x5c, 0x0b, 0xf2, 0xa5,
+ 0xb8, 0xef, 0x16, 0x41, 0xe4, 0xb3, 0x4a, 0x1d },
+ { 0x00, 0x69, 0xcf, 0xa6, 0x83, 0xea, 0x4c, 0x25,
+ 0x1b, 0x72, 0xd4, 0xbd, 0x98, 0xf1, 0x57, 0x3e },
+ { 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+ 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x58, 0xb0, 0xe8, 0x60, 0x38, 0xd0, 0x88,
+ 0xc0, 0x98, 0x70, 0x28, 0xa0, 0xf8, 0x10, 0x48 },
+ { 0x00, 0x69, 0xcf, 0xa6, 0x83, 0xea, 0x4c, 0x25,
+ 0x1b, 0x72, 0xd4, 0xbd, 0x98, 0xf1, 0x57, 0x3e },
+ { 0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+ 0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x59, 0xb2, 0xeb, 0x64, 0x3d, 0xd6, 0x8f,
+ 0xc8, 0x91, 0x7a, 0x23, 0xac, 0xf5, 0x1e, 0x47 },
+ { 0x00, 0x69, 0xcf, 0xa6, 0x83, 0xea, 0x4c, 0x25,
+ 0x06, 0x6f, 0xc9, 0xa0, 0x85, 0xec, 0x4a, 0x23 },
+ { 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+ 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x5a, 0xb4, 0xee, 0x68, 0x32, 0xdc, 0x86,
+ 0xd0, 0x8a, 0x64, 0x3e, 0xb8, 0xe2, 0x0c, 0x56 },
+ { 0x00, 0x69, 0xcf, 0xa6, 0x83, 0xea, 0x4c, 0x25,
+ 0x06, 0x6f, 0xc9, 0xa0, 0x85, 0xec, 0x4a, 0x23 },
+ { 0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+ 0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x5b, 0xb6, 0xed, 0x6c, 0x37, 0xda, 0x81,
+ 0xd8, 0x83, 0x6e, 0x35, 0xb4, 0xef, 0x02, 0x59 },
+ { 0x00, 0x69, 0xcf, 0xa6, 0x9e, 0xf7, 0x51, 0x38,
+ 0x21, 0x48, 0xee, 0x87, 0xbf, 0xd6, 0x70, 0x19 },
+ { 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+ 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x5c, 0xb8, 0xe4, 0x70, 0x2c, 0xc8, 0x94,
+ 0xe0, 0xbc, 0x58, 0x04, 0x90, 0xcc, 0x28, 0x74 },
+ { 0x00, 0x69, 0xcf, 0xa6, 0x9e, 0xf7, 0x51, 0x38,
+ 0x21, 0x48, 0xee, 0x87, 0xbf, 0xd6, 0x70, 0x19 },
+ { 0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+ 0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x5d, 0xba, 0xe7, 0x74, 0x29, 0xce, 0x93,
+ 0xe8, 0xb5, 0x52, 0x0f, 0x9c, 0xc1, 0x26, 0x7b },
+ { 0x00, 0x69, 0xcf, 0xa6, 0x9e, 0xf7, 0x51, 0x38,
+ 0x3c, 0x55, 0xf3, 0x9a, 0xa2, 0xcb, 0x6d, 0x04 },
+ { 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+ 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x5e, 0xbc, 0xe2, 0x78, 0x26, 0xc4, 0x9a,
+ 0xf0, 0xae, 0x4c, 0x12, 0x88, 0xd6, 0x34, 0x6a },
+ { 0x00, 0x69, 0xcf, 0xa6, 0x9e, 0xf7, 0x51, 0x38,
+ 0x3c, 0x55, 0xf3, 0x9a, 0xa2, 0xcb, 0x6d, 0x04 },
+ { 0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+ 0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x5f, 0xbe, 0xe1, 0x7c, 0x23, 0xc2, 0x9d,
+ 0xf8, 0xa7, 0x46, 0x19, 0x84, 0xdb, 0x3a, 0x65 },
+ { 0x00, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7,
+ 0x4a, 0x04, 0xd6, 0x98, 0x6f, 0x21, 0xf3, 0xbd },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+ 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20 },
+ { 0x00, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7,
+ 0x4a, 0x04, 0xd6, 0x98, 0x6f, 0x21, 0xf3, 0xbd },
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+ 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x61, 0xc2, 0xa3, 0x84, 0xe5, 0x46, 0x27,
+ 0x08, 0x69, 0xca, 0xab, 0x8c, 0xed, 0x4e, 0x2f },
+ { 0x00, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7,
+ 0x57, 0x19, 0xcb, 0x85, 0x72, 0x3c, 0xee, 0xa0 },
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+ 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x62, 0xc4, 0xa6, 0x88, 0xea, 0x4c, 0x2e,
+ 0x10, 0x72, 0xd4, 0xb6, 0x98, 0xfa, 0x5c, 0x3e },
+ { 0x00, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7,
+ 0x57, 0x19, 0xcb, 0x85, 0x72, 0x3c, 0xee, 0xa0 },
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+ 0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x63, 0xc6, 0xa5, 0x8c, 0xef, 0x4a, 0x29,
+ 0x18, 0x7b, 0xde, 0xbd, 0x94, 0xf7, 0x52, 0x31 },
+ { 0x00, 0x4e, 0x9c, 0xd2, 0x38, 0x76, 0xa4, 0xea,
+ 0x70, 0x3e, 0xec, 0xa2, 0x48, 0x06, 0xd4, 0x9a },
+ { 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+ 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x64, 0xc8, 0xac, 0x90, 0xf4, 0x58, 0x3c,
+ 0x20, 0x44, 0xe8, 0x8c, 0xb0, 0xd4, 0x78, 0x1c },
+ { 0x00, 0x4e, 0x9c, 0xd2, 0x38, 0x76, 0xa4, 0xea,
+ 0x70, 0x3e, 0xec, 0xa2, 0x48, 0x06, 0xd4, 0x9a },
+ { 0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+ 0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x65, 0xca, 0xaf, 0x94, 0xf1, 0x5e, 0x3b,
+ 0x28, 0x4d, 0xe2, 0x87, 0xbc, 0xd9, 0x76, 0x13 },
+ { 0x00, 0x4e, 0x9c, 0xd2, 0x38, 0x76, 0xa4, 0xea,
+ 0x6d, 0x23, 0xf1, 0xbf, 0x55, 0x1b, 0xc9, 0x87 },
+ { 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+ 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x66, 0xcc, 0xaa, 0x98, 0xfe, 0x54, 0x32,
+ 0x30, 0x56, 0xfc, 0x9a, 0xa8, 0xce, 0x64, 0x02 },
+ { 0x00, 0x4e, 0x9c, 0xd2, 0x38, 0x76, 0xa4, 0xea,
+ 0x6d, 0x23, 0xf1, 0xbf, 0x55, 0x1b, 0xc9, 0x87 },
+ { 0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+ 0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x67, 0xce, 0xa9, 0x9c, 0xfb, 0x52, 0x35,
+ 0x38, 0x5f, 0xf6, 0x91, 0xa4, 0xc3, 0x6a, 0x0d },
+ { 0x00, 0x4e, 0x81, 0xcf, 0x1f, 0x51, 0x9e, 0xd0,
+ 0x3e, 0x70, 0xbf, 0xf1, 0x21, 0x6f, 0xa0, 0xee },
+ { 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+ 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x68, 0xd0, 0xb8, 0xa0, 0xc8, 0x70, 0x18,
+ 0x40, 0x28, 0x90, 0xf8, 0xe0, 0x88, 0x30, 0x58 },
+ { 0x00, 0x4e, 0x81, 0xcf, 0x1f, 0x51, 0x9e, 0xd0,
+ 0x3e, 0x70, 0xbf, 0xf1, 0x21, 0x6f, 0xa0, 0xee },
+ { 0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+ 0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x69, 0xd2, 0xbb, 0xa4, 0xcd, 0x76, 0x1f,
+ 0x48, 0x21, 0x9a, 0xf3, 0xec, 0x85, 0x3e, 0x57 },
+ { 0x00, 0x4e, 0x81, 0xcf, 0x1f, 0x51, 0x9e, 0xd0,
+ 0x23, 0x6d, 0xa2, 0xec, 0x3c, 0x72, 0xbd, 0xf3 },
+ { 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+ 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x6a, 0xd4, 0xbe, 0xa8, 0xc2, 0x7c, 0x16,
+ 0x50, 0x3a, 0x84, 0xee, 0xf8, 0x92, 0x2c, 0x46 },
+ { 0x00, 0x4e, 0x81, 0xcf, 0x1f, 0x51, 0x9e, 0xd0,
+ 0x23, 0x6d, 0xa2, 0xec, 0x3c, 0x72, 0xbd, 0xf3 },
+ { 0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+ 0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x6b, 0xd6, 0xbd, 0xac, 0xc7, 0x7a, 0x11,
+ 0x58, 0x33, 0x8e, 0xe5, 0xf4, 0x9f, 0x22, 0x49 },
+ { 0x00, 0x4e, 0x81, 0xcf, 0x02, 0x4c, 0x83, 0xcd,
+ 0x04, 0x4a, 0x85, 0xcb, 0x06, 0x48, 0x87, 0xc9 },
+ { 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+ 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x6c, 0xd8, 0xb4, 0xb0, 0xdc, 0x68, 0x04,
+ 0x60, 0x0c, 0xb8, 0xd4, 0xd0, 0xbc, 0x08, 0x64 },
+ { 0x00, 0x4e, 0x81, 0xcf, 0x02, 0x4c, 0x83, 0xcd,
+ 0x04, 0x4a, 0x85, 0xcb, 0x06, 0x48, 0x87, 0xc9 },
+ { 0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+ 0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x6d, 0xda, 0xb7, 0xb4, 0xd9, 0x6e, 0x03,
+ 0x68, 0x05, 0xb2, 0xdf, 0xdc, 0xb1, 0x06, 0x6b },
+ { 0x00, 0x4e, 0x81, 0xcf, 0x02, 0x4c, 0x83, 0xcd,
+ 0x19, 0x57, 0x98, 0xd6, 0x1b, 0x55, 0x9a, 0xd4 },
+ { 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+ 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x6e, 0xdc, 0xb2, 0xb8, 0xd6, 0x64, 0x0a,
+ 0x70, 0x1e, 0xac, 0xc2, 0xc8, 0xa6, 0x14, 0x7a },
+ { 0x00, 0x4e, 0x81, 0xcf, 0x02, 0x4c, 0x83, 0xcd,
+ 0x19, 0x57, 0x98, 0xd6, 0x1b, 0x55, 0x9a, 0xd4 },
+ { 0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+ 0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x6f, 0xde, 0xb1, 0xbc, 0xd3, 0x62, 0x0d,
+ 0x78, 0x17, 0xa6, 0xc9, 0xc4, 0xab, 0x1a, 0x75 },
+ { 0x00, 0x53, 0xa6, 0xf5, 0x51, 0x02, 0xf7, 0xa4,
+ 0xa2, 0xf1, 0x04, 0x57, 0xf3, 0xa0, 0x55, 0x06 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+ 0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0 },
+ { 0x00, 0x53, 0xa6, 0xf5, 0x51, 0x02, 0xf7, 0xa4,
+ 0xa2, 0xf1, 0x04, 0x57, 0xf3, 0xa0, 0x55, 0x06 },
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+ 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x71, 0xe2, 0x93, 0xc4, 0xb5, 0x26, 0x57,
+ 0x88, 0xf9, 0x6a, 0x1b, 0x4c, 0x3d, 0xae, 0xdf },
+ { 0x00, 0x53, 0xa6, 0xf5, 0x51, 0x02, 0xf7, 0xa4,
+ 0xbf, 0xec, 0x19, 0x4a, 0xee, 0xbd, 0x48, 0x1b },
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+ 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x72, 0xe4, 0x96, 0xc8, 0xba, 0x2c, 0x5e,
+ 0x90, 0xe2, 0x74, 0x06, 0x58, 0x2a, 0xbc, 0xce },
+ { 0x00, 0x53, 0xa6, 0xf5, 0x51, 0x02, 0xf7, 0xa4,
+ 0xbf, 0xec, 0x19, 0x4a, 0xee, 0xbd, 0x48, 0x1b },
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+ 0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x73, 0xe6, 0x95, 0xcc, 0xbf, 0x2a, 0x59,
+ 0x98, 0xeb, 0x7e, 0x0d, 0x54, 0x27, 0xb2, 0xc1 },
+ { 0x00, 0x53, 0xa6, 0xf5, 0x4c, 0x1f, 0xea, 0xb9,
+ 0x98, 0xcb, 0x3e, 0x6d, 0xd4, 0x87, 0x72, 0x21 },
+ { 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+ 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x74, 0xe8, 0x9c, 0xd0, 0xa4, 0x38, 0x4c,
+ 0xa0, 0xd4, 0x48, 0x3c, 0x70, 0x04, 0x98, 0xec },
+ { 0x00, 0x53, 0xa6, 0xf5, 0x4c, 0x1f, 0xea, 0xb9,
+ 0x98, 0xcb, 0x3e, 0x6d, 0xd4, 0x87, 0x72, 0x21 },
+ { 0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+ 0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x75, 0xea, 0x9f, 0xd4, 0xa1, 0x3e, 0x4b,
+ 0xa8, 0xdd, 0x42, 0x37, 0x7c, 0x09, 0x96, 0xe3 },
+ { 0x00, 0x53, 0xa6, 0xf5, 0x4c, 0x1f, 0xea, 0xb9,
+ 0x85, 0xd6, 0x23, 0x70, 0xc9, 0x9a, 0x6f, 0x3c },
+ { 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+ 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x76, 0xec, 0x9a, 0xd8, 0xae, 0x34, 0x42,
+ 0xb0, 0xc6, 0x5c, 0x2a, 0x68, 0x1e, 0x84, 0xf2 },
+ { 0x00, 0x53, 0xa6, 0xf5, 0x4c, 0x1f, 0xea, 0xb9,
+ 0x85, 0xd6, 0x23, 0x70, 0xc9, 0x9a, 0x6f, 0x3c },
+ { 0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+ 0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x77, 0xee, 0x99, 0xdc, 0xab, 0x32, 0x45,
+ 0xb8, 0xcf, 0x56, 0x21, 0x64, 0x13, 0x8a, 0xfd },
+ { 0x00, 0x53, 0xbb, 0xe8, 0x6b, 0x38, 0xd0, 0x83,
+ 0xd6, 0x85, 0x6d, 0x3e, 0xbd, 0xee, 0x06, 0x55 },
+ { 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+ 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x78, 0xf0, 0x88, 0xe0, 0x98, 0x10, 0x68,
+ 0xc0, 0xb8, 0x30, 0x48, 0x20, 0x58, 0xd0, 0xa8 },
+ { 0x00, 0x53, 0xbb, 0xe8, 0x6b, 0x38, 0xd0, 0x83,
+ 0xd6, 0x85, 0x6d, 0x3e, 0xbd, 0xee, 0x06, 0x55 },
+ { 0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+ 0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x79, 0xf2, 0x8b, 0xe4, 0x9d, 0x16, 0x6f,
+ 0xc8, 0xb1, 0x3a, 0x43, 0x2c, 0x55, 0xde, 0xa7 },
+ { 0x00, 0x53, 0xbb, 0xe8, 0x6b, 0x38, 0xd0, 0x83,
+ 0xcb, 0x98, 0x70, 0x23, 0xa0, 0xf3, 0x1b, 0x48 },
+ { 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+ 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x7a, 0xf4, 0x8e, 0xe8, 0x92, 0x1c, 0x66,
+ 0xd0, 0xaa, 0x24, 0x5e, 0x38, 0x42, 0xcc, 0xb6 },
+ { 0x00, 0x53, 0xbb, 0xe8, 0x6b, 0x38, 0xd0, 0x83,
+ 0xcb, 0x98, 0x70, 0x23, 0xa0, 0xf3, 0x1b, 0x48 },
+ { 0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+ 0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x7b, 0xf6, 0x8d, 0xec, 0x97, 0x1a, 0x61,
+ 0xd8, 0xa3, 0x2e, 0x55, 0x34, 0x4f, 0xc2, 0xb9 },
+ { 0x00, 0x53, 0xbb, 0xe8, 0x76, 0x25, 0xcd, 0x9e,
+ 0xec, 0xbf, 0x57, 0x04, 0x9a, 0xc9, 0x21, 0x72 },
+ { 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+ 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x7c, 0xf8, 0x84, 0xf0, 0x8c, 0x08, 0x74,
+ 0xe0, 0x9c, 0x18, 0x64, 0x10, 0x6c, 0xe8, 0x94 },
+ { 0x00, 0x53, 0xbb, 0xe8, 0x76, 0x25, 0xcd, 0x9e,
+ 0xec, 0xbf, 0x57, 0x04, 0x9a, 0xc9, 0x21, 0x72 },
+ { 0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+ 0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x7d, 0xfa, 0x87, 0xf4, 0x89, 0x0e, 0x73,
+ 0xe8, 0x95, 0x12, 0x6f, 0x1c, 0x61, 0xe6, 0x9b },
+ { 0x00, 0x53, 0xbb, 0xe8, 0x76, 0x25, 0xcd, 0x9e,
+ 0xf1, 0xa2, 0x4a, 0x19, 0x87, 0xd4, 0x3c, 0x6f },
+ { 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+ 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x7e, 0xfc, 0x82, 0xf8, 0x86, 0x04, 0x7a,
+ 0xf0, 0x8e, 0x0c, 0x72, 0x08, 0x76, 0xf4, 0x8a },
+ { 0x00, 0x53, 0xbb, 0xe8, 0x76, 0x25, 0xcd, 0x9e,
+ 0xf1, 0xa2, 0x4a, 0x19, 0x87, 0xd4, 0x3c, 0x6f },
+ { 0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+ 0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x7f, 0xfe, 0x81, 0xfc, 0x83, 0x02, 0x7d,
+ 0xf8, 0x87, 0x06, 0x79, 0x04, 0x7b, 0xfa, 0x85 },
+ { 0x00, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2,
+ 0x13, 0xfb, 0xde, 0x36, 0x94, 0x7c, 0x59, 0xb1 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+ 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80 },
+ { 0x00, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2,
+ 0x13, 0xfb, 0xde, 0x36, 0x94, 0x7c, 0x59, 0xb1 },
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+ 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x81, 0x02, 0x83, 0x04, 0x85, 0x06, 0x87,
+ 0x08, 0x89, 0x0a, 0x8b, 0x0c, 0x8d, 0x0e, 0x8f },
+ { 0x00, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2,
+ 0x0e, 0xe6, 0xc3, 0x2b, 0x89, 0x61, 0x44, 0xac },
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+ 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x82, 0x04, 0x86, 0x08, 0x8a, 0x0c, 0x8e,
+ 0x10, 0x92, 0x14, 0x96, 0x18, 0x9a, 0x1c, 0x9e },
+ { 0x00, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2,
+ 0x0e, 0xe6, 0xc3, 0x2b, 0x89, 0x61, 0x44, 0xac },
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+ 0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x83, 0x06, 0x85, 0x0c, 0x8f, 0x0a, 0x89,
+ 0x18, 0x9b, 0x1e, 0x9d, 0x14, 0x97, 0x12, 0x91 },
+ { 0x00, 0xe8, 0xcd, 0x25, 0x9a, 0x72, 0x57, 0xbf,
+ 0x29, 0xc1, 0xe4, 0x0c, 0xb3, 0x5b, 0x7e, 0x96 },
+ { 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+ 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x84, 0x08, 0x8c, 0x10, 0x94, 0x18, 0x9c,
+ 0x20, 0xa4, 0x28, 0xac, 0x30, 0xb4, 0x38, 0xbc },
+ { 0x00, 0xe8, 0xcd, 0x25, 0x9a, 0x72, 0x57, 0xbf,
+ 0x29, 0xc1, 0xe4, 0x0c, 0xb3, 0x5b, 0x7e, 0x96 },
+ { 0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+ 0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x85, 0x0a, 0x8f, 0x14, 0x91, 0x1e, 0x9b,
+ 0x28, 0xad, 0x22, 0xa7, 0x3c, 0xb9, 0x36, 0xb3 },
+ { 0x00, 0xe8, 0xcd, 0x25, 0x9a, 0x72, 0x57, 0xbf,
+ 0x34, 0xdc, 0xf9, 0x11, 0xae, 0x46, 0x63, 0x8b },
+ { 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+ 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x86, 0x0c, 0x8a, 0x18, 0x9e, 0x14, 0x92,
+ 0x30, 0xb6, 0x3c, 0xba, 0x28, 0xae, 0x24, 0xa2 },
+ { 0x00, 0xe8, 0xcd, 0x25, 0x9a, 0x72, 0x57, 0xbf,
+ 0x34, 0xdc, 0xf9, 0x11, 0xae, 0x46, 0x63, 0x8b },
+ { 0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+ 0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x87, 0x0e, 0x89, 0x1c, 0x9b, 0x12, 0x95,
+ 0x38, 0xbf, 0x36, 0xb1, 0x24, 0xa3, 0x2a, 0xad },
+ { 0x00, 0xe8, 0xd0, 0x38, 0xbd, 0x55, 0x6d, 0x85,
+ 0x67, 0x8f, 0xb7, 0x5f, 0xda, 0x32, 0x0a, 0xe2 },
+ { 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+ 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x88, 0x10, 0x98, 0x20, 0xa8, 0x30, 0xb8,
+ 0x40, 0xc8, 0x50, 0xd8, 0x60, 0xe8, 0x70, 0xf8 },
+ { 0x00, 0xe8, 0xd0, 0x38, 0xbd, 0x55, 0x6d, 0x85,
+ 0x67, 0x8f, 0xb7, 0x5f, 0xda, 0x32, 0x0a, 0xe2 },
+ { 0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+ 0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x89, 0x12, 0x9b, 0x24, 0xad, 0x36, 0xbf,
+ 0x48, 0xc1, 0x5a, 0xd3, 0x6c, 0xe5, 0x7e, 0xf7 },
+ { 0x00, 0xe8, 0xd0, 0x38, 0xbd, 0x55, 0x6d, 0x85,
+ 0x7a, 0x92, 0xaa, 0x42, 0xc7, 0x2f, 0x17, 0xff },
+ { 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+ 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x8a, 0x14, 0x9e, 0x28, 0xa2, 0x3c, 0xb6,
+ 0x50, 0xda, 0x44, 0xce, 0x78, 0xf2, 0x6c, 0xe6 },
+ { 0x00, 0xe8, 0xd0, 0x38, 0xbd, 0x55, 0x6d, 0x85,
+ 0x7a, 0x92, 0xaa, 0x42, 0xc7, 0x2f, 0x17, 0xff },
+ { 0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+ 0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x8b, 0x16, 0x9d, 0x2c, 0xa7, 0x3a, 0xb1,
+ 0x58, 0xd3, 0x4e, 0xc5, 0x74, 0xff, 0x62, 0xe9 },
+ { 0x00, 0xe8, 0xd0, 0x38, 0xa0, 0x48, 0x70, 0x98,
+ 0x5d, 0xb5, 0x8d, 0x65, 0xfd, 0x15, 0x2d, 0xc5 },
+ { 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+ 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x8c, 0x18, 0x94, 0x30, 0xbc, 0x28, 0xa4,
+ 0x60, 0xec, 0x78, 0xf4, 0x50, 0xdc, 0x48, 0xc4 },
+ { 0x00, 0xe8, 0xd0, 0x38, 0xa0, 0x48, 0x70, 0x98,
+ 0x5d, 0xb5, 0x8d, 0x65, 0xfd, 0x15, 0x2d, 0xc5 },
+ { 0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+ 0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x8d, 0x1a, 0x97, 0x34, 0xb9, 0x2e, 0xa3,
+ 0x68, 0xe5, 0x72, 0xff, 0x5c, 0xd1, 0x46, 0xcb },
+ { 0x00, 0xe8, 0xd0, 0x38, 0xa0, 0x48, 0x70, 0x98,
+ 0x40, 0xa8, 0x90, 0x78, 0xe0, 0x08, 0x30, 0xd8 },
+ { 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+ 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x8e, 0x1c, 0x92, 0x38, 0xb6, 0x24, 0xaa,
+ 0x70, 0xfe, 0x6c, 0xe2, 0x48, 0xc6, 0x54, 0xda },
+ { 0x00, 0xe8, 0xd0, 0x38, 0xa0, 0x48, 0x70, 0x98,
+ 0x40, 0xa8, 0x90, 0x78, 0xe0, 0x08, 0x30, 0xd8 },
+ { 0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+ 0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x8f, 0x1e, 0x91, 0x3c, 0xb3, 0x22, 0xad,
+ 0x78, 0xf7, 0x66, 0xe9, 0x44, 0xcb, 0x5a, 0xd5 },
+ { 0x00, 0xf5, 0xf7, 0x02, 0xf3, 0x06, 0x04, 0xf1,
+ 0xfb, 0x0e, 0x0c, 0xf9, 0x08, 0xfd, 0xff, 0x0a },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+ 0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70 },
+ { 0x00, 0xf5, 0xf7, 0x02, 0xf3, 0x06, 0x04, 0xf1,
+ 0xfb, 0x0e, 0x0c, 0xf9, 0x08, 0xfd, 0xff, 0x0a },
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+ 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x91, 0x22, 0xb3, 0x44, 0xd5, 0x66, 0xf7,
+ 0x88, 0x19, 0xaa, 0x3b, 0xcc, 0x5d, 0xee, 0x7f },
+ { 0x00, 0xf5, 0xf7, 0x02, 0xf3, 0x06, 0x04, 0xf1,
+ 0xe6, 0x13, 0x11, 0xe4, 0x15, 0xe0, 0xe2, 0x17 },
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+ 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x92, 0x24, 0xb6, 0x48, 0xda, 0x6c, 0xfe,
+ 0x90, 0x02, 0xb4, 0x26, 0xd8, 0x4a, 0xfc, 0x6e },
+ { 0x00, 0xf5, 0xf7, 0x02, 0xf3, 0x06, 0x04, 0xf1,
+ 0xe6, 0x13, 0x11, 0xe4, 0x15, 0xe0, 0xe2, 0x17 },
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+ 0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x93, 0x26, 0xb5, 0x4c, 0xdf, 0x6a, 0xf9,
+ 0x98, 0x0b, 0xbe, 0x2d, 0xd4, 0x47, 0xf2, 0x61 },
+ { 0x00, 0xf5, 0xf7, 0x02, 0xee, 0x1b, 0x19, 0xec,
+ 0xc1, 0x34, 0x36, 0xc3, 0x2f, 0xda, 0xd8, 0x2d },
+ { 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+ 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x94, 0x28, 0xbc, 0x50, 0xc4, 0x78, 0xec,
+ 0xa0, 0x34, 0x88, 0x1c, 0xf0, 0x64, 0xd8, 0x4c },
+ { 0x00, 0xf5, 0xf7, 0x02, 0xee, 0x1b, 0x19, 0xec,
+ 0xc1, 0x34, 0x36, 0xc3, 0x2f, 0xda, 0xd8, 0x2d },
+ { 0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+ 0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x95, 0x2a, 0xbf, 0x54, 0xc1, 0x7e, 0xeb,
+ 0xa8, 0x3d, 0x82, 0x17, 0xfc, 0x69, 0xd6, 0x43 },
+ { 0x00, 0xf5, 0xf7, 0x02, 0xee, 0x1b, 0x19, 0xec,
+ 0xdc, 0x29, 0x2b, 0xde, 0x32, 0xc7, 0xc5, 0x30 },
+ { 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+ 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x96, 0x2c, 0xba, 0x58, 0xce, 0x74, 0xe2,
+ 0xb0, 0x26, 0x9c, 0x0a, 0xe8, 0x7e, 0xc4, 0x52 },
+ { 0x00, 0xf5, 0xf7, 0x02, 0xee, 0x1b, 0x19, 0xec,
+ 0xdc, 0x29, 0x2b, 0xde, 0x32, 0xc7, 0xc5, 0x30 },
+ { 0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+ 0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x97, 0x2e, 0xb9, 0x5c, 0xcb, 0x72, 0xe5,
+ 0xb8, 0x2f, 0x96, 0x01, 0xe4, 0x73, 0xca, 0x5d },
+ { 0x00, 0xf5, 0xea, 0x1f, 0xc9, 0x3c, 0x23, 0xd6,
+ 0x8f, 0x7a, 0x65, 0x90, 0x46, 0xb3, 0xac, 0x59 },
+ { 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+ 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x98, 0x30, 0xa8, 0x60, 0xf8, 0x50, 0xc8,
+ 0xc0, 0x58, 0xf0, 0x68, 0xa0, 0x38, 0x90, 0x08 },
+ { 0x00, 0xf5, 0xea, 0x1f, 0xc9, 0x3c, 0x23, 0xd6,
+ 0x8f, 0x7a, 0x65, 0x90, 0x46, 0xb3, 0xac, 0x59 },
+ { 0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+ 0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x99, 0x32, 0xab, 0x64, 0xfd, 0x56, 0xcf,
+ 0xc8, 0x51, 0xfa, 0x63, 0xac, 0x35, 0x9e, 0x07 },
+ { 0x00, 0xf5, 0xea, 0x1f, 0xc9, 0x3c, 0x23, 0xd6,
+ 0x92, 0x67, 0x78, 0x8d, 0x5b, 0xae, 0xb1, 0x44 },
+ { 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+ 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x9a, 0x34, 0xae, 0x68, 0xf2, 0x5c, 0xc6,
+ 0xd0, 0x4a, 0xe4, 0x7e, 0xb8, 0x22, 0x8c, 0x16 },
+ { 0x00, 0xf5, 0xea, 0x1f, 0xc9, 0x3c, 0x23, 0xd6,
+ 0x92, 0x67, 0x78, 0x8d, 0x5b, 0xae, 0xb1, 0x44 },
+ { 0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+ 0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x9b, 0x36, 0xad, 0x6c, 0xf7, 0x5a, 0xc1,
+ 0xd8, 0x43, 0xee, 0x75, 0xb4, 0x2f, 0x82, 0x19 },
+ { 0x00, 0xf5, 0xea, 0x1f, 0xd4, 0x21, 0x3e, 0xcb,
+ 0xb5, 0x40, 0x5f, 0xaa, 0x61, 0x94, 0x8b, 0x7e },
+ { 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+ 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x9c, 0x38, 0xa4, 0x70, 0xec, 0x48, 0xd4,
+ 0xe0, 0x7c, 0xd8, 0x44, 0x90, 0x0c, 0xa8, 0x34 },
+ { 0x00, 0xf5, 0xea, 0x1f, 0xd4, 0x21, 0x3e, 0xcb,
+ 0xb5, 0x40, 0x5f, 0xaa, 0x61, 0x94, 0x8b, 0x7e },
+ { 0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+ 0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x9d, 0x3a, 0xa7, 0x74, 0xe9, 0x4e, 0xd3,
+ 0xe8, 0x75, 0xd2, 0x4f, 0x9c, 0x01, 0xa6, 0x3b },
+ { 0x00, 0xf5, 0xea, 0x1f, 0xd4, 0x21, 0x3e, 0xcb,
+ 0xa8, 0x5d, 0x42, 0xb7, 0x7c, 0x89, 0x96, 0x63 },
+ { 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+ 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x9e, 0x3c, 0xa2, 0x78, 0xe6, 0x44, 0xda,
+ 0xf0, 0x6e, 0xcc, 0x52, 0x88, 0x16, 0xb4, 0x2a },
+ { 0x00, 0xf5, 0xea, 0x1f, 0xd4, 0x21, 0x3e, 0xcb,
+ 0xa8, 0x5d, 0x42, 0xb7, 0x7c, 0x89, 0x96, 0x63 },
+ { 0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+ 0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x9f, 0x3e, 0xa1, 0x7c, 0xe3, 0x42, 0xdd,
+ 0xf8, 0x67, 0xc6, 0x59, 0x84, 0x1b, 0xba, 0x25 },
+ { 0x00, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x04,
+ 0xde, 0x0c, 0x67, 0xb5, 0xb1, 0x63, 0x08, 0xda },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+ 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60 },
+ { 0x00, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x04,
+ 0xde, 0x0c, 0x67, 0xb5, 0xb1, 0x63, 0x08, 0xda },
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+ 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xa1, 0x42, 0xe3, 0x84, 0x25, 0xc6, 0x67,
+ 0x08, 0xa9, 0x4a, 0xeb, 0x8c, 0x2d, 0xce, 0x6f },
+ { 0x00, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x04,
+ 0xc3, 0x11, 0x7a, 0xa8, 0xac, 0x7e, 0x15, 0xc7 },
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+ 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xa2, 0x44, 0xe6, 0x88, 0x2a, 0xcc, 0x6e,
+ 0x10, 0xb2, 0x54, 0xf6, 0x98, 0x3a, 0xdc, 0x7e },
+ { 0x00, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x04,
+ 0xc3, 0x11, 0x7a, 0xa8, 0xac, 0x7e, 0x15, 0xc7 },
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+ 0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xa3, 0x46, 0xe5, 0x8c, 0x2f, 0xca, 0x69,
+ 0x18, 0xbb, 0x5e, 0xfd, 0x94, 0x37, 0xd2, 0x71 },
+ { 0x00, 0xd2, 0xb9, 0x6b, 0x72, 0xa0, 0xcb, 0x19,
+ 0xe4, 0x36, 0x5d, 0x8f, 0x96, 0x44, 0x2f, 0xfd },
+ { 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+ 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xa4, 0x48, 0xec, 0x90, 0x34, 0xd8, 0x7c,
+ 0x20, 0x84, 0x68, 0xcc, 0xb0, 0x14, 0xf8, 0x5c },
+ { 0x00, 0xd2, 0xb9, 0x6b, 0x72, 0xa0, 0xcb, 0x19,
+ 0xe4, 0x36, 0x5d, 0x8f, 0x96, 0x44, 0x2f, 0xfd },
+ { 0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+ 0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xa5, 0x4a, 0xef, 0x94, 0x31, 0xde, 0x7b,
+ 0x28, 0x8d, 0x62, 0xc7, 0xbc, 0x19, 0xf6, 0x53 },
+ { 0x00, 0xd2, 0xb9, 0x6b, 0x72, 0xa0, 0xcb, 0x19,
+ 0xf9, 0x2b, 0x40, 0x92, 0x8b, 0x59, 0x32, 0xe0 },
+ { 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+ 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xa6, 0x4c, 0xea, 0x98, 0x3e, 0xd4, 0x72,
+ 0x30, 0x96, 0x7c, 0xda, 0xa8, 0x0e, 0xe4, 0x42 },
+ { 0x00, 0xd2, 0xb9, 0x6b, 0x72, 0xa0, 0xcb, 0x19,
+ 0xf9, 0x2b, 0x40, 0x92, 0x8b, 0x59, 0x32, 0xe0 },
+ { 0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+ 0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xa7, 0x4e, 0xe9, 0x9c, 0x3b, 0xd2, 0x75,
+ 0x38, 0x9f, 0x76, 0xd1, 0xa4, 0x03, 0xea, 0x4d },
+ { 0x00, 0xd2, 0xa4, 0x76, 0x55, 0x87, 0xf1, 0x23,
+ 0xaa, 0x78, 0x0e, 0xdc, 0xff, 0x2d, 0x5b, 0x89 },
+ { 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+ 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xa8, 0x50, 0xf8, 0xa0, 0x08, 0xf0, 0x58,
+ 0x40, 0xe8, 0x10, 0xb8, 0xe0, 0x48, 0xb0, 0x18 },
+ { 0x00, 0xd2, 0xa4, 0x76, 0x55, 0x87, 0xf1, 0x23,
+ 0xaa, 0x78, 0x0e, 0xdc, 0xff, 0x2d, 0x5b, 0x89 },
+ { 0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+ 0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xa9, 0x52, 0xfb, 0xa4, 0x0d, 0xf6, 0x5f,
+ 0x48, 0xe1, 0x1a, 0xb3, 0xec, 0x45, 0xbe, 0x17 },
+ { 0x00, 0xd2, 0xa4, 0x76, 0x55, 0x87, 0xf1, 0x23,
+ 0xb7, 0x65, 0x13, 0xc1, 0xe2, 0x30, 0x46, 0x94 },
+ { 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+ 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xaa, 0x54, 0xfe, 0xa8, 0x02, 0xfc, 0x56,
+ 0x50, 0xfa, 0x04, 0xae, 0xf8, 0x52, 0xac, 0x06 },
+ { 0x00, 0xd2, 0xa4, 0x76, 0x55, 0x87, 0xf1, 0x23,
+ 0xb7, 0x65, 0x13, 0xc1, 0xe2, 0x30, 0x46, 0x94 },
+ { 0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+ 0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xab, 0x56, 0xfd, 0xac, 0x07, 0xfa, 0x51,
+ 0x58, 0xf3, 0x0e, 0xa5, 0xf4, 0x5f, 0xa2, 0x09 },
+ { 0x00, 0xd2, 0xa4, 0x76, 0x48, 0x9a, 0xec, 0x3e,
+ 0x90, 0x42, 0x34, 0xe6, 0xd8, 0x0a, 0x7c, 0xae },
+ { 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+ 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xac, 0x58, 0xf4, 0xb0, 0x1c, 0xe8, 0x44,
+ 0x60, 0xcc, 0x38, 0x94, 0xd0, 0x7c, 0x88, 0x24 },
+ { 0x00, 0xd2, 0xa4, 0x76, 0x48, 0x9a, 0xec, 0x3e,
+ 0x90, 0x42, 0x34, 0xe6, 0xd8, 0x0a, 0x7c, 0xae },
+ { 0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+ 0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xad, 0x5a, 0xf7, 0xb4, 0x19, 0xee, 0x43,
+ 0x68, 0xc5, 0x32, 0x9f, 0xdc, 0x71, 0x86, 0x2b },
+ { 0x00, 0xd2, 0xa4, 0x76, 0x48, 0x9a, 0xec, 0x3e,
+ 0x8d, 0x5f, 0x29, 0xfb, 0xc5, 0x17, 0x61, 0xb3 },
+ { 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+ 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xae, 0x5c, 0xf2, 0xb8, 0x16, 0xe4, 0x4a,
+ 0x70, 0xde, 0x2c, 0x82, 0xc8, 0x66, 0x94, 0x3a },
+ { 0x00, 0xd2, 0xa4, 0x76, 0x48, 0x9a, 0xec, 0x3e,
+ 0x8d, 0x5f, 0x29, 0xfb, 0xc5, 0x17, 0x61, 0xb3 },
+ { 0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+ 0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xaf, 0x5e, 0xf1, 0xbc, 0x13, 0xe2, 0x4d,
+ 0x78, 0xd7, 0x26, 0x89, 0xc4, 0x6b, 0x9a, 0x35 },
+ { 0x00, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57,
+ 0x36, 0xf9, 0xb5, 0x7a, 0x2d, 0xe2, 0xae, 0x61 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+ 0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90 },
+ { 0x00, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57,
+ 0x36, 0xf9, 0xb5, 0x7a, 0x2d, 0xe2, 0xae, 0x61 },
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+ 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xb1, 0x62, 0xd3, 0xc4, 0x75, 0xa6, 0x17,
+ 0x88, 0x39, 0xea, 0x5b, 0x4c, 0xfd, 0x2e, 0x9f },
+ { 0x00, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57,
+ 0x2b, 0xe4, 0xa8, 0x67, 0x30, 0xff, 0xb3, 0x7c },
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+ 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xb2, 0x64, 0xd6, 0xc8, 0x7a, 0xac, 0x1e,
+ 0x90, 0x22, 0xf4, 0x46, 0x58, 0xea, 0x3c, 0x8e },
+ { 0x00, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57,
+ 0x2b, 0xe4, 0xa8, 0x67, 0x30, 0xff, 0xb3, 0x7c },
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+ 0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xb3, 0x66, 0xd5, 0xcc, 0x7f, 0xaa, 0x19,
+ 0x98, 0x2b, 0xfe, 0x4d, 0x54, 0xe7, 0x32, 0x81 },
+ { 0x00, 0xcf, 0x83, 0x4c, 0x06, 0xc9, 0x85, 0x4a,
+ 0x0c, 0xc3, 0x8f, 0x40, 0x0a, 0xc5, 0x89, 0x46 },
+ { 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+ 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xb4, 0x68, 0xdc, 0xd0, 0x64, 0xb8, 0x0c,
+ 0xa0, 0x14, 0xc8, 0x7c, 0x70, 0xc4, 0x18, 0xac },
+ { 0x00, 0xcf, 0x83, 0x4c, 0x06, 0xc9, 0x85, 0x4a,
+ 0x0c, 0xc3, 0x8f, 0x40, 0x0a, 0xc5, 0x89, 0x46 },
+ { 0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+ 0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xb5, 0x6a, 0xdf, 0xd4, 0x61, 0xbe, 0x0b,
+ 0xa8, 0x1d, 0xc2, 0x77, 0x7c, 0xc9, 0x16, 0xa3 },
+ { 0x00, 0xcf, 0x83, 0x4c, 0x06, 0xc9, 0x85, 0x4a,
+ 0x11, 0xde, 0x92, 0x5d, 0x17, 0xd8, 0x94, 0x5b },
+ { 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+ 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xb6, 0x6c, 0xda, 0xd8, 0x6e, 0xb4, 0x02,
+ 0xb0, 0x06, 0xdc, 0x6a, 0x68, 0xde, 0x04, 0xb2 },
+ { 0x00, 0xcf, 0x83, 0x4c, 0x06, 0xc9, 0x85, 0x4a,
+ 0x11, 0xde, 0x92, 0x5d, 0x17, 0xd8, 0x94, 0x5b },
+ { 0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+ 0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xb7, 0x6e, 0xd9, 0xdc, 0x6b, 0xb2, 0x05,
+ 0xb8, 0x0f, 0xd6, 0x61, 0x64, 0xd3, 0x0a, 0xbd },
+ { 0x00, 0xcf, 0x9e, 0x51, 0x21, 0xee, 0xbf, 0x70,
+ 0x42, 0x8d, 0xdc, 0x13, 0x63, 0xac, 0xfd, 0x32 },
+ { 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+ 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xb8, 0x70, 0xc8, 0xe0, 0x58, 0x90, 0x28,
+ 0xc0, 0x78, 0xb0, 0x08, 0x20, 0x98, 0x50, 0xe8 },
+ { 0x00, 0xcf, 0x9e, 0x51, 0x21, 0xee, 0xbf, 0x70,
+ 0x42, 0x8d, 0xdc, 0x13, 0x63, 0xac, 0xfd, 0x32 },
+ { 0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+ 0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xb9, 0x72, 0xcb, 0xe4, 0x5d, 0x96, 0x2f,
+ 0xc8, 0x71, 0xba, 0x03, 0x2c, 0x95, 0x5e, 0xe7 },
+ { 0x00, 0xcf, 0x9e, 0x51, 0x21, 0xee, 0xbf, 0x70,
+ 0x5f, 0x90, 0xc1, 0x0e, 0x7e, 0xb1, 0xe0, 0x2f },
+ { 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+ 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xba, 0x74, 0xce, 0xe8, 0x52, 0x9c, 0x26,
+ 0xd0, 0x6a, 0xa4, 0x1e, 0x38, 0x82, 0x4c, 0xf6 },
+ { 0x00, 0xcf, 0x9e, 0x51, 0x21, 0xee, 0xbf, 0x70,
+ 0x5f, 0x90, 0xc1, 0x0e, 0x7e, 0xb1, 0xe0, 0x2f },
+ { 0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+ 0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xbb, 0x76, 0xcd, 0xec, 0x57, 0x9a, 0x21,
+ 0xd8, 0x63, 0xae, 0x15, 0x34, 0x8f, 0x42, 0xf9 },
+ { 0x00, 0xcf, 0x9e, 0x51, 0x3c, 0xf3, 0xa2, 0x6d,
+ 0x78, 0xb7, 0xe6, 0x29, 0x44, 0x8b, 0xda, 0x15 },
+ { 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+ 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xbc, 0x78, 0xc4, 0xf0, 0x4c, 0x88, 0x34,
+ 0xe0, 0x5c, 0x98, 0x24, 0x10, 0xac, 0x68, 0xd4 },
+ { 0x00, 0xcf, 0x9e, 0x51, 0x3c, 0xf3, 0xa2, 0x6d,
+ 0x78, 0xb7, 0xe6, 0x29, 0x44, 0x8b, 0xda, 0x15 },
+ { 0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+ 0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xbd, 0x7a, 0xc7, 0xf4, 0x49, 0x8e, 0x33,
+ 0xe8, 0x55, 0x92, 0x2f, 0x1c, 0xa1, 0x66, 0xdb },
+ { 0x00, 0xcf, 0x9e, 0x51, 0x3c, 0xf3, 0xa2, 0x6d,
+ 0x65, 0xaa, 0xfb, 0x34, 0x59, 0x96, 0xc7, 0x08 },
+ { 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+ 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xbe, 0x7c, 0xc2, 0xf8, 0x46, 0x84, 0x3a,
+ 0xf0, 0x4e, 0x8c, 0x32, 0x08, 0xb6, 0x74, 0xca },
+ { 0x00, 0xcf, 0x9e, 0x51, 0x3c, 0xf3, 0xa2, 0x6d,
+ 0x65, 0xaa, 0xfb, 0x34, 0x59, 0x96, 0xc7, 0x08 },
+ { 0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+ 0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xbf, 0x7e, 0xc1, 0xfc, 0x43, 0x82, 0x3d,
+ 0xf8, 0x47, 0x86, 0x39, 0x04, 0xbb, 0x7a, 0xc5 },
+ { 0x00, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3,
+ 0x94, 0x08, 0xb1, 0x2d, 0xde, 0x42, 0xfb, 0x67 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+ 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40 },
+ { 0x00, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3,
+ 0x94, 0x08, 0xb1, 0x2d, 0xde, 0x42, 0xfb, 0x67 },
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+ 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xc1, 0x82, 0x43, 0x04, 0xc5, 0x86, 0x47,
+ 0x08, 0xc9, 0x8a, 0x4b, 0x0c, 0xcd, 0x8e, 0x4f },
+ { 0x00, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3,
+ 0x89, 0x15, 0xac, 0x30, 0xc3, 0x5f, 0xe6, 0x7a },
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+ 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xc2, 0x84, 0x46, 0x08, 0xca, 0x8c, 0x4e,
+ 0x10, 0xd2, 0x94, 0x56, 0x18, 0xda, 0x9c, 0x5e },
+ { 0x00, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3,
+ 0x89, 0x15, 0xac, 0x30, 0xc3, 0x5f, 0xe6, 0x7a },
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+ 0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xc3, 0x86, 0x45, 0x0c, 0xcf, 0x8a, 0x49,
+ 0x18, 0xdb, 0x9e, 0x5d, 0x14, 0xd7, 0x92, 0x51 },
+ { 0x00, 0x9c, 0x25, 0xb9, 0x57, 0xcb, 0x72, 0xee,
+ 0xae, 0x32, 0x8b, 0x17, 0xf9, 0x65, 0xdc, 0x40 },
+ { 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+ 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xc4, 0x88, 0x4c, 0x10, 0xd4, 0x98, 0x5c,
+ 0x20, 0xe4, 0xa8, 0x6c, 0x30, 0xf4, 0xb8, 0x7c },
+ { 0x00, 0x9c, 0x25, 0xb9, 0x57, 0xcb, 0x72, 0xee,
+ 0xae, 0x32, 0x8b, 0x17, 0xf9, 0x65, 0xdc, 0x40 },
+ { 0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+ 0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xc5, 0x8a, 0x4f, 0x14, 0xd1, 0x9e, 0x5b,
+ 0x28, 0xed, 0xa2, 0x67, 0x3c, 0xf9, 0xb6, 0x73 },
+ { 0x00, 0x9c, 0x25, 0xb9, 0x57, 0xcb, 0x72, 0xee,
+ 0xb3, 0x2f, 0x96, 0x0a, 0xe4, 0x78, 0xc1, 0x5d },
+ { 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+ 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xc6, 0x8c, 0x4a, 0x18, 0xde, 0x94, 0x52,
+ 0x30, 0xf6, 0xbc, 0x7a, 0x28, 0xee, 0xa4, 0x62 },
+ { 0x00, 0x9c, 0x25, 0xb9, 0x57, 0xcb, 0x72, 0xee,
+ 0xb3, 0x2f, 0x96, 0x0a, 0xe4, 0x78, 0xc1, 0x5d },
+ { 0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+ 0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xc7, 0x8e, 0x49, 0x1c, 0xdb, 0x92, 0x55,
+ 0x38, 0xff, 0xb6, 0x71, 0x24, 0xe3, 0xaa, 0x6d },
+ { 0x00, 0x9c, 0x38, 0xa4, 0x70, 0xec, 0x48, 0xd4,
+ 0xe0, 0x7c, 0xd8, 0x44, 0x90, 0x0c, 0xa8, 0x34 },
+ { 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+ 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xc8, 0x90, 0x58, 0x20, 0xe8, 0xb0, 0x78,
+ 0x40, 0x88, 0xd0, 0x18, 0x60, 0xa8, 0xf0, 0x38 },
+ { 0x00, 0x9c, 0x38, 0xa4, 0x70, 0xec, 0x48, 0xd4,
+ 0xe0, 0x7c, 0xd8, 0x44, 0x90, 0x0c, 0xa8, 0x34 },
+ { 0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+ 0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xc9, 0x92, 0x5b, 0x24, 0xed, 0xb6, 0x7f,
+ 0x48, 0x81, 0xda, 0x13, 0x6c, 0xa5, 0xfe, 0x37 },
+ { 0x00, 0x9c, 0x38, 0xa4, 0x70, 0xec, 0x48, 0xd4,
+ 0xfd, 0x61, 0xc5, 0x59, 0x8d, 0x11, 0xb5, 0x29 },
+ { 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+ 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xca, 0x94, 0x5e, 0x28, 0xe2, 0xbc, 0x76,
+ 0x50, 0x9a, 0xc4, 0x0e, 0x78, 0xb2, 0xec, 0x26 },
+ { 0x00, 0x9c, 0x38, 0xa4, 0x70, 0xec, 0x48, 0xd4,
+ 0xfd, 0x61, 0xc5, 0x59, 0x8d, 0x11, 0xb5, 0x29 },
+ { 0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+ 0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xcb, 0x96, 0x5d, 0x2c, 0xe7, 0xba, 0x71,
+ 0x58, 0x93, 0xce, 0x05, 0x74, 0xbf, 0xe2, 0x29 },
+ { 0x00, 0x9c, 0x38, 0xa4, 0x6d, 0xf1, 0x55, 0xc9,
+ 0xda, 0x46, 0xe2, 0x7e, 0xb7, 0x2b, 0x8f, 0x13 },
+ { 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+ 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xcc, 0x98, 0x54, 0x30, 0xfc, 0xa8, 0x64,
+ 0x60, 0xac, 0xf8, 0x34, 0x50, 0x9c, 0xc8, 0x04 },
+ { 0x00, 0x9c, 0x38, 0xa4, 0x6d, 0xf1, 0x55, 0xc9,
+ 0xda, 0x46, 0xe2, 0x7e, 0xb7, 0x2b, 0x8f, 0x13 },
+ { 0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+ 0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xcd, 0x9a, 0x57, 0x34, 0xf9, 0xae, 0x63,
+ 0x68, 0xa5, 0xf2, 0x3f, 0x5c, 0x91, 0xc6, 0x0b },
+ { 0x00, 0x9c, 0x38, 0xa4, 0x6d, 0xf1, 0x55, 0xc9,
+ 0xc7, 0x5b, 0xff, 0x63, 0xaa, 0x36, 0x92, 0x0e },
+ { 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+ 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xce, 0x9c, 0x52, 0x38, 0xf6, 0xa4, 0x6a,
+ 0x70, 0xbe, 0xec, 0x22, 0x48, 0x86, 0xd4, 0x1a },
+ { 0x00, 0x9c, 0x38, 0xa4, 0x6d, 0xf1, 0x55, 0xc9,
+ 0xc7, 0x5b, 0xff, 0x63, 0xaa, 0x36, 0x92, 0x0e },
+ { 0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+ 0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xcf, 0x9e, 0x51, 0x3c, 0xf3, 0xa2, 0x6d,
+ 0x78, 0xb7, 0xe6, 0x29, 0x44, 0x8b, 0xda, 0x15 },
+ { 0x00, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0,
+ 0x7c, 0xfd, 0x63, 0xe2, 0x42, 0xc3, 0x5d, 0xdc },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+ 0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0 },
+ { 0x00, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0,
+ 0x7c, 0xfd, 0x63, 0xe2, 0x42, 0xc3, 0x5d, 0xdc },
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+ 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xd1, 0xa2, 0x73, 0x44, 0x95, 0xe6, 0x37,
+ 0x88, 0x59, 0x2a, 0xfb, 0xcc, 0x1d, 0x6e, 0xbf },
+ { 0x00, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0,
+ 0x61, 0xe0, 0x7e, 0xff, 0x5f, 0xde, 0x40, 0xc1 },
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+ 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xd2, 0xa4, 0x76, 0x48, 0x9a, 0xec, 0x3e,
+ 0x90, 0x42, 0x34, 0xe6, 0xd8, 0x0a, 0x7c, 0xae },
+ { 0x00, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0,
+ 0x61, 0xe0, 0x7e, 0xff, 0x5f, 0xde, 0x40, 0xc1 },
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+ 0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xd3, 0xa6, 0x75, 0x4c, 0x9f, 0xea, 0x39,
+ 0x98, 0x4b, 0x3e, 0xed, 0xd4, 0x07, 0x72, 0xa1 },
+ { 0x00, 0x81, 0x1f, 0x9e, 0x23, 0xa2, 0x3c, 0xbd,
+ 0x46, 0xc7, 0x59, 0xd8, 0x65, 0xe4, 0x7a, 0xfb },
+ { 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+ 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xd4, 0xa8, 0x7c, 0x50, 0x84, 0xf8, 0x2c,
+ 0xa0, 0x74, 0x08, 0xdc, 0xf0, 0x24, 0x58, 0x8c },
+ { 0x00, 0x81, 0x1f, 0x9e, 0x23, 0xa2, 0x3c, 0xbd,
+ 0x46, 0xc7, 0x59, 0xd8, 0x65, 0xe4, 0x7a, 0xfb },
+ { 0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+ 0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xd5, 0xaa, 0x7f, 0x54, 0x81, 0xfe, 0x2b,
+ 0xa8, 0x7d, 0x02, 0xd7, 0xfc, 0x29, 0x56, 0x83 },
+ { 0x00, 0x81, 0x1f, 0x9e, 0x23, 0xa2, 0x3c, 0xbd,
+ 0x5b, 0xda, 0x44, 0xc5, 0x78, 0xf9, 0x67, 0xe6 },
+ { 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+ 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xd6, 0xac, 0x7a, 0x58, 0x8e, 0xf4, 0x22,
+ 0xb0, 0x66, 0x1c, 0xca, 0xe8, 0x3e, 0x44, 0x92 },
+ { 0x00, 0x81, 0x1f, 0x9e, 0x23, 0xa2, 0x3c, 0xbd,
+ 0x5b, 0xda, 0x44, 0xc5, 0x78, 0xf9, 0x67, 0xe6 },
+ { 0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+ 0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xd7, 0xae, 0x79, 0x5c, 0x8b, 0xf2, 0x25,
+ 0xb8, 0x6f, 0x16, 0xc1, 0xe4, 0x33, 0x4a, 0x9d },
+ { 0x00, 0x81, 0x02, 0x83, 0x04, 0x85, 0x06, 0x87,
+ 0x08, 0x89, 0x0a, 0x8b, 0x0c, 0x8d, 0x0e, 0x8f },
+ { 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+ 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xd8, 0xb0, 0x68, 0x60, 0xb8, 0xd0, 0x08,
+ 0xc0, 0x18, 0x70, 0xa8, 0xa0, 0x78, 0x10, 0xc8 },
+ { 0x00, 0x81, 0x02, 0x83, 0x04, 0x85, 0x06, 0x87,
+ 0x08, 0x89, 0x0a, 0x8b, 0x0c, 0x8d, 0x0e, 0x8f },
+ { 0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+ 0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xd9, 0xb2, 0x6b, 0x64, 0xbd, 0xd6, 0x0f,
+ 0xc8, 0x11, 0x7a, 0xa3, 0xac, 0x75, 0x1e, 0xc7 },
+ { 0x00, 0x81, 0x02, 0x83, 0x04, 0x85, 0x06, 0x87,
+ 0x15, 0x94, 0x17, 0x96, 0x11, 0x90, 0x13, 0x92 },
+ { 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+ 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xda, 0xb4, 0x6e, 0x68, 0xb2, 0xdc, 0x06,
+ 0xd0, 0x0a, 0x64, 0xbe, 0xb8, 0x62, 0x0c, 0xd6 },
+ { 0x00, 0x81, 0x02, 0x83, 0x04, 0x85, 0x06, 0x87,
+ 0x15, 0x94, 0x17, 0x96, 0x11, 0x90, 0x13, 0x92 },
+ { 0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+ 0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xdb, 0xb6, 0x6d, 0x6c, 0xb7, 0xda, 0x01,
+ 0xd8, 0x03, 0x6e, 0xb5, 0xb4, 0x6f, 0x02, 0xd9 },
+ { 0x00, 0x81, 0x02, 0x83, 0x19, 0x98, 0x1b, 0x9a,
+ 0x32, 0xb3, 0x30, 0xb1, 0x2b, 0xaa, 0x29, 0xa8 },
+ { 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+ 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xdc, 0xb8, 0x64, 0x70, 0xac, 0xc8, 0x14,
+ 0xe0, 0x3c, 0x58, 0x84, 0x90, 0x4c, 0x28, 0xf4 },
+ { 0x00, 0x81, 0x02, 0x83, 0x19, 0x98, 0x1b, 0x9a,
+ 0x32, 0xb3, 0x30, 0xb1, 0x2b, 0xaa, 0x29, 0xa8 },
+ { 0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+ 0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xdd, 0xba, 0x67, 0x74, 0xa9, 0xce, 0x13,
+ 0xe8, 0x35, 0x52, 0x8f, 0x9c, 0x41, 0x26, 0xfb },
+ { 0x00, 0x81, 0x02, 0x83, 0x19, 0x98, 0x1b, 0x9a,
+ 0x2f, 0xae, 0x2d, 0xac, 0x36, 0xb7, 0x34, 0xb5 },
+ { 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+ 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xde, 0xbc, 0x62, 0x78, 0xa6, 0xc4, 0x1a,
+ 0xf0, 0x2e, 0x4c, 0x92, 0x88, 0x56, 0x34, 0xea },
+ { 0x00, 0x81, 0x02, 0x83, 0x19, 0x98, 0x1b, 0x9a,
+ 0x2f, 0xae, 0x2d, 0xac, 0x36, 0xb7, 0x34, 0xb5 },
+ { 0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+ 0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xdf, 0xbe, 0x61, 0x7c, 0xa3, 0xc2, 0x1d,
+ 0xf8, 0x27, 0x46, 0x99, 0x84, 0x5b, 0x3a, 0xe5 },
+ { 0x00, 0xa6, 0x51, 0xf7, 0xa2, 0x04, 0xf3, 0x55,
+ 0x59, 0xff, 0x08, 0xae, 0xfb, 0x5d, 0xaa, 0x0c },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+ 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0 },
+ { 0x00, 0xa6, 0x51, 0xf7, 0xa2, 0x04, 0xf3, 0x55,
+ 0x59, 0xff, 0x08, 0xae, 0xfb, 0x5d, 0xaa, 0x0c },
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+ 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xe1, 0xc2, 0x23, 0x84, 0x65, 0x46, 0xa7,
+ 0x08, 0xe9, 0xca, 0x2b, 0x8c, 0x6d, 0x4e, 0xaf },
+ { 0x00, 0xa6, 0x51, 0xf7, 0xa2, 0x04, 0xf3, 0x55,
+ 0x44, 0xe2, 0x15, 0xb3, 0xe6, 0x40, 0xb7, 0x11 },
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+ 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xe2, 0xc4, 0x26, 0x88, 0x6a, 0x4c, 0xae,
+ 0x10, 0xf2, 0xd4, 0x36, 0x98, 0x7a, 0x5c, 0xbe },
+ { 0x00, 0xa6, 0x51, 0xf7, 0xa2, 0x04, 0xf3, 0x55,
+ 0x44, 0xe2, 0x15, 0xb3, 0xe6, 0x40, 0xb7, 0x11 },
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+ 0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xe3, 0xc6, 0x25, 0x8c, 0x6f, 0x4a, 0xa9,
+ 0x18, 0xfb, 0xde, 0x3d, 0x94, 0x77, 0x52, 0xb1 },
+ { 0x00, 0xa6, 0x51, 0xf7, 0xbf, 0x19, 0xee, 0x48,
+ 0x63, 0xc5, 0x32, 0x94, 0xdc, 0x7a, 0x8d, 0x2b },
+ { 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+ 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xe4, 0xc8, 0x2c, 0x90, 0x74, 0x58, 0xbc,
+ 0x20, 0xc4, 0xe8, 0x0c, 0xb0, 0x54, 0x78, 0x9c },
+ { 0x00, 0xa6, 0x51, 0xf7, 0xbf, 0x19, 0xee, 0x48,
+ 0x63, 0xc5, 0x32, 0x94, 0xdc, 0x7a, 0x8d, 0x2b },
+ { 0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+ 0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xe5, 0xca, 0x2f, 0x94, 0x71, 0x5e, 0xbb,
+ 0x28, 0xcd, 0xe2, 0x07, 0xbc, 0x59, 0x76, 0x93 },
+ { 0x00, 0xa6, 0x51, 0xf7, 0xbf, 0x19, 0xee, 0x48,
+ 0x7e, 0xd8, 0x2f, 0x89, 0xc1, 0x67, 0x90, 0x36 },
+ { 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+ 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xe6, 0xcc, 0x2a, 0x98, 0x7e, 0x54, 0xb2,
+ 0x30, 0xd6, 0xfc, 0x1a, 0xa8, 0x4e, 0x64, 0x82 },
+ { 0x00, 0xa6, 0x51, 0xf7, 0xbf, 0x19, 0xee, 0x48,
+ 0x7e, 0xd8, 0x2f, 0x89, 0xc1, 0x67, 0x90, 0x36 },
+ { 0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+ 0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xe7, 0xce, 0x29, 0x9c, 0x7b, 0x52, 0xb5,
+ 0x38, 0xdf, 0xf6, 0x11, 0xa4, 0x43, 0x6a, 0x8d },
+ { 0x00, 0xa6, 0x4c, 0xea, 0x98, 0x3e, 0xd4, 0x72,
+ 0x2d, 0x8b, 0x61, 0xc7, 0xb5, 0x13, 0xf9, 0x5f },
+ { 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+ 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xe8, 0xd0, 0x38, 0xa0, 0x48, 0x70, 0x98,
+ 0x40, 0xa8, 0x90, 0x78, 0xe0, 0x08, 0x30, 0xd8 },
+ { 0x00, 0xa6, 0x4c, 0xea, 0x98, 0x3e, 0xd4, 0x72,
+ 0x2d, 0x8b, 0x61, 0xc7, 0xb5, 0x13, 0xf9, 0x5f },
+ { 0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+ 0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xe9, 0xd2, 0x3b, 0xa4, 0x4d, 0x76, 0x9f,
+ 0x48, 0xa1, 0x9a, 0x73, 0xec, 0x05, 0x3e, 0xd7 },
+ { 0x00, 0xa6, 0x4c, 0xea, 0x98, 0x3e, 0xd4, 0x72,
+ 0x30, 0x96, 0x7c, 0xda, 0xa8, 0x0e, 0xe4, 0x42 },
+ { 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+ 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xea, 0xd4, 0x3e, 0xa8, 0x42, 0x7c, 0x96,
+ 0x50, 0xba, 0x84, 0x6e, 0xf8, 0x12, 0x2c, 0xc6 },
+ { 0x00, 0xa6, 0x4c, 0xea, 0x98, 0x3e, 0xd4, 0x72,
+ 0x30, 0x96, 0x7c, 0xda, 0xa8, 0x0e, 0xe4, 0x42 },
+ { 0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+ 0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xeb, 0xd6, 0x3d, 0xac, 0x47, 0x7a, 0x91,
+ 0x58, 0xb3, 0x8e, 0x65, 0xf4, 0x1f, 0x22, 0xc9 },
+ { 0x00, 0xa6, 0x4c, 0xea, 0x85, 0x23, 0xc9, 0x6f,
+ 0x17, 0xb1, 0x5b, 0xfd, 0x92, 0x34, 0xde, 0x78 },
+ { 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+ 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xec, 0xd8, 0x34, 0xb0, 0x5c, 0x68, 0x84,
+ 0x60, 0x8c, 0xb8, 0x54, 0xd0, 0x3c, 0x08, 0xe4 },
+ { 0x00, 0xa6, 0x4c, 0xea, 0x85, 0x23, 0xc9, 0x6f,
+ 0x17, 0xb1, 0x5b, 0xfd, 0x92, 0x34, 0xde, 0x78 },
+ { 0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+ 0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xed, 0xda, 0x37, 0xb4, 0x59, 0x6e, 0x83,
+ 0x68, 0x85, 0xb2, 0x5f, 0xdc, 0x31, 0x06, 0xeb },
+ { 0x00, 0xa6, 0x4c, 0xea, 0x85, 0x23, 0xc9, 0x6f,
+ 0x0a, 0xac, 0x46, 0xe0, 0x8f, 0x29, 0xc3, 0x65 },
+ { 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+ 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xee, 0xdc, 0x32, 0xb8, 0x56, 0x64, 0x8a,
+ 0x70, 0x9e, 0xac, 0x42, 0xc8, 0x26, 0x14, 0xfa },
+ { 0x00, 0xa6, 0x4c, 0xea, 0x85, 0x23, 0xc9, 0x6f,
+ 0x0a, 0xac, 0x46, 0xe0, 0x8f, 0x29, 0xc3, 0x65 },
+ { 0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+ 0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xef, 0xde, 0x31, 0xbc, 0x53, 0x62, 0x8d,
+ 0x78, 0x97, 0xa6, 0x49, 0xc4, 0x2b, 0x1a, 0xf5 },
+ { 0x00, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x06,
+ 0xb1, 0x0a, 0xda, 0x61, 0x67, 0xdc, 0x0c, 0xb7 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+ 0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50 },
+ { 0x00, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x06,
+ 0xb1, 0x0a, 0xda, 0x61, 0x67, 0xdc, 0x0c, 0xb7 },
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+ 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xf1, 0xe2, 0x13, 0xc4, 0x35, 0x26, 0xd7,
+ 0x88, 0x79, 0x6a, 0x9b, 0x4c, 0xbd, 0xae, 0x5f },
+ { 0x00, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x06,
+ 0xac, 0x17, 0xc7, 0x7c, 0x7a, 0xc1, 0x11, 0xaa },
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+ 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xf2, 0xe4, 0x16, 0xc8, 0x3a, 0x2c, 0xde,
+ 0x90, 0x62, 0x74, 0x86, 0x58, 0xaa, 0xbc, 0x4e },
+ { 0x00, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x06,
+ 0xac, 0x17, 0xc7, 0x7c, 0x7a, 0xc1, 0x11, 0xaa },
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+ 0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xf3, 0xe6, 0x15, 0xcc, 0x3f, 0x2a, 0xd9,
+ 0x98, 0x6b, 0x7e, 0x8d, 0x54, 0xa7, 0xb2, 0x41 },
+ { 0x00, 0xbb, 0x6b, 0xd0, 0xcb, 0x70, 0xa0, 0x1b,
+ 0x8b, 0x30, 0xe0, 0x5b, 0x40, 0xfb, 0x2b, 0x90 },
+ { 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+ 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xf4, 0xe8, 0x1c, 0xd0, 0x24, 0x38, 0xcc,
+ 0xa0, 0x54, 0x48, 0xbc, 0x70, 0x84, 0x98, 0x6c },
+ { 0x00, 0xbb, 0x6b, 0xd0, 0xcb, 0x70, 0xa0, 0x1b,
+ 0x8b, 0x30, 0xe0, 0x5b, 0x40, 0xfb, 0x2b, 0x90 },
+ { 0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+ 0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xf5, 0xea, 0x1f, 0xd4, 0x21, 0x3e, 0xcb,
+ 0xa8, 0x5d, 0x42, 0xb7, 0x7c, 0x89, 0x96, 0x63 },
+ { 0x00, 0xbb, 0x6b, 0xd0, 0xcb, 0x70, 0xa0, 0x1b,
+ 0x96, 0x2d, 0xfd, 0x46, 0x5d, 0xe6, 0x36, 0x8d },
+ { 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+ 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xf6, 0xec, 0x1a, 0xd8, 0x2e, 0x34, 0xc2,
+ 0xb0, 0x46, 0x5c, 0xaa, 0x68, 0x9e, 0x84, 0x72 },
+ { 0x00, 0xbb, 0x6b, 0xd0, 0xcb, 0x70, 0xa0, 0x1b,
+ 0x96, 0x2d, 0xfd, 0x46, 0x5d, 0xe6, 0x36, 0x8d },
+ { 0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+ 0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xf7, 0xee, 0x19, 0xdc, 0x2b, 0x32, 0xc5,
+ 0xb8, 0x4f, 0x56, 0xa1, 0x64, 0x93, 0x8a, 0x7d },
+ { 0x00, 0xbb, 0x76, 0xcd, 0xec, 0x57, 0x9a, 0x21,
+ 0xc5, 0x7e, 0xb3, 0x08, 0x29, 0x92, 0x5f, 0xe4 },
+ { 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+ 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xf8, 0xf0, 0x08, 0xe0, 0x18, 0x10, 0xe8,
+ 0xc0, 0x38, 0x30, 0xc8, 0x20, 0xd8, 0xd0, 0x28 },
+ { 0x00, 0xbb, 0x76, 0xcd, 0xec, 0x57, 0x9a, 0x21,
+ 0xc5, 0x7e, 0xb3, 0x08, 0x29, 0x92, 0x5f, 0xe4 },
+ { 0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+ 0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xf9, 0xf2, 0x0b, 0xe4, 0x1d, 0x16, 0xef,
+ 0xc8, 0x31, 0x3a, 0xc3, 0x2c, 0xd5, 0xde, 0x27 },
+ { 0x00, 0xbb, 0x76, 0xcd, 0xec, 0x57, 0x9a, 0x21,
+ 0xd8, 0x63, 0xae, 0x15, 0x34, 0x8f, 0x42, 0xf9 },
+ { 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+ 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xfa, 0xf4, 0x0e, 0xe8, 0x12, 0x1c, 0xe6,
+ 0xd0, 0x2a, 0x24, 0xde, 0x38, 0xc2, 0xcc, 0x36 },
+ { 0x00, 0xbb, 0x76, 0xcd, 0xec, 0x57, 0x9a, 0x21,
+ 0xd8, 0x63, 0xae, 0x15, 0x34, 0x8f, 0x42, 0xf9 },
+ { 0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+ 0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xfb, 0xf6, 0x0d, 0xec, 0x17, 0x1a, 0xe1,
+ 0xd8, 0x23, 0x2e, 0xd5, 0x34, 0xcf, 0xc2, 0x39 },
+ { 0x00, 0xbb, 0x76, 0xcd, 0xf1, 0x4a, 0x87, 0x3c,
+ 0xff, 0x44, 0x89, 0x32, 0x0e, 0xb5, 0x78, 0xc3 },
+ { 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+ 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xfc, 0xf8, 0x04, 0xf0, 0x0c, 0x08, 0xf4,
+ 0xe0, 0x1c, 0x18, 0xe4, 0x10, 0xec, 0xe8, 0x14 },
+ { 0x00, 0xbb, 0x76, 0xcd, 0xf1, 0x4a, 0x87, 0x3c,
+ 0xff, 0x44, 0x89, 0x32, 0x0e, 0xb5, 0x78, 0xc3 },
+ { 0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+ 0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xfd, 0xfa, 0x07, 0xf4, 0x09, 0x0e, 0xf3,
+ 0xe8, 0x15, 0x12, 0xef, 0x1c, 0xe1, 0xe6, 0x1b },
+ { 0x00, 0xbb, 0x76, 0xcd, 0xf1, 0x4a, 0x87, 0x3c,
+ 0xe2, 0x59, 0x94, 0x2f, 0x13, 0xa8, 0x65, 0xde },
+ { 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+ 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xfe, 0xfc, 0x02, 0xf8, 0x06, 0x04, 0xfa,
+ 0xf0, 0x0e, 0x0c, 0xf2, 0x08, 0xf6, 0xf4, 0x0a },
+ { 0x00, 0xbb, 0x76, 0xcd, 0xf1, 0x4a, 0x87, 0x3c,
+ 0xe2, 0x59, 0x94, 0x2f, 0x13, 0xa8, 0x65, 0xde },
+ { 0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+ 0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xff, 0xfe, 0x01, 0xfc, 0x03, 0x02, 0xfd,
+ 0xf8, 0x07, 0x06, 0xf9, 0x04, 0xfb, 0xfa, 0x05 }
+};
+/* END CSTYLED */
+#endif /* defined(__aarch64__) */
diff --git a/zfs/module/zfs/vdev_raidz_math_aarch64_neon_common.h b/zfs/module/zfs/vdev_raidz_math_aarch64_neon_common.h
new file mode 100644
index 000000000000..cb9ff86c10c1
--- /dev/null
+++ b/zfs/module/zfs/vdev_raidz_math_aarch64_neon_common.h
@@ -0,0 +1,684 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (C) 2016 Romain Dolbeau. All rights reserved.
+ */
+
+#include <sys/types.h>
+#include <linux/simd_aarch64.h>
+
+#define __asm __asm__ __volatile__
+
+#define _REG_CNT(_0, _1, _2, _3, _4, _5, _6, _7, N, ...) N
+#define REG_CNT(r...) _REG_CNT(r, 8, 7, 6, 5, 4, 3, 2, 1)
+
+#define VR0_(REG, ...) "%[w"#REG"]"
+#define VR1_(_1, REG, ...) "%[w"#REG"]"
+#define VR2_(_1, _2, REG, ...) "%[w"#REG"]"
+#define VR3_(_1, _2, _3, REG, ...) "%[w"#REG"]"
+#define VR4_(_1, _2, _3, _4, REG, ...) "%[w"#REG"]"
+#define VR5_(_1, _2, _3, _4, _5, REG, ...) "%[w"#REG"]"
+#define VR6_(_1, _2, _3, _4, _5, _6, REG, ...) "%[w"#REG"]"
+#define VR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) "%[w"#REG"]"
+
+/*
+ * Here we need registers not used otherwise.
+ * They will be used in unused ASM for the case
+ * with more registers than required... but GGC
+ * will still need to make sure the constraints
+ * are correct, and duplicate constraints are illegal
+ * ... and we use the "register" number as a name
+ */
+
+#define VR0(r...) VR0_(r)
+#define VR1(r...) VR1_(r)
+#define VR2(r...) VR2_(r, 36)
+#define VR3(r...) VR3_(r, 36, 35)
+#define VR4(r...) VR4_(r, 36, 35, 34, 33)
+#define VR5(r...) VR5_(r, 36, 35, 34, 33, 32)
+#define VR6(r...) VR6_(r, 36, 35, 34, 33, 32, 31)
+#define VR7(r...) VR7_(r, 36, 35, 34, 33, 32, 31, 30)
+
+#define VR(X) "%[w"#X"]"
+
+#define RVR0_(REG, ...) [w##REG] "w" (w##REG)
+#define RVR1_(_1, REG, ...) [w##REG] "w" (w##REG)
+#define RVR2_(_1, _2, REG, ...) [w##REG] "w" (w##REG)
+#define RVR3_(_1, _2, _3, REG, ...) [w##REG] "w" (w##REG)
+#define RVR4_(_1, _2, _3, _4, REG, ...) [w##REG] "w" (w##REG)
+#define RVR5_(_1, _2, _3, _4, _5, REG, ...) [w##REG] "w" (w##REG)
+#define RVR6_(_1, _2, _3, _4, _5, _6, REG, ...) [w##REG] "w" (w##REG)
+#define RVR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) [w##REG] "w" (w##REG)
+
+#define RVR0(r...) RVR0_(r)
+#define RVR1(r...) RVR1_(r)
+#define RVR2(r...) RVR2_(r, 36)
+#define RVR3(r...) RVR3_(r, 36, 35)
+#define RVR4(r...) RVR4_(r, 36, 35, 34, 33)
+#define RVR5(r...) RVR5_(r, 36, 35, 34, 33, 32)
+#define RVR6(r...) RVR6_(r, 36, 35, 34, 33, 32, 31)
+#define RVR7(r...) RVR7_(r, 36, 35, 34, 33, 32, 31, 30)
+
+#define RVR(X) [w##X] "w" (w##X)
+
+#define WVR0_(REG, ...) [w##REG] "=w" (w##REG)
+#define WVR1_(_1, REG, ...) [w##REG] "=w" (w##REG)
+#define WVR2_(_1, _2, REG, ...) [w##REG] "=w" (w##REG)
+#define WVR3_(_1, _2, _3, REG, ...) [w##REG] "=w" (w##REG)
+#define WVR4_(_1, _2, _3, _4, REG, ...) [w##REG] "=w" (w##REG)
+#define WVR5_(_1, _2, _3, _4, _5, REG, ...) [w##REG] "=w" (w##REG)
+#define WVR6_(_1, _2, _3, _4, _5, _6, REG, ...) [w##REG] "=w" (w##REG)
+#define WVR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) [w##REG] "=w" (w##REG)
+
+#define WVR0(r...) WVR0_(r)
+#define WVR1(r...) WVR1_(r)
+#define WVR2(r...) WVR2_(r, 36)
+#define WVR3(r...) WVR3_(r, 36, 35)
+#define WVR4(r...) WVR4_(r, 36, 35, 34, 33)
+#define WVR5(r...) WVR5_(r, 36, 35, 34, 33, 32)
+#define WVR6(r...) WVR6_(r, 36, 35, 34, 33, 32, 31)
+#define WVR7(r...) WVR7_(r, 36, 35, 34, 33, 32, 31, 30)
+
+#define WVR(X) [w##X] "=w" (w##X)
+
+#define UVR0_(REG, ...) [w##REG] "+&w" (w##REG)
+#define UVR1_(_1, REG, ...) [w##REG] "+&w" (w##REG)
+#define UVR2_(_1, _2, REG, ...) [w##REG] "+&w" (w##REG)
+#define UVR3_(_1, _2, _3, REG, ...) [w##REG] "+&w" (w##REG)
+#define UVR4_(_1, _2, _3, _4, REG, ...) [w##REG] "+&w" (w##REG)
+#define UVR5_(_1, _2, _3, _4, _5, REG, ...) [w##REG] "+&w" (w##REG)
+#define UVR6_(_1, _2, _3, _4, _5, _6, REG, ...) [w##REG] "+&w" (w##REG)
+#define UVR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) [w##REG] "+&w" (w##REG)
+
+#define UVR0(r...) UVR0_(r)
+#define UVR1(r...) UVR1_(r)
+#define UVR2(r...) UVR2_(r, 36)
+#define UVR3(r...) UVR3_(r, 36, 35)
+#define UVR4(r...) UVR4_(r, 36, 35, 34, 33)
+#define UVR5(r...) UVR5_(r, 36, 35, 34, 33, 32)
+#define UVR6(r...) UVR6_(r, 36, 35, 34, 33, 32, 31)
+#define UVR7(r...) UVR7_(r, 36, 35, 34, 33, 32, 31, 30)
+
+#define UVR(X) [w##X] "+&w" (w##X)
+
+#define R_01(REG1, REG2, ...) REG1, REG2
+#define _R_23(_0, _1, REG2, REG3, ...) REG2, REG3
+#define R_23(REG...) _R_23(REG, 1, 2, 3)
+
+#define ASM_BUG() ASSERT(0)
+
+#define OFFSET(ptr, val) (((unsigned char *)(ptr))+val)
+
+extern const uint8_t gf_clmul_mod_lt[4*256][16];
+
+#define ELEM_SIZE 16
+
+typedef struct v {
+ uint8_t b[ELEM_SIZE] __attribute__((aligned(ELEM_SIZE)));
+} v_t;
+
+#define XOR_ACC(src, r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 8: \
+ __asm( \
+ "ld1 { v21.4s },%[SRC0]\n" \
+ "ld1 { v20.4s },%[SRC1]\n" \
+ "ld1 { v19.4s },%[SRC2]\n" \
+ "ld1 { v18.4s },%[SRC3]\n" \
+ "eor " VR0(r) ".16b," VR0(r) ".16b,v21.16b\n" \
+ "eor " VR1(r) ".16b," VR1(r) ".16b,v20.16b\n" \
+ "eor " VR2(r) ".16b," VR2(r) ".16b,v19.16b\n" \
+ "eor " VR3(r) ".16b," VR3(r) ".16b,v18.16b\n" \
+ "ld1 { v21.4s },%[SRC4]\n" \
+ "ld1 { v20.4s },%[SRC5]\n" \
+ "ld1 { v19.4s },%[SRC6]\n" \
+ "ld1 { v18.4s },%[SRC7]\n" \
+ "eor " VR4(r) ".16b," VR4(r) ".16b,v21.16b\n" \
+ "eor " VR5(r) ".16b," VR5(r) ".16b,v20.16b\n" \
+ "eor " VR6(r) ".16b," VR6(r) ".16b,v19.16b\n" \
+ "eor " VR7(r) ".16b," VR7(r) ".16b,v18.16b\n" \
+ : UVR0(r), UVR1(r), UVR2(r), UVR3(r), \
+ UVR4(r), UVR5(r), UVR6(r), UVR7(r) \
+ : [SRC0] "Q" (*(OFFSET(src, 0))), \
+ [SRC1] "Q" (*(OFFSET(src, 16))), \
+ [SRC2] "Q" (*(OFFSET(src, 32))), \
+ [SRC3] "Q" (*(OFFSET(src, 48))), \
+ [SRC4] "Q" (*(OFFSET(src, 64))), \
+ [SRC5] "Q" (*(OFFSET(src, 80))), \
+ [SRC6] "Q" (*(OFFSET(src, 96))), \
+ [SRC7] "Q" (*(OFFSET(src, 112))) \
+ : "v18", "v19", "v20", "v21"); \
+ break; \
+ case 4: \
+ __asm( \
+ "ld1 { v21.4s },%[SRC0]\n" \
+ "ld1 { v20.4s },%[SRC1]\n" \
+ "ld1 { v19.4s },%[SRC2]\n" \
+ "ld1 { v18.4s },%[SRC3]\n" \
+ "eor " VR0(r) ".16b," VR0(r) ".16b,v21.16b\n" \
+ "eor " VR1(r) ".16b," VR1(r) ".16b,v20.16b\n" \
+ "eor " VR2(r) ".16b," VR2(r) ".16b,v19.16b\n" \
+ "eor " VR3(r) ".16b," VR3(r) ".16b,v18.16b\n" \
+ : UVR0(r), UVR1(r), UVR2(r), UVR3(r) \
+ : [SRC0] "Q" (*(OFFSET(src, 0))), \
+ [SRC1] "Q" (*(OFFSET(src, 16))), \
+ [SRC2] "Q" (*(OFFSET(src, 32))), \
+ [SRC3] "Q" (*(OFFSET(src, 48))) \
+ : "v18", "v19", "v20", "v21"); \
+ break; \
+ case 2: \
+ __asm( \
+ "ld1 { v21.4s },%[SRC0]\n" \
+ "ld1 { v20.4s },%[SRC1]\n" \
+ "eor " VR0(r) ".16b," VR0(r) ".16b,v21.16b\n" \
+ "eor " VR1(r) ".16b," VR1(r) ".16b,v20.16b\n" \
+ : UVR0(r), UVR1(r) \
+ : [SRC0] "Q" (*(OFFSET(src, 0))), \
+ [SRC1] "Q" (*(OFFSET(src, 16))) \
+ : "v20", "v21"); \
+ break; \
+ default: \
+ ASM_BUG(); \
+ } \
+}
+
+#define XOR(r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 8: \
+ __asm( \
+ "eor " VR4(r) ".16b," VR4(r) ".16b," VR0(r) ".16b\n" \
+ "eor " VR5(r) ".16b," VR5(r) ".16b," VR1(r) ".16b\n" \
+ "eor " VR6(r) ".16b," VR6(r) ".16b," VR2(r) ".16b\n" \
+ "eor " VR7(r) ".16b," VR7(r) ".16b," VR3(r) ".16b\n" \
+ : UVR4(r), UVR5(r), UVR6(r), UVR7(r) \
+ : RVR0(r), RVR1(r), RVR2(r), RVR3(r)); \
+ break; \
+ case 4: \
+ __asm( \
+ "eor " VR2(r) ".16b," VR2(r) ".16b," VR0(r) ".16b\n" \
+ "eor " VR3(r) ".16b," VR3(r) ".16b," VR1(r) ".16b\n" \
+ : UVR2(r), UVR3(r) \
+ : RVR0(r), RVR1(r)); \
+ break; \
+ default: \
+ ASM_BUG(); \
+ } \
+}
+
+#define ZERO(r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 8: \
+ __asm( \
+ "eor " VR0(r) ".16b," VR0(r) ".16b," VR0(r) ".16b\n" \
+ "eor " VR1(r) ".16b," VR1(r) ".16b," VR1(r) ".16b\n" \
+ "eor " VR2(r) ".16b," VR2(r) ".16b," VR2(r) ".16b\n" \
+ "eor " VR3(r) ".16b," VR3(r) ".16b," VR3(r) ".16b\n" \
+ "eor " VR4(r) ".16b," VR4(r) ".16b," VR4(r) ".16b\n" \
+ "eor " VR5(r) ".16b," VR5(r) ".16b," VR5(r) ".16b\n" \
+ "eor " VR6(r) ".16b," VR6(r) ".16b," VR6(r) ".16b\n" \
+ "eor " VR7(r) ".16b," VR7(r) ".16b," VR7(r) ".16b\n" \
+ : WVR0(r), WVR1(r), WVR2(r), WVR3(r), \
+ WVR4(r), WVR5(r), WVR6(r), WVR7(r)); \
+ break; \
+ case 4: \
+ __asm( \
+ "eor " VR0(r) ".16b," VR0(r) ".16b," VR0(r) ".16b\n" \
+ "eor " VR1(r) ".16b," VR1(r) ".16b," VR1(r) ".16b\n" \
+ "eor " VR2(r) ".16b," VR2(r) ".16b," VR2(r) ".16b\n" \
+ "eor " VR3(r) ".16b," VR3(r) ".16b," VR3(r) ".16b\n" \
+ : WVR0(r), WVR1(r), WVR2(r), WVR3(r)); \
+ break; \
+ case 2: \
+ __asm( \
+ "eor " VR0(r) ".16b," VR0(r) ".16b," VR0(r) ".16b\n" \
+ "eor " VR1(r) ".16b," VR1(r) ".16b," VR1(r) ".16b\n" \
+ : WVR0(r), WVR1(r)); \
+ break; \
+ default: \
+ ASM_BUG(); \
+ } \
+}
+
+#define COPY(r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 8: \
+ __asm( \
+ "mov " VR4(r) ".16b," VR0(r) ".16b\n" \
+ "mov " VR5(r) ".16b," VR1(r) ".16b\n" \
+ "mov " VR6(r) ".16b," VR2(r) ".16b\n" \
+ "mov " VR7(r) ".16b," VR3(r) ".16b\n" \
+ : WVR4(r), WVR5(r), WVR6(r), WVR7(r) \
+ : RVR0(r), RVR1(r), RVR2(r), RVR3(r)); \
+ break; \
+ case 4: \
+ __asm( \
+ "mov " VR2(r) ".16b," VR0(r) ".16b\n" \
+ "mov " VR3(r) ".16b," VR1(r) ".16b\n" \
+ : WVR2(r), WVR3(r) \
+ : RVR0(r), RVR1(r)); \
+ break; \
+ default: \
+ ASM_BUG(); \
+ } \
+}
+
+#define LOAD(src, r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 8: \
+ __asm( \
+ "ld1 { " VR0(r) ".4s },%[SRC0]\n" \
+ "ld1 { " VR1(r) ".4s },%[SRC1]\n" \
+ "ld1 { " VR2(r) ".4s },%[SRC2]\n" \
+ "ld1 { " VR3(r) ".4s },%[SRC3]\n" \
+ "ld1 { " VR4(r) ".4s },%[SRC4]\n" \
+ "ld1 { " VR5(r) ".4s },%[SRC5]\n" \
+ "ld1 { " VR6(r) ".4s },%[SRC6]\n" \
+ "ld1 { " VR7(r) ".4s },%[SRC7]\n" \
+ : WVR0(r), WVR1(r), WVR2(r), WVR3(r), \
+ WVR4(r), WVR5(r), WVR6(r), WVR7(r) \
+ : [SRC0] "Q" (*(OFFSET(src, 0))), \
+ [SRC1] "Q" (*(OFFSET(src, 16))), \
+ [SRC2] "Q" (*(OFFSET(src, 32))), \
+ [SRC3] "Q" (*(OFFSET(src, 48))), \
+ [SRC4] "Q" (*(OFFSET(src, 64))), \
+ [SRC5] "Q" (*(OFFSET(src, 80))), \
+ [SRC6] "Q" (*(OFFSET(src, 96))), \
+ [SRC7] "Q" (*(OFFSET(src, 112)))); \
+ break; \
+ case 4: \
+ __asm( \
+ "ld1 { " VR0(r) ".4s },%[SRC0]\n" \
+ "ld1 { " VR1(r) ".4s },%[SRC1]\n" \
+ "ld1 { " VR2(r) ".4s },%[SRC2]\n" \
+ "ld1 { " VR3(r) ".4s },%[SRC3]\n" \
+ : WVR0(r), WVR1(r), WVR2(r), WVR3(r) \
+ : [SRC0] "Q" (*(OFFSET(src, 0))), \
+ [SRC1] "Q" (*(OFFSET(src, 16))), \
+ [SRC2] "Q" (*(OFFSET(src, 32))), \
+ [SRC3] "Q" (*(OFFSET(src, 48)))); \
+ break; \
+ case 2: \
+ __asm( \
+ "ld1 { " VR0(r) ".4s },%[SRC0]\n" \
+ "ld1 { " VR1(r) ".4s },%[SRC1]\n" \
+ : WVR0(r), WVR1(r) \
+ : [SRC0] "Q" (*(OFFSET(src, 0))), \
+ [SRC1] "Q" (*(OFFSET(src, 16)))); \
+ break; \
+ default: \
+ ASM_BUG(); \
+ } \
+}
+
+#define STORE(dst, r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 8: \
+ __asm( \
+ "st1 { " VR0(r) ".4s },%[DST0]\n" \
+ "st1 { " VR1(r) ".4s },%[DST1]\n" \
+ "st1 { " VR2(r) ".4s },%[DST2]\n" \
+ "st1 { " VR3(r) ".4s },%[DST3]\n" \
+ "st1 { " VR4(r) ".4s },%[DST4]\n" \
+ "st1 { " VR5(r) ".4s },%[DST5]\n" \
+ "st1 { " VR6(r) ".4s },%[DST6]\n" \
+ "st1 { " VR7(r) ".4s },%[DST7]\n" \
+ : [DST0] "=Q" (*(OFFSET(dst, 0))), \
+ [DST1] "=Q" (*(OFFSET(dst, 16))), \
+ [DST2] "=Q" (*(OFFSET(dst, 32))), \
+ [DST3] "=Q" (*(OFFSET(dst, 48))), \
+ [DST4] "=Q" (*(OFFSET(dst, 64))), \
+ [DST5] "=Q" (*(OFFSET(dst, 80))), \
+ [DST6] "=Q" (*(OFFSET(dst, 96))), \
+ [DST7] "=Q" (*(OFFSET(dst, 112))) \
+ : RVR0(r), RVR1(r), RVR2(r), RVR3(r), \
+ RVR4(r), RVR5(r), RVR6(r), RVR7(r)); \
+ break; \
+ case 4: \
+ __asm( \
+ "st1 { " VR0(r) ".4s },%[DST0]\n" \
+ "st1 { " VR1(r) ".4s },%[DST1]\n" \
+ "st1 { " VR2(r) ".4s },%[DST2]\n" \
+ "st1 { " VR3(r) ".4s },%[DST3]\n" \
+ : [DST0] "=Q" (*(OFFSET(dst, 0))), \
+ [DST1] "=Q" (*(OFFSET(dst, 16))), \
+ [DST2] "=Q" (*(OFFSET(dst, 32))), \
+ [DST3] "=Q" (*(OFFSET(dst, 48))) \
+ : RVR0(r), RVR1(r), RVR2(r), RVR3(r)); \
+ break; \
+ case 2: \
+ __asm( \
+ "st1 { " VR0(r) ".4s },%[DST0]\n" \
+ "st1 { " VR1(r) ".4s },%[DST1]\n" \
+ : [DST0] "=Q" (*(OFFSET(dst, 0))), \
+ [DST1] "=Q" (*(OFFSET(dst, 16))) \
+ : RVR0(r), RVR1(r)); \
+ break; \
+ default: \
+ ASM_BUG(); \
+ } \
+}
+
+/*
+ * Unfortunately cannot use the macro, because GCC
+ * will try to use the macro name and not value
+ * later on...
+ * Kept as a reference to what a numbered variable is
+ */
+#define _00 "v17"
+#define _1d "v16"
+#define _temp0 "v19"
+#define _temp1 "v18"
+
+#define MUL2_SETUP() \
+{ \
+ __asm( \
+ "eor " VR(17) ".16b," VR(17) ".16b," VR(17) ".16b\n" \
+ "movi " VR(16) ".16b,#0x1d\n" \
+ : WVR(16), WVR(17)); \
+}
+
+#define MUL2(r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 4: \
+ __asm( \
+ "cmgt v19.16b," VR(17) ".16b," VR0(r) ".16b\n" \
+ "cmgt v18.16b," VR(17) ".16b," VR1(r) ".16b\n" \
+ "cmgt v21.16b," VR(17) ".16b," VR2(r) ".16b\n" \
+ "cmgt v20.16b," VR(17) ".16b," VR3(r) ".16b\n" \
+ "and v19.16b,v19.16b," VR(16) ".16b\n" \
+ "and v18.16b,v18.16b," VR(16) ".16b\n" \
+ "and v21.16b,v21.16b," VR(16) ".16b\n" \
+ "and v20.16b,v20.16b," VR(16) ".16b\n" \
+ "shl " VR0(r) ".16b," VR0(r) ".16b,#1\n" \
+ "shl " VR1(r) ".16b," VR1(r) ".16b,#1\n" \
+ "shl " VR2(r) ".16b," VR2(r) ".16b,#1\n" \
+ "shl " VR3(r) ".16b," VR3(r) ".16b,#1\n" \
+ "eor " VR0(r) ".16b,v19.16b," VR0(r) ".16b\n" \
+ "eor " VR1(r) ".16b,v18.16b," VR1(r) ".16b\n" \
+ "eor " VR2(r) ".16b,v21.16b," VR2(r) ".16b\n" \
+ "eor " VR3(r) ".16b,v20.16b," VR3(r) ".16b\n" \
+ : UVR0(r), UVR1(r), UVR2(r), UVR3(r) \
+ : RVR(17), RVR(16) \
+ : "v18", "v19", "v20", "v21"); \
+ break; \
+ case 2: \
+ __asm( \
+ "cmgt v19.16b," VR(17) ".16b," VR0(r) ".16b\n" \
+ "cmgt v18.16b," VR(17) ".16b," VR1(r) ".16b\n" \
+ "and v19.16b,v19.16b," VR(16) ".16b\n" \
+ "and v18.16b,v18.16b," VR(16) ".16b\n" \
+ "shl " VR0(r) ".16b," VR0(r) ".16b,#1\n" \
+ "shl " VR1(r) ".16b," VR1(r) ".16b,#1\n" \
+ "eor " VR0(r) ".16b,v19.16b," VR0(r) ".16b\n" \
+ "eor " VR1(r) ".16b,v18.16b," VR1(r) ".16b\n" \
+ : UVR0(r), UVR1(r) \
+ : RVR(17), RVR(16) \
+ : "v18", "v19"); \
+ break; \
+ default: \
+ ASM_BUG(); \
+ } \
+}
+
+#define MUL4(r...) \
+{ \
+ MUL2(r); \
+ MUL2(r); \
+}
+
+/*
+ * Unfortunately cannot use the macro, because GCC
+ * will try to use the macro name and not value
+ * later on...
+ * Kept as a reference to what a register is
+ * (here we're using actual registers for the
+ * clobbered ones)
+ */
+#define _0f "v15"
+#define _a_save "v14"
+#define _b_save "v13"
+#define _lt_mod_a "v12"
+#define _lt_clmul_a "v11"
+#define _lt_mod_b "v10"
+#define _lt_clmul_b "v15"
+
+#define _MULx2(c, r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 2: \
+ __asm( \
+ /* lts for upper part */ \
+ "movi v15.16b,#0x0f\n" \
+ "ld1 { v10.4s },%[lt0]\n" \
+ "ld1 { v11.4s },%[lt1]\n" \
+ /* upper part */ \
+ "and v14.16b," VR0(r) ".16b,v15.16b\n" \
+ "and v13.16b," VR1(r) ".16b,v15.16b\n" \
+ "sshr " VR0(r) ".8h," VR0(r) ".8h,#4\n" \
+ "sshr " VR1(r) ".8h," VR1(r) ".8h,#4\n" \
+ "and " VR0(r) ".16b," VR0(r) ".16b,v15.16b\n" \
+ "and " VR1(r) ".16b," VR1(r) ".16b,v15.16b\n" \
+ \
+ "tbl v12.16b,{v10.16b}," VR0(r) ".16b\n" \
+ "tbl v10.16b,{v10.16b}," VR1(r) ".16b\n" \
+ "tbl v15.16b,{v11.16b}," VR0(r) ".16b\n" \
+ "tbl v11.16b,{v11.16b}," VR1(r) ".16b\n" \
+ \
+ "eor " VR0(r) ".16b,v15.16b,v12.16b\n" \
+ "eor " VR1(r) ".16b,v11.16b,v10.16b\n" \
+ /* lts for lower part */ \
+ "ld1 { v10.4s },%[lt2]\n" \
+ "ld1 { v15.4s },%[lt3]\n" \
+ /* lower part */ \
+ "tbl v12.16b,{v10.16b},v14.16b\n" \
+ "tbl v10.16b,{v10.16b},v13.16b\n" \
+ "tbl v11.16b,{v15.16b},v14.16b\n" \
+ "tbl v15.16b,{v15.16b},v13.16b\n" \
+ \
+ "eor " VR0(r) ".16b," VR0(r) ".16b,v12.16b\n" \
+ "eor " VR1(r) ".16b," VR1(r) ".16b,v10.16b\n" \
+ "eor " VR0(r) ".16b," VR0(r) ".16b,v11.16b\n" \
+ "eor " VR1(r) ".16b," VR1(r) ".16b,v15.16b\n" \
+ : UVR0(r), UVR1(r) \
+ : [lt0] "Q" ((gf_clmul_mod_lt[4*(c)+0][0])), \
+ [lt1] "Q" ((gf_clmul_mod_lt[4*(c)+1][0])), \
+ [lt2] "Q" ((gf_clmul_mod_lt[4*(c)+2][0])), \
+ [lt3] "Q" ((gf_clmul_mod_lt[4*(c)+3][0])) \
+ : "v10", "v11", "v12", "v13", "v14", "v15"); \
+ break; \
+ default: \
+ ASM_BUG(); \
+ } \
+}
+
+#define MUL(c, r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 4: \
+ _MULx2(c, R_23(r)); \
+ _MULx2(c, R_01(r)); \
+ break; \
+ case 2: \
+ _MULx2(c, R_01(r)); \
+ break; \
+ default: \
+ ASM_BUG(); \
+ } \
+}
+
+#define raidz_math_begin() kfpu_begin()
+#define raidz_math_end() kfpu_end()
+
+/* Overkill... */
+#if defined(_KERNEL)
+#define GEN_X_DEFINE_0_3() \
+register unsigned char w0 asm("v0") __attribute__((vector_size(16))); \
+register unsigned char w1 asm("v1") __attribute__((vector_size(16))); \
+register unsigned char w2 asm("v2") __attribute__((vector_size(16))); \
+register unsigned char w3 asm("v3") __attribute__((vector_size(16)));
+#define GEN_X_DEFINE_4_5() \
+register unsigned char w4 asm("v4") __attribute__((vector_size(16))); \
+register unsigned char w5 asm("v5") __attribute__((vector_size(16)));
+#define GEN_X_DEFINE_6_7() \
+register unsigned char w6 asm("v6") __attribute__((vector_size(16))); \
+register unsigned char w7 asm("v7") __attribute__((vector_size(16)));
+#define GEN_X_DEFINE_8_9() \
+register unsigned char w8 asm("v8") __attribute__((vector_size(16))); \
+register unsigned char w9 asm("v9") __attribute__((vector_size(16)));
+#define GEN_X_DEFINE_10_11() \
+register unsigned char w10 asm("v10") __attribute__((vector_size(16))); \
+register unsigned char w11 asm("v11") __attribute__((vector_size(16)));
+#define GEN_X_DEFINE_12_15() \
+register unsigned char w12 asm("v12") __attribute__((vector_size(16))); \
+register unsigned char w13 asm("v13") __attribute__((vector_size(16))); \
+register unsigned char w14 asm("v14") __attribute__((vector_size(16))); \
+register unsigned char w15 asm("v15") __attribute__((vector_size(16)));
+#define GEN_X_DEFINE_16() \
+register unsigned char w16 asm("v16") __attribute__((vector_size(16)));
+#define GEN_X_DEFINE_17() \
+register unsigned char w17 asm("v17") __attribute__((vector_size(16)));
+#define GEN_X_DEFINE_18_21() \
+register unsigned char w18 asm("v18") __attribute__((vector_size(16))); \
+register unsigned char w19 asm("v19") __attribute__((vector_size(16))); \
+register unsigned char w20 asm("v20") __attribute__((vector_size(16))); \
+register unsigned char w21 asm("v21") __attribute__((vector_size(16)));
+#define GEN_X_DEFINE_22_23() \
+register unsigned char w22 asm("v22") __attribute__((vector_size(16))); \
+register unsigned char w23 asm("v23") __attribute__((vector_size(16)));
+#define GEN_X_DEFINE_24_27() \
+register unsigned char w24 asm("v24") __attribute__((vector_size(16))); \
+register unsigned char w25 asm("v25") __attribute__((vector_size(16))); \
+register unsigned char w26 asm("v26") __attribute__((vector_size(16))); \
+register unsigned char w27 asm("v27") __attribute__((vector_size(16)));
+#define GEN_X_DEFINE_28_30() \
+register unsigned char w28 asm("v28") __attribute__((vector_size(16))); \
+register unsigned char w29 asm("v29") __attribute__((vector_size(16))); \
+register unsigned char w30 asm("v30") __attribute__((vector_size(16)));
+#define GEN_X_DEFINE_31() \
+register unsigned char w31 asm("v31") __attribute__((vector_size(16)));
+#define GEN_X_DEFINE_32() \
+register unsigned char w32 asm("v31") __attribute__((vector_size(16)));
+#define GEN_X_DEFINE_33_36() \
+register unsigned char w33 asm("v31") __attribute__((vector_size(16))); \
+register unsigned char w34 asm("v31") __attribute__((vector_size(16))); \
+register unsigned char w35 asm("v31") __attribute__((vector_size(16))); \
+register unsigned char w36 asm("v31") __attribute__((vector_size(16)));
+#define GEN_X_DEFINE_37_38() \
+register unsigned char w37 asm("v31") __attribute__((vector_size(16))); \
+register unsigned char w38 asm("v31") __attribute__((vector_size(16)));
+#define GEN_X_DEFINE_ALL() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_6_7() \
+ GEN_X_DEFINE_8_9() \
+ GEN_X_DEFINE_10_11() \
+ GEN_X_DEFINE_12_15() \
+ GEN_X_DEFINE_16() \
+ GEN_X_DEFINE_17() \
+ GEN_X_DEFINE_18_21() \
+ GEN_X_DEFINE_22_23() \
+ GEN_X_DEFINE_24_27() \
+ GEN_X_DEFINE_28_30() \
+ GEN_X_DEFINE_31() \
+ GEN_X_DEFINE_32() \
+ GEN_X_DEFINE_33_36() \
+ GEN_X_DEFINE_37_38()
+#else
+#define GEN_X_DEFINE_0_3() \
+ unsigned char w0 __attribute__((vector_size(16))); \
+ unsigned char w1 __attribute__((vector_size(16))); \
+ unsigned char w2 __attribute__((vector_size(16))); \
+ unsigned char w3 __attribute__((vector_size(16)));
+#define GEN_X_DEFINE_4_5() \
+ unsigned char w4 __attribute__((vector_size(16))); \
+ unsigned char w5 __attribute__((vector_size(16)));
+#define GEN_X_DEFINE_6_7() \
+ unsigned char w6 __attribute__((vector_size(16))); \
+ unsigned char w7 __attribute__((vector_size(16)));
+#define GEN_X_DEFINE_8_9() \
+ unsigned char w8 __attribute__((vector_size(16))); \
+ unsigned char w9 __attribute__((vector_size(16)));
+#define GEN_X_DEFINE_10_11() \
+ unsigned char w10 __attribute__((vector_size(16))); \
+ unsigned char w11 __attribute__((vector_size(16)));
+#define GEN_X_DEFINE_12_15() \
+ unsigned char w12 __attribute__((vector_size(16))); \
+ unsigned char w13 __attribute__((vector_size(16))); \
+ unsigned char w14 __attribute__((vector_size(16))); \
+ unsigned char w15 __attribute__((vector_size(16)));
+#define GEN_X_DEFINE_16() \
+ unsigned char w16 __attribute__((vector_size(16)));
+#define GEN_X_DEFINE_17() \
+ unsigned char w17 __attribute__((vector_size(16)));
+#define GEN_X_DEFINE_18_21() \
+ unsigned char w18 __attribute__((vector_size(16))); \
+ unsigned char w19 __attribute__((vector_size(16))); \
+ unsigned char w20 __attribute__((vector_size(16))); \
+ unsigned char w21 __attribute__((vector_size(16)));
+#define GEN_X_DEFINE_22_23() \
+ unsigned char w22 __attribute__((vector_size(16))); \
+ unsigned char w23 __attribute__((vector_size(16)));
+#define GEN_X_DEFINE_24_27() \
+ unsigned char w24 __attribute__((vector_size(16))); \
+ unsigned char w25 __attribute__((vector_size(16))); \
+ unsigned char w26 __attribute__((vector_size(16))); \
+ unsigned char w27 __attribute__((vector_size(16)));
+#define GEN_X_DEFINE_28_30() \
+ unsigned char w28 __attribute__((vector_size(16))); \
+ unsigned char w29 __attribute__((vector_size(16))); \
+ unsigned char w30 __attribute__((vector_size(16)));
+#define GEN_X_DEFINE_31() \
+ unsigned char w31 __attribute__((vector_size(16)));
+#define GEN_X_DEFINE_32() \
+ unsigned char w32 __attribute__((vector_size(16)));
+#define GEN_X_DEFINE_33_36() \
+ unsigned char w33 __attribute__((vector_size(16))); \
+ unsigned char w34 __attribute__((vector_size(16))); \
+ unsigned char w35 __attribute__((vector_size(16))); \
+ unsigned char w36 __attribute__((vector_size(16)));
+#define GEN_X_DEFINE_37_38() \
+ unsigned char w37 __attribute__((vector_size(16))); \
+ unsigned char w38 __attribute__((vector_size(16)));
+#define GEN_X_DEFINE_ALL() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_6_7() \
+ GEN_X_DEFINE_8_9() \
+ GEN_X_DEFINE_10_11() \
+ GEN_X_DEFINE_12_15() \
+ GEN_X_DEFINE_16() \
+ GEN_X_DEFINE_17() \
+ GEN_X_DEFINE_18_21() \
+ GEN_X_DEFINE_22_23() \
+ GEN_X_DEFINE_24_27() \
+ GEN_X_DEFINE_28_30() \
+ GEN_X_DEFINE_31() \
+ GEN_X_DEFINE_32() \
+ GEN_X_DEFINE_33_36() \
+ GEN_X_DEFINE_37_38()
+#endif
diff --git a/zfs/module/zfs/vdev_raidz_math_aarch64_neonx2.c b/zfs/module/zfs/vdev_raidz_math_aarch64_neonx2.c
new file mode 100644
index 000000000000..f8688a06a8f6
--- /dev/null
+++ b/zfs/module/zfs/vdev_raidz_math_aarch64_neonx2.c
@@ -0,0 +1,232 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (C) 2016 Romain Dolbeau. All rights reserved.
+ */
+
+#include <sys/isa_defs.h>
+
+#if defined(__aarch64__)
+
+#include "vdev_raidz_math_aarch64_neon_common.h"
+
+#define SYN_STRIDE 4
+
+#define ZERO_STRIDE 8
+#define ZERO_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_6_7()
+#define ZERO_D 0, 1, 2, 3, 4, 5, 6, 7
+
+#define COPY_STRIDE 8
+#define COPY_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_6_7()
+#define COPY_D 0, 1, 2, 3, 4, 5, 6, 7
+
+#define ADD_STRIDE 8
+#define ADD_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_6_7()
+#define ADD_D 0, 1, 2, 3, 4, 5, 6, 7
+
+#define MUL_STRIDE 4
+#define MUL_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_33_36()
+#define MUL_D 0, 1, 2, 3
+
+#define GEN_P_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_33_36()
+#define GEN_P_STRIDE 4
+#define GEN_P_P 0, 1, 2, 3
+
+#define GEN_PQ_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_6_7() \
+ GEN_X_DEFINE_16() \
+ GEN_X_DEFINE_17() \
+ GEN_X_DEFINE_33_36()
+#define GEN_PQ_STRIDE 4
+#define GEN_PQ_D 0, 1, 2, 3
+#define GEN_PQ_C 4, 5, 6, 7
+
+#define GEN_PQR_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_6_7() \
+ GEN_X_DEFINE_16() \
+ GEN_X_DEFINE_17() \
+ GEN_X_DEFINE_33_36()
+#define GEN_PQR_STRIDE 4
+#define GEN_PQR_D 0, 1, 2, 3
+#define GEN_PQR_C 4, 5, 6, 7
+
+#define SYN_Q_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_6_7() \
+ GEN_X_DEFINE_16() \
+ GEN_X_DEFINE_17() \
+ GEN_X_DEFINE_33_36()
+#define SYN_Q_STRIDE 4
+#define SYN_Q_D 0, 1, 2, 3
+#define SYN_Q_X 4, 5, 6, 7
+
+#define SYN_R_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_6_7() \
+ GEN_X_DEFINE_16() \
+ GEN_X_DEFINE_17() \
+ GEN_X_DEFINE_33_36()
+#define SYN_R_STRIDE 4
+#define SYN_R_D 0, 1, 2, 3
+#define SYN_R_X 4, 5, 6, 7
+
+#define SYN_PQ_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_6_7() \
+ GEN_X_DEFINE_16() \
+ GEN_X_DEFINE_17() \
+ GEN_X_DEFINE_33_36()
+#define SYN_PQ_STRIDE 4
+#define SYN_PQ_D 0, 1, 2, 3
+#define SYN_PQ_X 4, 5, 6, 7
+
+#define REC_PQ_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_6_7() \
+ GEN_X_DEFINE_8_9() \
+ GEN_X_DEFINE_22_23() \
+ GEN_X_DEFINE_33_36()
+#define REC_PQ_STRIDE 4
+#define REC_PQ_X 0, 1, 2, 3
+#define REC_PQ_Y 4, 5, 6, 7
+#define REC_PQ_T 8, 9, 22, 23
+
+#define SYN_PR_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_6_7() \
+ GEN_X_DEFINE_16() \
+ GEN_X_DEFINE_17() \
+ GEN_X_DEFINE_33_36()
+#define SYN_PR_STRIDE 4
+#define SYN_PR_D 0, 1, 2, 3
+#define SYN_PR_X 4, 5, 6, 7
+
+#define REC_PR_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_6_7() \
+ GEN_X_DEFINE_8_9() \
+ GEN_X_DEFINE_22_23() \
+ GEN_X_DEFINE_33_36()
+#define REC_PR_STRIDE 4
+#define REC_PR_X 0, 1, 2, 3
+#define REC_PR_Y 4, 5, 6, 7
+#define REC_PR_T 8, 9, 22, 23
+
+#define SYN_QR_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_6_7() \
+ GEN_X_DEFINE_16() \
+ GEN_X_DEFINE_17() \
+ GEN_X_DEFINE_33_36()
+#define SYN_QR_STRIDE 4
+#define SYN_QR_D 0, 1, 2, 3
+#define SYN_QR_X 4, 5, 6, 7
+
+#define REC_QR_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_6_7() \
+ GEN_X_DEFINE_8_9() \
+ GEN_X_DEFINE_22_23() \
+ GEN_X_DEFINE_33_36()
+#define REC_QR_STRIDE 4
+#define REC_QR_X 0, 1, 2, 3
+#define REC_QR_Y 4, 5, 6, 7
+#define REC_QR_T 8, 9, 22, 23
+
+#define SYN_PQR_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_6_7() \
+ GEN_X_DEFINE_16() \
+ GEN_X_DEFINE_17() \
+ GEN_X_DEFINE_33_36()
+#define SYN_PQR_STRIDE 4
+#define SYN_PQR_D 0, 1, 2, 3
+#define SYN_PQR_X 4, 5, 6, 7
+
+#define REC_PQR_DEFINE() \
+ GEN_X_DEFINE_0_3() \
+ GEN_X_DEFINE_4_5() \
+ GEN_X_DEFINE_6_7() \
+ GEN_X_DEFINE_8_9() \
+ GEN_X_DEFINE_31() \
+ GEN_X_DEFINE_32() \
+ GEN_X_DEFINE_33_36()
+#define REC_PQR_STRIDE 2
+#define REC_PQR_X 0, 1
+#define REC_PQR_Y 2, 3
+#define REC_PQR_Z 4, 5
+#define REC_PQR_XS 6, 7
+#define REC_PQR_YS 8, 9
+
+#include <sys/vdev_raidz_impl.h>
+#include "vdev_raidz_math_impl.h"
+
+DEFINE_GEN_METHODS(aarch64_neonx2);
+/*
+ * If compiled with -O0, gcc doesn't do any stack frame coalescing
+ * and -Wframe-larger-than=1024 is triggered in debug mode.
+ */
+#pragma GCC diagnostic ignored "-Wframe-larger-than="
+DEFINE_REC_METHODS(aarch64_neonx2);
+#pragma GCC diagnostic pop
+
+static boolean_t
+raidz_will_aarch64_neonx2_work(void)
+{
+ return (B_TRUE); // __arch64__ requires NEON
+}
+
+const raidz_impl_ops_t vdev_raidz_aarch64_neonx2_impl = {
+ .init = NULL,
+ .fini = NULL,
+ .gen = RAIDZ_GEN_METHODS(aarch64_neonx2),
+ .rec = RAIDZ_REC_METHODS(aarch64_neonx2),
+ .is_supported = &raidz_will_aarch64_neonx2_work,
+ .name = "aarch64_neonx2"
+};
+
+#endif /* defined(__aarch64__) */
diff --git a/zfs/module/zfs/vdev_raidz_math_avx2.c b/zfs/module/zfs/vdev_raidz_math_avx2.c
new file mode 100644
index 000000000000..07113a2352ce
--- /dev/null
+++ b/zfs/module/zfs/vdev_raidz_math_avx2.c
@@ -0,0 +1,411 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
+ */
+#include <sys/isa_defs.h>
+
+#if defined(__x86_64) && defined(HAVE_AVX2)
+
+#include <sys/types.h>
+#include <linux/simd_x86.h>
+
+#define __asm __asm__ __volatile__
+
+#define _REG_CNT(_0, _1, _2, _3, _4, _5, _6, _7, N, ...) N
+#define REG_CNT(r...) _REG_CNT(r, 8, 7, 6, 5, 4, 3, 2, 1)
+
+#define VR0_(REG, ...) "ymm"#REG
+#define VR1_(_1, REG, ...) "ymm"#REG
+#define VR2_(_1, _2, REG, ...) "ymm"#REG
+#define VR3_(_1, _2, _3, REG, ...) "ymm"#REG
+#define VR4_(_1, _2, _3, _4, REG, ...) "ymm"#REG
+#define VR5_(_1, _2, _3, _4, _5, REG, ...) "ymm"#REG
+#define VR6_(_1, _2, _3, _4, _5, _6, REG, ...) "ymm"#REG
+#define VR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) "ymm"#REG
+
+#define VR0(r...) VR0_(r)
+#define VR1(r...) VR1_(r)
+#define VR2(r...) VR2_(r, 1)
+#define VR3(r...) VR3_(r, 1, 2)
+#define VR4(r...) VR4_(r, 1, 2)
+#define VR5(r...) VR5_(r, 1, 2, 3)
+#define VR6(r...) VR6_(r, 1, 2, 3, 4)
+#define VR7(r...) VR7_(r, 1, 2, 3, 4, 5)
+
+#define R_01(REG1, REG2, ...) REG1, REG2
+#define _R_23(_0, _1, REG2, REG3, ...) REG2, REG3
+#define R_23(REG...) _R_23(REG, 1, 2, 3)
+
+#define ASM_BUG() ASSERT(0)
+
+extern const uint8_t gf_clmul_mod_lt[4*256][16];
+
+#define ELEM_SIZE 32
+
+typedef struct v {
+ uint8_t b[ELEM_SIZE] __attribute__((aligned(ELEM_SIZE)));
+} v_t;
+
+
+#define XOR_ACC(src, r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 4: \
+ __asm( \
+ "vpxor 0x00(%[SRC]), %%" VR0(r)", %%" VR0(r) "\n" \
+ "vpxor 0x20(%[SRC]), %%" VR1(r)", %%" VR1(r) "\n" \
+ "vpxor 0x40(%[SRC]), %%" VR2(r)", %%" VR2(r) "\n" \
+ "vpxor 0x60(%[SRC]), %%" VR3(r)", %%" VR3(r) "\n" \
+ : : [SRC] "r" (src)); \
+ break; \
+ case 2: \
+ __asm( \
+ "vpxor 0x00(%[SRC]), %%" VR0(r)", %%" VR0(r) "\n" \
+ "vpxor 0x20(%[SRC]), %%" VR1(r)", %%" VR1(r) "\n" \
+ : : [SRC] "r" (src)); \
+ break; \
+ default: \
+ ASM_BUG(); \
+ } \
+}
+
+#define XOR(r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 8: \
+ __asm( \
+ "vpxor %" VR0(r) ", %" VR4(r)", %" VR4(r) "\n" \
+ "vpxor %" VR1(r) ", %" VR5(r)", %" VR5(r) "\n" \
+ "vpxor %" VR2(r) ", %" VR6(r)", %" VR6(r) "\n" \
+ "vpxor %" VR3(r) ", %" VR7(r)", %" VR7(r)); \
+ break; \
+ case 4: \
+ __asm( \
+ "vpxor %" VR0(r) ", %" VR2(r)", %" VR2(r) "\n" \
+ "vpxor %" VR1(r) ", %" VR3(r)", %" VR3(r)); \
+ break; \
+ default: \
+ ASM_BUG(); \
+ } \
+}
+
+#define ZERO(r...) XOR(r, r)
+
+#define COPY(r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 8: \
+ __asm( \
+ "vmovdqa %" VR0(r) ", %" VR4(r) "\n" \
+ "vmovdqa %" VR1(r) ", %" VR5(r) "\n" \
+ "vmovdqa %" VR2(r) ", %" VR6(r) "\n" \
+ "vmovdqa %" VR3(r) ", %" VR7(r)); \
+ break; \
+ case 4: \
+ __asm( \
+ "vmovdqa %" VR0(r) ", %" VR2(r) "\n" \
+ "vmovdqa %" VR1(r) ", %" VR3(r)); \
+ break; \
+ default: \
+ ASM_BUG(); \
+ } \
+}
+
+#define LOAD(src, r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 4: \
+ __asm( \
+ "vmovdqa 0x00(%[SRC]), %%" VR0(r) "\n" \
+ "vmovdqa 0x20(%[SRC]), %%" VR1(r) "\n" \
+ "vmovdqa 0x40(%[SRC]), %%" VR2(r) "\n" \
+ "vmovdqa 0x60(%[SRC]), %%" VR3(r) "\n" \
+ : : [SRC] "r" (src)); \
+ break; \
+ case 2: \
+ __asm( \
+ "vmovdqa 0x00(%[SRC]), %%" VR0(r) "\n" \
+ "vmovdqa 0x20(%[SRC]), %%" VR1(r) "\n" \
+ : : [SRC] "r" (src)); \
+ break; \
+ default: \
+ ASM_BUG(); \
+ } \
+}
+
+#define STORE(dst, r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 4: \
+ __asm( \
+ "vmovdqa %%" VR0(r) ", 0x00(%[DST])\n" \
+ "vmovdqa %%" VR1(r) ", 0x20(%[DST])\n" \
+ "vmovdqa %%" VR2(r) ", 0x40(%[DST])\n" \
+ "vmovdqa %%" VR3(r) ", 0x60(%[DST])\n" \
+ : : [DST] "r" (dst)); \
+ break; \
+ case 2: \
+ __asm( \
+ "vmovdqa %%" VR0(r) ", 0x00(%[DST])\n" \
+ "vmovdqa %%" VR1(r) ", 0x20(%[DST])\n" \
+ : : [DST] "r" (dst)); \
+ break; \
+ default: \
+ ASM_BUG(); \
+ } \
+}
+
+#define FLUSH() \
+{ \
+ __asm("vzeroupper"); \
+}
+
+#define MUL2_SETUP() \
+{ \
+ __asm("vmovq %0, %%xmm14" :: "r"(0x1d1d1d1d1d1d1d1d)); \
+ __asm("vpbroadcastq %xmm14, %ymm14"); \
+ __asm("vpxor %ymm15, %ymm15 ,%ymm15"); \
+}
+
+#define _MUL2(r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 2: \
+ __asm( \
+ "vpcmpgtb %" VR0(r)", %ymm15, %ymm12\n" \
+ "vpcmpgtb %" VR1(r)", %ymm15, %ymm13\n" \
+ "vpaddb %" VR0(r)", %" VR0(r)", %" VR0(r) "\n" \
+ "vpaddb %" VR1(r)", %" VR1(r)", %" VR1(r) "\n" \
+ "vpand %ymm14, %ymm12, %ymm12\n" \
+ "vpand %ymm14, %ymm13, %ymm13\n" \
+ "vpxor %ymm12, %" VR0(r)", %" VR0(r) "\n" \
+ "vpxor %ymm13, %" VR1(r)", %" VR1(r)); \
+ break; \
+ default: \
+ ASM_BUG(); \
+ } \
+}
+
+#define MUL2(r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 4: \
+ _MUL2(R_01(r)); \
+ _MUL2(R_23(r)); \
+ break; \
+ case 2: \
+ _MUL2(r); \
+ break; \
+ default: \
+ ASM_BUG(); \
+ } \
+}
+
+#define MUL4(r...) \
+{ \
+ MUL2(r); \
+ MUL2(r); \
+}
+
+#define _0f "ymm15"
+#define _as "ymm14"
+#define _bs "ymm13"
+#define _ltmod "ymm12"
+#define _ltmul "ymm11"
+#define _ta "ymm10"
+#define _tb "ymm15"
+
+static const uint8_t __attribute__((aligned(32))) _mul_mask = 0x0F;
+
+#define _MULx2(c, r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 2: \
+ __asm( \
+ "vpbroadcastb (%[mask]), %%" _0f "\n" \
+ /* upper bits */ \
+ "vbroadcasti128 0x00(%[lt]), %%" _ltmod "\n" \
+ "vbroadcasti128 0x10(%[lt]), %%" _ltmul "\n" \
+ \
+ "vpsraw $0x4, %%" VR0(r) ", %%"_as "\n" \
+ "vpsraw $0x4, %%" VR1(r) ", %%"_bs "\n" \
+ "vpand %%" _0f ", %%" VR0(r) ", %%" VR0(r) "\n" \
+ "vpand %%" _0f ", %%" VR1(r) ", %%" VR1(r) "\n" \
+ "vpand %%" _0f ", %%" _as ", %%" _as "\n" \
+ "vpand %%" _0f ", %%" _bs ", %%" _bs "\n" \
+ \
+ "vpshufb %%" _as ", %%" _ltmod ", %%" _ta "\n" \
+ "vpshufb %%" _bs ", %%" _ltmod ", %%" _tb "\n" \
+ "vpshufb %%" _as ", %%" _ltmul ", %%" _as "\n" \
+ "vpshufb %%" _bs ", %%" _ltmul ", %%" _bs "\n" \
+ /* lower bits */ \
+ "vbroadcasti128 0x20(%[lt]), %%" _ltmod "\n" \
+ "vbroadcasti128 0x30(%[lt]), %%" _ltmul "\n" \
+ \
+ "vpxor %%" _ta ", %%" _as ", %%" _as "\n" \
+ "vpxor %%" _tb ", %%" _bs ", %%" _bs "\n" \
+ \
+ "vpshufb %%" VR0(r) ", %%" _ltmod ", %%" _ta "\n" \
+ "vpshufb %%" VR1(r) ", %%" _ltmod ", %%" _tb "\n" \
+ "vpshufb %%" VR0(r) ", %%" _ltmul ", %%" VR0(r) "\n"\
+ "vpshufb %%" VR1(r) ", %%" _ltmul ", %%" VR1(r) "\n"\
+ \
+ "vpxor %%" _ta ", %%" VR0(r) ", %%" VR0(r) "\n" \
+ "vpxor %%" _as ", %%" VR0(r) ", %%" VR0(r) "\n" \
+ "vpxor %%" _tb ", %%" VR1(r) ", %%" VR1(r) "\n" \
+ "vpxor %%" _bs ", %%" VR1(r) ", %%" VR1(r) "\n" \
+ : : [mask] "r" (&_mul_mask), \
+ [lt] "r" (gf_clmul_mod_lt[4*(c)])); \
+ break; \
+ default: \
+ ASM_BUG(); \
+ } \
+}
+
+#define MUL(c, r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 4: \
+ _MULx2(c, R_01(r)); \
+ _MULx2(c, R_23(r)); \
+ break; \
+ case 2: \
+ _MULx2(c, R_01(r)); \
+ break; \
+ default: \
+ ASM_BUG(); \
+ } \
+}
+
+#define raidz_math_begin() kfpu_begin()
+#define raidz_math_end() \
+{ \
+ FLUSH(); \
+ kfpu_end(); \
+}
+
+
+#define SYN_STRIDE 4
+
+#define ZERO_STRIDE 4
+#define ZERO_DEFINE() {}
+#define ZERO_D 0, 1, 2, 3
+
+#define COPY_STRIDE 4
+#define COPY_DEFINE() {}
+#define COPY_D 0, 1, 2, 3
+
+#define ADD_STRIDE 4
+#define ADD_DEFINE() {}
+#define ADD_D 0, 1, 2, 3
+
+#define MUL_STRIDE 4
+#define MUL_DEFINE() {}
+#define MUL_D 0, 1, 2, 3
+
+#define GEN_P_STRIDE 4
+#define GEN_P_DEFINE() {}
+#define GEN_P_P 0, 1, 2, 3
+
+#define GEN_PQ_STRIDE 4
+#define GEN_PQ_DEFINE() {}
+#define GEN_PQ_D 0, 1, 2, 3
+#define GEN_PQ_C 4, 5, 6, 7
+
+#define GEN_PQR_STRIDE 4
+#define GEN_PQR_DEFINE() {}
+#define GEN_PQR_D 0, 1, 2, 3
+#define GEN_PQR_C 4, 5, 6, 7
+
+#define SYN_Q_DEFINE() {}
+#define SYN_Q_D 0, 1, 2, 3
+#define SYN_Q_X 4, 5, 6, 7
+
+#define SYN_R_DEFINE() {}
+#define SYN_R_D 0, 1, 2, 3
+#define SYN_R_X 4, 5, 6, 7
+
+#define SYN_PQ_DEFINE() {}
+#define SYN_PQ_D 0, 1, 2, 3
+#define SYN_PQ_X 4, 5, 6, 7
+
+#define REC_PQ_STRIDE 2
+#define REC_PQ_DEFINE() {}
+#define REC_PQ_X 0, 1
+#define REC_PQ_Y 2, 3
+#define REC_PQ_T 4, 5
+
+#define SYN_PR_DEFINE() {}
+#define SYN_PR_D 0, 1, 2, 3
+#define SYN_PR_X 4, 5, 6, 7
+
+#define REC_PR_STRIDE 2
+#define REC_PR_DEFINE() {}
+#define REC_PR_X 0, 1
+#define REC_PR_Y 2, 3
+#define REC_PR_T 4, 5
+
+#define SYN_QR_DEFINE() {}
+#define SYN_QR_D 0, 1, 2, 3
+#define SYN_QR_X 4, 5, 6, 7
+
+#define REC_QR_STRIDE 2
+#define REC_QR_DEFINE() {}
+#define REC_QR_X 0, 1
+#define REC_QR_Y 2, 3
+#define REC_QR_T 4, 5
+
+#define SYN_PQR_DEFINE() {}
+#define SYN_PQR_D 0, 1, 2, 3
+#define SYN_PQR_X 4, 5, 6, 7
+
+#define REC_PQR_STRIDE 2
+#define REC_PQR_DEFINE() {}
+#define REC_PQR_X 0, 1
+#define REC_PQR_Y 2, 3
+#define REC_PQR_Z 4, 5
+#define REC_PQR_XS 6, 7
+#define REC_PQR_YS 8, 9
+
+
+#include <sys/vdev_raidz_impl.h>
+#include "vdev_raidz_math_impl.h"
+
+DEFINE_GEN_METHODS(avx2);
+DEFINE_REC_METHODS(avx2);
+
+static boolean_t
+raidz_will_avx2_work(void)
+{
+ return (zfs_avx_available() && zfs_avx2_available());
+}
+
+const raidz_impl_ops_t vdev_raidz_avx2_impl = {
+ .init = NULL,
+ .fini = NULL,
+ .gen = RAIDZ_GEN_METHODS(avx2),
+ .rec = RAIDZ_REC_METHODS(avx2),
+ .is_supported = &raidz_will_avx2_work,
+ .name = "avx2"
+};
+
+#endif /* defined(__x86_64) && defined(HAVE_AVX2) */
diff --git a/zfs/module/zfs/vdev_raidz_math_avx512bw.c b/zfs/module/zfs/vdev_raidz_math_avx512bw.c
new file mode 100644
index 000000000000..3d5326b9e6e1
--- /dev/null
+++ b/zfs/module/zfs/vdev_raidz_math_avx512bw.c
@@ -0,0 +1,410 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (C) 2016 Romain Dolbeau. All rights reserved.
+ * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
+ */
+
+#include <sys/isa_defs.h>
+
+#if defined(__x86_64) && defined(HAVE_AVX512BW)
+
+#include <sys/types.h>
+#include <linux/simd_x86.h>
+
+#define __asm __asm__ __volatile__
+
+#define _REG_CNT(_0, _1, _2, _3, _4, _5, _6, _7, N, ...) N
+#define REG_CNT(r...) _REG_CNT(r, 8, 7, 6, 5, 4, 3, 2, 1)
+
+#define VR0_(REG, ...) "zmm"#REG
+#define VR1_(_1, REG, ...) "zmm"#REG
+#define VR2_(_1, _2, REG, ...) "zmm"#REG
+#define VR3_(_1, _2, _3, REG, ...) "zmm"#REG
+#define VR4_(_1, _2, _3, _4, REG, ...) "zmm"#REG
+#define VR5_(_1, _2, _3, _4, _5, REG, ...) "zmm"#REG
+#define VR6_(_1, _2, _3, _4, _5, _6, REG, ...) "zmm"#REG
+#define VR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) "zmm"#REG
+
+#define VR0(r...) VR0_(r)
+#define VR1(r...) VR1_(r)
+#define VR2(r...) VR2_(r, 1)
+#define VR3(r...) VR3_(r, 1, 2)
+#define VR4(r...) VR4_(r, 1, 2)
+#define VR5(r...) VR5_(r, 1, 2, 3)
+#define VR6(r...) VR6_(r, 1, 2, 3, 4)
+#define VR7(r...) VR7_(r, 1, 2, 3, 4, 5)
+
+#define R_01(REG1, REG2, ...) REG1, REG2
+#define _R_23(_0, _1, REG2, REG3, ...) REG2, REG3
+#define R_23(REG...) _R_23(REG, 1, 2, 3)
+
+#define ASM_BUG() ASSERT(0)
+
+extern const uint8_t gf_clmul_mod_lt[4*256][16];
+
+#define ELEM_SIZE 64
+
+typedef struct v {
+ uint8_t b[ELEM_SIZE] __attribute__((aligned(ELEM_SIZE)));
+} v_t;
+
+#define XOR_ACC(src, r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 4: \
+ __asm( \
+ "vpxorq 0x00(%[SRC]), %%" VR0(r)", %%" VR0(r) "\n" \
+ "vpxorq 0x40(%[SRC]), %%" VR1(r)", %%" VR1(r) "\n" \
+ "vpxorq 0x80(%[SRC]), %%" VR2(r)", %%" VR2(r) "\n" \
+ "vpxorq 0xc0(%[SRC]), %%" VR3(r)", %%" VR3(r) "\n" \
+ : : [SRC] "r" (src)); \
+ break; \
+ case 2: \
+ __asm( \
+ "vpxorq 0x00(%[SRC]), %%" VR0(r)", %%" VR0(r) "\n" \
+ "vpxorq 0x40(%[SRC]), %%" VR1(r)", %%" VR1(r) "\n" \
+ : : [SRC] "r" (src)); \
+ break; \
+ default: \
+ ASM_BUG(); \
+ } \
+}
+
+#define XOR(r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 8: \
+ __asm( \
+ "vpxorq %" VR0(r) ", %" VR4(r)", %" VR4(r) "\n" \
+ "vpxorq %" VR1(r) ", %" VR5(r)", %" VR5(r) "\n" \
+ "vpxorq %" VR2(r) ", %" VR6(r)", %" VR6(r) "\n" \
+ "vpxorq %" VR3(r) ", %" VR7(r)", %" VR7(r)); \
+ break; \
+ case 4: \
+ __asm( \
+ "vpxorq %" VR0(r) ", %" VR2(r)", %" VR2(r) "\n" \
+ "vpxorq %" VR1(r) ", %" VR3(r)", %" VR3(r)); \
+ break; \
+ default: \
+ ASM_BUG(); \
+ } \
+}
+
+#define ZERO(r...) XOR(r, r)
+
+#define COPY(r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 8: \
+ __asm( \
+ "vmovdqa64 %" VR0(r) ", %" VR4(r) "\n" \
+ "vmovdqa64 %" VR1(r) ", %" VR5(r) "\n" \
+ "vmovdqa64 %" VR2(r) ", %" VR6(r) "\n" \
+ "vmovdqa64 %" VR3(r) ", %" VR7(r)); \
+ break; \
+ case 4: \
+ __asm( \
+ "vmovdqa64 %" VR0(r) ", %" VR2(r) "\n" \
+ "vmovdqa64 %" VR1(r) ", %" VR3(r)); \
+ break; \
+ default: \
+ ASM_BUG(); \
+ } \
+}
+
+#define LOAD(src, r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 4: \
+ __asm( \
+ "vmovdqa64 0x00(%[SRC]), %%" VR0(r) "\n" \
+ "vmovdqa64 0x40(%[SRC]), %%" VR1(r) "\n" \
+ "vmovdqa64 0x80(%[SRC]), %%" VR2(r) "\n" \
+ "vmovdqa64 0xc0(%[SRC]), %%" VR3(r) "\n" \
+ : : [SRC] "r" (src)); \
+ break; \
+ case 2: \
+ __asm( \
+ "vmovdqa64 0x00(%[SRC]), %%" VR0(r) "\n" \
+ "vmovdqa64 0x40(%[SRC]), %%" VR1(r) "\n" \
+ : : [SRC] "r" (src)); \
+ break; \
+ default: \
+ ASM_BUG(); \
+ } \
+}
+
+#define STORE(dst, r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 4: \
+ __asm( \
+ "vmovdqa64 %%" VR0(r) ", 0x00(%[DST])\n" \
+ "vmovdqa64 %%" VR1(r) ", 0x40(%[DST])\n" \
+ "vmovdqa64 %%" VR2(r) ", 0x80(%[DST])\n" \
+ "vmovdqa64 %%" VR3(r) ", 0xc0(%[DST])\n" \
+ : : [DST] "r" (dst)); \
+ break; \
+ case 2: \
+ __asm( \
+ "vmovdqa64 %%" VR0(r) ", 0x00(%[DST])\n" \
+ "vmovdqa64 %%" VR1(r) ", 0x40(%[DST])\n" \
+ : : [DST] "r" (dst)); \
+ break; \
+ default: \
+ ASM_BUG(); \
+ } \
+}
+
+#define MUL2_SETUP() \
+{ \
+ __asm("vmovq %0, %%xmm22" :: "r"(0x1d1d1d1d1d1d1d1d)); \
+ __asm("vpbroadcastq %xmm22, %zmm22"); \
+ __asm("vpxord %zmm23, %zmm23 ,%zmm23"); \
+}
+
+#define _MUL2(r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 2: \
+ __asm( \
+ "vpcmpb $1, %zmm23, %" VR0(r)", %k1\n" \
+ "vpcmpb $1, %zmm23, %" VR1(r)", %k2\n" \
+ "vpaddb %" VR0(r)", %" VR0(r)", %" VR0(r) "\n" \
+ "vpaddb %" VR1(r)", %" VR1(r)", %" VR1(r) "\n" \
+ "vpxord %zmm22, %" VR0(r)", %zmm12\n" \
+ "vpxord %zmm22, %" VR1(r)", %zmm13\n" \
+ "vmovdqu8 %zmm12, %" VR0(r) "{%k1}\n" \
+ "vmovdqu8 %zmm13, %" VR1(r) "{%k2}"); \
+ break; \
+ default: \
+ ASM_BUG(); \
+ } \
+}
+
+#define MUL2(r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 4: \
+ _MUL2(R_01(r)); \
+ _MUL2(R_23(r)); \
+ break; \
+ case 2: \
+ _MUL2(r); \
+ break; \
+ default: \
+ ASM_BUG(); \
+ } \
+}
+
+#define MUL4(r...) \
+{ \
+ MUL2(r); \
+ MUL2(r); \
+}
+
+#define _0f "zmm15"
+#define _as "zmm14"
+#define _bs "zmm13"
+#define _ltmod "zmm12"
+#define _ltmul "zmm11"
+#define _ta "zmm10"
+#define _tb "zmm15"
+
+static const uint8_t __attribute__((aligned(64))) _mul_mask = 0x0F;
+
+#define _MULx2(c, r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 2: \
+ __asm( \
+ "vpbroadcastb (%[mask]), %%" _0f "\n" \
+ /* upper bits */ \
+ "vbroadcasti32x4 0x00(%[lt]), %%" _ltmod "\n" \
+ "vbroadcasti32x4 0x10(%[lt]), %%" _ltmul "\n" \
+ \
+ "vpsraw $0x4, %%" VR0(r) ", %%"_as "\n" \
+ "vpsraw $0x4, %%" VR1(r) ", %%"_bs "\n" \
+ "vpandq %%" _0f ", %%" VR0(r) ", %%" VR0(r) "\n" \
+ "vpandq %%" _0f ", %%" VR1(r) ", %%" VR1(r) "\n" \
+ "vpandq %%" _0f ", %%" _as ", %%" _as "\n" \
+ "vpandq %%" _0f ", %%" _bs ", %%" _bs "\n" \
+ \
+ "vpshufb %%" _as ", %%" _ltmod ", %%" _ta "\n" \
+ "vpshufb %%" _bs ", %%" _ltmod ", %%" _tb "\n" \
+ "vpshufb %%" _as ", %%" _ltmul ", %%" _as "\n" \
+ "vpshufb %%" _bs ", %%" _ltmul ", %%" _bs "\n" \
+ /* lower bits */ \
+ "vbroadcasti32x4 0x20(%[lt]), %%" _ltmod "\n" \
+ "vbroadcasti32x4 0x30(%[lt]), %%" _ltmul "\n" \
+ \
+ "vpxorq %%" _ta ", %%" _as ", %%" _as "\n" \
+ "vpxorq %%" _tb ", %%" _bs ", %%" _bs "\n" \
+ \
+ "vpshufb %%" VR0(r) ", %%" _ltmod ", %%" _ta "\n" \
+ "vpshufb %%" VR1(r) ", %%" _ltmod ", %%" _tb "\n" \
+ "vpshufb %%" VR0(r) ", %%" _ltmul ", %%" VR0(r) "\n"\
+ "vpshufb %%" VR1(r) ", %%" _ltmul ", %%" VR1(r) "\n"\
+ \
+ "vpxorq %%" _ta ", %%" VR0(r) ", %%" VR0(r) "\n" \
+ "vpxorq %%" _as ", %%" VR0(r) ", %%" VR0(r) "\n" \
+ "vpxorq %%" _tb ", %%" VR1(r) ", %%" VR1(r) "\n" \
+ "vpxorq %%" _bs ", %%" VR1(r) ", %%" VR1(r) "\n" \
+ : : [mask] "r" (&_mul_mask), \
+ [lt] "r" (gf_clmul_mod_lt[4*(c)])); \
+ break; \
+ default: \
+ ASM_BUG(); \
+ } \
+}
+
+#define MUL(c, r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 4: \
+ _MULx2(c, R_01(r)); \
+ _MULx2(c, R_23(r)); \
+ break; \
+ case 2: \
+ _MULx2(c, R_01(r)); \
+ break; \
+ default: \
+ ASM_BUG(); \
+ } \
+}
+
+#define raidz_math_begin() kfpu_begin()
+#define raidz_math_end() kfpu_end()
+
+/*
+ * ZERO, COPY, and MUL operations are already 2x unrolled, which means that
+ * the stride of these operations for avx512 must not exceed 4. Otherwise, a
+ * single step would exceed 512B block size.
+ */
+
+#define SYN_STRIDE 4
+
+#define ZERO_STRIDE 4
+#define ZERO_DEFINE() {}
+#define ZERO_D 0, 1, 2, 3
+
+#define COPY_STRIDE 4
+#define COPY_DEFINE() {}
+#define COPY_D 0, 1, 2, 3
+
+#define ADD_STRIDE 4
+#define ADD_DEFINE() {}
+#define ADD_D 0, 1, 2, 3
+
+#define MUL_STRIDE 4
+#define MUL_DEFINE() {}
+#define MUL_D 0, 1, 2, 3
+
+#define GEN_P_STRIDE 4
+#define GEN_P_DEFINE() {}
+#define GEN_P_P 0, 1, 2, 3
+
+#define GEN_PQ_STRIDE 4
+#define GEN_PQ_DEFINE() {}
+#define GEN_PQ_D 0, 1, 2, 3
+#define GEN_PQ_C 4, 5, 6, 7
+
+#define GEN_PQR_STRIDE 4
+#define GEN_PQR_DEFINE() {}
+#define GEN_PQR_D 0, 1, 2, 3
+#define GEN_PQR_C 4, 5, 6, 7
+
+#define SYN_Q_DEFINE() {}
+#define SYN_Q_D 0, 1, 2, 3
+#define SYN_Q_X 4, 5, 6, 7
+
+#define SYN_R_DEFINE() {}
+#define SYN_R_D 0, 1, 2, 3
+#define SYN_R_X 4, 5, 6, 7
+
+#define SYN_PQ_DEFINE() {}
+#define SYN_PQ_D 0, 1, 2, 3
+#define SYN_PQ_X 4, 5, 6, 7
+
+#define REC_PQ_STRIDE 2
+#define REC_PQ_DEFINE() {}
+#define REC_PQ_X 0, 1
+#define REC_PQ_Y 2, 3
+#define REC_PQ_T 4, 5
+
+#define SYN_PR_DEFINE() {}
+#define SYN_PR_D 0, 1, 2, 3
+#define SYN_PR_X 4, 5, 6, 7
+
+#define REC_PR_STRIDE 2
+#define REC_PR_DEFINE() {}
+#define REC_PR_X 0, 1
+#define REC_PR_Y 2, 3
+#define REC_PR_T 4, 5
+
+#define SYN_QR_DEFINE() {}
+#define SYN_QR_D 0, 1, 2, 3
+#define SYN_QR_X 4, 5, 6, 7
+
+#define REC_QR_STRIDE 2
+#define REC_QR_DEFINE() {}
+#define REC_QR_X 0, 1
+#define REC_QR_Y 2, 3
+#define REC_QR_T 4, 5
+
+#define SYN_PQR_DEFINE() {}
+#define SYN_PQR_D 0, 1, 2, 3
+#define SYN_PQR_X 4, 5, 6, 7
+
+#define REC_PQR_STRIDE 2
+#define REC_PQR_DEFINE() {}
+#define REC_PQR_X 0, 1
+#define REC_PQR_Y 2, 3
+#define REC_PQR_Z 4, 5
+#define REC_PQR_XS 6, 7
+#define REC_PQR_YS 8, 9
+
+
+#include <sys/vdev_raidz_impl.h>
+#include "vdev_raidz_math_impl.h"
+
+DEFINE_GEN_METHODS(avx512bw);
+DEFINE_REC_METHODS(avx512bw);
+
+static boolean_t
+raidz_will_avx512bw_work(void)
+{
+ return (zfs_avx_available() &&
+ zfs_avx512f_available() &&
+ zfs_avx512bw_available());
+}
+
+const raidz_impl_ops_t vdev_raidz_avx512bw_impl = {
+ .init = NULL,
+ .fini = NULL,
+ .gen = RAIDZ_GEN_METHODS(avx512bw),
+ .rec = RAIDZ_REC_METHODS(avx512bw),
+ .is_supported = &raidz_will_avx512bw_work,
+ .name = "avx512bw"
+};
+
+#endif /* defined(__x86_64) && defined(HAVE_AVX512BW) */
diff --git a/zfs/module/zfs/vdev_raidz_math_avx512f.c b/zfs/module/zfs/vdev_raidz_math_avx512f.c
new file mode 100644
index 000000000000..f4e4560ced83
--- /dev/null
+++ b/zfs/module/zfs/vdev_raidz_math_avx512f.c
@@ -0,0 +1,487 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (C) 2016 Romain Dolbeau. All rights reserved.
+ * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
+ */
+
+#include <sys/isa_defs.h>
+
+#if defined(__x86_64) && defined(HAVE_AVX512F)
+
+#include <sys/types.h>
+#include <linux/simd_x86.h>
+
+#define __asm __asm__ __volatile__
+
+#define _REG_CNT(_0, _1, _2, _3, _4, _5, _6, _7, N, ...) N
+#define REG_CNT(r...) _REG_CNT(r, 8, 7, 6, 5, 4, 3, 2, 1)
+
+#define VR0_(REG, ...) "zmm"#REG
+#define VR1_(_1, REG, ...) "zmm"#REG
+#define VR2_(_1, _2, REG, ...) "zmm"#REG
+#define VR3_(_1, _2, _3, REG, ...) "zmm"#REG
+#define VR4_(_1, _2, _3, _4, REG, ...) "zmm"#REG
+#define VR5_(_1, _2, _3, _4, _5, REG, ...) "zmm"#REG
+#define VR6_(_1, _2, _3, _4, _5, _6, REG, ...) "zmm"#REG
+#define VR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) "zmm"#REG
+
+#define VR0(r...) VR0_(r)
+#define VR1(r...) VR1_(r)
+#define VR2(r...) VR2_(r, 1)
+#define VR3(r...) VR3_(r, 1, 2)
+#define VR4(r...) VR4_(r, 1, 2)
+#define VR5(r...) VR5_(r, 1, 2, 3)
+#define VR6(r...) VR6_(r, 1, 2, 3, 4)
+#define VR7(r...) VR7_(r, 1, 2, 3, 4, 5)
+
+#define VRy0_(REG, ...) "ymm"#REG
+#define VRy1_(_1, REG, ...) "ymm"#REG
+#define VRy2_(_1, _2, REG, ...) "ymm"#REG
+#define VRy3_(_1, _2, _3, REG, ...) "ymm"#REG
+#define VRy4_(_1, _2, _3, _4, REG, ...) "ymm"#REG
+#define VRy5_(_1, _2, _3, _4, _5, REG, ...) "ymm"#REG
+#define VRy6_(_1, _2, _3, _4, _5, _6, REG, ...) "ymm"#REG
+#define VRy7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) "ymm"#REG
+
+#define VRy0(r...) VRy0_(r)
+#define VRy1(r...) VRy1_(r)
+#define VRy2(r...) VRy2_(r, 1)
+#define VRy3(r...) VRy3_(r, 1, 2)
+#define VRy4(r...) VRy4_(r, 1, 2)
+#define VRy5(r...) VRy5_(r, 1, 2, 3)
+#define VRy6(r...) VRy6_(r, 1, 2, 3, 4)
+#define VRy7(r...) VRy7_(r, 1, 2, 3, 4, 5)
+
+#define R_01(REG1, REG2, ...) REG1, REG2
+#define _R_23(_0, _1, REG2, REG3, ...) REG2, REG3
+#define R_23(REG...) _R_23(REG, 1, 2, 3)
+
+#define ELEM_SIZE 64
+
+typedef struct v {
+ uint8_t b[ELEM_SIZE] __attribute__((aligned(ELEM_SIZE)));
+} v_t;
+
+
+#define XOR_ACC(src, r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 4: \
+ __asm( \
+ "vpxorq 0x00(%[SRC]), %%" VR0(r)", %%" VR0(r) "\n" \
+ "vpxorq 0x40(%[SRC]), %%" VR1(r)", %%" VR1(r) "\n" \
+ "vpxorq 0x80(%[SRC]), %%" VR2(r)", %%" VR2(r) "\n" \
+ "vpxorq 0xc0(%[SRC]), %%" VR3(r)", %%" VR3(r) "\n" \
+ : : [SRC] "r" (src)); \
+ break; \
+ } \
+}
+
+#define XOR(r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 8: \
+ __asm( \
+ "vpxorq %" VR0(r) ", %" VR4(r)", %" VR4(r) "\n" \
+ "vpxorq %" VR1(r) ", %" VR5(r)", %" VR5(r) "\n" \
+ "vpxorq %" VR2(r) ", %" VR6(r)", %" VR6(r) "\n" \
+ "vpxorq %" VR3(r) ", %" VR7(r)", %" VR7(r)); \
+ break; \
+ case 4: \
+ __asm( \
+ "vpxorq %" VR0(r) ", %" VR2(r)", %" VR2(r) "\n" \
+ "vpxorq %" VR1(r) ", %" VR3(r)", %" VR3(r)); \
+ break; \
+ } \
+}
+
+
+#define ZERO(r...) XOR(r, r)
+
+
+#define COPY(r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 8: \
+ __asm( \
+ "vmovdqa64 %" VR0(r) ", %" VR4(r) "\n" \
+ "vmovdqa64 %" VR1(r) ", %" VR5(r) "\n" \
+ "vmovdqa64 %" VR2(r) ", %" VR6(r) "\n" \
+ "vmovdqa64 %" VR3(r) ", %" VR7(r)); \
+ break; \
+ case 4: \
+ __asm( \
+ "vmovdqa64 %" VR0(r) ", %" VR2(r) "\n" \
+ "vmovdqa64 %" VR1(r) ", %" VR3(r)); \
+ break; \
+ } \
+}
+
+#define LOAD(src, r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 4: \
+ __asm( \
+ "vmovdqa64 0x00(%[SRC]), %%" VR0(r) "\n" \
+ "vmovdqa64 0x40(%[SRC]), %%" VR1(r) "\n" \
+ "vmovdqa64 0x80(%[SRC]), %%" VR2(r) "\n" \
+ "vmovdqa64 0xc0(%[SRC]), %%" VR3(r) "\n" \
+ : : [SRC] "r" (src)); \
+ break; \
+ } \
+}
+
+#define STORE(dst, r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 4: \
+ __asm( \
+ "vmovdqa64 %%" VR0(r) ", 0x00(%[DST])\n" \
+ "vmovdqa64 %%" VR1(r) ", 0x40(%[DST])\n" \
+ "vmovdqa64 %%" VR2(r) ", 0x80(%[DST])\n" \
+ "vmovdqa64 %%" VR3(r) ", 0xc0(%[DST])\n" \
+ : : [DST] "r" (dst)); \
+ break; \
+ } \
+}
+
+#define MUL2_SETUP() \
+{ \
+ __asm("vmovq %0, %%xmm31" :: "r"(0x1d1d1d1d1d1d1d1d)); \
+ __asm("vpbroadcastq %xmm31, %zmm31"); \
+ __asm("vmovq %0, %%xmm30" :: "r"(0x8080808080808080)); \
+ __asm("vpbroadcastq %xmm30, %zmm30"); \
+ __asm("vmovq %0, %%xmm29" :: "r"(0xfefefefefefefefe)); \
+ __asm("vpbroadcastq %xmm29, %zmm29"); \
+}
+
+#define _MUL2(r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 2: \
+ __asm( \
+ "vpandq %" VR0(r)", %zmm30, %zmm26\n" \
+ "vpandq %" VR1(r)", %zmm30, %zmm25\n" \
+ "vpsrlq $7, %zmm26, %zmm28\n" \
+ "vpsrlq $7, %zmm25, %zmm27\n" \
+ "vpsllq $1, %zmm26, %zmm26\n" \
+ "vpsllq $1, %zmm25, %zmm25\n" \
+ "vpsubq %zmm28, %zmm26, %zmm26\n" \
+ "vpsubq %zmm27, %zmm25, %zmm25\n" \
+ "vpsllq $1, %" VR0(r)", %" VR0(r) "\n" \
+ "vpsllq $1, %" VR1(r)", %" VR1(r) "\n" \
+ "vpandq %zmm26, %zmm31, %zmm26\n" \
+ "vpandq %zmm25, %zmm31, %zmm25\n" \
+ "vpternlogd $0x6c,%zmm29, %zmm26, %" VR0(r) "\n" \
+ "vpternlogd $0x6c,%zmm29, %zmm25, %" VR1(r)); \
+ break; \
+ } \
+}
+
+#define MUL2(r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 4: \
+ _MUL2(R_01(r)); \
+ _MUL2(R_23(r)); \
+ break; \
+ case 2: \
+ _MUL2(r); \
+ break; \
+ } \
+}
+
+#define MUL4(r...) \
+{ \
+ MUL2(r); \
+ MUL2(r); \
+}
+
+
+/* General multiplication by adding powers of two */
+
+#define _mul_x2_in 21, 22
+#define _mul_x2_acc 23, 24
+
+#define _MUL_PARAM(x, in, acc) \
+{ \
+ if (x & 0x01) { COPY(in, acc); } else { ZERO(acc); } \
+ if (x & 0xfe) { MUL2(in); } \
+ if (x & 0x02) { XOR(in, acc); } \
+ if (x & 0xfc) { MUL2(in); } \
+ if (x & 0x04) { XOR(in, acc); } \
+ if (x & 0xf8) { MUL2(in); } \
+ if (x & 0x08) { XOR(in, acc); } \
+ if (x & 0xf0) { MUL2(in); } \
+ if (x & 0x10) { XOR(in, acc); } \
+ if (x & 0xe0) { MUL2(in); } \
+ if (x & 0x20) { XOR(in, acc); } \
+ if (x & 0xc0) { MUL2(in); } \
+ if (x & 0x40) { XOR(in, acc); } \
+ if (x & 0x80) { MUL2(in); XOR(in, acc); } \
+}
+
+#define MUL_x2_DEFINE(x) \
+static void \
+mul_x2_ ## x(void) { _MUL_PARAM(x, _mul_x2_in, _mul_x2_acc); }
+
+
+MUL_x2_DEFINE(0); MUL_x2_DEFINE(1); MUL_x2_DEFINE(2); MUL_x2_DEFINE(3);
+MUL_x2_DEFINE(4); MUL_x2_DEFINE(5); MUL_x2_DEFINE(6); MUL_x2_DEFINE(7);
+MUL_x2_DEFINE(8); MUL_x2_DEFINE(9); MUL_x2_DEFINE(10); MUL_x2_DEFINE(11);
+MUL_x2_DEFINE(12); MUL_x2_DEFINE(13); MUL_x2_DEFINE(14); MUL_x2_DEFINE(15);
+MUL_x2_DEFINE(16); MUL_x2_DEFINE(17); MUL_x2_DEFINE(18); MUL_x2_DEFINE(19);
+MUL_x2_DEFINE(20); MUL_x2_DEFINE(21); MUL_x2_DEFINE(22); MUL_x2_DEFINE(23);
+MUL_x2_DEFINE(24); MUL_x2_DEFINE(25); MUL_x2_DEFINE(26); MUL_x2_DEFINE(27);
+MUL_x2_DEFINE(28); MUL_x2_DEFINE(29); MUL_x2_DEFINE(30); MUL_x2_DEFINE(31);
+MUL_x2_DEFINE(32); MUL_x2_DEFINE(33); MUL_x2_DEFINE(34); MUL_x2_DEFINE(35);
+MUL_x2_DEFINE(36); MUL_x2_DEFINE(37); MUL_x2_DEFINE(38); MUL_x2_DEFINE(39);
+MUL_x2_DEFINE(40); MUL_x2_DEFINE(41); MUL_x2_DEFINE(42); MUL_x2_DEFINE(43);
+MUL_x2_DEFINE(44); MUL_x2_DEFINE(45); MUL_x2_DEFINE(46); MUL_x2_DEFINE(47);
+MUL_x2_DEFINE(48); MUL_x2_DEFINE(49); MUL_x2_DEFINE(50); MUL_x2_DEFINE(51);
+MUL_x2_DEFINE(52); MUL_x2_DEFINE(53); MUL_x2_DEFINE(54); MUL_x2_DEFINE(55);
+MUL_x2_DEFINE(56); MUL_x2_DEFINE(57); MUL_x2_DEFINE(58); MUL_x2_DEFINE(59);
+MUL_x2_DEFINE(60); MUL_x2_DEFINE(61); MUL_x2_DEFINE(62); MUL_x2_DEFINE(63);
+MUL_x2_DEFINE(64); MUL_x2_DEFINE(65); MUL_x2_DEFINE(66); MUL_x2_DEFINE(67);
+MUL_x2_DEFINE(68); MUL_x2_DEFINE(69); MUL_x2_DEFINE(70); MUL_x2_DEFINE(71);
+MUL_x2_DEFINE(72); MUL_x2_DEFINE(73); MUL_x2_DEFINE(74); MUL_x2_DEFINE(75);
+MUL_x2_DEFINE(76); MUL_x2_DEFINE(77); MUL_x2_DEFINE(78); MUL_x2_DEFINE(79);
+MUL_x2_DEFINE(80); MUL_x2_DEFINE(81); MUL_x2_DEFINE(82); MUL_x2_DEFINE(83);
+MUL_x2_DEFINE(84); MUL_x2_DEFINE(85); MUL_x2_DEFINE(86); MUL_x2_DEFINE(87);
+MUL_x2_DEFINE(88); MUL_x2_DEFINE(89); MUL_x2_DEFINE(90); MUL_x2_DEFINE(91);
+MUL_x2_DEFINE(92); MUL_x2_DEFINE(93); MUL_x2_DEFINE(94); MUL_x2_DEFINE(95);
+MUL_x2_DEFINE(96); MUL_x2_DEFINE(97); MUL_x2_DEFINE(98); MUL_x2_DEFINE(99);
+MUL_x2_DEFINE(100); MUL_x2_DEFINE(101); MUL_x2_DEFINE(102); MUL_x2_DEFINE(103);
+MUL_x2_DEFINE(104); MUL_x2_DEFINE(105); MUL_x2_DEFINE(106); MUL_x2_DEFINE(107);
+MUL_x2_DEFINE(108); MUL_x2_DEFINE(109); MUL_x2_DEFINE(110); MUL_x2_DEFINE(111);
+MUL_x2_DEFINE(112); MUL_x2_DEFINE(113); MUL_x2_DEFINE(114); MUL_x2_DEFINE(115);
+MUL_x2_DEFINE(116); MUL_x2_DEFINE(117); MUL_x2_DEFINE(118); MUL_x2_DEFINE(119);
+MUL_x2_DEFINE(120); MUL_x2_DEFINE(121); MUL_x2_DEFINE(122); MUL_x2_DEFINE(123);
+MUL_x2_DEFINE(124); MUL_x2_DEFINE(125); MUL_x2_DEFINE(126); MUL_x2_DEFINE(127);
+MUL_x2_DEFINE(128); MUL_x2_DEFINE(129); MUL_x2_DEFINE(130); MUL_x2_DEFINE(131);
+MUL_x2_DEFINE(132); MUL_x2_DEFINE(133); MUL_x2_DEFINE(134); MUL_x2_DEFINE(135);
+MUL_x2_DEFINE(136); MUL_x2_DEFINE(137); MUL_x2_DEFINE(138); MUL_x2_DEFINE(139);
+MUL_x2_DEFINE(140); MUL_x2_DEFINE(141); MUL_x2_DEFINE(142); MUL_x2_DEFINE(143);
+MUL_x2_DEFINE(144); MUL_x2_DEFINE(145); MUL_x2_DEFINE(146); MUL_x2_DEFINE(147);
+MUL_x2_DEFINE(148); MUL_x2_DEFINE(149); MUL_x2_DEFINE(150); MUL_x2_DEFINE(151);
+MUL_x2_DEFINE(152); MUL_x2_DEFINE(153); MUL_x2_DEFINE(154); MUL_x2_DEFINE(155);
+MUL_x2_DEFINE(156); MUL_x2_DEFINE(157); MUL_x2_DEFINE(158); MUL_x2_DEFINE(159);
+MUL_x2_DEFINE(160); MUL_x2_DEFINE(161); MUL_x2_DEFINE(162); MUL_x2_DEFINE(163);
+MUL_x2_DEFINE(164); MUL_x2_DEFINE(165); MUL_x2_DEFINE(166); MUL_x2_DEFINE(167);
+MUL_x2_DEFINE(168); MUL_x2_DEFINE(169); MUL_x2_DEFINE(170); MUL_x2_DEFINE(171);
+MUL_x2_DEFINE(172); MUL_x2_DEFINE(173); MUL_x2_DEFINE(174); MUL_x2_DEFINE(175);
+MUL_x2_DEFINE(176); MUL_x2_DEFINE(177); MUL_x2_DEFINE(178); MUL_x2_DEFINE(179);
+MUL_x2_DEFINE(180); MUL_x2_DEFINE(181); MUL_x2_DEFINE(182); MUL_x2_DEFINE(183);
+MUL_x2_DEFINE(184); MUL_x2_DEFINE(185); MUL_x2_DEFINE(186); MUL_x2_DEFINE(187);
+MUL_x2_DEFINE(188); MUL_x2_DEFINE(189); MUL_x2_DEFINE(190); MUL_x2_DEFINE(191);
+MUL_x2_DEFINE(192); MUL_x2_DEFINE(193); MUL_x2_DEFINE(194); MUL_x2_DEFINE(195);
+MUL_x2_DEFINE(196); MUL_x2_DEFINE(197); MUL_x2_DEFINE(198); MUL_x2_DEFINE(199);
+MUL_x2_DEFINE(200); MUL_x2_DEFINE(201); MUL_x2_DEFINE(202); MUL_x2_DEFINE(203);
+MUL_x2_DEFINE(204); MUL_x2_DEFINE(205); MUL_x2_DEFINE(206); MUL_x2_DEFINE(207);
+MUL_x2_DEFINE(208); MUL_x2_DEFINE(209); MUL_x2_DEFINE(210); MUL_x2_DEFINE(211);
+MUL_x2_DEFINE(212); MUL_x2_DEFINE(213); MUL_x2_DEFINE(214); MUL_x2_DEFINE(215);
+MUL_x2_DEFINE(216); MUL_x2_DEFINE(217); MUL_x2_DEFINE(218); MUL_x2_DEFINE(219);
+MUL_x2_DEFINE(220); MUL_x2_DEFINE(221); MUL_x2_DEFINE(222); MUL_x2_DEFINE(223);
+MUL_x2_DEFINE(224); MUL_x2_DEFINE(225); MUL_x2_DEFINE(226); MUL_x2_DEFINE(227);
+MUL_x2_DEFINE(228); MUL_x2_DEFINE(229); MUL_x2_DEFINE(230); MUL_x2_DEFINE(231);
+MUL_x2_DEFINE(232); MUL_x2_DEFINE(233); MUL_x2_DEFINE(234); MUL_x2_DEFINE(235);
+MUL_x2_DEFINE(236); MUL_x2_DEFINE(237); MUL_x2_DEFINE(238); MUL_x2_DEFINE(239);
+MUL_x2_DEFINE(240); MUL_x2_DEFINE(241); MUL_x2_DEFINE(242); MUL_x2_DEFINE(243);
+MUL_x2_DEFINE(244); MUL_x2_DEFINE(245); MUL_x2_DEFINE(246); MUL_x2_DEFINE(247);
+MUL_x2_DEFINE(248); MUL_x2_DEFINE(249); MUL_x2_DEFINE(250); MUL_x2_DEFINE(251);
+MUL_x2_DEFINE(252); MUL_x2_DEFINE(253); MUL_x2_DEFINE(254); MUL_x2_DEFINE(255);
+
+
+typedef void (*mul_fn_ptr_t)(void);
+
+static const mul_fn_ptr_t __attribute__((aligned(256)))
+gf_x2_mul_fns[256] = {
+ mul_x2_0, mul_x2_1, mul_x2_2, mul_x2_3, mul_x2_4, mul_x2_5,
+ mul_x2_6, mul_x2_7, mul_x2_8, mul_x2_9, mul_x2_10, mul_x2_11,
+ mul_x2_12, mul_x2_13, mul_x2_14, mul_x2_15, mul_x2_16, mul_x2_17,
+ mul_x2_18, mul_x2_19, mul_x2_20, mul_x2_21, mul_x2_22, mul_x2_23,
+ mul_x2_24, mul_x2_25, mul_x2_26, mul_x2_27, mul_x2_28, mul_x2_29,
+ mul_x2_30, mul_x2_31, mul_x2_32, mul_x2_33, mul_x2_34, mul_x2_35,
+ mul_x2_36, mul_x2_37, mul_x2_38, mul_x2_39, mul_x2_40, mul_x2_41,
+ mul_x2_42, mul_x2_43, mul_x2_44, mul_x2_45, mul_x2_46, mul_x2_47,
+ mul_x2_48, mul_x2_49, mul_x2_50, mul_x2_51, mul_x2_52, mul_x2_53,
+ mul_x2_54, mul_x2_55, mul_x2_56, mul_x2_57, mul_x2_58, mul_x2_59,
+ mul_x2_60, mul_x2_61, mul_x2_62, mul_x2_63, mul_x2_64, mul_x2_65,
+ mul_x2_66, mul_x2_67, mul_x2_68, mul_x2_69, mul_x2_70, mul_x2_71,
+ mul_x2_72, mul_x2_73, mul_x2_74, mul_x2_75, mul_x2_76, mul_x2_77,
+ mul_x2_78, mul_x2_79, mul_x2_80, mul_x2_81, mul_x2_82, mul_x2_83,
+ mul_x2_84, mul_x2_85, mul_x2_86, mul_x2_87, mul_x2_88, mul_x2_89,
+ mul_x2_90, mul_x2_91, mul_x2_92, mul_x2_93, mul_x2_94, mul_x2_95,
+ mul_x2_96, mul_x2_97, mul_x2_98, mul_x2_99, mul_x2_100, mul_x2_101,
+ mul_x2_102, mul_x2_103, mul_x2_104, mul_x2_105, mul_x2_106, mul_x2_107,
+ mul_x2_108, mul_x2_109, mul_x2_110, mul_x2_111, mul_x2_112, mul_x2_113,
+ mul_x2_114, mul_x2_115, mul_x2_116, mul_x2_117, mul_x2_118, mul_x2_119,
+ mul_x2_120, mul_x2_121, mul_x2_122, mul_x2_123, mul_x2_124, mul_x2_125,
+ mul_x2_126, mul_x2_127, mul_x2_128, mul_x2_129, mul_x2_130, mul_x2_131,
+ mul_x2_132, mul_x2_133, mul_x2_134, mul_x2_135, mul_x2_136, mul_x2_137,
+ mul_x2_138, mul_x2_139, mul_x2_140, mul_x2_141, mul_x2_142, mul_x2_143,
+ mul_x2_144, mul_x2_145, mul_x2_146, mul_x2_147, mul_x2_148, mul_x2_149,
+ mul_x2_150, mul_x2_151, mul_x2_152, mul_x2_153, mul_x2_154, mul_x2_155,
+ mul_x2_156, mul_x2_157, mul_x2_158, mul_x2_159, mul_x2_160, mul_x2_161,
+ mul_x2_162, mul_x2_163, mul_x2_164, mul_x2_165, mul_x2_166, mul_x2_167,
+ mul_x2_168, mul_x2_169, mul_x2_170, mul_x2_171, mul_x2_172, mul_x2_173,
+ mul_x2_174, mul_x2_175, mul_x2_176, mul_x2_177, mul_x2_178, mul_x2_179,
+ mul_x2_180, mul_x2_181, mul_x2_182, mul_x2_183, mul_x2_184, mul_x2_185,
+ mul_x2_186, mul_x2_187, mul_x2_188, mul_x2_189, mul_x2_190, mul_x2_191,
+ mul_x2_192, mul_x2_193, mul_x2_194, mul_x2_195, mul_x2_196, mul_x2_197,
+ mul_x2_198, mul_x2_199, mul_x2_200, mul_x2_201, mul_x2_202, mul_x2_203,
+ mul_x2_204, mul_x2_205, mul_x2_206, mul_x2_207, mul_x2_208, mul_x2_209,
+ mul_x2_210, mul_x2_211, mul_x2_212, mul_x2_213, mul_x2_214, mul_x2_215,
+ mul_x2_216, mul_x2_217, mul_x2_218, mul_x2_219, mul_x2_220, mul_x2_221,
+ mul_x2_222, mul_x2_223, mul_x2_224, mul_x2_225, mul_x2_226, mul_x2_227,
+ mul_x2_228, mul_x2_229, mul_x2_230, mul_x2_231, mul_x2_232, mul_x2_233,
+ mul_x2_234, mul_x2_235, mul_x2_236, mul_x2_237, mul_x2_238, mul_x2_239,
+ mul_x2_240, mul_x2_241, mul_x2_242, mul_x2_243, mul_x2_244, mul_x2_245,
+ mul_x2_246, mul_x2_247, mul_x2_248, mul_x2_249, mul_x2_250, mul_x2_251,
+ mul_x2_252, mul_x2_253, mul_x2_254, mul_x2_255
+};
+
+#define MUL(c, r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 4: \
+ COPY(R_01(r), _mul_x2_in); \
+ gf_x2_mul_fns[c](); \
+ COPY(_mul_x2_acc, R_01(r)); \
+ COPY(R_23(r), _mul_x2_in); \
+ gf_x2_mul_fns[c](); \
+ COPY(_mul_x2_acc, R_23(r)); \
+ } \
+}
+
+
+#define raidz_math_begin() kfpu_begin()
+#define raidz_math_end() kfpu_end()
+
+
+#define SYN_STRIDE 4
+
+#define ZERO_STRIDE 4
+#define ZERO_DEFINE() {}
+#define ZERO_D 0, 1, 2, 3
+
+#define COPY_STRIDE 4
+#define COPY_DEFINE() {}
+#define COPY_D 0, 1, 2, 3
+
+#define ADD_STRIDE 4
+#define ADD_DEFINE() {}
+#define ADD_D 0, 1, 2, 3
+
+#define MUL_STRIDE 4
+#define MUL_DEFINE() MUL2_SETUP()
+#define MUL_D 0, 1, 2, 3
+
+#define GEN_P_STRIDE 4
+#define GEN_P_DEFINE() {}
+#define GEN_P_P 0, 1, 2, 3
+
+#define GEN_PQ_STRIDE 4
+#define GEN_PQ_DEFINE() {}
+#define GEN_PQ_D 0, 1, 2, 3
+#define GEN_PQ_C 4, 5, 6, 7
+
+#define GEN_PQR_STRIDE 4
+#define GEN_PQR_DEFINE() {}
+#define GEN_PQR_D 0, 1, 2, 3
+#define GEN_PQR_C 4, 5, 6, 7
+
+#define SYN_Q_DEFINE() {}
+#define SYN_Q_D 0, 1, 2, 3
+#define SYN_Q_X 4, 5, 6, 7
+
+#define SYN_R_DEFINE() {}
+#define SYN_R_D 0, 1, 2, 3
+#define SYN_R_X 4, 5, 6, 7
+
+#define SYN_PQ_DEFINE() {}
+#define SYN_PQ_D 0, 1, 2, 3
+#define SYN_PQ_X 4, 5, 6, 7
+
+#define REC_PQ_STRIDE 4
+#define REC_PQ_DEFINE() MUL2_SETUP()
+#define REC_PQ_X 0, 1, 2, 3
+#define REC_PQ_Y 4, 5, 6, 7
+#define REC_PQ_T 8, 9, 10, 11
+
+#define SYN_PR_DEFINE() {}
+#define SYN_PR_D 0, 1, 2, 3
+#define SYN_PR_X 4, 5, 6, 7
+
+#define REC_PR_STRIDE 4
+#define REC_PR_DEFINE() MUL2_SETUP()
+#define REC_PR_X 0, 1, 2, 3
+#define REC_PR_Y 4, 5, 6, 7
+#define REC_PR_T 8, 9, 10, 11
+
+#define SYN_QR_DEFINE() {}
+#define SYN_QR_D 0, 1, 2, 3
+#define SYN_QR_X 4, 5, 6, 7
+
+#define REC_QR_STRIDE 4
+#define REC_QR_DEFINE() MUL2_SETUP()
+#define REC_QR_X 0, 1, 2, 3
+#define REC_QR_Y 4, 5, 6, 7
+#define REC_QR_T 8, 9, 10, 11
+
+#define SYN_PQR_DEFINE() {}
+#define SYN_PQR_D 0, 1, 2, 3
+#define SYN_PQR_X 4, 5, 6, 7
+
+#define REC_PQR_STRIDE 4
+#define REC_PQR_DEFINE() MUL2_SETUP()
+#define REC_PQR_X 0, 1, 2, 3
+#define REC_PQR_Y 4, 5, 6, 7
+#define REC_PQR_Z 8, 9, 10, 11
+#define REC_PQR_XS 12, 13, 14, 15
+#define REC_PQR_YS 16, 17, 18, 19
+
+
+#include <sys/vdev_raidz_impl.h>
+#include "vdev_raidz_math_impl.h"
+
+DEFINE_GEN_METHODS(avx512f);
+DEFINE_REC_METHODS(avx512f);
+
+static boolean_t
+raidz_will_avx512f_work(void)
+{
+ return (zfs_avx_available() &&
+ zfs_avx2_available() &&
+ zfs_avx512f_available());
+}
+
+const raidz_impl_ops_t vdev_raidz_avx512f_impl = {
+ .init = NULL,
+ .fini = NULL,
+ .gen = RAIDZ_GEN_METHODS(avx512f),
+ .rec = RAIDZ_REC_METHODS(avx512f),
+ .is_supported = &raidz_will_avx512f_work,
+ .name = "avx512f"
+};
+
+#endif /* defined(__x86_64) && defined(HAVE_AVX512F) */
diff --git a/zfs/module/zfs/vdev_raidz_math_impl.h b/zfs/module/zfs/vdev_raidz_math_impl.h
new file mode 100644
index 000000000000..ea592c0f12da
--- /dev/null
+++ b/zfs/module/zfs/vdev_raidz_math_impl.h
@@ -0,0 +1,1477 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
+ */
+
+#ifndef _VDEV_RAIDZ_MATH_IMPL_H
+#define _VDEV_RAIDZ_MATH_IMPL_H
+
+#include <sys/types.h>
+
+#define raidz_inline inline __attribute__((always_inline))
+#ifndef noinline
+#define noinline __attribute__((noinline))
+#endif
+
+/*
+ * Functions calculate multiplication constants for data reconstruction.
+ * Coefficients depend on RAIDZ geometry, indexes of failed child vdevs, and
+ * used parity columns for reconstruction.
+ * @rm RAIDZ map
+ * @tgtidx array of missing data indexes
+ * @coeff output array of coefficients. Array must be provided by
+ * user and must hold minimum MUL_CNT values.
+ */
+static noinline void
+raidz_rec_q_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
+{
+ const unsigned ncols = raidz_ncols(rm);
+ const unsigned x = tgtidx[TARGET_X];
+
+ coeff[MUL_Q_X] = gf_exp2(255 - (ncols - x - 1));
+}
+
+static noinline void
+raidz_rec_r_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
+{
+ const unsigned ncols = raidz_ncols(rm);
+ const unsigned x = tgtidx[TARGET_X];
+
+ coeff[MUL_R_X] = gf_exp4(255 - (ncols - x - 1));
+}
+
+static noinline void
+raidz_rec_pq_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
+{
+ const unsigned ncols = raidz_ncols(rm);
+ const unsigned x = tgtidx[TARGET_X];
+ const unsigned y = tgtidx[TARGET_Y];
+ gf_t a, b, e;
+
+ a = gf_exp2(x + 255 - y);
+ b = gf_exp2(255 - (ncols - x - 1));
+ e = a ^ 0x01;
+
+ coeff[MUL_PQ_X] = gf_div(a, e);
+ coeff[MUL_PQ_Y] = gf_div(b, e);
+}
+
+static noinline void
+raidz_rec_pr_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
+{
+ const unsigned ncols = raidz_ncols(rm);
+ const unsigned x = tgtidx[TARGET_X];
+ const unsigned y = tgtidx[TARGET_Y];
+
+ gf_t a, b, e;
+
+ a = gf_exp4(x + 255 - y);
+ b = gf_exp4(255 - (ncols - x - 1));
+ e = a ^ 0x01;
+
+ coeff[MUL_PR_X] = gf_div(a, e);
+ coeff[MUL_PR_Y] = gf_div(b, e);
+}
+
+static noinline void
+raidz_rec_qr_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
+{
+ const unsigned ncols = raidz_ncols(rm);
+ const unsigned x = tgtidx[TARGET_X];
+ const unsigned y = tgtidx[TARGET_Y];
+
+ gf_t nx, ny, nxxy, nxyy, d;
+
+ nx = gf_exp2(ncols - x - 1);
+ ny = gf_exp2(ncols - y - 1);
+ nxxy = gf_mul(gf_mul(nx, nx), ny);
+ nxyy = gf_mul(gf_mul(nx, ny), ny);
+ d = nxxy ^ nxyy;
+
+ coeff[MUL_QR_XQ] = ny;
+ coeff[MUL_QR_X] = gf_div(ny, d);
+ coeff[MUL_QR_YQ] = nx;
+ coeff[MUL_QR_Y] = gf_div(nx, d);
+}
+
+static noinline void
+raidz_rec_pqr_coeff(const raidz_map_t *rm, const int *tgtidx, unsigned *coeff)
+{
+ const unsigned ncols = raidz_ncols(rm);
+ const unsigned x = tgtidx[TARGET_X];
+ const unsigned y = tgtidx[TARGET_Y];
+ const unsigned z = tgtidx[TARGET_Z];
+
+ gf_t nx, ny, nz, nxx, nyy, nzz, nyyz, nyzz, xd, yd;
+
+ nx = gf_exp2(ncols - x - 1);
+ ny = gf_exp2(ncols - y - 1);
+ nz = gf_exp2(ncols - z - 1);
+
+ nxx = gf_exp4(ncols - x - 1);
+ nyy = gf_exp4(ncols - y - 1);
+ nzz = gf_exp4(ncols - z - 1);
+
+ nyyz = gf_mul(gf_mul(ny, nz), ny);
+ nyzz = gf_mul(nzz, ny);
+
+ xd = gf_mul(nxx, ny) ^ gf_mul(nx, nyy) ^ nyyz ^
+ gf_mul(nxx, nz) ^ gf_mul(nzz, nx) ^ nyzz;
+
+ yd = gf_inv(ny ^ nz);
+
+ coeff[MUL_PQR_XP] = gf_div(nyyz ^ nyzz, xd);
+ coeff[MUL_PQR_XQ] = gf_div(nyy ^ nzz, xd);
+ coeff[MUL_PQR_XR] = gf_div(ny ^ nz, xd);
+ coeff[MUL_PQR_YU] = nx;
+ coeff[MUL_PQR_YP] = gf_mul(nz, yd);
+ coeff[MUL_PQR_YQ] = yd;
+}
+
+/*
+ * Method for zeroing a buffer (can be implemented using SIMD).
+ * This method is used by multiple for gen/rec functions.
+ *
+ * @dc Destination buffer
+ * @dsize Destination buffer size
+ * @private Unused
+ */
+static int
+raidz_zero_abd_cb(void *dc, size_t dsize, void *private)
+{
+ v_t *dst = (v_t *)dc;
+ size_t i;
+
+ ZERO_DEFINE();
+
+ (void) private; /* unused */
+
+ ZERO(ZERO_D);
+
+ for (i = 0; i < dsize / sizeof (v_t); i += (2 * ZERO_STRIDE)) {
+ STORE(dst + i, ZERO_D);
+ STORE(dst + i + ZERO_STRIDE, ZERO_D);
+ }
+
+ return (0);
+}
+
+#define raidz_zero(dabd, size) \
+{ \
+ abd_iterate_func(dabd, 0, size, raidz_zero_abd_cb, NULL); \
+}
+
+/*
+ * Method for copying two buffers (can be implemented using SIMD).
+ * This method is used by multiple for gen/rec functions.
+ *
+ * @dc Destination buffer
+ * @sc Source buffer
+ * @dsize Destination buffer size
+ * @ssize Source buffer size
+ * @private Unused
+ */
+static int
+raidz_copy_abd_cb(void *dc, void *sc, size_t size, void *private)
+{
+ v_t *dst = (v_t *)dc;
+ const v_t *src = (v_t *)sc;
+ size_t i;
+
+ COPY_DEFINE();
+
+ (void) private; /* unused */
+
+ for (i = 0; i < size / sizeof (v_t); i += (2 * COPY_STRIDE)) {
+ LOAD(src + i, COPY_D);
+ STORE(dst + i, COPY_D);
+
+ LOAD(src + i + COPY_STRIDE, COPY_D);
+ STORE(dst + i + COPY_STRIDE, COPY_D);
+ }
+
+ return (0);
+}
+
+
+#define raidz_copy(dabd, sabd, size) \
+{ \
+ abd_iterate_func2(dabd, sabd, 0, 0, size, raidz_copy_abd_cb, NULL);\
+}
+
+/*
+ * Method for adding (XORing) two buffers.
+ * Source and destination are XORed together and result is stored in
+ * destination buffer. This method is used by multiple for gen/rec functions.
+ *
+ * @dc Destination buffer
+ * @sc Source buffer
+ * @dsize Destination buffer size
+ * @ssize Source buffer size
+ * @private Unused
+ */
+static int
+raidz_add_abd_cb(void *dc, void *sc, size_t size, void *private)
+{
+ v_t *dst = (v_t *)dc;
+ const v_t *src = (v_t *)sc;
+ size_t i;
+
+ ADD_DEFINE();
+
+ (void) private; /* unused */
+
+ for (i = 0; i < size / sizeof (v_t); i += (2 * ADD_STRIDE)) {
+ LOAD(dst + i, ADD_D);
+ XOR_ACC(src + i, ADD_D);
+ STORE(dst + i, ADD_D);
+
+ LOAD(dst + i + ADD_STRIDE, ADD_D);
+ XOR_ACC(src + i + ADD_STRIDE, ADD_D);
+ STORE(dst + i + ADD_STRIDE, ADD_D);
+ }
+
+ return (0);
+}
+
+#define raidz_add(dabd, sabd, size) \
+{ \
+ abd_iterate_func2(dabd, sabd, 0, 0, size, raidz_add_abd_cb, NULL);\
+}
+
+/*
+ * Method for multiplying a buffer with a constant in GF(2^8).
+ * Symbols from buffer are multiplied by a constant and result is stored
+ * back in the same buffer.
+ *
+ * @dc In/Out data buffer.
+ * @size Size of the buffer
+ * @private pointer to the multiplication constant (unsigned)
+ */
+static int
+raidz_mul_abd_cb(void *dc, size_t size, void *private)
+{
+ const unsigned mul = *((unsigned *)private);
+ v_t *d = (v_t *)dc;
+ size_t i;
+
+ MUL_DEFINE();
+
+ for (i = 0; i < size / sizeof (v_t); i += (2 * MUL_STRIDE)) {
+ LOAD(d + i, MUL_D);
+ MUL(mul, MUL_D);
+ STORE(d + i, MUL_D);
+
+ LOAD(d + i + MUL_STRIDE, MUL_D);
+ MUL(mul, MUL_D);
+ STORE(d + i + MUL_STRIDE, MUL_D);
+ }
+
+ return (0);
+}
+
+
+/*
+ * Syndrome generation/update macros
+ *
+ * Require LOAD(), XOR(), STORE(), MUL2(), and MUL4() macros
+ */
+#define P_D_SYNDROME(D, T, t) \
+{ \
+ LOAD((t), T); \
+ XOR(D, T); \
+ STORE((t), T); \
+}
+
+#define Q_D_SYNDROME(D, T, t) \
+{ \
+ LOAD((t), T); \
+ MUL2(T); \
+ XOR(D, T); \
+ STORE((t), T); \
+}
+
+#define Q_SYNDROME(T, t) \
+{ \
+ LOAD((t), T); \
+ MUL2(T); \
+ STORE((t), T); \
+}
+
+#define R_D_SYNDROME(D, T, t) \
+{ \
+ LOAD((t), T); \
+ MUL4(T); \
+ XOR(D, T); \
+ STORE((t), T); \
+}
+
+#define R_SYNDROME(T, t) \
+{ \
+ LOAD((t), T); \
+ MUL4(T); \
+ STORE((t), T); \
+}
+
+
+/*
+ * PARITY CALCULATION
+ *
+ * Macros *_SYNDROME are used for parity/syndrome calculation.
+ * *_D_SYNDROME() macros are used to calculate syndrome between 0 and
+ * length of data column, and *_SYNDROME() macros are only for updating
+ * the parity/syndrome if data column is shorter.
+ *
+ * P parity is calculated using raidz_add_abd().
+ */
+
+/*
+ * Generate P parity (RAIDZ1)
+ *
+ * @rm RAIDZ map
+ */
+static raidz_inline void
+raidz_generate_p_impl(raidz_map_t * const rm)
+{
+ size_t c;
+ const size_t ncols = raidz_ncols(rm);
+ const size_t psize = rm->rm_col[CODE_P].rc_size;
+ abd_t *pabd = rm->rm_col[CODE_P].rc_abd;
+ size_t size;
+ abd_t *dabd;
+
+ raidz_math_begin();
+
+ /* start with first data column */
+ raidz_copy(pabd, rm->rm_col[1].rc_abd, psize);
+
+ for (c = 2; c < ncols; c++) {
+ dabd = rm->rm_col[c].rc_abd;
+ size = rm->rm_col[c].rc_size;
+
+ /* add data column */
+ raidz_add(pabd, dabd, size);
+ }
+
+ raidz_math_end();
+}
+
+
+/*
+ * Generate PQ parity (RAIDZ2)
+ * The function is called per data column.
+ *
+ * @c array of pointers to parity (code) columns
+ * @dc pointer to data column
+ * @csize size of parity columns
+ * @dsize size of data column
+ */
+static void
+raidz_gen_pq_add(void **c, const void *dc, const size_t csize,
+ const size_t dsize)
+{
+ v_t *p = (v_t *)c[0];
+ v_t *q = (v_t *)c[1];
+ const v_t *d = (v_t *)dc;
+ const v_t * const dend = d + (dsize / sizeof (v_t));
+ const v_t * const qend = q + (csize / sizeof (v_t));
+
+ GEN_PQ_DEFINE();
+
+ MUL2_SETUP();
+
+ for (; d < dend; d += GEN_PQ_STRIDE, p += GEN_PQ_STRIDE,
+ q += GEN_PQ_STRIDE) {
+ LOAD(d, GEN_PQ_D);
+ P_D_SYNDROME(GEN_PQ_D, GEN_PQ_C, p);
+ Q_D_SYNDROME(GEN_PQ_D, GEN_PQ_C, q);
+ }
+ for (; q < qend; q += GEN_PQ_STRIDE) {
+ Q_SYNDROME(GEN_PQ_C, q);
+ }
+}
+
+
+/*
+ * Generate PQ parity (RAIDZ2)
+ *
+ * @rm RAIDZ map
+ */
+static raidz_inline void
+raidz_generate_pq_impl(raidz_map_t * const rm)
+{
+ size_t c;
+ const size_t ncols = raidz_ncols(rm);
+ const size_t csize = rm->rm_col[CODE_P].rc_size;
+ size_t dsize;
+ abd_t *dabd;
+ abd_t *cabds[] = {
+ rm->rm_col[CODE_P].rc_abd,
+ rm->rm_col[CODE_Q].rc_abd
+ };
+
+ raidz_math_begin();
+
+ raidz_copy(cabds[CODE_P], rm->rm_col[2].rc_abd, csize);
+ raidz_copy(cabds[CODE_Q], rm->rm_col[2].rc_abd, csize);
+
+ for (c = 3; c < ncols; c++) {
+ dabd = rm->rm_col[c].rc_abd;
+ dsize = rm->rm_col[c].rc_size;
+
+ abd_raidz_gen_iterate(cabds, dabd, csize, dsize, 2,
+ raidz_gen_pq_add);
+ }
+
+ raidz_math_end();
+}
+
+
+/*
+ * Generate PQR parity (RAIDZ3)
+ * The function is called per data column.
+ *
+ * @c array of pointers to parity (code) columns
+ * @dc pointer to data column
+ * @csize size of parity columns
+ * @dsize size of data column
+ */
+static void
+raidz_gen_pqr_add(void **c, const void *dc, const size_t csize,
+ const size_t dsize)
+{
+ v_t *p = (v_t *)c[0];
+ v_t *q = (v_t *)c[1];
+ v_t *r = (v_t *)c[CODE_R];
+ const v_t *d = (v_t *)dc;
+ const v_t * const dend = d + (dsize / sizeof (v_t));
+ const v_t * const qend = q + (csize / sizeof (v_t));
+
+ GEN_PQR_DEFINE();
+
+ MUL2_SETUP();
+
+ for (; d < dend; d += GEN_PQR_STRIDE, p += GEN_PQR_STRIDE,
+ q += GEN_PQR_STRIDE, r += GEN_PQR_STRIDE) {
+ LOAD(d, GEN_PQR_D);
+ P_D_SYNDROME(GEN_PQR_D, GEN_PQR_C, p);
+ Q_D_SYNDROME(GEN_PQR_D, GEN_PQR_C, q);
+ R_D_SYNDROME(GEN_PQR_D, GEN_PQR_C, r);
+ }
+ for (; q < qend; q += GEN_PQR_STRIDE, r += GEN_PQR_STRIDE) {
+ Q_SYNDROME(GEN_PQR_C, q);
+ R_SYNDROME(GEN_PQR_C, r);
+ }
+}
+
+
+/*
+ * Generate PQR parity (RAIDZ2)
+ *
+ * @rm RAIDZ map
+ */
+static raidz_inline void
+raidz_generate_pqr_impl(raidz_map_t * const rm)
+{
+ size_t c;
+ const size_t ncols = raidz_ncols(rm);
+ const size_t csize = rm->rm_col[CODE_P].rc_size;
+ size_t dsize;
+ abd_t *dabd;
+ abd_t *cabds[] = {
+ rm->rm_col[CODE_P].rc_abd,
+ rm->rm_col[CODE_Q].rc_abd,
+ rm->rm_col[CODE_R].rc_abd
+ };
+
+ raidz_math_begin();
+
+ raidz_copy(cabds[CODE_P], rm->rm_col[3].rc_abd, csize);
+ raidz_copy(cabds[CODE_Q], rm->rm_col[3].rc_abd, csize);
+ raidz_copy(cabds[CODE_R], rm->rm_col[3].rc_abd, csize);
+
+ for (c = 4; c < ncols; c++) {
+ dabd = rm->rm_col[c].rc_abd;
+ dsize = rm->rm_col[c].rc_size;
+
+ abd_raidz_gen_iterate(cabds, dabd, csize, dsize, 3,
+ raidz_gen_pqr_add);
+ }
+
+ raidz_math_end();
+}
+
+
+/*
+ * DATA RECONSTRUCTION
+ *
+ * Data reconstruction process consists of two phases:
+ * - Syndrome calculation
+ * - Data reconstruction
+ *
+ * Syndrome is calculated by generating parity using available data columns
+ * and zeros in places of erasure. Existing parity is added to corresponding
+ * syndrome value to obtain the [P|Q|R]syn values from equation:
+ * P = Psyn + Dx + Dy + Dz
+ * Q = Qsyn + 2^x * Dx + 2^y * Dy + 2^z * Dz
+ * R = Rsyn + 4^x * Dx + 4^y * Dy + 4^z * Dz
+ *
+ * For data reconstruction phase, the corresponding equations are solved
+ * for missing data (Dx, Dy, Dz). This generally involves multiplying known
+ * symbols by an coefficient and adding them together. The multiplication
+ * constant coefficients are calculated ahead of the operation in
+ * raidz_rec_[q|r|pq|pq|qr|pqr]_coeff() functions.
+ *
+ * IMPLEMENTATION NOTE: RAID-Z block can have complex geometry, with "big"
+ * and "short" columns.
+ * For this reason, reconstruction is performed in minimum of
+ * two steps. First, from offset 0 to short_size, then from short_size to
+ * short_size. Calculation functions REC_[*]_BLOCK() are implemented to work
+ * over both ranges. The split also enables removal of conditional expressions
+ * from loop bodies, improving throughput of SIMD implementations.
+ * For the best performance, all functions marked with raidz_inline attribute
+ * must be inlined by compiler.
+ *
+ * parity data
+ * columns columns
+ * <----------> <------------------>
+ * x y <----+ missing columns (x, y)
+ * | |
+ * +---+---+---+---+-v-+---+-v-+---+ ^ 0
+ * | | | | | | | | | |
+ * | | | | | | | | | |
+ * | P | Q | R | D | D | D | D | D | |
+ * | | | | 0 | 1 | 2 | 3 | 4 | |
+ * | | | | | | | | | v
+ * | | | | | +---+---+---+ ^ short_size
+ * | | | | | | |
+ * +---+---+---+---+---+ v big_size
+ * <------------------> <---------->
+ * big columns short columns
+ *
+ */
+
+
+
+
+/*
+ * Reconstruct single data column using P parity
+ *
+ * @syn_method raidz_add_abd()
+ * @rec_method not applicable
+ *
+ * @rm RAIDZ map
+ * @tgtidx array of missing data indexes
+ */
+static raidz_inline int
+raidz_reconstruct_p_impl(raidz_map_t *rm, const int *tgtidx)
+{
+ size_t c;
+ const size_t firstdc = raidz_parity(rm);
+ const size_t ncols = raidz_ncols(rm);
+ const size_t x = tgtidx[TARGET_X];
+ const size_t xsize = rm->rm_col[x].rc_size;
+ abd_t *xabd = rm->rm_col[x].rc_abd;
+ size_t size;
+ abd_t *dabd;
+
+ raidz_math_begin();
+
+ /* copy P into target */
+ raidz_copy(xabd, rm->rm_col[CODE_P].rc_abd, xsize);
+
+ /* generate p_syndrome */
+ for (c = firstdc; c < ncols; c++) {
+ if (c == x)
+ continue;
+
+ dabd = rm->rm_col[c].rc_abd;
+ size = MIN(rm->rm_col[c].rc_size, xsize);
+
+ raidz_add(xabd, dabd, size);
+ }
+
+ raidz_math_end();
+
+ return (1 << CODE_P);
+}
+
+
+/*
+ * Generate Q syndrome (Qsyn)
+ *
+ * @xc array of pointers to syndrome columns
+ * @dc data column (NULL if missing)
+ * @xsize size of syndrome columns
+ * @dsize size of data column (0 if missing)
+ */
+static void
+raidz_syn_q_abd(void **xc, const void *dc, const size_t xsize,
+ const size_t dsize)
+{
+ v_t *x = (v_t *)xc[TARGET_X];
+ const v_t *d = (v_t *)dc;
+ const v_t * const dend = d + (dsize / sizeof (v_t));
+ const v_t * const xend = x + (xsize / sizeof (v_t));
+
+ SYN_Q_DEFINE();
+
+ MUL2_SETUP();
+
+ for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE) {
+ LOAD(d, SYN_Q_D);
+ Q_D_SYNDROME(SYN_Q_D, SYN_Q_X, x);
+ }
+ for (; x < xend; x += SYN_STRIDE) {
+ Q_SYNDROME(SYN_Q_X, x);
+ }
+}
+
+
+/*
+ * Reconstruct single data column using Q parity
+ *
+ * @syn_method raidz_add_abd()
+ * @rec_method raidz_mul_abd_cb()
+ *
+ * @rm RAIDZ map
+ * @tgtidx array of missing data indexes
+ */
+static raidz_inline int
+raidz_reconstruct_q_impl(raidz_map_t *rm, const int *tgtidx)
+{
+ size_t c;
+ size_t dsize;
+ abd_t *dabd;
+ const size_t firstdc = raidz_parity(rm);
+ const size_t ncols = raidz_ncols(rm);
+ const size_t x = tgtidx[TARGET_X];
+ abd_t *xabd = rm->rm_col[x].rc_abd;
+ const size_t xsize = rm->rm_col[x].rc_size;
+ abd_t *tabds[] = { xabd };
+
+ unsigned coeff[MUL_CNT];
+ raidz_rec_q_coeff(rm, tgtidx, coeff);
+
+ raidz_math_begin();
+
+ /* Start with first data column if present */
+ if (firstdc != x) {
+ raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize);
+ } else {
+ raidz_zero(xabd, xsize);
+ }
+
+ /* generate q_syndrome */
+ for (c = firstdc+1; c < ncols; c++) {
+ if (c == x) {
+ dabd = NULL;
+ dsize = 0;
+ } else {
+ dabd = rm->rm_col[c].rc_abd;
+ dsize = rm->rm_col[c].rc_size;
+ }
+
+ abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 1,
+ raidz_syn_q_abd);
+ }
+
+ /* add Q to the syndrome */
+ raidz_add(xabd, rm->rm_col[CODE_Q].rc_abd, xsize);
+
+ /* transform the syndrome */
+ abd_iterate_func(xabd, 0, xsize, raidz_mul_abd_cb, (void*) coeff);
+
+ raidz_math_end();
+
+ return (1 << CODE_Q);
+}
+
+
+/*
+ * Generate R syndrome (Rsyn)
+ *
+ * @xc array of pointers to syndrome columns
+ * @dc data column (NULL if missing)
+ * @tsize size of syndrome columns
+ * @dsize size of data column (0 if missing)
+ */
+static void
+raidz_syn_r_abd(void **xc, const void *dc, const size_t tsize,
+ const size_t dsize)
+{
+ v_t *x = (v_t *)xc[TARGET_X];
+ const v_t *d = (v_t *)dc;
+ const v_t * const dend = d + (dsize / sizeof (v_t));
+ const v_t * const xend = x + (tsize / sizeof (v_t));
+
+ SYN_R_DEFINE();
+
+ MUL2_SETUP();
+
+ for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE) {
+ LOAD(d, SYN_R_D);
+ R_D_SYNDROME(SYN_R_D, SYN_R_X, x);
+ }
+ for (; x < xend; x += SYN_STRIDE) {
+ R_SYNDROME(SYN_R_X, x);
+ }
+}
+
+
+/*
+ * Reconstruct single data column using R parity
+ *
+ * @syn_method raidz_add_abd()
+ * @rec_method raidz_mul_abd_cb()
+ *
+ * @rm RAIDZ map
+ * @tgtidx array of missing data indexes
+ */
+static raidz_inline int
+raidz_reconstruct_r_impl(raidz_map_t *rm, const int *tgtidx)
+{
+ size_t c;
+ size_t dsize;
+ abd_t *dabd;
+ const size_t firstdc = raidz_parity(rm);
+ const size_t ncols = raidz_ncols(rm);
+ const size_t x = tgtidx[TARGET_X];
+ const size_t xsize = rm->rm_col[x].rc_size;
+ abd_t *xabd = rm->rm_col[x].rc_abd;
+ abd_t *tabds[] = { xabd };
+
+ unsigned coeff[MUL_CNT];
+ raidz_rec_r_coeff(rm, tgtidx, coeff);
+
+ raidz_math_begin();
+
+ /* Start with first data column if present */
+ if (firstdc != x) {
+ raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize);
+ } else {
+ raidz_zero(xabd, xsize);
+ }
+
+
+ /* generate q_syndrome */
+ for (c = firstdc+1; c < ncols; c++) {
+ if (c == x) {
+ dabd = NULL;
+ dsize = 0;
+ } else {
+ dabd = rm->rm_col[c].rc_abd;
+ dsize = rm->rm_col[c].rc_size;
+ }
+
+ abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 1,
+ raidz_syn_r_abd);
+ }
+
+ /* add R to the syndrome */
+ raidz_add(xabd, rm->rm_col[CODE_R].rc_abd, xsize);
+
+ /* transform the syndrome */
+ abd_iterate_func(xabd, 0, xsize, raidz_mul_abd_cb, (void *)coeff);
+
+ raidz_math_end();
+
+ return (1 << CODE_R);
+}
+
+
+/*
+ * Generate P and Q syndromes
+ *
+ * @xc array of pointers to syndrome columns
+ * @dc data column (NULL if missing)
+ * @tsize size of syndrome columns
+ * @dsize size of data column (0 if missing)
+ */
+static void
+raidz_syn_pq_abd(void **tc, const void *dc, const size_t tsize,
+ const size_t dsize)
+{
+ v_t *x = (v_t *)tc[TARGET_X];
+ v_t *y = (v_t *)tc[TARGET_Y];
+ const v_t *d = (v_t *)dc;
+ const v_t * const dend = d + (dsize / sizeof (v_t));
+ const v_t * const yend = y + (tsize / sizeof (v_t));
+
+ SYN_PQ_DEFINE();
+
+ MUL2_SETUP();
+
+ for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE) {
+ LOAD(d, SYN_PQ_D);
+ P_D_SYNDROME(SYN_PQ_D, SYN_PQ_X, x);
+ Q_D_SYNDROME(SYN_PQ_D, SYN_PQ_X, y);
+ }
+ for (; y < yend; y += SYN_STRIDE) {
+ Q_SYNDROME(SYN_PQ_X, y);
+ }
+}
+
+/*
+ * Reconstruct data using PQ parity and PQ syndromes
+ *
+ * @tc syndrome/result columns
+ * @tsize size of syndrome/result columns
+ * @c parity columns
+ * @mul array of multiplication constants
+ */
+static void
+raidz_rec_pq_abd(void **tc, const size_t tsize, void **c,
+ const unsigned *mul)
+{
+ v_t *x = (v_t *)tc[TARGET_X];
+ v_t *y = (v_t *)tc[TARGET_Y];
+ const v_t * const xend = x + (tsize / sizeof (v_t));
+ const v_t *p = (v_t *)c[CODE_P];
+ const v_t *q = (v_t *)c[CODE_Q];
+
+ REC_PQ_DEFINE();
+
+ for (; x < xend; x += REC_PQ_STRIDE, y += REC_PQ_STRIDE,
+ p += REC_PQ_STRIDE, q += REC_PQ_STRIDE) {
+ LOAD(x, REC_PQ_X);
+ LOAD(y, REC_PQ_Y);
+
+ XOR_ACC(p, REC_PQ_X);
+ XOR_ACC(q, REC_PQ_Y);
+
+ /* Save Pxy */
+ COPY(REC_PQ_X, REC_PQ_T);
+
+ /* Calc X */
+ MUL(mul[MUL_PQ_X], REC_PQ_X);
+ MUL(mul[MUL_PQ_Y], REC_PQ_Y);
+ XOR(REC_PQ_Y, REC_PQ_X);
+ STORE(x, REC_PQ_X);
+
+ /* Calc Y */
+ XOR(REC_PQ_T, REC_PQ_X);
+ STORE(y, REC_PQ_X);
+ }
+}
+
+
+/*
+ * Reconstruct two data columns using PQ parity
+ *
+ * @syn_method raidz_syn_pq_abd()
+ * @rec_method raidz_rec_pq_abd()
+ *
+ * @rm RAIDZ map
+ * @tgtidx array of missing data indexes
+ */
+static raidz_inline int
+raidz_reconstruct_pq_impl(raidz_map_t *rm, const int *tgtidx)
+{
+ size_t c;
+ size_t dsize;
+ abd_t *dabd;
+ const size_t firstdc = raidz_parity(rm);
+ const size_t ncols = raidz_ncols(rm);
+ const size_t x = tgtidx[TARGET_X];
+ const size_t y = tgtidx[TARGET_Y];
+ const size_t xsize = rm->rm_col[x].rc_size;
+ const size_t ysize = rm->rm_col[y].rc_size;
+ abd_t *xabd = rm->rm_col[x].rc_abd;
+ abd_t *yabd = rm->rm_col[y].rc_abd;
+ abd_t *tabds[2] = { xabd, yabd };
+ abd_t *cabds[] = {
+ rm->rm_col[CODE_P].rc_abd,
+ rm->rm_col[CODE_Q].rc_abd
+ };
+
+ unsigned coeff[MUL_CNT];
+ raidz_rec_pq_coeff(rm, tgtidx, coeff);
+
+ /*
+ * Check if some of targets is shorter then others
+ * In this case, shorter target needs to be replaced with
+ * new buffer so that syndrome can be calculated.
+ */
+ if (ysize < xsize) {
+ yabd = abd_alloc(xsize, B_FALSE);
+ tabds[1] = yabd;
+ }
+
+ raidz_math_begin();
+
+ /* Start with first data column if present */
+ if (firstdc != x) {
+ raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize);
+ raidz_copy(yabd, rm->rm_col[firstdc].rc_abd, xsize);
+ } else {
+ raidz_zero(xabd, xsize);
+ raidz_zero(yabd, xsize);
+ }
+
+ /* generate q_syndrome */
+ for (c = firstdc+1; c < ncols; c++) {
+ if (c == x || c == y) {
+ dabd = NULL;
+ dsize = 0;
+ } else {
+ dabd = rm->rm_col[c].rc_abd;
+ dsize = rm->rm_col[c].rc_size;
+ }
+
+ abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 2,
+ raidz_syn_pq_abd);
+ }
+
+ abd_raidz_rec_iterate(cabds, tabds, xsize, 2, raidz_rec_pq_abd, coeff);
+
+ /* Copy shorter targets back to the original abd buffer */
+ if (ysize < xsize)
+ raidz_copy(rm->rm_col[y].rc_abd, yabd, ysize);
+
+ raidz_math_end();
+
+ if (ysize < xsize)
+ abd_free(yabd);
+
+ return ((1 << CODE_P) | (1 << CODE_Q));
+}
+
+
+/*
+ * Generate P and R syndromes
+ *
+ * @xc array of pointers to syndrome columns
+ * @dc data column (NULL if missing)
+ * @tsize size of syndrome columns
+ * @dsize size of data column (0 if missing)
+ */
+static void
+raidz_syn_pr_abd(void **c, const void *dc, const size_t tsize,
+ const size_t dsize)
+{
+ v_t *x = (v_t *)c[TARGET_X];
+ v_t *y = (v_t *)c[TARGET_Y];
+ const v_t *d = (v_t *)dc;
+ const v_t * const dend = d + (dsize / sizeof (v_t));
+ const v_t * const yend = y + (tsize / sizeof (v_t));
+
+ SYN_PR_DEFINE();
+
+ MUL2_SETUP();
+
+ for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE) {
+ LOAD(d, SYN_PR_D);
+ P_D_SYNDROME(SYN_PR_D, SYN_PR_X, x);
+ R_D_SYNDROME(SYN_PR_D, SYN_PR_X, y);
+ }
+ for (; y < yend; y += SYN_STRIDE) {
+ R_SYNDROME(SYN_PR_X, y);
+ }
+}
+
+/*
+ * Reconstruct data using PR parity and PR syndromes
+ *
+ * @tc syndrome/result columns
+ * @tsize size of syndrome/result columns
+ * @c parity columns
+ * @mul array of multiplication constants
+ */
+static void
+raidz_rec_pr_abd(void **t, const size_t tsize, void **c,
+ const unsigned *mul)
+{
+ v_t *x = (v_t *)t[TARGET_X];
+ v_t *y = (v_t *)t[TARGET_Y];
+ const v_t * const xend = x + (tsize / sizeof (v_t));
+ const v_t *p = (v_t *)c[CODE_P];
+ const v_t *q = (v_t *)c[CODE_Q];
+
+ REC_PR_DEFINE();
+
+ for (; x < xend; x += REC_PR_STRIDE, y += REC_PR_STRIDE,
+ p += REC_PR_STRIDE, q += REC_PR_STRIDE) {
+ LOAD(x, REC_PR_X);
+ LOAD(y, REC_PR_Y);
+ XOR_ACC(p, REC_PR_X);
+ XOR_ACC(q, REC_PR_Y);
+
+ /* Save Pxy */
+ COPY(REC_PR_X, REC_PR_T);
+
+ /* Calc X */
+ MUL(mul[MUL_PR_X], REC_PR_X);
+ MUL(mul[MUL_PR_Y], REC_PR_Y);
+ XOR(REC_PR_Y, REC_PR_X);
+ STORE(x, REC_PR_X);
+
+ /* Calc Y */
+ XOR(REC_PR_T, REC_PR_X);
+ STORE(y, REC_PR_X);
+ }
+}
+
+
+/*
+ * Reconstruct two data columns using PR parity
+ *
+ * @syn_method raidz_syn_pr_abd()
+ * @rec_method raidz_rec_pr_abd()
+ *
+ * @rm RAIDZ map
+ * @tgtidx array of missing data indexes
+ */
+static raidz_inline int
+raidz_reconstruct_pr_impl(raidz_map_t *rm, const int *tgtidx)
+{
+ size_t c;
+ size_t dsize;
+ abd_t *dabd;
+ const size_t firstdc = raidz_parity(rm);
+ const size_t ncols = raidz_ncols(rm);
+ const size_t x = tgtidx[0];
+ const size_t y = tgtidx[1];
+ const size_t xsize = rm->rm_col[x].rc_size;
+ const size_t ysize = rm->rm_col[y].rc_size;
+ abd_t *xabd = rm->rm_col[x].rc_abd;
+ abd_t *yabd = rm->rm_col[y].rc_abd;
+ abd_t *tabds[2] = { xabd, yabd };
+ abd_t *cabds[] = {
+ rm->rm_col[CODE_P].rc_abd,
+ rm->rm_col[CODE_R].rc_abd
+ };
+ unsigned coeff[MUL_CNT];
+ raidz_rec_pr_coeff(rm, tgtidx, coeff);
+
+ /*
+ * Check if some of targets are shorter then others.
+ * They need to be replaced with a new buffer so that syndrome can
+ * be calculated on full length.
+ */
+ if (ysize < xsize) {
+ yabd = abd_alloc(xsize, B_FALSE);
+ tabds[1] = yabd;
+ }
+
+ raidz_math_begin();
+
+ /* Start with first data column if present */
+ if (firstdc != x) {
+ raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize);
+ raidz_copy(yabd, rm->rm_col[firstdc].rc_abd, xsize);
+ } else {
+ raidz_zero(xabd, xsize);
+ raidz_zero(yabd, xsize);
+ }
+
+ /* generate q_syndrome */
+ for (c = firstdc+1; c < ncols; c++) {
+ if (c == x || c == y) {
+ dabd = NULL;
+ dsize = 0;
+ } else {
+ dabd = rm->rm_col[c].rc_abd;
+ dsize = rm->rm_col[c].rc_size;
+ }
+
+ abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 2,
+ raidz_syn_pr_abd);
+ }
+
+ abd_raidz_rec_iterate(cabds, tabds, xsize, 2, raidz_rec_pr_abd, coeff);
+
+ /*
+ * Copy shorter targets back to the original abd buffer
+ */
+ if (ysize < xsize)
+ raidz_copy(rm->rm_col[y].rc_abd, yabd, ysize);
+
+ raidz_math_end();
+
+ if (ysize < xsize)
+ abd_free(yabd);
+
+ return ((1 << CODE_P) | (1 << CODE_Q));
+}
+
+
+/*
+ * Generate Q and R syndromes
+ *
+ * @xc array of pointers to syndrome columns
+ * @dc data column (NULL if missing)
+ * @tsize size of syndrome columns
+ * @dsize size of data column (0 if missing)
+ */
+static void
+raidz_syn_qr_abd(void **c, const void *dc, const size_t tsize,
+ const size_t dsize)
+{
+ v_t *x = (v_t *)c[TARGET_X];
+ v_t *y = (v_t *)c[TARGET_Y];
+ const v_t * const xend = x + (tsize / sizeof (v_t));
+ const v_t *d = (v_t *)dc;
+ const v_t * const dend = d + (dsize / sizeof (v_t));
+
+ SYN_QR_DEFINE();
+
+ MUL2_SETUP();
+
+ for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE) {
+ LOAD(d, SYN_PQ_D);
+ Q_D_SYNDROME(SYN_QR_D, SYN_QR_X, x);
+ R_D_SYNDROME(SYN_QR_D, SYN_QR_X, y);
+ }
+ for (; x < xend; x += SYN_STRIDE, y += SYN_STRIDE) {
+ Q_SYNDROME(SYN_QR_X, x);
+ R_SYNDROME(SYN_QR_X, y);
+ }
+}
+
+
+/*
+ * Reconstruct data using QR parity and QR syndromes
+ *
+ * @tc syndrome/result columns
+ * @tsize size of syndrome/result columns
+ * @c parity columns
+ * @mul array of multiplication constants
+ */
+static void
+raidz_rec_qr_abd(void **t, const size_t tsize, void **c,
+ const unsigned *mul)
+{
+ v_t *x = (v_t *)t[TARGET_X];
+ v_t *y = (v_t *)t[TARGET_Y];
+ const v_t * const xend = x + (tsize / sizeof (v_t));
+ const v_t *p = (v_t *)c[CODE_P];
+ const v_t *q = (v_t *)c[CODE_Q];
+
+ REC_QR_DEFINE();
+
+ for (; x < xend; x += REC_QR_STRIDE, y += REC_QR_STRIDE,
+ p += REC_QR_STRIDE, q += REC_QR_STRIDE) {
+ LOAD(x, REC_QR_X);
+ LOAD(y, REC_QR_Y);
+
+ XOR_ACC(p, REC_QR_X);
+ XOR_ACC(q, REC_QR_Y);
+
+ /* Save Pxy */
+ COPY(REC_QR_X, REC_QR_T);
+
+ /* Calc X */
+ MUL(mul[MUL_QR_XQ], REC_QR_X); /* X = Q * xqm */
+ XOR(REC_QR_Y, REC_QR_X); /* X = R ^ X */
+ MUL(mul[MUL_QR_X], REC_QR_X); /* X = X * xm */
+ STORE(x, REC_QR_X);
+
+ /* Calc Y */
+ MUL(mul[MUL_QR_YQ], REC_QR_T); /* X = Q * xqm */
+ XOR(REC_QR_Y, REC_QR_T); /* X = R ^ X */
+ MUL(mul[MUL_QR_Y], REC_QR_T); /* X = X * xm */
+ STORE(y, REC_QR_T);
+ }
+}
+
+
+/*
+ * Reconstruct two data columns using QR parity
+ *
+ * @syn_method raidz_syn_qr_abd()
+ * @rec_method raidz_rec_qr_abd()
+ *
+ * @rm RAIDZ map
+ * @tgtidx array of missing data indexes
+ */
+static raidz_inline int
+raidz_reconstruct_qr_impl(raidz_map_t *rm, const int *tgtidx)
+{
+ size_t c;
+ size_t dsize;
+ abd_t *dabd;
+ const size_t firstdc = raidz_parity(rm);
+ const size_t ncols = raidz_ncols(rm);
+ const size_t x = tgtidx[TARGET_X];
+ const size_t y = tgtidx[TARGET_Y];
+ const size_t xsize = rm->rm_col[x].rc_size;
+ const size_t ysize = rm->rm_col[y].rc_size;
+ abd_t *xabd = rm->rm_col[x].rc_abd;
+ abd_t *yabd = rm->rm_col[y].rc_abd;
+ abd_t *tabds[2] = { xabd, yabd };
+ abd_t *cabds[] = {
+ rm->rm_col[CODE_Q].rc_abd,
+ rm->rm_col[CODE_R].rc_abd
+ };
+ unsigned coeff[MUL_CNT];
+ raidz_rec_qr_coeff(rm, tgtidx, coeff);
+
+ /*
+ * Check if some of targets is shorter then others
+ * In this case, shorter target needs to be replaced with
+ * new buffer so that syndrome can be calculated.
+ */
+ if (ysize < xsize) {
+ yabd = abd_alloc(xsize, B_FALSE);
+ tabds[1] = yabd;
+ }
+
+ raidz_math_begin();
+
+ /* Start with first data column if present */
+ if (firstdc != x) {
+ raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize);
+ raidz_copy(yabd, rm->rm_col[firstdc].rc_abd, xsize);
+ } else {
+ raidz_zero(xabd, xsize);
+ raidz_zero(yabd, xsize);
+ }
+
+ /* generate q_syndrome */
+ for (c = firstdc+1; c < ncols; c++) {
+ if (c == x || c == y) {
+ dabd = NULL;
+ dsize = 0;
+ } else {
+ dabd = rm->rm_col[c].rc_abd;
+ dsize = rm->rm_col[c].rc_size;
+ }
+
+ abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 2,
+ raidz_syn_qr_abd);
+ }
+
+ abd_raidz_rec_iterate(cabds, tabds, xsize, 2, raidz_rec_qr_abd, coeff);
+
+ /*
+ * Copy shorter targets back to the original abd buffer
+ */
+ if (ysize < xsize)
+ raidz_copy(rm->rm_col[y].rc_abd, yabd, ysize);
+
+ raidz_math_end();
+
+ if (ysize < xsize)
+ abd_free(yabd);
+
+
+ return ((1 << CODE_Q) | (1 << CODE_R));
+}
+
+
+/*
+ * Generate P, Q, and R syndromes
+ *
+ * @xc array of pointers to syndrome columns
+ * @dc data column (NULL if missing)
+ * @tsize size of syndrome columns
+ * @dsize size of data column (0 if missing)
+ */
+static void
+raidz_syn_pqr_abd(void **c, const void *dc, const size_t tsize,
+ const size_t dsize)
+{
+ v_t *x = (v_t *)c[TARGET_X];
+ v_t *y = (v_t *)c[TARGET_Y];
+ v_t *z = (v_t *)c[TARGET_Z];
+ const v_t * const yend = y + (tsize / sizeof (v_t));
+ const v_t *d = (v_t *)dc;
+ const v_t * const dend = d + (dsize / sizeof (v_t));
+
+ SYN_PQR_DEFINE();
+
+ MUL2_SETUP();
+
+ for (; d < dend; d += SYN_STRIDE, x += SYN_STRIDE, y += SYN_STRIDE,
+ z += SYN_STRIDE) {
+ LOAD(d, SYN_PQR_D);
+ P_D_SYNDROME(SYN_PQR_D, SYN_PQR_X, x)
+ Q_D_SYNDROME(SYN_PQR_D, SYN_PQR_X, y);
+ R_D_SYNDROME(SYN_PQR_D, SYN_PQR_X, z);
+ }
+ for (; y < yend; y += SYN_STRIDE, z += SYN_STRIDE) {
+ Q_SYNDROME(SYN_PQR_X, y);
+ R_SYNDROME(SYN_PQR_X, z);
+ }
+}
+
+
+/*
+ * Reconstruct data using PRQ parity and PQR syndromes
+ *
+ * @tc syndrome/result columns
+ * @tsize size of syndrome/result columns
+ * @c parity columns
+ * @mul array of multiplication constants
+ */
+static void
+raidz_rec_pqr_abd(void **t, const size_t tsize, void **c,
+ const unsigned * const mul)
+{
+ v_t *x = (v_t *)t[TARGET_X];
+ v_t *y = (v_t *)t[TARGET_Y];
+ v_t *z = (v_t *)t[TARGET_Z];
+ const v_t * const xend = x + (tsize / sizeof (v_t));
+ const v_t *p = (v_t *)c[CODE_P];
+ const v_t *q = (v_t *)c[CODE_Q];
+ const v_t *r = (v_t *)c[CODE_R];
+
+ REC_PQR_DEFINE();
+
+ for (; x < xend; x += REC_PQR_STRIDE, y += REC_PQR_STRIDE,
+ z += REC_PQR_STRIDE, p += REC_PQR_STRIDE, q += REC_PQR_STRIDE,
+ r += REC_PQR_STRIDE) {
+ LOAD(x, REC_PQR_X);
+ LOAD(y, REC_PQR_Y);
+ LOAD(z, REC_PQR_Z);
+
+ XOR_ACC(p, REC_PQR_X);
+ XOR_ACC(q, REC_PQR_Y);
+ XOR_ACC(r, REC_PQR_Z);
+
+ /* Save Pxyz and Qxyz */
+ COPY(REC_PQR_X, REC_PQR_XS);
+ COPY(REC_PQR_Y, REC_PQR_YS);
+
+ /* Calc X */
+ MUL(mul[MUL_PQR_XP], REC_PQR_X); /* Xp = Pxyz * xp */
+ MUL(mul[MUL_PQR_XQ], REC_PQR_Y); /* Xq = Qxyz * xq */
+ XOR(REC_PQR_Y, REC_PQR_X);
+ MUL(mul[MUL_PQR_XR], REC_PQR_Z); /* Xr = Rxyz * xr */
+ XOR(REC_PQR_Z, REC_PQR_X); /* X = Xp + Xq + Xr */
+ STORE(x, REC_PQR_X);
+
+ /* Calc Y */
+ XOR(REC_PQR_X, REC_PQR_XS); /* Pyz = Pxyz + X */
+ MUL(mul[MUL_PQR_YU], REC_PQR_X); /* Xq = X * upd_q */
+ XOR(REC_PQR_X, REC_PQR_YS); /* Qyz = Qxyz + Xq */
+ COPY(REC_PQR_XS, REC_PQR_X); /* restore Pyz */
+ MUL(mul[MUL_PQR_YP], REC_PQR_X); /* Yp = Pyz * yp */
+ MUL(mul[MUL_PQR_YQ], REC_PQR_YS); /* Yq = Qyz * yq */
+ XOR(REC_PQR_X, REC_PQR_YS); /* Y = Yp + Yq */
+ STORE(y, REC_PQR_YS);
+
+ /* Calc Z */
+ XOR(REC_PQR_XS, REC_PQR_YS); /* Z = Pz = Pyz + Y */
+ STORE(z, REC_PQR_YS);
+ }
+}
+
+
+/*
+ * Reconstruct three data columns using PQR parity
+ *
+ * @syn_method raidz_syn_pqr_abd()
+ * @rec_method raidz_rec_pqr_abd()
+ *
+ * @rm RAIDZ map
+ * @tgtidx array of missing data indexes
+ */
+static raidz_inline int
+raidz_reconstruct_pqr_impl(raidz_map_t *rm, const int *tgtidx)
+{
+ size_t c;
+ size_t dsize;
+ abd_t *dabd;
+ const size_t firstdc = raidz_parity(rm);
+ const size_t ncols = raidz_ncols(rm);
+ const size_t x = tgtidx[TARGET_X];
+ const size_t y = tgtidx[TARGET_Y];
+ const size_t z = tgtidx[TARGET_Z];
+ const size_t xsize = rm->rm_col[x].rc_size;
+ const size_t ysize = rm->rm_col[y].rc_size;
+ const size_t zsize = rm->rm_col[z].rc_size;
+ abd_t *xabd = rm->rm_col[x].rc_abd;
+ abd_t *yabd = rm->rm_col[y].rc_abd;
+ abd_t *zabd = rm->rm_col[z].rc_abd;
+ abd_t *tabds[] = { xabd, yabd, zabd };
+ abd_t *cabds[] = {
+ rm->rm_col[CODE_P].rc_abd,
+ rm->rm_col[CODE_Q].rc_abd,
+ rm->rm_col[CODE_R].rc_abd
+ };
+ unsigned coeff[MUL_CNT];
+ raidz_rec_pqr_coeff(rm, tgtidx, coeff);
+
+ /*
+ * Check if some of targets is shorter then others
+ * In this case, shorter target needs to be replaced with
+ * new buffer so that syndrome can be calculated.
+ */
+ if (ysize < xsize) {
+ yabd = abd_alloc(xsize, B_FALSE);
+ tabds[1] = yabd;
+ }
+ if (zsize < xsize) {
+ zabd = abd_alloc(xsize, B_FALSE);
+ tabds[2] = zabd;
+ }
+
+ raidz_math_begin();
+
+ /* Start with first data column if present */
+ if (firstdc != x) {
+ raidz_copy(xabd, rm->rm_col[firstdc].rc_abd, xsize);
+ raidz_copy(yabd, rm->rm_col[firstdc].rc_abd, xsize);
+ raidz_copy(zabd, rm->rm_col[firstdc].rc_abd, xsize);
+ } else {
+ raidz_zero(xabd, xsize);
+ raidz_zero(yabd, xsize);
+ raidz_zero(zabd, xsize);
+ }
+
+ /* generate q_syndrome */
+ for (c = firstdc+1; c < ncols; c++) {
+ if (c == x || c == y || c == z) {
+ dabd = NULL;
+ dsize = 0;
+ } else {
+ dabd = rm->rm_col[c].rc_abd;
+ dsize = rm->rm_col[c].rc_size;
+ }
+
+ abd_raidz_gen_iterate(tabds, dabd, xsize, dsize, 3,
+ raidz_syn_pqr_abd);
+ }
+
+ abd_raidz_rec_iterate(cabds, tabds, xsize, 3, raidz_rec_pqr_abd, coeff);
+
+ /*
+ * Copy shorter targets back to the original abd buffer
+ */
+ if (ysize < xsize)
+ raidz_copy(rm->rm_col[y].rc_abd, yabd, ysize);
+ if (zsize < xsize)
+ raidz_copy(rm->rm_col[z].rc_abd, zabd, zsize);
+
+ raidz_math_end();
+
+ if (ysize < xsize)
+ abd_free(yabd);
+ if (zsize < xsize)
+ abd_free(zabd);
+
+ return ((1 << CODE_P) | (1 << CODE_Q) | (1 << CODE_R));
+}
+
+#endif /* _VDEV_RAIDZ_MATH_IMPL_H */
diff --git a/zfs/module/zfs/vdev_raidz_math_scalar.c b/zfs/module/zfs/vdev_raidz_math_scalar.c
new file mode 100644
index 000000000000..a693bff63ffb
--- /dev/null
+++ b/zfs/module/zfs/vdev_raidz_math_scalar.c
@@ -0,0 +1,336 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
+ */
+
+#include <sys/vdev_raidz_impl.h>
+
+/*
+ * Provide native CPU scalar routines.
+ * Support 32bit and 64bit CPUs.
+ */
+#if ((~(0x0ULL)) >> 24) == 0xffULL
+#define ELEM_SIZE 4
+typedef uint32_t iv_t;
+#elif ((~(0x0ULL)) >> 56) == 0xffULL
+#define ELEM_SIZE 8
+typedef uint64_t iv_t;
+#endif
+
+/*
+ * Vector type used in scalar implementation
+ *
+ * The union is expected to be of native CPU register size. Since addition
+ * uses XOR operation, it can be performed an all byte elements at once.
+ * Multiplication requires per byte access.
+ */
+typedef union {
+ iv_t e;
+ uint8_t b[ELEM_SIZE];
+} v_t;
+
+/*
+ * Precomputed lookup tables for multiplication by a constant
+ *
+ * Reconstruction path requires multiplication by a constant factors. Instead of
+ * performing two step lookup (log & exp tables), a direct lookup can be used
+ * instead. Multiplication of element 'a' by a constant 'c' is obtained as:
+ *
+ * r = vdev_raidz_mul_lt[c_log][a];
+ *
+ * where c_log = vdev_raidz_log2[c]. Log of coefficient factors is used because
+ * they are faster to obtain while solving the syndrome equations.
+ *
+ * PERFORMANCE NOTE:
+ * Even though the complete lookup table uses 64kiB, only relatively small
+ * portion of it is used at the same time. Following shows number of accessed
+ * bytes for different cases:
+ * - 1 failed disk: 256B (1 mul. coefficient)
+ * - 2 failed disks: 512B (2 mul. coefficients)
+ * - 3 failed disks: 1536B (6 mul. coefficients)
+ *
+ * Size of actually accessed lookup table regions is only larger for
+ * reconstruction of 3 failed disks, when compared to traditional log/exp
+ * method. But since the result is obtained in one lookup step performance is
+ * doubled.
+ */
+static uint8_t vdev_raidz_mul_lt[256][256] __attribute__((aligned(256)));
+
+static void
+raidz_init_scalar(void)
+{
+ int c, i;
+ for (c = 0; c < 256; c++)
+ for (i = 0; i < 256; i++)
+ vdev_raidz_mul_lt[c][i] = gf_mul(c, i);
+
+}
+
+#define PREFETCHNTA(ptr, offset) {}
+#define PREFETCH(ptr, offset) {}
+
+#define XOR_ACC(src, acc) acc.e ^= ((v_t *)src)[0].e
+#define XOR(src, acc) acc.e ^= src.e
+#define ZERO(acc) acc.e = 0
+#define COPY(src, dst) dst = src
+#define LOAD(src, val) val = ((v_t *)src)[0]
+#define STORE(dst, val) ((v_t *)dst)[0] = val
+
+/*
+ * Constants used for optimized multiplication by 2.
+ */
+static const struct {
+ iv_t mod;
+ iv_t mask;
+ iv_t msb;
+} scalar_mul2_consts = {
+#if ELEM_SIZE == 8
+ .mod = 0x1d1d1d1d1d1d1d1dULL,
+ .mask = 0xfefefefefefefefeULL,
+ .msb = 0x8080808080808080ULL,
+#else
+ .mod = 0x1d1d1d1dULL,
+ .mask = 0xfefefefeULL,
+ .msb = 0x80808080ULL,
+#endif
+};
+
+#define MUL2_SETUP() {}
+
+#define MUL2(a) \
+{ \
+ iv_t _mask; \
+ \
+ _mask = (a).e & scalar_mul2_consts.msb; \
+ _mask = (_mask << 1) - (_mask >> 7); \
+ (a).e = ((a).e << 1) & scalar_mul2_consts.mask; \
+ (a).e = (a).e ^ (_mask & scalar_mul2_consts.mod); \
+}
+
+#define MUL4(a) \
+{ \
+ MUL2(a); \
+ MUL2(a); \
+}
+
+#define MUL(c, a) \
+{ \
+ const uint8_t *mul_lt = vdev_raidz_mul_lt[c]; \
+ switch (ELEM_SIZE) { \
+ case 8: \
+ a.b[7] = mul_lt[a.b[7]]; \
+ a.b[6] = mul_lt[a.b[6]]; \
+ a.b[5] = mul_lt[a.b[5]]; \
+ a.b[4] = mul_lt[a.b[4]]; \
+ case 4: \
+ a.b[3] = mul_lt[a.b[3]]; \
+ a.b[2] = mul_lt[a.b[2]]; \
+ a.b[1] = mul_lt[a.b[1]]; \
+ a.b[0] = mul_lt[a.b[0]]; \
+ break; \
+ } \
+}
+
+#define raidz_math_begin() {}
+#define raidz_math_end() {}
+
+#define SYN_STRIDE 1
+
+#define ZERO_DEFINE() v_t d0
+#define ZERO_STRIDE 1
+#define ZERO_D d0
+
+#define COPY_DEFINE() v_t d0
+#define COPY_STRIDE 1
+#define COPY_D d0
+
+#define ADD_DEFINE() v_t d0
+#define ADD_STRIDE 1
+#define ADD_D d0
+
+#define MUL_DEFINE() v_t d0
+#define MUL_STRIDE 1
+#define MUL_D d0
+
+#define GEN_P_STRIDE 1
+#define GEN_P_DEFINE() v_t p0
+#define GEN_P_P p0
+
+#define GEN_PQ_STRIDE 1
+#define GEN_PQ_DEFINE() v_t d0, c0
+#define GEN_PQ_D d0
+#define GEN_PQ_C c0
+
+#define GEN_PQR_STRIDE 1
+#define GEN_PQR_DEFINE() v_t d0, c0
+#define GEN_PQR_D d0
+#define GEN_PQR_C c0
+
+#define SYN_Q_DEFINE() v_t d0, x0
+#define SYN_Q_D d0
+#define SYN_Q_X x0
+
+
+#define SYN_R_DEFINE() v_t d0, x0
+#define SYN_R_D d0
+#define SYN_R_X x0
+
+
+#define SYN_PQ_DEFINE() v_t d0, x0
+#define SYN_PQ_D d0
+#define SYN_PQ_X x0
+
+
+#define REC_PQ_STRIDE 1
+#define REC_PQ_DEFINE() v_t x0, y0, t0
+#define REC_PQ_X x0
+#define REC_PQ_Y y0
+#define REC_PQ_T t0
+
+
+#define SYN_PR_DEFINE() v_t d0, x0
+#define SYN_PR_D d0
+#define SYN_PR_X x0
+
+#define REC_PR_STRIDE 1
+#define REC_PR_DEFINE() v_t x0, y0, t0
+#define REC_PR_X x0
+#define REC_PR_Y y0
+#define REC_PR_T t0
+
+
+#define SYN_QR_DEFINE() v_t d0, x0
+#define SYN_QR_D d0
+#define SYN_QR_X x0
+
+
+#define REC_QR_STRIDE 1
+#define REC_QR_DEFINE() v_t x0, y0, t0
+#define REC_QR_X x0
+#define REC_QR_Y y0
+#define REC_QR_T t0
+
+
+#define SYN_PQR_DEFINE() v_t d0, x0
+#define SYN_PQR_D d0
+#define SYN_PQR_X x0
+
+#define REC_PQR_STRIDE 1
+#define REC_PQR_DEFINE() v_t x0, y0, z0, xs0, ys0
+#define REC_PQR_X x0
+#define REC_PQR_Y y0
+#define REC_PQR_Z z0
+#define REC_PQR_XS xs0
+#define REC_PQR_YS ys0
+
+#include "vdev_raidz_math_impl.h"
+
+DEFINE_GEN_METHODS(scalar);
+DEFINE_REC_METHODS(scalar);
+
+boolean_t
+raidz_will_scalar_work(void)
+{
+ return (B_TRUE); /* always */
+}
+
+const raidz_impl_ops_t vdev_raidz_scalar_impl = {
+ .init = raidz_init_scalar,
+ .fini = NULL,
+ .gen = RAIDZ_GEN_METHODS(scalar),
+ .rec = RAIDZ_REC_METHODS(scalar),
+ .is_supported = &raidz_will_scalar_work,
+ .name = "scalar"
+};
+
+/* Powers of 2 in the RAID-Z Galois field. */
+const uint8_t vdev_raidz_pow2[256] __attribute__((aligned(256))) = {
+ 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
+ 0x1d, 0x3a, 0x74, 0xe8, 0xcd, 0x87, 0x13, 0x26,
+ 0x4c, 0x98, 0x2d, 0x5a, 0xb4, 0x75, 0xea, 0xc9,
+ 0x8f, 0x03, 0x06, 0x0c, 0x18, 0x30, 0x60, 0xc0,
+ 0x9d, 0x27, 0x4e, 0x9c, 0x25, 0x4a, 0x94, 0x35,
+ 0x6a, 0xd4, 0xb5, 0x77, 0xee, 0xc1, 0x9f, 0x23,
+ 0x46, 0x8c, 0x05, 0x0a, 0x14, 0x28, 0x50, 0xa0,
+ 0x5d, 0xba, 0x69, 0xd2, 0xb9, 0x6f, 0xde, 0xa1,
+ 0x5f, 0xbe, 0x61, 0xc2, 0x99, 0x2f, 0x5e, 0xbc,
+ 0x65, 0xca, 0x89, 0x0f, 0x1e, 0x3c, 0x78, 0xf0,
+ 0xfd, 0xe7, 0xd3, 0xbb, 0x6b, 0xd6, 0xb1, 0x7f,
+ 0xfe, 0xe1, 0xdf, 0xa3, 0x5b, 0xb6, 0x71, 0xe2,
+ 0xd9, 0xaf, 0x43, 0x86, 0x11, 0x22, 0x44, 0x88,
+ 0x0d, 0x1a, 0x34, 0x68, 0xd0, 0xbd, 0x67, 0xce,
+ 0x81, 0x1f, 0x3e, 0x7c, 0xf8, 0xed, 0xc7, 0x93,
+ 0x3b, 0x76, 0xec, 0xc5, 0x97, 0x33, 0x66, 0xcc,
+ 0x85, 0x17, 0x2e, 0x5c, 0xb8, 0x6d, 0xda, 0xa9,
+ 0x4f, 0x9e, 0x21, 0x42, 0x84, 0x15, 0x2a, 0x54,
+ 0xa8, 0x4d, 0x9a, 0x29, 0x52, 0xa4, 0x55, 0xaa,
+ 0x49, 0x92, 0x39, 0x72, 0xe4, 0xd5, 0xb7, 0x73,
+ 0xe6, 0xd1, 0xbf, 0x63, 0xc6, 0x91, 0x3f, 0x7e,
+ 0xfc, 0xe5, 0xd7, 0xb3, 0x7b, 0xf6, 0xf1, 0xff,
+ 0xe3, 0xdb, 0xab, 0x4b, 0x96, 0x31, 0x62, 0xc4,
+ 0x95, 0x37, 0x6e, 0xdc, 0xa5, 0x57, 0xae, 0x41,
+ 0x82, 0x19, 0x32, 0x64, 0xc8, 0x8d, 0x07, 0x0e,
+ 0x1c, 0x38, 0x70, 0xe0, 0xdd, 0xa7, 0x53, 0xa6,
+ 0x51, 0xa2, 0x59, 0xb2, 0x79, 0xf2, 0xf9, 0xef,
+ 0xc3, 0x9b, 0x2b, 0x56, 0xac, 0x45, 0x8a, 0x09,
+ 0x12, 0x24, 0x48, 0x90, 0x3d, 0x7a, 0xf4, 0xf5,
+ 0xf7, 0xf3, 0xfb, 0xeb, 0xcb, 0x8b, 0x0b, 0x16,
+ 0x2c, 0x58, 0xb0, 0x7d, 0xfa, 0xe9, 0xcf, 0x83,
+ 0x1b, 0x36, 0x6c, 0xd8, 0xad, 0x47, 0x8e, 0x01
+};
+
+/* Logs of 2 in the RAID-Z Galois field. */
+const uint8_t vdev_raidz_log2[256] __attribute__((aligned(256))) = {
+ 0x00, 0x00, 0x01, 0x19, 0x02, 0x32, 0x1a, 0xc6,
+ 0x03, 0xdf, 0x33, 0xee, 0x1b, 0x68, 0xc7, 0x4b,
+ 0x04, 0x64, 0xe0, 0x0e, 0x34, 0x8d, 0xef, 0x81,
+ 0x1c, 0xc1, 0x69, 0xf8, 0xc8, 0x08, 0x4c, 0x71,
+ 0x05, 0x8a, 0x65, 0x2f, 0xe1, 0x24, 0x0f, 0x21,
+ 0x35, 0x93, 0x8e, 0xda, 0xf0, 0x12, 0x82, 0x45,
+ 0x1d, 0xb5, 0xc2, 0x7d, 0x6a, 0x27, 0xf9, 0xb9,
+ 0xc9, 0x9a, 0x09, 0x78, 0x4d, 0xe4, 0x72, 0xa6,
+ 0x06, 0xbf, 0x8b, 0x62, 0x66, 0xdd, 0x30, 0xfd,
+ 0xe2, 0x98, 0x25, 0xb3, 0x10, 0x91, 0x22, 0x88,
+ 0x36, 0xd0, 0x94, 0xce, 0x8f, 0x96, 0xdb, 0xbd,
+ 0xf1, 0xd2, 0x13, 0x5c, 0x83, 0x38, 0x46, 0x40,
+ 0x1e, 0x42, 0xb6, 0xa3, 0xc3, 0x48, 0x7e, 0x6e,
+ 0x6b, 0x3a, 0x28, 0x54, 0xfa, 0x85, 0xba, 0x3d,
+ 0xca, 0x5e, 0x9b, 0x9f, 0x0a, 0x15, 0x79, 0x2b,
+ 0x4e, 0xd4, 0xe5, 0xac, 0x73, 0xf3, 0xa7, 0x57,
+ 0x07, 0x70, 0xc0, 0xf7, 0x8c, 0x80, 0x63, 0x0d,
+ 0x67, 0x4a, 0xde, 0xed, 0x31, 0xc5, 0xfe, 0x18,
+ 0xe3, 0xa5, 0x99, 0x77, 0x26, 0xb8, 0xb4, 0x7c,
+ 0x11, 0x44, 0x92, 0xd9, 0x23, 0x20, 0x89, 0x2e,
+ 0x37, 0x3f, 0xd1, 0x5b, 0x95, 0xbc, 0xcf, 0xcd,
+ 0x90, 0x87, 0x97, 0xb2, 0xdc, 0xfc, 0xbe, 0x61,
+ 0xf2, 0x56, 0xd3, 0xab, 0x14, 0x2a, 0x5d, 0x9e,
+ 0x84, 0x3c, 0x39, 0x53, 0x47, 0x6d, 0x41, 0xa2,
+ 0x1f, 0x2d, 0x43, 0xd8, 0xb7, 0x7b, 0xa4, 0x76,
+ 0xc4, 0x17, 0x49, 0xec, 0x7f, 0x0c, 0x6f, 0xf6,
+ 0x6c, 0xa1, 0x3b, 0x52, 0x29, 0x9d, 0x55, 0xaa,
+ 0xfb, 0x60, 0x86, 0xb1, 0xbb, 0xcc, 0x3e, 0x5a,
+ 0xcb, 0x59, 0x5f, 0xb0, 0x9c, 0xa9, 0xa0, 0x51,
+ 0x0b, 0xf5, 0x16, 0xeb, 0x7a, 0x75, 0x2c, 0xd7,
+ 0x4f, 0xae, 0xd5, 0xe9, 0xe6, 0xe7, 0xad, 0xe8,
+ 0x74, 0xd6, 0xf4, 0xea, 0xa8, 0x50, 0x58, 0xaf,
+};
diff --git a/zfs/module/zfs/vdev_raidz_math_sse2.c b/zfs/module/zfs/vdev_raidz_math_sse2.c
new file mode 100644
index 000000000000..9985da273643
--- /dev/null
+++ b/zfs/module/zfs/vdev_raidz_math_sse2.c
@@ -0,0 +1,622 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
+ */
+
+#include <sys/isa_defs.h>
+
+#if defined(__x86_64) && defined(HAVE_SSE2)
+
+#include <sys/types.h>
+#include <linux/simd_x86.h>
+
+#define __asm __asm__ __volatile__
+
+#define _REG_CNT(_0, _1, _2, _3, _4, _5, _6, _7, N, ...) N
+#define REG_CNT(r...) _REG_CNT(r, 8, 7, 6, 5, 4, 3, 2, 1)
+
+#define VR0_(REG, ...) "xmm"#REG
+#define VR1_(_1, REG, ...) "xmm"#REG
+#define VR2_(_1, _2, REG, ...) "xmm"#REG
+#define VR3_(_1, _2, _3, REG, ...) "xmm"#REG
+#define VR4_(_1, _2, _3, _4, REG, ...) "xmm"#REG
+#define VR5_(_1, _2, _3, _4, _5, REG, ...) "xmm"#REG
+#define VR6_(_1, _2, _3, _4, _5, _6, REG, ...) "xmm"#REG
+#define VR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) "xmm"#REG
+
+#define VR0(r...) VR0_(r, 1, 2, 3, 4, 5, 6)
+#define VR1(r...) VR1_(r, 1, 2, 3, 4, 5, 6)
+#define VR2(r...) VR2_(r, 1, 2, 3, 4, 5, 6)
+#define VR3(r...) VR3_(r, 1, 2, 3, 4, 5, 6)
+#define VR4(r...) VR4_(r, 1, 2, 3, 4, 5, 6)
+#define VR5(r...) VR5_(r, 1, 2, 3, 4, 5, 6)
+#define VR6(r...) VR6_(r, 1, 2, 3, 4, 5, 6)
+#define VR7(r...) VR7_(r, 1, 2, 3, 4, 5, 6)
+
+#define ELEM_SIZE 16
+
+typedef struct v {
+ uint8_t b[ELEM_SIZE] __attribute__((aligned(ELEM_SIZE)));
+} v_t;
+
+#define XOR_ACC(src, r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 4: \
+ __asm( \
+ "pxor 0x00(%[SRC]), %%" VR0(r) "\n" \
+ "pxor 0x10(%[SRC]), %%" VR1(r) "\n" \
+ "pxor 0x20(%[SRC]), %%" VR2(r) "\n" \
+ "pxor 0x30(%[SRC]), %%" VR3(r) "\n" \
+ : : [SRC] "r" (src)); \
+ break; \
+ case 2: \
+ __asm( \
+ "pxor 0x00(%[SRC]), %%" VR0(r) "\n" \
+ "pxor 0x10(%[SRC]), %%" VR1(r) "\n" \
+ : : [SRC] "r" (src)); \
+ break; \
+ case 1: \
+ __asm("pxor 0x00(%[SRC]), %%" VR0(r) "\n" \
+ : : [SRC] "r" (src)); \
+ break; \
+ } \
+}
+
+#define XOR(r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 8: \
+ __asm( \
+ "pxor %" VR0(r) ", %" VR4(r) "\n" \
+ "pxor %" VR1(r) ", %" VR5(r) "\n" \
+ "pxor %" VR2(r) ", %" VR6(r) "\n" \
+ "pxor %" VR3(r) ", %" VR7(r)); \
+ break; \
+ case 4: \
+ __asm( \
+ "pxor %" VR0(r) ", %" VR2(r) "\n" \
+ "pxor %" VR1(r) ", %" VR3(r)); \
+ break; \
+ case 2: \
+ __asm( \
+ "pxor %" VR0(r) ", %" VR1(r)); \
+ break; \
+ } \
+}
+
+#define ZERO(r...) XOR(r, r)
+
+#define COPY(r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 8: \
+ __asm( \
+ "movdqa %" VR0(r) ", %" VR4(r) "\n" \
+ "movdqa %" VR1(r) ", %" VR5(r) "\n" \
+ "movdqa %" VR2(r) ", %" VR6(r) "\n" \
+ "movdqa %" VR3(r) ", %" VR7(r)); \
+ break; \
+ case 4: \
+ __asm( \
+ "movdqa %" VR0(r) ", %" VR2(r) "\n" \
+ "movdqa %" VR1(r) ", %" VR3(r)); \
+ break; \
+ case 2: \
+ __asm( \
+ "movdqa %" VR0(r) ", %" VR1(r)); \
+ break; \
+ } \
+}
+
+#define LOAD(src, r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 4: \
+ __asm( \
+ "movdqa 0x00(%[SRC]), %%" VR0(r) "\n" \
+ "movdqa 0x10(%[SRC]), %%" VR1(r) "\n" \
+ "movdqa 0x20(%[SRC]), %%" VR2(r) "\n" \
+ "movdqa 0x30(%[SRC]), %%" VR3(r) "\n" \
+ : : [SRC] "r" (src)); \
+ break; \
+ case 2: \
+ __asm( \
+ "movdqa 0x00(%[SRC]), %%" VR0(r) "\n" \
+ "movdqa 0x10(%[SRC]), %%" VR1(r) "\n" \
+ : : [SRC] "r" (src)); \
+ break; \
+ case 1: \
+ __asm( \
+ "movdqa 0x00(%[SRC]), %%" VR0(r) "\n" \
+ : : [SRC] "r" (src)); \
+ break; \
+ } \
+}
+
+#define STORE(dst, r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 4: \
+ __asm( \
+ "movdqa %%" VR0(r)", 0x00(%[DST])\n" \
+ "movdqa %%" VR1(r)", 0x10(%[DST])\n" \
+ "movdqa %%" VR2(r)", 0x20(%[DST])\n" \
+ "movdqa %%" VR3(r)", 0x30(%[DST])\n" \
+ : : [DST] "r" (dst)); \
+ break; \
+ case 2: \
+ __asm( \
+ "movdqa %%" VR0(r)", 0x00(%[DST])\n" \
+ "movdqa %%" VR1(r)", 0x10(%[DST])\n" \
+ : : [DST] "r" (dst)); \
+ break; \
+ case 1: \
+ __asm( \
+ "movdqa %%" VR0(r)", 0x00(%[DST])\n" \
+ : : [DST] "r" (dst)); \
+ break; \
+ } \
+}
+
+#define MUL2_SETUP() \
+{ \
+ __asm( \
+ "movd %[mask], %%xmm15\n" \
+ "pshufd $0x0, %%xmm15, %%xmm15\n" \
+ : : [mask] "r" (0x1d1d1d1d)); \
+}
+
+#define _MUL2_x1(a0) \
+{ \
+ __asm( \
+ "pxor %xmm14, %xmm14\n" \
+ "pcmpgtb %" a0", %xmm14\n" \
+ "pand %xmm15, %xmm14\n" \
+ "paddb %" a0", %" a0 "\n" \
+ "pxor %xmm14, %" a0); \
+}
+
+#define _MUL2_x2(a0, a1) \
+{ \
+ __asm( \
+ "pxor %xmm14, %xmm14\n" \
+ "pxor %xmm13, %xmm13\n" \
+ "pcmpgtb %" a0", %xmm14\n" \
+ "pcmpgtb %" a1", %xmm13\n" \
+ "pand %xmm15, %xmm14\n" \
+ "pand %xmm15, %xmm13\n" \
+ "paddb %" a0", %" a0 "\n" \
+ "paddb %" a1", %" a1 "\n" \
+ "pxor %xmm14, %" a0 "\n" \
+ "pxor %xmm13, %" a1); \
+}
+
+#define MUL2(r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 4: \
+ _MUL2_x2(VR0(r), VR1(r)); \
+ _MUL2_x2(VR2(r), VR3(r)); \
+ break; \
+ case 2: \
+ _MUL2_x2(VR0(r), VR1(r)); \
+ break; \
+ case 1: \
+ _MUL2_x1(VR0(r)); \
+ break; \
+ } \
+}
+
+#define MUL4(r...) \
+{ \
+ MUL2(r); \
+ MUL2(r); \
+}
+
+/* General multiplication by adding powers of two */
+
+#define _MUL_PARAM(x, in, acc) \
+{ \
+ if (x & 0x01) { COPY(in, acc); } else { ZERO(acc); } \
+ if (x & 0xfe) { MUL2(in); } \
+ if (x & 0x02) { XOR(in, acc); } \
+ if (x & 0xfc) { MUL2(in); } \
+ if (x & 0x04) { XOR(in, acc); } \
+ if (x & 0xf8) { MUL2(in); } \
+ if (x & 0x08) { XOR(in, acc); } \
+ if (x & 0xf0) { MUL2(in); } \
+ if (x & 0x10) { XOR(in, acc); } \
+ if (x & 0xe0) { MUL2(in); } \
+ if (x & 0x20) { XOR(in, acc); } \
+ if (x & 0xc0) { MUL2(in); } \
+ if (x & 0x40) { XOR(in, acc); } \
+ if (x & 0x80) { MUL2(in); XOR(in, acc); } \
+}
+
+#define _mul_x1_in 11
+#define _mul_x1_acc 12
+
+#define MUL_x1_DEFINE(x) \
+static void \
+mul_x1_ ## x(void) { _MUL_PARAM(x, _mul_x1_in, _mul_x1_acc); }
+
+#define _mul_x2_in 9, 10
+#define _mul_x2_acc 11, 12
+
+#define MUL_x2_DEFINE(x) \
+static void \
+mul_x2_ ## x(void) { _MUL_PARAM(x, _mul_x2_in, _mul_x2_acc); }
+
+MUL_x1_DEFINE(0); MUL_x1_DEFINE(1); MUL_x1_DEFINE(2); MUL_x1_DEFINE(3);
+MUL_x1_DEFINE(4); MUL_x1_DEFINE(5); MUL_x1_DEFINE(6); MUL_x1_DEFINE(7);
+MUL_x1_DEFINE(8); MUL_x1_DEFINE(9); MUL_x1_DEFINE(10); MUL_x1_DEFINE(11);
+MUL_x1_DEFINE(12); MUL_x1_DEFINE(13); MUL_x1_DEFINE(14); MUL_x1_DEFINE(15);
+MUL_x1_DEFINE(16); MUL_x1_DEFINE(17); MUL_x1_DEFINE(18); MUL_x1_DEFINE(19);
+MUL_x1_DEFINE(20); MUL_x1_DEFINE(21); MUL_x1_DEFINE(22); MUL_x1_DEFINE(23);
+MUL_x1_DEFINE(24); MUL_x1_DEFINE(25); MUL_x1_DEFINE(26); MUL_x1_DEFINE(27);
+MUL_x1_DEFINE(28); MUL_x1_DEFINE(29); MUL_x1_DEFINE(30); MUL_x1_DEFINE(31);
+MUL_x1_DEFINE(32); MUL_x1_DEFINE(33); MUL_x1_DEFINE(34); MUL_x1_DEFINE(35);
+MUL_x1_DEFINE(36); MUL_x1_DEFINE(37); MUL_x1_DEFINE(38); MUL_x1_DEFINE(39);
+MUL_x1_DEFINE(40); MUL_x1_DEFINE(41); MUL_x1_DEFINE(42); MUL_x1_DEFINE(43);
+MUL_x1_DEFINE(44); MUL_x1_DEFINE(45); MUL_x1_DEFINE(46); MUL_x1_DEFINE(47);
+MUL_x1_DEFINE(48); MUL_x1_DEFINE(49); MUL_x1_DEFINE(50); MUL_x1_DEFINE(51);
+MUL_x1_DEFINE(52); MUL_x1_DEFINE(53); MUL_x1_DEFINE(54); MUL_x1_DEFINE(55);
+MUL_x1_DEFINE(56); MUL_x1_DEFINE(57); MUL_x1_DEFINE(58); MUL_x1_DEFINE(59);
+MUL_x1_DEFINE(60); MUL_x1_DEFINE(61); MUL_x1_DEFINE(62); MUL_x1_DEFINE(63);
+MUL_x1_DEFINE(64); MUL_x1_DEFINE(65); MUL_x1_DEFINE(66); MUL_x1_DEFINE(67);
+MUL_x1_DEFINE(68); MUL_x1_DEFINE(69); MUL_x1_DEFINE(70); MUL_x1_DEFINE(71);
+MUL_x1_DEFINE(72); MUL_x1_DEFINE(73); MUL_x1_DEFINE(74); MUL_x1_DEFINE(75);
+MUL_x1_DEFINE(76); MUL_x1_DEFINE(77); MUL_x1_DEFINE(78); MUL_x1_DEFINE(79);
+MUL_x1_DEFINE(80); MUL_x1_DEFINE(81); MUL_x1_DEFINE(82); MUL_x1_DEFINE(83);
+MUL_x1_DEFINE(84); MUL_x1_DEFINE(85); MUL_x1_DEFINE(86); MUL_x1_DEFINE(87);
+MUL_x1_DEFINE(88); MUL_x1_DEFINE(89); MUL_x1_DEFINE(90); MUL_x1_DEFINE(91);
+MUL_x1_DEFINE(92); MUL_x1_DEFINE(93); MUL_x1_DEFINE(94); MUL_x1_DEFINE(95);
+MUL_x1_DEFINE(96); MUL_x1_DEFINE(97); MUL_x1_DEFINE(98); MUL_x1_DEFINE(99);
+MUL_x1_DEFINE(100); MUL_x1_DEFINE(101); MUL_x1_DEFINE(102); MUL_x1_DEFINE(103);
+MUL_x1_DEFINE(104); MUL_x1_DEFINE(105); MUL_x1_DEFINE(106); MUL_x1_DEFINE(107);
+MUL_x1_DEFINE(108); MUL_x1_DEFINE(109); MUL_x1_DEFINE(110); MUL_x1_DEFINE(111);
+MUL_x1_DEFINE(112); MUL_x1_DEFINE(113); MUL_x1_DEFINE(114); MUL_x1_DEFINE(115);
+MUL_x1_DEFINE(116); MUL_x1_DEFINE(117); MUL_x1_DEFINE(118); MUL_x1_DEFINE(119);
+MUL_x1_DEFINE(120); MUL_x1_DEFINE(121); MUL_x1_DEFINE(122); MUL_x1_DEFINE(123);
+MUL_x1_DEFINE(124); MUL_x1_DEFINE(125); MUL_x1_DEFINE(126); MUL_x1_DEFINE(127);
+MUL_x1_DEFINE(128); MUL_x1_DEFINE(129); MUL_x1_DEFINE(130); MUL_x1_DEFINE(131);
+MUL_x1_DEFINE(132); MUL_x1_DEFINE(133); MUL_x1_DEFINE(134); MUL_x1_DEFINE(135);
+MUL_x1_DEFINE(136); MUL_x1_DEFINE(137); MUL_x1_DEFINE(138); MUL_x1_DEFINE(139);
+MUL_x1_DEFINE(140); MUL_x1_DEFINE(141); MUL_x1_DEFINE(142); MUL_x1_DEFINE(143);
+MUL_x1_DEFINE(144); MUL_x1_DEFINE(145); MUL_x1_DEFINE(146); MUL_x1_DEFINE(147);
+MUL_x1_DEFINE(148); MUL_x1_DEFINE(149); MUL_x1_DEFINE(150); MUL_x1_DEFINE(151);
+MUL_x1_DEFINE(152); MUL_x1_DEFINE(153); MUL_x1_DEFINE(154); MUL_x1_DEFINE(155);
+MUL_x1_DEFINE(156); MUL_x1_DEFINE(157); MUL_x1_DEFINE(158); MUL_x1_DEFINE(159);
+MUL_x1_DEFINE(160); MUL_x1_DEFINE(161); MUL_x1_DEFINE(162); MUL_x1_DEFINE(163);
+MUL_x1_DEFINE(164); MUL_x1_DEFINE(165); MUL_x1_DEFINE(166); MUL_x1_DEFINE(167);
+MUL_x1_DEFINE(168); MUL_x1_DEFINE(169); MUL_x1_DEFINE(170); MUL_x1_DEFINE(171);
+MUL_x1_DEFINE(172); MUL_x1_DEFINE(173); MUL_x1_DEFINE(174); MUL_x1_DEFINE(175);
+MUL_x1_DEFINE(176); MUL_x1_DEFINE(177); MUL_x1_DEFINE(178); MUL_x1_DEFINE(179);
+MUL_x1_DEFINE(180); MUL_x1_DEFINE(181); MUL_x1_DEFINE(182); MUL_x1_DEFINE(183);
+MUL_x1_DEFINE(184); MUL_x1_DEFINE(185); MUL_x1_DEFINE(186); MUL_x1_DEFINE(187);
+MUL_x1_DEFINE(188); MUL_x1_DEFINE(189); MUL_x1_DEFINE(190); MUL_x1_DEFINE(191);
+MUL_x1_DEFINE(192); MUL_x1_DEFINE(193); MUL_x1_DEFINE(194); MUL_x1_DEFINE(195);
+MUL_x1_DEFINE(196); MUL_x1_DEFINE(197); MUL_x1_DEFINE(198); MUL_x1_DEFINE(199);
+MUL_x1_DEFINE(200); MUL_x1_DEFINE(201); MUL_x1_DEFINE(202); MUL_x1_DEFINE(203);
+MUL_x1_DEFINE(204); MUL_x1_DEFINE(205); MUL_x1_DEFINE(206); MUL_x1_DEFINE(207);
+MUL_x1_DEFINE(208); MUL_x1_DEFINE(209); MUL_x1_DEFINE(210); MUL_x1_DEFINE(211);
+MUL_x1_DEFINE(212); MUL_x1_DEFINE(213); MUL_x1_DEFINE(214); MUL_x1_DEFINE(215);
+MUL_x1_DEFINE(216); MUL_x1_DEFINE(217); MUL_x1_DEFINE(218); MUL_x1_DEFINE(219);
+MUL_x1_DEFINE(220); MUL_x1_DEFINE(221); MUL_x1_DEFINE(222); MUL_x1_DEFINE(223);
+MUL_x1_DEFINE(224); MUL_x1_DEFINE(225); MUL_x1_DEFINE(226); MUL_x1_DEFINE(227);
+MUL_x1_DEFINE(228); MUL_x1_DEFINE(229); MUL_x1_DEFINE(230); MUL_x1_DEFINE(231);
+MUL_x1_DEFINE(232); MUL_x1_DEFINE(233); MUL_x1_DEFINE(234); MUL_x1_DEFINE(235);
+MUL_x1_DEFINE(236); MUL_x1_DEFINE(237); MUL_x1_DEFINE(238); MUL_x1_DEFINE(239);
+MUL_x1_DEFINE(240); MUL_x1_DEFINE(241); MUL_x1_DEFINE(242); MUL_x1_DEFINE(243);
+MUL_x1_DEFINE(244); MUL_x1_DEFINE(245); MUL_x1_DEFINE(246); MUL_x1_DEFINE(247);
+MUL_x1_DEFINE(248); MUL_x1_DEFINE(249); MUL_x1_DEFINE(250); MUL_x1_DEFINE(251);
+MUL_x1_DEFINE(252); MUL_x1_DEFINE(253); MUL_x1_DEFINE(254); MUL_x1_DEFINE(255);
+
+MUL_x2_DEFINE(0); MUL_x2_DEFINE(1); MUL_x2_DEFINE(2); MUL_x2_DEFINE(3);
+MUL_x2_DEFINE(4); MUL_x2_DEFINE(5); MUL_x2_DEFINE(6); MUL_x2_DEFINE(7);
+MUL_x2_DEFINE(8); MUL_x2_DEFINE(9); MUL_x2_DEFINE(10); MUL_x2_DEFINE(11);
+MUL_x2_DEFINE(12); MUL_x2_DEFINE(13); MUL_x2_DEFINE(14); MUL_x2_DEFINE(15);
+MUL_x2_DEFINE(16); MUL_x2_DEFINE(17); MUL_x2_DEFINE(18); MUL_x2_DEFINE(19);
+MUL_x2_DEFINE(20); MUL_x2_DEFINE(21); MUL_x2_DEFINE(22); MUL_x2_DEFINE(23);
+MUL_x2_DEFINE(24); MUL_x2_DEFINE(25); MUL_x2_DEFINE(26); MUL_x2_DEFINE(27);
+MUL_x2_DEFINE(28); MUL_x2_DEFINE(29); MUL_x2_DEFINE(30); MUL_x2_DEFINE(31);
+MUL_x2_DEFINE(32); MUL_x2_DEFINE(33); MUL_x2_DEFINE(34); MUL_x2_DEFINE(35);
+MUL_x2_DEFINE(36); MUL_x2_DEFINE(37); MUL_x2_DEFINE(38); MUL_x2_DEFINE(39);
+MUL_x2_DEFINE(40); MUL_x2_DEFINE(41); MUL_x2_DEFINE(42); MUL_x2_DEFINE(43);
+MUL_x2_DEFINE(44); MUL_x2_DEFINE(45); MUL_x2_DEFINE(46); MUL_x2_DEFINE(47);
+MUL_x2_DEFINE(48); MUL_x2_DEFINE(49); MUL_x2_DEFINE(50); MUL_x2_DEFINE(51);
+MUL_x2_DEFINE(52); MUL_x2_DEFINE(53); MUL_x2_DEFINE(54); MUL_x2_DEFINE(55);
+MUL_x2_DEFINE(56); MUL_x2_DEFINE(57); MUL_x2_DEFINE(58); MUL_x2_DEFINE(59);
+MUL_x2_DEFINE(60); MUL_x2_DEFINE(61); MUL_x2_DEFINE(62); MUL_x2_DEFINE(63);
+MUL_x2_DEFINE(64); MUL_x2_DEFINE(65); MUL_x2_DEFINE(66); MUL_x2_DEFINE(67);
+MUL_x2_DEFINE(68); MUL_x2_DEFINE(69); MUL_x2_DEFINE(70); MUL_x2_DEFINE(71);
+MUL_x2_DEFINE(72); MUL_x2_DEFINE(73); MUL_x2_DEFINE(74); MUL_x2_DEFINE(75);
+MUL_x2_DEFINE(76); MUL_x2_DEFINE(77); MUL_x2_DEFINE(78); MUL_x2_DEFINE(79);
+MUL_x2_DEFINE(80); MUL_x2_DEFINE(81); MUL_x2_DEFINE(82); MUL_x2_DEFINE(83);
+MUL_x2_DEFINE(84); MUL_x2_DEFINE(85); MUL_x2_DEFINE(86); MUL_x2_DEFINE(87);
+MUL_x2_DEFINE(88); MUL_x2_DEFINE(89); MUL_x2_DEFINE(90); MUL_x2_DEFINE(91);
+MUL_x2_DEFINE(92); MUL_x2_DEFINE(93); MUL_x2_DEFINE(94); MUL_x2_DEFINE(95);
+MUL_x2_DEFINE(96); MUL_x2_DEFINE(97); MUL_x2_DEFINE(98); MUL_x2_DEFINE(99);
+MUL_x2_DEFINE(100); MUL_x2_DEFINE(101); MUL_x2_DEFINE(102); MUL_x2_DEFINE(103);
+MUL_x2_DEFINE(104); MUL_x2_DEFINE(105); MUL_x2_DEFINE(106); MUL_x2_DEFINE(107);
+MUL_x2_DEFINE(108); MUL_x2_DEFINE(109); MUL_x2_DEFINE(110); MUL_x2_DEFINE(111);
+MUL_x2_DEFINE(112); MUL_x2_DEFINE(113); MUL_x2_DEFINE(114); MUL_x2_DEFINE(115);
+MUL_x2_DEFINE(116); MUL_x2_DEFINE(117); MUL_x2_DEFINE(118); MUL_x2_DEFINE(119);
+MUL_x2_DEFINE(120); MUL_x2_DEFINE(121); MUL_x2_DEFINE(122); MUL_x2_DEFINE(123);
+MUL_x2_DEFINE(124); MUL_x2_DEFINE(125); MUL_x2_DEFINE(126); MUL_x2_DEFINE(127);
+MUL_x2_DEFINE(128); MUL_x2_DEFINE(129); MUL_x2_DEFINE(130); MUL_x2_DEFINE(131);
+MUL_x2_DEFINE(132); MUL_x2_DEFINE(133); MUL_x2_DEFINE(134); MUL_x2_DEFINE(135);
+MUL_x2_DEFINE(136); MUL_x2_DEFINE(137); MUL_x2_DEFINE(138); MUL_x2_DEFINE(139);
+MUL_x2_DEFINE(140); MUL_x2_DEFINE(141); MUL_x2_DEFINE(142); MUL_x2_DEFINE(143);
+MUL_x2_DEFINE(144); MUL_x2_DEFINE(145); MUL_x2_DEFINE(146); MUL_x2_DEFINE(147);
+MUL_x2_DEFINE(148); MUL_x2_DEFINE(149); MUL_x2_DEFINE(150); MUL_x2_DEFINE(151);
+MUL_x2_DEFINE(152); MUL_x2_DEFINE(153); MUL_x2_DEFINE(154); MUL_x2_DEFINE(155);
+MUL_x2_DEFINE(156); MUL_x2_DEFINE(157); MUL_x2_DEFINE(158); MUL_x2_DEFINE(159);
+MUL_x2_DEFINE(160); MUL_x2_DEFINE(161); MUL_x2_DEFINE(162); MUL_x2_DEFINE(163);
+MUL_x2_DEFINE(164); MUL_x2_DEFINE(165); MUL_x2_DEFINE(166); MUL_x2_DEFINE(167);
+MUL_x2_DEFINE(168); MUL_x2_DEFINE(169); MUL_x2_DEFINE(170); MUL_x2_DEFINE(171);
+MUL_x2_DEFINE(172); MUL_x2_DEFINE(173); MUL_x2_DEFINE(174); MUL_x2_DEFINE(175);
+MUL_x2_DEFINE(176); MUL_x2_DEFINE(177); MUL_x2_DEFINE(178); MUL_x2_DEFINE(179);
+MUL_x2_DEFINE(180); MUL_x2_DEFINE(181); MUL_x2_DEFINE(182); MUL_x2_DEFINE(183);
+MUL_x2_DEFINE(184); MUL_x2_DEFINE(185); MUL_x2_DEFINE(186); MUL_x2_DEFINE(187);
+MUL_x2_DEFINE(188); MUL_x2_DEFINE(189); MUL_x2_DEFINE(190); MUL_x2_DEFINE(191);
+MUL_x2_DEFINE(192); MUL_x2_DEFINE(193); MUL_x2_DEFINE(194); MUL_x2_DEFINE(195);
+MUL_x2_DEFINE(196); MUL_x2_DEFINE(197); MUL_x2_DEFINE(198); MUL_x2_DEFINE(199);
+MUL_x2_DEFINE(200); MUL_x2_DEFINE(201); MUL_x2_DEFINE(202); MUL_x2_DEFINE(203);
+MUL_x2_DEFINE(204); MUL_x2_DEFINE(205); MUL_x2_DEFINE(206); MUL_x2_DEFINE(207);
+MUL_x2_DEFINE(208); MUL_x2_DEFINE(209); MUL_x2_DEFINE(210); MUL_x2_DEFINE(211);
+MUL_x2_DEFINE(212); MUL_x2_DEFINE(213); MUL_x2_DEFINE(214); MUL_x2_DEFINE(215);
+MUL_x2_DEFINE(216); MUL_x2_DEFINE(217); MUL_x2_DEFINE(218); MUL_x2_DEFINE(219);
+MUL_x2_DEFINE(220); MUL_x2_DEFINE(221); MUL_x2_DEFINE(222); MUL_x2_DEFINE(223);
+MUL_x2_DEFINE(224); MUL_x2_DEFINE(225); MUL_x2_DEFINE(226); MUL_x2_DEFINE(227);
+MUL_x2_DEFINE(228); MUL_x2_DEFINE(229); MUL_x2_DEFINE(230); MUL_x2_DEFINE(231);
+MUL_x2_DEFINE(232); MUL_x2_DEFINE(233); MUL_x2_DEFINE(234); MUL_x2_DEFINE(235);
+MUL_x2_DEFINE(236); MUL_x2_DEFINE(237); MUL_x2_DEFINE(238); MUL_x2_DEFINE(239);
+MUL_x2_DEFINE(240); MUL_x2_DEFINE(241); MUL_x2_DEFINE(242); MUL_x2_DEFINE(243);
+MUL_x2_DEFINE(244); MUL_x2_DEFINE(245); MUL_x2_DEFINE(246); MUL_x2_DEFINE(247);
+MUL_x2_DEFINE(248); MUL_x2_DEFINE(249); MUL_x2_DEFINE(250); MUL_x2_DEFINE(251);
+MUL_x2_DEFINE(252); MUL_x2_DEFINE(253); MUL_x2_DEFINE(254); MUL_x2_DEFINE(255);
+
+
+
+typedef void (*mul_fn_ptr_t)(void);
+
+static const mul_fn_ptr_t __attribute__((aligned(256)))
+gf_x1_mul_fns[256] = {
+ mul_x1_0, mul_x1_1, mul_x1_2, mul_x1_3, mul_x1_4, mul_x1_5,
+ mul_x1_6, mul_x1_7, mul_x1_8, mul_x1_9, mul_x1_10, mul_x1_11,
+ mul_x1_12, mul_x1_13, mul_x1_14, mul_x1_15, mul_x1_16, mul_x1_17,
+ mul_x1_18, mul_x1_19, mul_x1_20, mul_x1_21, mul_x1_22, mul_x1_23,
+ mul_x1_24, mul_x1_25, mul_x1_26, mul_x1_27, mul_x1_28, mul_x1_29,
+ mul_x1_30, mul_x1_31, mul_x1_32, mul_x1_33, mul_x1_34, mul_x1_35,
+ mul_x1_36, mul_x1_37, mul_x1_38, mul_x1_39, mul_x1_40, mul_x1_41,
+ mul_x1_42, mul_x1_43, mul_x1_44, mul_x1_45, mul_x1_46, mul_x1_47,
+ mul_x1_48, mul_x1_49, mul_x1_50, mul_x1_51, mul_x1_52, mul_x1_53,
+ mul_x1_54, mul_x1_55, mul_x1_56, mul_x1_57, mul_x1_58, mul_x1_59,
+ mul_x1_60, mul_x1_61, mul_x1_62, mul_x1_63, mul_x1_64, mul_x1_65,
+ mul_x1_66, mul_x1_67, mul_x1_68, mul_x1_69, mul_x1_70, mul_x1_71,
+ mul_x1_72, mul_x1_73, mul_x1_74, mul_x1_75, mul_x1_76, mul_x1_77,
+ mul_x1_78, mul_x1_79, mul_x1_80, mul_x1_81, mul_x1_82, mul_x1_83,
+ mul_x1_84, mul_x1_85, mul_x1_86, mul_x1_87, mul_x1_88, mul_x1_89,
+ mul_x1_90, mul_x1_91, mul_x1_92, mul_x1_93, mul_x1_94, mul_x1_95,
+ mul_x1_96, mul_x1_97, mul_x1_98, mul_x1_99, mul_x1_100, mul_x1_101,
+ mul_x1_102, mul_x1_103, mul_x1_104, mul_x1_105, mul_x1_106, mul_x1_107,
+ mul_x1_108, mul_x1_109, mul_x1_110, mul_x1_111, mul_x1_112, mul_x1_113,
+ mul_x1_114, mul_x1_115, mul_x1_116, mul_x1_117, mul_x1_118, mul_x1_119,
+ mul_x1_120, mul_x1_121, mul_x1_122, mul_x1_123, mul_x1_124, mul_x1_125,
+ mul_x1_126, mul_x1_127, mul_x1_128, mul_x1_129, mul_x1_130, mul_x1_131,
+ mul_x1_132, mul_x1_133, mul_x1_134, mul_x1_135, mul_x1_136, mul_x1_137,
+ mul_x1_138, mul_x1_139, mul_x1_140, mul_x1_141, mul_x1_142, mul_x1_143,
+ mul_x1_144, mul_x1_145, mul_x1_146, mul_x1_147, mul_x1_148, mul_x1_149,
+ mul_x1_150, mul_x1_151, mul_x1_152, mul_x1_153, mul_x1_154, mul_x1_155,
+ mul_x1_156, mul_x1_157, mul_x1_158, mul_x1_159, mul_x1_160, mul_x1_161,
+ mul_x1_162, mul_x1_163, mul_x1_164, mul_x1_165, mul_x1_166, mul_x1_167,
+ mul_x1_168, mul_x1_169, mul_x1_170, mul_x1_171, mul_x1_172, mul_x1_173,
+ mul_x1_174, mul_x1_175, mul_x1_176, mul_x1_177, mul_x1_178, mul_x1_179,
+ mul_x1_180, mul_x1_181, mul_x1_182, mul_x1_183, mul_x1_184, mul_x1_185,
+ mul_x1_186, mul_x1_187, mul_x1_188, mul_x1_189, mul_x1_190, mul_x1_191,
+ mul_x1_192, mul_x1_193, mul_x1_194, mul_x1_195, mul_x1_196, mul_x1_197,
+ mul_x1_198, mul_x1_199, mul_x1_200, mul_x1_201, mul_x1_202, mul_x1_203,
+ mul_x1_204, mul_x1_205, mul_x1_206, mul_x1_207, mul_x1_208, mul_x1_209,
+ mul_x1_210, mul_x1_211, mul_x1_212, mul_x1_213, mul_x1_214, mul_x1_215,
+ mul_x1_216, mul_x1_217, mul_x1_218, mul_x1_219, mul_x1_220, mul_x1_221,
+ mul_x1_222, mul_x1_223, mul_x1_224, mul_x1_225, mul_x1_226, mul_x1_227,
+ mul_x1_228, mul_x1_229, mul_x1_230, mul_x1_231, mul_x1_232, mul_x1_233,
+ mul_x1_234, mul_x1_235, mul_x1_236, mul_x1_237, mul_x1_238, mul_x1_239,
+ mul_x1_240, mul_x1_241, mul_x1_242, mul_x1_243, mul_x1_244, mul_x1_245,
+ mul_x1_246, mul_x1_247, mul_x1_248, mul_x1_249, mul_x1_250, mul_x1_251,
+ mul_x1_252, mul_x1_253, mul_x1_254, mul_x1_255
+};
+
+static const mul_fn_ptr_t __attribute__((aligned(256)))
+gf_x2_mul_fns[256] = {
+ mul_x2_0, mul_x2_1, mul_x2_2, mul_x2_3, mul_x2_4, mul_x2_5,
+ mul_x2_6, mul_x2_7, mul_x2_8, mul_x2_9, mul_x2_10, mul_x2_11,
+ mul_x2_12, mul_x2_13, mul_x2_14, mul_x2_15, mul_x2_16, mul_x2_17,
+ mul_x2_18, mul_x2_19, mul_x2_20, mul_x2_21, mul_x2_22, mul_x2_23,
+ mul_x2_24, mul_x2_25, mul_x2_26, mul_x2_27, mul_x2_28, mul_x2_29,
+ mul_x2_30, mul_x2_31, mul_x2_32, mul_x2_33, mul_x2_34, mul_x2_35,
+ mul_x2_36, mul_x2_37, mul_x2_38, mul_x2_39, mul_x2_40, mul_x2_41,
+ mul_x2_42, mul_x2_43, mul_x2_44, mul_x2_45, mul_x2_46, mul_x2_47,
+ mul_x2_48, mul_x2_49, mul_x2_50, mul_x2_51, mul_x2_52, mul_x2_53,
+ mul_x2_54, mul_x2_55, mul_x2_56, mul_x2_57, mul_x2_58, mul_x2_59,
+ mul_x2_60, mul_x2_61, mul_x2_62, mul_x2_63, mul_x2_64, mul_x2_65,
+ mul_x2_66, mul_x2_67, mul_x2_68, mul_x2_69, mul_x2_70, mul_x2_71,
+ mul_x2_72, mul_x2_73, mul_x2_74, mul_x2_75, mul_x2_76, mul_x2_77,
+ mul_x2_78, mul_x2_79, mul_x2_80, mul_x2_81, mul_x2_82, mul_x2_83,
+ mul_x2_84, mul_x2_85, mul_x2_86, mul_x2_87, mul_x2_88, mul_x2_89,
+ mul_x2_90, mul_x2_91, mul_x2_92, mul_x2_93, mul_x2_94, mul_x2_95,
+ mul_x2_96, mul_x2_97, mul_x2_98, mul_x2_99, mul_x2_100, mul_x2_101,
+ mul_x2_102, mul_x2_103, mul_x2_104, mul_x2_105, mul_x2_106, mul_x2_107,
+ mul_x2_108, mul_x2_109, mul_x2_110, mul_x2_111, mul_x2_112, mul_x2_113,
+ mul_x2_114, mul_x2_115, mul_x2_116, mul_x2_117, mul_x2_118, mul_x2_119,
+ mul_x2_120, mul_x2_121, mul_x2_122, mul_x2_123, mul_x2_124, mul_x2_125,
+ mul_x2_126, mul_x2_127, mul_x2_128, mul_x2_129, mul_x2_130, mul_x2_131,
+ mul_x2_132, mul_x2_133, mul_x2_134, mul_x2_135, mul_x2_136, mul_x2_137,
+ mul_x2_138, mul_x2_139, mul_x2_140, mul_x2_141, mul_x2_142, mul_x2_143,
+ mul_x2_144, mul_x2_145, mul_x2_146, mul_x2_147, mul_x2_148, mul_x2_149,
+ mul_x2_150, mul_x2_151, mul_x2_152, mul_x2_153, mul_x2_154, mul_x2_155,
+ mul_x2_156, mul_x2_157, mul_x2_158, mul_x2_159, mul_x2_160, mul_x2_161,
+ mul_x2_162, mul_x2_163, mul_x2_164, mul_x2_165, mul_x2_166, mul_x2_167,
+ mul_x2_168, mul_x2_169, mul_x2_170, mul_x2_171, mul_x2_172, mul_x2_173,
+ mul_x2_174, mul_x2_175, mul_x2_176, mul_x2_177, mul_x2_178, mul_x2_179,
+ mul_x2_180, mul_x2_181, mul_x2_182, mul_x2_183, mul_x2_184, mul_x2_185,
+ mul_x2_186, mul_x2_187, mul_x2_188, mul_x2_189, mul_x2_190, mul_x2_191,
+ mul_x2_192, mul_x2_193, mul_x2_194, mul_x2_195, mul_x2_196, mul_x2_197,
+ mul_x2_198, mul_x2_199, mul_x2_200, mul_x2_201, mul_x2_202, mul_x2_203,
+ mul_x2_204, mul_x2_205, mul_x2_206, mul_x2_207, mul_x2_208, mul_x2_209,
+ mul_x2_210, mul_x2_211, mul_x2_212, mul_x2_213, mul_x2_214, mul_x2_215,
+ mul_x2_216, mul_x2_217, mul_x2_218, mul_x2_219, mul_x2_220, mul_x2_221,
+ mul_x2_222, mul_x2_223, mul_x2_224, mul_x2_225, mul_x2_226, mul_x2_227,
+ mul_x2_228, mul_x2_229, mul_x2_230, mul_x2_231, mul_x2_232, mul_x2_233,
+ mul_x2_234, mul_x2_235, mul_x2_236, mul_x2_237, mul_x2_238, mul_x2_239,
+ mul_x2_240, mul_x2_241, mul_x2_242, mul_x2_243, mul_x2_244, mul_x2_245,
+ mul_x2_246, mul_x2_247, mul_x2_248, mul_x2_249, mul_x2_250, mul_x2_251,
+ mul_x2_252, mul_x2_253, mul_x2_254, mul_x2_255
+};
+
+#define MUL(c, r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 2: \
+ COPY(r, _mul_x2_in); \
+ gf_x2_mul_fns[c](); \
+ COPY(_mul_x2_acc, r); \
+ break; \
+ case 1: \
+ COPY(r, _mul_x1_in); \
+ gf_x1_mul_fns[c](); \
+ COPY(_mul_x1_acc, r); \
+ break; \
+ } \
+}
+
+
+#define raidz_math_begin() kfpu_begin()
+#define raidz_math_end() kfpu_end()
+
+#define SYN_STRIDE 4
+
+#define ZERO_STRIDE 4
+#define ZERO_DEFINE() {}
+#define ZERO_D 0, 1, 2, 3
+
+#define COPY_STRIDE 4
+#define COPY_DEFINE() {}
+#define COPY_D 0, 1, 2, 3
+
+#define ADD_STRIDE 4
+#define ADD_DEFINE() {}
+#define ADD_D 0, 1, 2, 3
+
+#define MUL_STRIDE 2
+#define MUL_DEFINE() MUL2_SETUP()
+#define MUL_D 0, 1
+
+#define GEN_P_STRIDE 4
+#define GEN_P_DEFINE() {}
+#define GEN_P_P 0, 1, 2, 3
+
+#define GEN_PQ_STRIDE 4
+#define GEN_PQ_DEFINE() {}
+#define GEN_PQ_D 0, 1, 2, 3
+#define GEN_PQ_C 4, 5, 6, 7
+
+#define GEN_PQR_STRIDE 4
+#define GEN_PQR_DEFINE() {}
+#define GEN_PQR_D 0, 1, 2, 3
+#define GEN_PQR_C 4, 5, 6, 7
+
+#define SYN_Q_DEFINE() {}
+#define SYN_Q_D 0, 1, 2, 3
+#define SYN_Q_X 4, 5, 6, 7
+
+#define SYN_R_DEFINE() {}
+#define SYN_R_D 0, 1, 2, 3
+#define SYN_R_X 4, 5, 6, 7
+
+#define SYN_PQ_DEFINE() {}
+#define SYN_PQ_D 0, 1, 2, 3
+#define SYN_PQ_X 4, 5, 6, 7
+
+#define REC_PQ_STRIDE 2
+#define REC_PQ_DEFINE() MUL2_SETUP()
+#define REC_PQ_X 0, 1
+#define REC_PQ_Y 2, 3
+#define REC_PQ_T 4, 5
+
+#define SYN_PR_DEFINE() {}
+#define SYN_PR_D 0, 1, 2, 3
+#define SYN_PR_X 4, 5, 6, 7
+
+#define REC_PR_STRIDE 2
+#define REC_PR_DEFINE() MUL2_SETUP()
+#define REC_PR_X 0, 1
+#define REC_PR_Y 2, 3
+#define REC_PR_T 4, 5
+
+#define SYN_QR_DEFINE() {}
+#define SYN_QR_D 0, 1, 2, 3
+#define SYN_QR_X 4, 5, 6, 7
+
+#define REC_QR_STRIDE 2
+#define REC_QR_DEFINE() MUL2_SETUP()
+#define REC_QR_X 0, 1
+#define REC_QR_Y 2, 3
+#define REC_QR_T 4, 5
+
+#define SYN_PQR_DEFINE() {}
+#define SYN_PQR_D 0, 1, 2, 3
+#define SYN_PQR_X 4, 5, 6, 7
+
+#define REC_PQR_STRIDE 1
+#define REC_PQR_DEFINE() MUL2_SETUP()
+#define REC_PQR_X 0
+#define REC_PQR_Y 1
+#define REC_PQR_Z 2
+#define REC_PQR_XS 3
+#define REC_PQR_YS 4
+
+
+#include <sys/vdev_raidz_impl.h>
+#include "vdev_raidz_math_impl.h"
+
+DEFINE_GEN_METHODS(sse2);
+DEFINE_REC_METHODS(sse2);
+
+static boolean_t
+raidz_will_sse2_work(void)
+{
+ return (zfs_sse_available() && zfs_sse2_available());
+}
+
+const raidz_impl_ops_t vdev_raidz_sse2_impl = {
+ .init = NULL,
+ .fini = NULL,
+ .gen = RAIDZ_GEN_METHODS(sse2),
+ .rec = RAIDZ_REC_METHODS(sse2),
+ .is_supported = &raidz_will_sse2_work,
+ .name = "sse2"
+};
+
+#endif /* defined(__x86_64) && defined(HAVE_SSE2) */
diff --git a/zfs/module/zfs/vdev_raidz_math_ssse3.c b/zfs/module/zfs/vdev_raidz_math_ssse3.c
new file mode 100644
index 000000000000..a015baab2d83
--- /dev/null
+++ b/zfs/module/zfs/vdev_raidz_math_ssse3.c
@@ -0,0 +1,2475 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (C) 2016 Gvozden Nešković. All rights reserved.
+ */
+
+#include <sys/isa_defs.h>
+
+#if defined(__x86_64) && defined(HAVE_SSSE3)
+
+#include <sys/types.h>
+#include <linux/simd_x86.h>
+
+#define __asm __asm__ __volatile__
+
+#define _REG_CNT(_0, _1, _2, _3, _4, _5, _6, _7, N, ...) N
+#define REG_CNT(r...) _REG_CNT(r, 8, 7, 6, 5, 4, 3, 2, 1)
+
+#define VR0_(REG, ...) "xmm"#REG
+#define VR1_(_1, REG, ...) "xmm"#REG
+#define VR2_(_1, _2, REG, ...) "xmm"#REG
+#define VR3_(_1, _2, _3, REG, ...) "xmm"#REG
+#define VR4_(_1, _2, _3, _4, REG, ...) "xmm"#REG
+#define VR5_(_1, _2, _3, _4, _5, REG, ...) "xmm"#REG
+#define VR6_(_1, _2, _3, _4, _5, _6, REG, ...) "xmm"#REG
+#define VR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) "xmm"#REG
+
+#define VR0(r...) VR0_(r)
+#define VR1(r...) VR1_(r)
+#define VR2(r...) VR2_(r, 1)
+#define VR3(r...) VR3_(r, 1, 2)
+#define VR4(r...) VR4_(r, 1, 2)
+#define VR5(r...) VR5_(r, 1, 2, 3)
+#define VR6(r...) VR6_(r, 1, 2, 3, 4)
+#define VR7(r...) VR7_(r, 1, 2, 3, 4, 5)
+
+#define R_01(REG1, REG2, ...) REG1, REG2
+#define _R_23(_0, _1, REG2, REG3, ...) REG2, REG3
+#define R_23(REG...) _R_23(REG, 1, 2, 3)
+
+#define ASM_BUG() ASSERT(0)
+
+const uint8_t gf_clmul_mod_lt[4*256][16];
+
+#define ELEM_SIZE 16
+
+typedef struct v {
+ uint8_t b[ELEM_SIZE] __attribute__((aligned(ELEM_SIZE)));
+} v_t;
+
+
+#define XOR_ACC(src, r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 4: \
+ __asm( \
+ "pxor 0x00(%[SRC]), %%" VR0(r) "\n" \
+ "pxor 0x10(%[SRC]), %%" VR1(r) "\n" \
+ "pxor 0x20(%[SRC]), %%" VR2(r) "\n" \
+ "pxor 0x30(%[SRC]), %%" VR3(r) "\n" \
+ : : [SRC] "r" (src)); \
+ break; \
+ case 2: \
+ __asm( \
+ "pxor 0x00(%[SRC]), %%" VR0(r) "\n" \
+ "pxor 0x10(%[SRC]), %%" VR1(r) "\n" \
+ : : [SRC] "r" (src)); \
+ break; \
+ default: \
+ ASM_BUG(); \
+ } \
+}
+
+#define XOR(r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 8: \
+ __asm( \
+ "pxor %" VR0(r) ", %" VR4(r) "\n" \
+ "pxor %" VR1(r) ", %" VR5(r) "\n" \
+ "pxor %" VR2(r) ", %" VR6(r) "\n" \
+ "pxor %" VR3(r) ", %" VR7(r)); \
+ break; \
+ case 4: \
+ __asm( \
+ "pxor %" VR0(r) ", %" VR2(r) "\n" \
+ "pxor %" VR1(r) ", %" VR3(r)); \
+ break; \
+ default: \
+ ASM_BUG(); \
+ } \
+}
+
+#define ZERO(r...) XOR(r, r)
+
+#define COPY(r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 8: \
+ __asm( \
+ "movdqa %" VR0(r) ", %" VR4(r) "\n" \
+ "movdqa %" VR1(r) ", %" VR5(r) "\n" \
+ "movdqa %" VR2(r) ", %" VR6(r) "\n" \
+ "movdqa %" VR3(r) ", %" VR7(r)); \
+ break; \
+ case 4: \
+ __asm( \
+ "movdqa %" VR0(r) ", %" VR2(r) "\n" \
+ "movdqa %" VR1(r) ", %" VR3(r)); \
+ break; \
+ default: \
+ ASM_BUG(); \
+ } \
+}
+
+#define LOAD(src, r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 4: \
+ __asm( \
+ "movdqa 0x00(%[SRC]), %%" VR0(r) "\n" \
+ "movdqa 0x10(%[SRC]), %%" VR1(r) "\n" \
+ "movdqa 0x20(%[SRC]), %%" VR2(r) "\n" \
+ "movdqa 0x30(%[SRC]), %%" VR3(r) "\n" \
+ : : [SRC] "r" (src)); \
+ break; \
+ case 2: \
+ __asm( \
+ "movdqa 0x00(%[SRC]), %%" VR0(r) "\n" \
+ "movdqa 0x10(%[SRC]), %%" VR1(r) "\n" \
+ : : [SRC] "r" (src)); \
+ break; \
+ default: \
+ ASM_BUG(); \
+ } \
+}
+
+#define STORE(dst, r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 4: \
+ __asm( \
+ "movdqa %%" VR0(r)", 0x00(%[DST])\n" \
+ "movdqa %%" VR1(r)", 0x10(%[DST])\n" \
+ "movdqa %%" VR2(r)", 0x20(%[DST])\n" \
+ "movdqa %%" VR3(r)", 0x30(%[DST])\n" \
+ : : [DST] "r" (dst)); \
+ break; \
+ case 2: \
+ __asm( \
+ "movdqa %%" VR0(r)", 0x00(%[DST])\n" \
+ "movdqa %%" VR1(r)", 0x10(%[DST])\n" \
+ : : [DST] "r" (dst)); \
+ break; \
+ default: \
+ ASM_BUG(); \
+ } \
+}
+
+#define MUL2_SETUP() \
+{ \
+ __asm( \
+ "movd %[mask], %%xmm15\n" \
+ "pshufd $0x0, %%xmm15, %%xmm15\n" \
+ : : [mask] "r" (0x1d1d1d1d)); \
+}
+
+#define _MUL2_x2(r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 2: \
+ __asm( \
+ "pxor %xmm14, %xmm14\n" \
+ "pxor %xmm13, %xmm13\n" \
+ "pcmpgtb %" VR0(r)", %xmm14\n" \
+ "pcmpgtb %" VR1(r)", %xmm13\n" \
+ "pand %xmm15, %xmm14\n" \
+ "pand %xmm15, %xmm13\n" \
+ "paddb %" VR0(r)", %" VR0(r) "\n" \
+ "paddb %" VR1(r)", %" VR1(r) "\n" \
+ "pxor %xmm14, %" VR0(r) "\n" \
+ "pxor %xmm13, %" VR1(r)); \
+ break; \
+ default: \
+ ASM_BUG(); \
+ } \
+}
+
+#define MUL2(r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 4: \
+ _MUL2_x2(R_01(r)); \
+ _MUL2_x2(R_23(r)); \
+ break; \
+ case 2: \
+ _MUL2_x2(r); \
+ break; \
+ default: \
+ ASM_BUG(); \
+ } \
+}
+
+#define MUL4(r...) \
+{ \
+ MUL2(r); \
+ MUL2(r); \
+}
+
+#define _0f "xmm15"
+#define _a_save "xmm14"
+#define _b_save "xmm13"
+#define _lt_mod_a "xmm12"
+#define _lt_clmul_a "xmm11"
+#define _lt_mod_b "xmm10"
+#define _lt_clmul_b "xmm15"
+
+#define _MULx2(c, r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 2: \
+ __asm( \
+ /* lts for upper part */ \
+ "movd %[mask], %%" _0f "\n" \
+ "pshufd $0x0, %%" _0f ", %%" _0f "\n" \
+ "movdqa 0x00(%[lt]), %%" _lt_mod_a "\n" \
+ "movdqa 0x10(%[lt]), %%" _lt_clmul_a "\n" \
+ /* upper part */ \
+ "movdqa %%" VR0(r) ", %%" _a_save "\n" \
+ "movdqa %%" VR1(r) ", %%" _b_save "\n" \
+ "psraw $0x4, %%" VR0(r) "\n" \
+ "psraw $0x4, %%" VR1(r) "\n" \
+ "pand %%" _0f ", %%" _a_save "\n" \
+ "pand %%" _0f ", %%" _b_save "\n" \
+ "pand %%" _0f ", %%" VR0(r) "\n" \
+ "pand %%" _0f ", %%" VR1(r) "\n" \
+ \
+ "movdqa %%" _lt_mod_a ", %%" _lt_mod_b "\n" \
+ "movdqa %%" _lt_clmul_a ", %%" _lt_clmul_b "\n" \
+ \
+ "pshufb %%" VR0(r) ",%%" _lt_mod_a "\n" \
+ "pshufb %%" VR1(r) ",%%" _lt_mod_b "\n" \
+ "pshufb %%" VR0(r) ",%%" _lt_clmul_a "\n" \
+ "pshufb %%" VR1(r) ",%%" _lt_clmul_b "\n" \
+ \
+ "pxor %%" _lt_mod_a ",%%" _lt_clmul_a "\n" \
+ "pxor %%" _lt_mod_b ",%%" _lt_clmul_b "\n" \
+ "movdqa %%" _lt_clmul_a ",%%" VR0(r) "\n" \
+ "movdqa %%" _lt_clmul_b ",%%" VR1(r) "\n" \
+ /* lts for lower part */ \
+ "movdqa 0x20(%[lt]), %%" _lt_mod_a "\n" \
+ "movdqa 0x30(%[lt]), %%" _lt_clmul_a "\n" \
+ "movdqa %%" _lt_mod_a ", %%" _lt_mod_b "\n" \
+ "movdqa %%" _lt_clmul_a ", %%" _lt_clmul_b "\n" \
+ /* lower part */ \
+ "pshufb %%" _a_save ",%%" _lt_mod_a "\n" \
+ "pshufb %%" _b_save ",%%" _lt_mod_b "\n" \
+ "pshufb %%" _a_save ",%%" _lt_clmul_a "\n" \
+ "pshufb %%" _b_save ",%%" _lt_clmul_b "\n" \
+ \
+ "pxor %%" _lt_mod_a ",%%" VR0(r) "\n" \
+ "pxor %%" _lt_mod_b ",%%" VR1(r) "\n" \
+ "pxor %%" _lt_clmul_a ",%%" VR0(r) "\n" \
+ "pxor %%" _lt_clmul_b ",%%" VR1(r) "\n" \
+ : : [mask] "r" (0x0f0f0f0f), \
+ [lt] "r" (gf_clmul_mod_lt[4*(c)])); \
+ break; \
+ default: \
+ ASM_BUG(); \
+ } \
+}
+
+#define MUL(c, r...) \
+{ \
+ switch (REG_CNT(r)) { \
+ case 4: \
+ _MULx2(c, R_23(r)); \
+ _MULx2(c, R_01(r)); \
+ break; \
+ case 2: \
+ _MULx2(c, R_01(r)); \
+ break; \
+ default: \
+ ASM_BUG(); \
+ } \
+}
+
+#define raidz_math_begin() kfpu_begin()
+#define raidz_math_end() kfpu_end()
+
+
+#define SYN_STRIDE 4
+
+#define ZERO_STRIDE 4
+#define ZERO_DEFINE() {}
+#define ZERO_D 0, 1, 2, 3
+
+#define COPY_STRIDE 4
+#define COPY_DEFINE() {}
+#define COPY_D 0, 1, 2, 3
+
+#define ADD_STRIDE 4
+#define ADD_DEFINE() {}
+#define ADD_D 0, 1, 2, 3
+
+#define MUL_STRIDE 4
+#define MUL_DEFINE() {}
+#define MUL_D 0, 1, 2, 3
+
+#define GEN_P_STRIDE 4
+#define GEN_P_DEFINE() {}
+#define GEN_P_P 0, 1, 2, 3
+
+#define GEN_PQ_STRIDE 4
+#define GEN_PQ_DEFINE() {}
+#define GEN_PQ_D 0, 1, 2, 3
+#define GEN_PQ_C 4, 5, 6, 7
+
+#define GEN_PQR_STRIDE 4
+#define GEN_PQR_DEFINE() {}
+#define GEN_PQR_D 0, 1, 2, 3
+#define GEN_PQR_C 4, 5, 6, 7
+
+#define SYN_Q_DEFINE() {}
+#define SYN_Q_D 0, 1, 2, 3
+#define SYN_Q_X 4, 5, 6, 7
+
+#define SYN_R_DEFINE() {}
+#define SYN_R_D 0, 1, 2, 3
+#define SYN_R_X 4, 5, 6, 7
+
+#define SYN_PQ_DEFINE() {}
+#define SYN_PQ_D 0, 1, 2, 3
+#define SYN_PQ_X 4, 5, 6, 7
+
+#define REC_PQ_STRIDE 2
+#define REC_PQ_DEFINE() {}
+#define REC_PQ_X 0, 1
+#define REC_PQ_Y 2, 3
+#define REC_PQ_T 4, 5
+
+#define SYN_PR_DEFINE() {}
+#define SYN_PR_D 0, 1, 2, 3
+#define SYN_PR_X 4, 5, 6, 7
+
+#define REC_PR_STRIDE 2
+#define REC_PR_DEFINE() {}
+#define REC_PR_X 0, 1
+#define REC_PR_Y 2, 3
+#define REC_PR_T 4, 5
+
+#define SYN_QR_DEFINE() {}
+#define SYN_QR_D 0, 1, 2, 3
+#define SYN_QR_X 4, 5, 6, 7
+
+#define REC_QR_STRIDE 2
+#define REC_QR_DEFINE() {}
+#define REC_QR_X 0, 1
+#define REC_QR_Y 2, 3
+#define REC_QR_T 4, 5
+
+#define SYN_PQR_DEFINE() {}
+#define SYN_PQR_D 0, 1, 2, 3
+#define SYN_PQR_X 4, 5, 6, 7
+
+#define REC_PQR_STRIDE 2
+#define REC_PQR_DEFINE() {}
+#define REC_PQR_X 0, 1
+#define REC_PQR_Y 2, 3
+#define REC_PQR_Z 4, 5
+#define REC_PQR_XS 6, 7
+#define REC_PQR_YS 8, 9
+
+
+#include <sys/vdev_raidz_impl.h>
+#include "vdev_raidz_math_impl.h"
+
+DEFINE_GEN_METHODS(ssse3);
+DEFINE_REC_METHODS(ssse3);
+
+static boolean_t
+raidz_will_ssse3_work(void)
+{
+ return (zfs_sse_available() && zfs_sse2_available() &&
+ zfs_ssse3_available());
+}
+
+const raidz_impl_ops_t vdev_raidz_ssse3_impl = {
+ .init = NULL,
+ .fini = NULL,
+ .gen = RAIDZ_GEN_METHODS(ssse3),
+ .rec = RAIDZ_REC_METHODS(ssse3),
+ .is_supported = &raidz_will_ssse3_work,
+ .name = "ssse3"
+};
+
+#endif /* defined(__x86_64) && defined(HAVE_SSSE3) */
+
+
+#if defined(__x86_64)
+#if defined(HAVE_SSSE3) || defined(HAVE_AVX2) || defined(HAVE_AVX512BW)
+/* BEGIN CSTYLED */
+const uint8_t
+__attribute__((aligned(256))) gf_clmul_mod_lt[4*256][16] =
+{
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+ 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+ 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e,
+ 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+ 0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09,
+ 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+ 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x04, 0x08, 0x0c, 0x10, 0x14, 0x18, 0x1c,
+ 0x20, 0x24, 0x28, 0x2c, 0x30, 0x34, 0x38, 0x3c },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+ 0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x05, 0x0a, 0x0f, 0x14, 0x11, 0x1e, 0x1b,
+ 0x28, 0x2d, 0x22, 0x27, 0x3c, 0x39, 0x36, 0x33 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+ 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x06, 0x0c, 0x0a, 0x18, 0x1e, 0x14, 0x12,
+ 0x30, 0x36, 0x3c, 0x3a, 0x28, 0x2e, 0x24, 0x22 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+ 0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x07, 0x0e, 0x09, 0x1c, 0x1b, 0x12, 0x15,
+ 0x38, 0x3f, 0x36, 0x31, 0x24, 0x23, 0x2a, 0x2d },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+ 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x08, 0x10, 0x18, 0x20, 0x28, 0x30, 0x38,
+ 0x40, 0x48, 0x50, 0x58, 0x60, 0x68, 0x70, 0x78 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+ 0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f,
+ 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+ 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x0a, 0x14, 0x1e, 0x28, 0x22, 0x3c, 0x36,
+ 0x50, 0x5a, 0x44, 0x4e, 0x78, 0x72, 0x6c, 0x66 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+ 0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31,
+ 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+ 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x0c, 0x18, 0x14, 0x30, 0x3c, 0x28, 0x24,
+ 0x60, 0x6c, 0x78, 0x74, 0x50, 0x5c, 0x48, 0x44 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+ 0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23,
+ 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+ 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a,
+ 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+ 0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x0f, 0x1e, 0x11, 0x3c, 0x33, 0x22, 0x2d,
+ 0x78, 0x77, 0x66, 0x69, 0x44, 0x4b, 0x5a, 0x55 },
+ { 0x00, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53,
+ 0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+ 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ { 0x00, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53,
+ 0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb },
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+ 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77,
+ 0x88, 0x99, 0xaa, 0xbb, 0xcc, 0xdd, 0xee, 0xff },
+ { 0x00, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53,
+ 0xf5, 0xe8, 0xcf, 0xd2, 0x81, 0x9c, 0xbb, 0xa6 },
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+ 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x12, 0x24, 0x36, 0x48, 0x5a, 0x6c, 0x7e,
+ 0x90, 0x82, 0xb4, 0xa6, 0xd8, 0xca, 0xfc, 0xee },
+ { 0x00, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53,
+ 0xf5, 0xe8, 0xcf, 0xd2, 0x81, 0x9c, 0xbb, 0xa6 },
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+ 0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x13, 0x26, 0x35, 0x4c, 0x5f, 0x6a, 0x79,
+ 0x98, 0x8b, 0xbe, 0xad, 0xd4, 0xc7, 0xf2, 0xe1 },
+ { 0x00, 0x1d, 0x3a, 0x27, 0x69, 0x74, 0x53, 0x4e,
+ 0xd2, 0xcf, 0xe8, 0xf5, 0xbb, 0xa6, 0x81, 0x9c },
+ { 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+ 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x14, 0x28, 0x3c, 0x50, 0x44, 0x78, 0x6c,
+ 0xa0, 0xb4, 0x88, 0x9c, 0xf0, 0xe4, 0xd8, 0xcc },
+ { 0x00, 0x1d, 0x3a, 0x27, 0x69, 0x74, 0x53, 0x4e,
+ 0xd2, 0xcf, 0xe8, 0xf5, 0xbb, 0xa6, 0x81, 0x9c },
+ { 0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+ 0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x15, 0x2a, 0x3f, 0x54, 0x41, 0x7e, 0x6b,
+ 0xa8, 0xbd, 0x82, 0x97, 0xfc, 0xe9, 0xd6, 0xc3 },
+ { 0x00, 0x1d, 0x3a, 0x27, 0x69, 0x74, 0x53, 0x4e,
+ 0xcf, 0xd2, 0xf5, 0xe8, 0xa6, 0xbb, 0x9c, 0x81 },
+ { 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+ 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x16, 0x2c, 0x3a, 0x58, 0x4e, 0x74, 0x62,
+ 0xb0, 0xa6, 0x9c, 0x8a, 0xe8, 0xfe, 0xc4, 0xd2 },
+ { 0x00, 0x1d, 0x3a, 0x27, 0x69, 0x74, 0x53, 0x4e,
+ 0xcf, 0xd2, 0xf5, 0xe8, 0xa6, 0xbb, 0x9c, 0x81 },
+ { 0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+ 0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x17, 0x2e, 0x39, 0x5c, 0x4b, 0x72, 0x65,
+ 0xb8, 0xaf, 0x96, 0x81, 0xe4, 0xf3, 0xca, 0xdd },
+ { 0x00, 0x1d, 0x27, 0x3a, 0x4e, 0x53, 0x69, 0x74,
+ 0x9c, 0x81, 0xbb, 0xa6, 0xd2, 0xcf, 0xf5, 0xe8 },
+ { 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+ 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x18, 0x30, 0x28, 0x60, 0x78, 0x50, 0x48,
+ 0xc0, 0xd8, 0xf0, 0xe8, 0xa0, 0xb8, 0x90, 0x88 },
+ { 0x00, 0x1d, 0x27, 0x3a, 0x4e, 0x53, 0x69, 0x74,
+ 0x9c, 0x81, 0xbb, 0xa6, 0xd2, 0xcf, 0xf5, 0xe8 },
+ { 0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+ 0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x19, 0x32, 0x2b, 0x64, 0x7d, 0x56, 0x4f,
+ 0xc8, 0xd1, 0xfa, 0xe3, 0xac, 0xb5, 0x9e, 0x87 },
+ { 0x00, 0x1d, 0x27, 0x3a, 0x4e, 0x53, 0x69, 0x74,
+ 0x81, 0x9c, 0xa6, 0xbb, 0xcf, 0xd2, 0xe8, 0xf5 },
+ { 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+ 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x1a, 0x34, 0x2e, 0x68, 0x72, 0x5c, 0x46,
+ 0xd0, 0xca, 0xe4, 0xfe, 0xb8, 0xa2, 0x8c, 0x96 },
+ { 0x00, 0x1d, 0x27, 0x3a, 0x4e, 0x53, 0x69, 0x74,
+ 0x81, 0x9c, 0xa6, 0xbb, 0xcf, 0xd2, 0xe8, 0xf5 },
+ { 0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+ 0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x1b, 0x36, 0x2d, 0x6c, 0x77, 0x5a, 0x41,
+ 0xd8, 0xc3, 0xee, 0xf5, 0xb4, 0xaf, 0x82, 0x99 },
+ { 0x00, 0x1d, 0x27, 0x3a, 0x53, 0x4e, 0x74, 0x69,
+ 0xa6, 0xbb, 0x81, 0x9c, 0xf5, 0xe8, 0xd2, 0xcf },
+ { 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+ 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x1c, 0x38, 0x24, 0x70, 0x6c, 0x48, 0x54,
+ 0xe0, 0xfc, 0xd8, 0xc4, 0x90, 0x8c, 0xa8, 0xb4 },
+ { 0x00, 0x1d, 0x27, 0x3a, 0x53, 0x4e, 0x74, 0x69,
+ 0xa6, 0xbb, 0x81, 0x9c, 0xf5, 0xe8, 0xd2, 0xcf },
+ { 0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+ 0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x1d, 0x3a, 0x27, 0x74, 0x69, 0x4e, 0x53,
+ 0xe8, 0xf5, 0xd2, 0xcf, 0x9c, 0x81, 0xa6, 0xbb },
+ { 0x00, 0x1d, 0x27, 0x3a, 0x53, 0x4e, 0x74, 0x69,
+ 0xbb, 0xa6, 0x9c, 0x81, 0xe8, 0xf5, 0xcf, 0xd2 },
+ { 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+ 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x1e, 0x3c, 0x22, 0x78, 0x66, 0x44, 0x5a,
+ 0xf0, 0xee, 0xcc, 0xd2, 0x88, 0x96, 0xb4, 0xaa },
+ { 0x00, 0x1d, 0x27, 0x3a, 0x53, 0x4e, 0x74, 0x69,
+ 0xbb, 0xa6, 0x9c, 0x81, 0xe8, 0xf5, 0xcf, 0xd2 },
+ { 0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+ 0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x1f, 0x3e, 0x21, 0x7c, 0x63, 0x42, 0x5d,
+ 0xf8, 0xe7, 0xc6, 0xd9, 0x84, 0x9b, 0xba, 0xa5 },
+ { 0x00, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6,
+ 0xcd, 0xf7, 0xb9, 0x83, 0x25, 0x1f, 0x51, 0x6b },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+ 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0 },
+ { 0x00, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6,
+ 0xcd, 0xf7, 0xb9, 0x83, 0x25, 0x1f, 0x51, 0x6b },
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+ 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x21, 0x42, 0x63, 0x84, 0xa5, 0xc6, 0xe7,
+ 0x08, 0x29, 0x4a, 0x6b, 0x8c, 0xad, 0xce, 0xef },
+ { 0x00, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6,
+ 0xd0, 0xea, 0xa4, 0x9e, 0x38, 0x02, 0x4c, 0x76 },
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+ 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x22, 0x44, 0x66, 0x88, 0xaa, 0xcc, 0xee,
+ 0x10, 0x32, 0x54, 0x76, 0x98, 0xba, 0xdc, 0xfe },
+ { 0x00, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6,
+ 0xd0, 0xea, 0xa4, 0x9e, 0x38, 0x02, 0x4c, 0x76 },
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+ 0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x23, 0x46, 0x65, 0x8c, 0xaf, 0xca, 0xe9,
+ 0x18, 0x3b, 0x5e, 0x7d, 0x94, 0xb7, 0xd2, 0xf1 },
+ { 0x00, 0x3a, 0x74, 0x4e, 0xf5, 0xcf, 0x81, 0xbb,
+ 0xf7, 0xcd, 0x83, 0xb9, 0x02, 0x38, 0x76, 0x4c },
+ { 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+ 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x24, 0x48, 0x6c, 0x90, 0xb4, 0xd8, 0xfc,
+ 0x20, 0x04, 0x68, 0x4c, 0xb0, 0x94, 0xf8, 0xdc },
+ { 0x00, 0x3a, 0x74, 0x4e, 0xf5, 0xcf, 0x81, 0xbb,
+ 0xf7, 0xcd, 0x83, 0xb9, 0x02, 0x38, 0x76, 0x4c },
+ { 0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+ 0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x25, 0x4a, 0x6f, 0x94, 0xb1, 0xde, 0xfb,
+ 0x28, 0x0d, 0x62, 0x47, 0xbc, 0x99, 0xf6, 0xd3 },
+ { 0x00, 0x3a, 0x74, 0x4e, 0xf5, 0xcf, 0x81, 0xbb,
+ 0xea, 0xd0, 0x9e, 0xa4, 0x1f, 0x25, 0x6b, 0x51 },
+ { 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+ 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x26, 0x4c, 0x6a, 0x98, 0xbe, 0xd4, 0xf2,
+ 0x30, 0x16, 0x7c, 0x5a, 0xa8, 0x8e, 0xe4, 0xc2 },
+ { 0x00, 0x3a, 0x74, 0x4e, 0xf5, 0xcf, 0x81, 0xbb,
+ 0xea, 0xd0, 0x9e, 0xa4, 0x1f, 0x25, 0x6b, 0x51 },
+ { 0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+ 0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5,
+ 0x38, 0x1f, 0x76, 0x51, 0xa4, 0x83, 0xea, 0xcd },
+ { 0x00, 0x3a, 0x69, 0x53, 0xd2, 0xe8, 0xbb, 0x81,
+ 0xb9, 0x83, 0xd0, 0xea, 0x6b, 0x51, 0x02, 0x38 },
+ { 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+ 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x28, 0x50, 0x78, 0xa0, 0x88, 0xf0, 0xd8,
+ 0x40, 0x68, 0x10, 0x38, 0xe0, 0xc8, 0xb0, 0x98 },
+ { 0x00, 0x3a, 0x69, 0x53, 0xd2, 0xe8, 0xbb, 0x81,
+ 0xb9, 0x83, 0xd0, 0xea, 0x6b, 0x51, 0x02, 0x38 },
+ { 0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+ 0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x29, 0x52, 0x7b, 0xa4, 0x8d, 0xf6, 0xdf,
+ 0x48, 0x61, 0x1a, 0x33, 0xec, 0xc5, 0xbe, 0x97 },
+ { 0x00, 0x3a, 0x69, 0x53, 0xd2, 0xe8, 0xbb, 0x81,
+ 0xa4, 0x9e, 0xcd, 0xf7, 0x76, 0x4c, 0x1f, 0x25 },
+ { 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+ 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x2a, 0x54, 0x7e, 0xa8, 0x82, 0xfc, 0xd6,
+ 0x50, 0x7a, 0x04, 0x2e, 0xf8, 0xd2, 0xac, 0x86 },
+ { 0x00, 0x3a, 0x69, 0x53, 0xd2, 0xe8, 0xbb, 0x81,
+ 0xa4, 0x9e, 0xcd, 0xf7, 0x76, 0x4c, 0x1f, 0x25 },
+ { 0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+ 0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x2b, 0x56, 0x7d, 0xac, 0x87, 0xfa, 0xd1,
+ 0x58, 0x73, 0x0e, 0x25, 0xf4, 0xdf, 0xa2, 0x89 },
+ { 0x00, 0x3a, 0x69, 0x53, 0xcf, 0xf5, 0xa6, 0x9c,
+ 0x83, 0xb9, 0xea, 0xd0, 0x4c, 0x76, 0x25, 0x1f },
+ { 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+ 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x2c, 0x58, 0x74, 0xb0, 0x9c, 0xe8, 0xc4,
+ 0x60, 0x4c, 0x38, 0x14, 0xd0, 0xfc, 0x88, 0xa4 },
+ { 0x00, 0x3a, 0x69, 0x53, 0xcf, 0xf5, 0xa6, 0x9c,
+ 0x83, 0xb9, 0xea, 0xd0, 0x4c, 0x76, 0x25, 0x1f },
+ { 0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+ 0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x2d, 0x5a, 0x77, 0xb4, 0x99, 0xee, 0xc3,
+ 0x68, 0x45, 0x32, 0x1f, 0xdc, 0xf1, 0x86, 0xab },
+ { 0x00, 0x3a, 0x69, 0x53, 0xcf, 0xf5, 0xa6, 0x9c,
+ 0x9e, 0xa4, 0xf7, 0xcd, 0x51, 0x6b, 0x38, 0x02 },
+ { 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+ 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x2e, 0x5c, 0x72, 0xb8, 0x96, 0xe4, 0xca,
+ 0x70, 0x5e, 0x2c, 0x02, 0xc8, 0xe6, 0x94, 0xba },
+ { 0x00, 0x3a, 0x69, 0x53, 0xcf, 0xf5, 0xa6, 0x9c,
+ 0x9e, 0xa4, 0xf7, 0xcd, 0x51, 0x6b, 0x38, 0x02 },
+ { 0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+ 0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x2f, 0x5e, 0x71, 0xbc, 0x93, 0xe2, 0xcd,
+ 0x78, 0x57, 0x26, 0x09, 0xc4, 0xeb, 0x9a, 0xb5 },
+ { 0x00, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5,
+ 0x25, 0x02, 0x6b, 0x4c, 0xb9, 0x9e, 0xf7, 0xd0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+ 0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10 },
+ { 0x00, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5,
+ 0x25, 0x02, 0x6b, 0x4c, 0xb9, 0x9e, 0xf7, 0xd0 },
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+ 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x31, 0x62, 0x53, 0xc4, 0xf5, 0xa6, 0x97,
+ 0x88, 0xb9, 0xea, 0xdb, 0x4c, 0x7d, 0x2e, 0x1f },
+ { 0x00, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5,
+ 0x38, 0x1f, 0x76, 0x51, 0xa4, 0x83, 0xea, 0xcd },
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+ 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x32, 0x64, 0x56, 0xc8, 0xfa, 0xac, 0x9e,
+ 0x90, 0xa2, 0xf4, 0xc6, 0x58, 0x6a, 0x3c, 0x0e },
+ { 0x00, 0x27, 0x4e, 0x69, 0x9c, 0xbb, 0xd2, 0xf5,
+ 0x38, 0x1f, 0x76, 0x51, 0xa4, 0x83, 0xea, 0xcd },
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+ 0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x33, 0x66, 0x55, 0xcc, 0xff, 0xaa, 0x99,
+ 0x98, 0xab, 0xfe, 0xcd, 0x54, 0x67, 0x32, 0x01 },
+ { 0x00, 0x27, 0x4e, 0x69, 0x81, 0xa6, 0xcf, 0xe8,
+ 0x1f, 0x38, 0x51, 0x76, 0x9e, 0xb9, 0xd0, 0xf7 },
+ { 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+ 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x34, 0x68, 0x5c, 0xd0, 0xe4, 0xb8, 0x8c,
+ 0xa0, 0x94, 0xc8, 0xfc, 0x70, 0x44, 0x18, 0x2c },
+ { 0x00, 0x27, 0x4e, 0x69, 0x81, 0xa6, 0xcf, 0xe8,
+ 0x1f, 0x38, 0x51, 0x76, 0x9e, 0xb9, 0xd0, 0xf7 },
+ { 0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+ 0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x35, 0x6a, 0x5f, 0xd4, 0xe1, 0xbe, 0x8b,
+ 0xa8, 0x9d, 0xc2, 0xf7, 0x7c, 0x49, 0x16, 0x23 },
+ { 0x00, 0x27, 0x4e, 0x69, 0x81, 0xa6, 0xcf, 0xe8,
+ 0x02, 0x25, 0x4c, 0x6b, 0x83, 0xa4, 0xcd, 0xea },
+ { 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+ 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x36, 0x6c, 0x5a, 0xd8, 0xee, 0xb4, 0x82,
+ 0xb0, 0x86, 0xdc, 0xea, 0x68, 0x5e, 0x04, 0x32 },
+ { 0x00, 0x27, 0x4e, 0x69, 0x81, 0xa6, 0xcf, 0xe8,
+ 0x02, 0x25, 0x4c, 0x6b, 0x83, 0xa4, 0xcd, 0xea },
+ { 0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+ 0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x37, 0x6e, 0x59, 0xdc, 0xeb, 0xb2, 0x85,
+ 0xb8, 0x8f, 0xd6, 0xe1, 0x64, 0x53, 0x0a, 0x3d },
+ { 0x00, 0x27, 0x53, 0x74, 0xa6, 0x81, 0xf5, 0xd2,
+ 0x51, 0x76, 0x02, 0x25, 0xf7, 0xd0, 0xa4, 0x83 },
+ { 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+ 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x38, 0x70, 0x48, 0xe0, 0xd8, 0x90, 0xa8,
+ 0xc0, 0xf8, 0xb0, 0x88, 0x20, 0x18, 0x50, 0x68 },
+ { 0x00, 0x27, 0x53, 0x74, 0xa6, 0x81, 0xf5, 0xd2,
+ 0x51, 0x76, 0x02, 0x25, 0xf7, 0xd0, 0xa4, 0x83 },
+ { 0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+ 0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x39, 0x72, 0x4b, 0xe4, 0xdd, 0x96, 0xaf,
+ 0xc8, 0xf1, 0xba, 0x83, 0x2c, 0x15, 0x5e, 0x67 },
+ { 0x00, 0x27, 0x53, 0x74, 0xa6, 0x81, 0xf5, 0xd2,
+ 0x4c, 0x6b, 0x1f, 0x38, 0xea, 0xcd, 0xb9, 0x9e },
+ { 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+ 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x3a, 0x74, 0x4e, 0xe8, 0xd2, 0x9c, 0xa6,
+ 0xd0, 0xea, 0xa4, 0x9e, 0x38, 0x02, 0x4c, 0x76 },
+ { 0x00, 0x27, 0x53, 0x74, 0xa6, 0x81, 0xf5, 0xd2,
+ 0x4c, 0x6b, 0x1f, 0x38, 0xea, 0xcd, 0xb9, 0x9e },
+ { 0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+ 0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x3b, 0x76, 0x4d, 0xec, 0xd7, 0x9a, 0xa1,
+ 0xd8, 0xe3, 0xae, 0x95, 0x34, 0x0f, 0x42, 0x79 },
+ { 0x00, 0x27, 0x53, 0x74, 0xbb, 0x9c, 0xe8, 0xcf,
+ 0x6b, 0x4c, 0x38, 0x1f, 0xd0, 0xf7, 0x83, 0xa4 },
+ { 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+ 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x3c, 0x78, 0x44, 0xf0, 0xcc, 0x88, 0xb4,
+ 0xe0, 0xdc, 0x98, 0xa4, 0x10, 0x2c, 0x68, 0x54 },
+ { 0x00, 0x27, 0x53, 0x74, 0xbb, 0x9c, 0xe8, 0xcf,
+ 0x6b, 0x4c, 0x38, 0x1f, 0xd0, 0xf7, 0x83, 0xa4 },
+ { 0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+ 0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x3d, 0x7a, 0x47, 0xf4, 0xc9, 0x8e, 0xb3,
+ 0xe8, 0xd5, 0x92, 0xaf, 0x1c, 0x21, 0x66, 0x5b },
+ { 0x00, 0x27, 0x53, 0x74, 0xbb, 0x9c, 0xe8, 0xcf,
+ 0x76, 0x51, 0x25, 0x02, 0xcd, 0xea, 0x9e, 0xb9 },
+ { 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+ 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x3e, 0x7c, 0x42, 0xf8, 0xc6, 0x84, 0xba,
+ 0xf0, 0xce, 0x8c, 0xb2, 0x08, 0x36, 0x74, 0x4a },
+ { 0x00, 0x27, 0x53, 0x74, 0xbb, 0x9c, 0xe8, 0xcf,
+ 0x76, 0x51, 0x25, 0x02, 0xcd, 0xea, 0x9e, 0xb9 },
+ { 0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+ 0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d, 0x1d },
+ { 0x00, 0x3f, 0x7e, 0x41, 0xfc, 0xc3, 0x82, 0xbd,
+ 0xf8, 0xc7, 0x86, 0xb9, 0x04, 0x3b, 0x7a, 0x45 },
+ { 0x00, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51,
+ 0x87, 0xf3, 0x6f, 0x1b, 0x4a, 0x3e, 0xa2, 0xd6 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+ 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0 },
+ { 0x00, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51,
+ 0x87, 0xf3, 0x6f, 0x1b, 0x4a, 0x3e, 0xa2, 0xd6 },
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+ 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x41, 0x82, 0xc3, 0x04, 0x45, 0x86, 0xc7,
+ 0x08, 0x49, 0x8a, 0xcb, 0x0c, 0x4d, 0x8e, 0xcf },
+ { 0x00, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51,
+ 0x9a, 0xee, 0x72, 0x06, 0x57, 0x23, 0xbf, 0xcb },
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+ 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x42, 0x84, 0xc6, 0x08, 0x4a, 0x8c, 0xce,
+ 0x10, 0x52, 0x94, 0xd6, 0x18, 0x5a, 0x9c, 0xde },
+ { 0x00, 0x74, 0xe8, 0x9c, 0xcd, 0xb9, 0x25, 0x51,
+ 0x9a, 0xee, 0x72, 0x06, 0x57, 0x23, 0xbf, 0xcb },
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+ 0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x43, 0x86, 0xc5, 0x0c, 0x4f, 0x8a, 0xc9,
+ 0x18, 0x5b, 0x9e, 0xdd, 0x14, 0x57, 0x92, 0xd1 },
+ { 0x00, 0x74, 0xe8, 0x9c, 0xd0, 0xa4, 0x38, 0x4c,
+ 0xbd, 0xc9, 0x55, 0x21, 0x6d, 0x19, 0x85, 0xf1 },
+ { 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+ 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x44, 0x88, 0xcc, 0x10, 0x54, 0x98, 0xdc,
+ 0x20, 0x64, 0xa8, 0xec, 0x30, 0x74, 0xb8, 0xfc },
+ { 0x00, 0x74, 0xe8, 0x9c, 0xd0, 0xa4, 0x38, 0x4c,
+ 0xbd, 0xc9, 0x55, 0x21, 0x6d, 0x19, 0x85, 0xf1 },
+ { 0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+ 0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x45, 0x8a, 0xcf, 0x14, 0x51, 0x9e, 0xdb,
+ 0x28, 0x6d, 0xa2, 0xe7, 0x3c, 0x79, 0xb6, 0xf3 },
+ { 0x00, 0x74, 0xe8, 0x9c, 0xd0, 0xa4, 0x38, 0x4c,
+ 0xa0, 0xd4, 0x48, 0x3c, 0x70, 0x04, 0x98, 0xec },
+ { 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+ 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x46, 0x8c, 0xca, 0x18, 0x5e, 0x94, 0xd2,
+ 0x30, 0x76, 0xbc, 0xfa, 0x28, 0x6e, 0xa4, 0xe2 },
+ { 0x00, 0x74, 0xe8, 0x9c, 0xd0, 0xa4, 0x38, 0x4c,
+ 0xa0, 0xd4, 0x48, 0x3c, 0x70, 0x04, 0x98, 0xec },
+ { 0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+ 0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x47, 0x8e, 0xc9, 0x1c, 0x5b, 0x92, 0xd5,
+ 0x38, 0x7f, 0xb6, 0xf1, 0x24, 0x63, 0xaa, 0xed },
+ { 0x00, 0x74, 0xf5, 0x81, 0xf7, 0x83, 0x02, 0x76,
+ 0xf3, 0x87, 0x06, 0x72, 0x04, 0x70, 0xf1, 0x85 },
+ { 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+ 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x48, 0x90, 0xd8, 0x20, 0x68, 0xb0, 0xf8,
+ 0x40, 0x08, 0xd0, 0x98, 0x60, 0x28, 0xf0, 0xb8 },
+ { 0x00, 0x74, 0xf5, 0x81, 0xf7, 0x83, 0x02, 0x76,
+ 0xf3, 0x87, 0x06, 0x72, 0x04, 0x70, 0xf1, 0x85 },
+ { 0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+ 0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x49, 0x92, 0xdb, 0x24, 0x6d, 0xb6, 0xff,
+ 0x48, 0x01, 0xda, 0x93, 0x6c, 0x25, 0xfe, 0xb7 },
+ { 0x00, 0x74, 0xf5, 0x81, 0xf7, 0x83, 0x02, 0x76,
+ 0xee, 0x9a, 0x1b, 0x6f, 0x19, 0x6d, 0xec, 0x98 },
+ { 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+ 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x4a, 0x94, 0xde, 0x28, 0x62, 0xbc, 0xf6,
+ 0x50, 0x1a, 0xc4, 0x8e, 0x78, 0x32, 0xec, 0xa6 },
+ { 0x00, 0x74, 0xf5, 0x81, 0xf7, 0x83, 0x02, 0x76,
+ 0xee, 0x9a, 0x1b, 0x6f, 0x19, 0x6d, 0xec, 0x98 },
+ { 0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+ 0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x4b, 0x96, 0xdd, 0x2c, 0x67, 0xba, 0xf1,
+ 0x58, 0x13, 0xce, 0x85, 0x74, 0x3f, 0xe2, 0xa9 },
+ { 0x00, 0x74, 0xf5, 0x81, 0xea, 0x9e, 0x1f, 0x6b,
+ 0xc9, 0xbd, 0x3c, 0x48, 0x23, 0x57, 0xd6, 0xa2 },
+ { 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+ 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x4c, 0x98, 0xd4, 0x30, 0x7c, 0xa8, 0xe4,
+ 0x60, 0x2c, 0xf8, 0xb4, 0x50, 0x1c, 0xc8, 0x84 },
+ { 0x00, 0x74, 0xf5, 0x81, 0xea, 0x9e, 0x1f, 0x6b,
+ 0xc9, 0xbd, 0x3c, 0x48, 0x23, 0x57, 0xd6, 0xa2 },
+ { 0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+ 0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x4d, 0x9a, 0xd7, 0x34, 0x79, 0xae, 0xe3,
+ 0x68, 0x25, 0xf2, 0xbf, 0x5c, 0x11, 0xc6, 0x8b },
+ { 0x00, 0x74, 0xf5, 0x81, 0xea, 0x9e, 0x1f, 0x6b,
+ 0xd4, 0xa0, 0x21, 0x55, 0x3e, 0x4a, 0xcb, 0xbf },
+ { 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+ 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x4e, 0x9c, 0xd2, 0x38, 0x76, 0xa4, 0xea,
+ 0x70, 0x3e, 0xec, 0xa2, 0x48, 0x06, 0xd4, 0x9a },
+ { 0x00, 0x74, 0xf5, 0x81, 0xea, 0x9e, 0x1f, 0x6b,
+ 0xd4, 0xa0, 0x21, 0x55, 0x3e, 0x4a, 0xcb, 0xbf },
+ { 0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+ 0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x4f, 0x9e, 0xd1, 0x3c, 0x73, 0xa2, 0xed,
+ 0x78, 0x37, 0xe6, 0xa9, 0x44, 0x0b, 0xda, 0x95 },
+ { 0x00, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x02,
+ 0x6f, 0x06, 0xbd, 0xd4, 0xd6, 0xbf, 0x04, 0x6d },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+ 0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30 },
+ { 0x00, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x02,
+ 0x6f, 0x06, 0xbd, 0xd4, 0xd6, 0xbf, 0x04, 0x6d },
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+ 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x51, 0xa2, 0xf3, 0x44, 0x15, 0xe6, 0xb7,
+ 0x88, 0xd9, 0x2a, 0x7b, 0xcc, 0x9d, 0x6e, 0x3f },
+ { 0x00, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x02,
+ 0x72, 0x1b, 0xa0, 0xc9, 0xcb, 0xa2, 0x19, 0x70 },
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+ 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x52, 0xa4, 0xf6, 0x48, 0x1a, 0xec, 0xbe,
+ 0x90, 0xc2, 0x34, 0x66, 0xd8, 0x8a, 0x7c, 0x2e },
+ { 0x00, 0x69, 0xd2, 0xbb, 0xb9, 0xd0, 0x6b, 0x02,
+ 0x72, 0x1b, 0xa0, 0xc9, 0xcb, 0xa2, 0x19, 0x70 },
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+ 0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x53, 0xa6, 0xf5, 0x4c, 0x1f, 0xea, 0xb9,
+ 0x98, 0xcb, 0x3e, 0x6d, 0xd4, 0x87, 0x72, 0x21 },
+ { 0x00, 0x69, 0xd2, 0xbb, 0xa4, 0xcd, 0x76, 0x1f,
+ 0x55, 0x3c, 0x87, 0xee, 0xf1, 0x98, 0x23, 0x4a },
+ { 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+ 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x54, 0xa8, 0xfc, 0x50, 0x04, 0xf8, 0xac,
+ 0xa0, 0xf4, 0x08, 0x5c, 0xf0, 0xa4, 0x58, 0x0c },
+ { 0x00, 0x69, 0xd2, 0xbb, 0xa4, 0xcd, 0x76, 0x1f,
+ 0x55, 0x3c, 0x87, 0xee, 0xf1, 0x98, 0x23, 0x4a },
+ { 0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+ 0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x55, 0xaa, 0xff, 0x54, 0x01, 0xfe, 0xab,
+ 0xa8, 0xfd, 0x02, 0x57, 0xfc, 0xa9, 0x56, 0x03 },
+ { 0x00, 0x69, 0xd2, 0xbb, 0xa4, 0xcd, 0x76, 0x1f,
+ 0x48, 0x21, 0x9a, 0xf3, 0xec, 0x85, 0x3e, 0x57 },
+ { 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+ 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x56, 0xac, 0xfa, 0x58, 0x0e, 0xf4, 0xa2,
+ 0xb0, 0xe6, 0x1c, 0x4a, 0xe8, 0xbe, 0x44, 0x12 },
+ { 0x00, 0x69, 0xd2, 0xbb, 0xa4, 0xcd, 0x76, 0x1f,
+ 0x48, 0x21, 0x9a, 0xf3, 0xec, 0x85, 0x3e, 0x57 },
+ { 0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+ 0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x57, 0xae, 0xf9, 0x5c, 0x0b, 0xf2, 0xa5,
+ 0xb8, 0xef, 0x16, 0x41, 0xe4, 0xb3, 0x4a, 0x1d },
+ { 0x00, 0x69, 0xcf, 0xa6, 0x83, 0xea, 0x4c, 0x25,
+ 0x1b, 0x72, 0xd4, 0xbd, 0x98, 0xf1, 0x57, 0x3e },
+ { 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+ 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x58, 0xb0, 0xe8, 0x60, 0x38, 0xd0, 0x88,
+ 0xc0, 0x98, 0x70, 0x28, 0xa0, 0xf8, 0x10, 0x48 },
+ { 0x00, 0x69, 0xcf, 0xa6, 0x83, 0xea, 0x4c, 0x25,
+ 0x1b, 0x72, 0xd4, 0xbd, 0x98, 0xf1, 0x57, 0x3e },
+ { 0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+ 0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x59, 0xb2, 0xeb, 0x64, 0x3d, 0xd6, 0x8f,
+ 0xc8, 0x91, 0x7a, 0x23, 0xac, 0xf5, 0x1e, 0x47 },
+ { 0x00, 0x69, 0xcf, 0xa6, 0x83, 0xea, 0x4c, 0x25,
+ 0x06, 0x6f, 0xc9, 0xa0, 0x85, 0xec, 0x4a, 0x23 },
+ { 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+ 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x5a, 0xb4, 0xee, 0x68, 0x32, 0xdc, 0x86,
+ 0xd0, 0x8a, 0x64, 0x3e, 0xb8, 0xe2, 0x0c, 0x56 },
+ { 0x00, 0x69, 0xcf, 0xa6, 0x83, 0xea, 0x4c, 0x25,
+ 0x06, 0x6f, 0xc9, 0xa0, 0x85, 0xec, 0x4a, 0x23 },
+ { 0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+ 0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x5b, 0xb6, 0xed, 0x6c, 0x37, 0xda, 0x81,
+ 0xd8, 0x83, 0x6e, 0x35, 0xb4, 0xef, 0x02, 0x59 },
+ { 0x00, 0x69, 0xcf, 0xa6, 0x9e, 0xf7, 0x51, 0x38,
+ 0x21, 0x48, 0xee, 0x87, 0xbf, 0xd6, 0x70, 0x19 },
+ { 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+ 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x5c, 0xb8, 0xe4, 0x70, 0x2c, 0xc8, 0x94,
+ 0xe0, 0xbc, 0x58, 0x04, 0x90, 0xcc, 0x28, 0x74 },
+ { 0x00, 0x69, 0xcf, 0xa6, 0x9e, 0xf7, 0x51, 0x38,
+ 0x21, 0x48, 0xee, 0x87, 0xbf, 0xd6, 0x70, 0x19 },
+ { 0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+ 0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x5d, 0xba, 0xe7, 0x74, 0x29, 0xce, 0x93,
+ 0xe8, 0xb5, 0x52, 0x0f, 0x9c, 0xc1, 0x26, 0x7b },
+ { 0x00, 0x69, 0xcf, 0xa6, 0x9e, 0xf7, 0x51, 0x38,
+ 0x3c, 0x55, 0xf3, 0x9a, 0xa2, 0xcb, 0x6d, 0x04 },
+ { 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+ 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x5e, 0xbc, 0xe2, 0x78, 0x26, 0xc4, 0x9a,
+ 0xf0, 0xae, 0x4c, 0x12, 0x88, 0xd6, 0x34, 0x6a },
+ { 0x00, 0x69, 0xcf, 0xa6, 0x9e, 0xf7, 0x51, 0x38,
+ 0x3c, 0x55, 0xf3, 0x9a, 0xa2, 0xcb, 0x6d, 0x04 },
+ { 0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+ 0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x3a, 0x3a, 0x3a, 0x3a, 0x27, 0x27, 0x27, 0x27 },
+ { 0x00, 0x5f, 0xbe, 0xe1, 0x7c, 0x23, 0xc2, 0x9d,
+ 0xf8, 0xa7, 0x46, 0x19, 0x84, 0xdb, 0x3a, 0x65 },
+ { 0x00, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7,
+ 0x4a, 0x04, 0xd6, 0x98, 0x6f, 0x21, 0xf3, 0xbd },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+ 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20 },
+ { 0x00, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7,
+ 0x4a, 0x04, 0xd6, 0x98, 0x6f, 0x21, 0xf3, 0xbd },
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+ 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x61, 0xc2, 0xa3, 0x84, 0xe5, 0x46, 0x27,
+ 0x08, 0x69, 0xca, 0xab, 0x8c, 0xed, 0x4e, 0x2f },
+ { 0x00, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7,
+ 0x57, 0x19, 0xcb, 0x85, 0x72, 0x3c, 0xee, 0xa0 },
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+ 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x62, 0xc4, 0xa6, 0x88, 0xea, 0x4c, 0x2e,
+ 0x10, 0x72, 0xd4, 0xb6, 0x98, 0xfa, 0x5c, 0x3e },
+ { 0x00, 0x4e, 0x9c, 0xd2, 0x25, 0x6b, 0xb9, 0xf7,
+ 0x57, 0x19, 0xcb, 0x85, 0x72, 0x3c, 0xee, 0xa0 },
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+ 0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x63, 0xc6, 0xa5, 0x8c, 0xef, 0x4a, 0x29,
+ 0x18, 0x7b, 0xde, 0xbd, 0x94, 0xf7, 0x52, 0x31 },
+ { 0x00, 0x4e, 0x9c, 0xd2, 0x38, 0x76, 0xa4, 0xea,
+ 0x70, 0x3e, 0xec, 0xa2, 0x48, 0x06, 0xd4, 0x9a },
+ { 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+ 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x64, 0xc8, 0xac, 0x90, 0xf4, 0x58, 0x3c,
+ 0x20, 0x44, 0xe8, 0x8c, 0xb0, 0xd4, 0x78, 0x1c },
+ { 0x00, 0x4e, 0x9c, 0xd2, 0x38, 0x76, 0xa4, 0xea,
+ 0x70, 0x3e, 0xec, 0xa2, 0x48, 0x06, 0xd4, 0x9a },
+ { 0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+ 0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x65, 0xca, 0xaf, 0x94, 0xf1, 0x5e, 0x3b,
+ 0x28, 0x4d, 0xe2, 0x87, 0xbc, 0xd9, 0x76, 0x13 },
+ { 0x00, 0x4e, 0x9c, 0xd2, 0x38, 0x76, 0xa4, 0xea,
+ 0x6d, 0x23, 0xf1, 0xbf, 0x55, 0x1b, 0xc9, 0x87 },
+ { 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+ 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x66, 0xcc, 0xaa, 0x98, 0xfe, 0x54, 0x32,
+ 0x30, 0x56, 0xfc, 0x9a, 0xa8, 0xce, 0x64, 0x02 },
+ { 0x00, 0x4e, 0x9c, 0xd2, 0x38, 0x76, 0xa4, 0xea,
+ 0x6d, 0x23, 0xf1, 0xbf, 0x55, 0x1b, 0xc9, 0x87 },
+ { 0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+ 0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x67, 0xce, 0xa9, 0x9c, 0xfb, 0x52, 0x35,
+ 0x38, 0x5f, 0xf6, 0x91, 0xa4, 0xc3, 0x6a, 0x0d },
+ { 0x00, 0x4e, 0x81, 0xcf, 0x1f, 0x51, 0x9e, 0xd0,
+ 0x3e, 0x70, 0xbf, 0xf1, 0x21, 0x6f, 0xa0, 0xee },
+ { 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+ 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x68, 0xd0, 0xb8, 0xa0, 0xc8, 0x70, 0x18,
+ 0x40, 0x28, 0x90, 0xf8, 0xe0, 0x88, 0x30, 0x58 },
+ { 0x00, 0x4e, 0x81, 0xcf, 0x1f, 0x51, 0x9e, 0xd0,
+ 0x3e, 0x70, 0xbf, 0xf1, 0x21, 0x6f, 0xa0, 0xee },
+ { 0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+ 0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x69, 0xd2, 0xbb, 0xa4, 0xcd, 0x76, 0x1f,
+ 0x48, 0x21, 0x9a, 0xf3, 0xec, 0x85, 0x3e, 0x57 },
+ { 0x00, 0x4e, 0x81, 0xcf, 0x1f, 0x51, 0x9e, 0xd0,
+ 0x23, 0x6d, 0xa2, 0xec, 0x3c, 0x72, 0xbd, 0xf3 },
+ { 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+ 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x6a, 0xd4, 0xbe, 0xa8, 0xc2, 0x7c, 0x16,
+ 0x50, 0x3a, 0x84, 0xee, 0xf8, 0x92, 0x2c, 0x46 },
+ { 0x00, 0x4e, 0x81, 0xcf, 0x1f, 0x51, 0x9e, 0xd0,
+ 0x23, 0x6d, 0xa2, 0xec, 0x3c, 0x72, 0xbd, 0xf3 },
+ { 0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+ 0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x6b, 0xd6, 0xbd, 0xac, 0xc7, 0x7a, 0x11,
+ 0x58, 0x33, 0x8e, 0xe5, 0xf4, 0x9f, 0x22, 0x49 },
+ { 0x00, 0x4e, 0x81, 0xcf, 0x02, 0x4c, 0x83, 0xcd,
+ 0x04, 0x4a, 0x85, 0xcb, 0x06, 0x48, 0x87, 0xc9 },
+ { 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+ 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x6c, 0xd8, 0xb4, 0xb0, 0xdc, 0x68, 0x04,
+ 0x60, 0x0c, 0xb8, 0xd4, 0xd0, 0xbc, 0x08, 0x64 },
+ { 0x00, 0x4e, 0x81, 0xcf, 0x02, 0x4c, 0x83, 0xcd,
+ 0x04, 0x4a, 0x85, 0xcb, 0x06, 0x48, 0x87, 0xc9 },
+ { 0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+ 0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x6d, 0xda, 0xb7, 0xb4, 0xd9, 0x6e, 0x03,
+ 0x68, 0x05, 0xb2, 0xdf, 0xdc, 0xb1, 0x06, 0x6b },
+ { 0x00, 0x4e, 0x81, 0xcf, 0x02, 0x4c, 0x83, 0xcd,
+ 0x19, 0x57, 0x98, 0xd6, 0x1b, 0x55, 0x9a, 0xd4 },
+ { 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+ 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x6e, 0xdc, 0xb2, 0xb8, 0xd6, 0x64, 0x0a,
+ 0x70, 0x1e, 0xac, 0xc2, 0xc8, 0xa6, 0x14, 0x7a },
+ { 0x00, 0x4e, 0x81, 0xcf, 0x02, 0x4c, 0x83, 0xcd,
+ 0x19, 0x57, 0x98, 0xd6, 0x1b, 0x55, 0x9a, 0xd4 },
+ { 0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+ 0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x6f, 0xde, 0xb1, 0xbc, 0xd3, 0x62, 0x0d,
+ 0x78, 0x17, 0xa6, 0xc9, 0xc4, 0xab, 0x1a, 0x75 },
+ { 0x00, 0x53, 0xa6, 0xf5, 0x51, 0x02, 0xf7, 0xa4,
+ 0xa2, 0xf1, 0x04, 0x57, 0xf3, 0xa0, 0x55, 0x06 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+ 0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0 },
+ { 0x00, 0x53, 0xa6, 0xf5, 0x51, 0x02, 0xf7, 0xa4,
+ 0xa2, 0xf1, 0x04, 0x57, 0xf3, 0xa0, 0x55, 0x06 },
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+ 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x71, 0xe2, 0x93, 0xc4, 0xb5, 0x26, 0x57,
+ 0x88, 0xf9, 0x6a, 0x1b, 0x4c, 0x3d, 0xae, 0xdf },
+ { 0x00, 0x53, 0xa6, 0xf5, 0x51, 0x02, 0xf7, 0xa4,
+ 0xbf, 0xec, 0x19, 0x4a, 0xee, 0xbd, 0x48, 0x1b },
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+ 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x72, 0xe4, 0x96, 0xc8, 0xba, 0x2c, 0x5e,
+ 0x90, 0xe2, 0x74, 0x06, 0x58, 0x2a, 0xbc, 0xce },
+ { 0x00, 0x53, 0xa6, 0xf5, 0x51, 0x02, 0xf7, 0xa4,
+ 0xbf, 0xec, 0x19, 0x4a, 0xee, 0xbd, 0x48, 0x1b },
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+ 0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x73, 0xe6, 0x95, 0xcc, 0xbf, 0x2a, 0x59,
+ 0x98, 0xeb, 0x7e, 0x0d, 0x54, 0x27, 0xb2, 0xc1 },
+ { 0x00, 0x53, 0xa6, 0xf5, 0x4c, 0x1f, 0xea, 0xb9,
+ 0x98, 0xcb, 0x3e, 0x6d, 0xd4, 0x87, 0x72, 0x21 },
+ { 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+ 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x74, 0xe8, 0x9c, 0xd0, 0xa4, 0x38, 0x4c,
+ 0xa0, 0xd4, 0x48, 0x3c, 0x70, 0x04, 0x98, 0xec },
+ { 0x00, 0x53, 0xa6, 0xf5, 0x4c, 0x1f, 0xea, 0xb9,
+ 0x98, 0xcb, 0x3e, 0x6d, 0xd4, 0x87, 0x72, 0x21 },
+ { 0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+ 0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x75, 0xea, 0x9f, 0xd4, 0xa1, 0x3e, 0x4b,
+ 0xa8, 0xdd, 0x42, 0x37, 0x7c, 0x09, 0x96, 0xe3 },
+ { 0x00, 0x53, 0xa6, 0xf5, 0x4c, 0x1f, 0xea, 0xb9,
+ 0x85, 0xd6, 0x23, 0x70, 0xc9, 0x9a, 0x6f, 0x3c },
+ { 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+ 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x76, 0xec, 0x9a, 0xd8, 0xae, 0x34, 0x42,
+ 0xb0, 0xc6, 0x5c, 0x2a, 0x68, 0x1e, 0x84, 0xf2 },
+ { 0x00, 0x53, 0xa6, 0xf5, 0x4c, 0x1f, 0xea, 0xb9,
+ 0x85, 0xd6, 0x23, 0x70, 0xc9, 0x9a, 0x6f, 0x3c },
+ { 0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+ 0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x77, 0xee, 0x99, 0xdc, 0xab, 0x32, 0x45,
+ 0xb8, 0xcf, 0x56, 0x21, 0x64, 0x13, 0x8a, 0xfd },
+ { 0x00, 0x53, 0xbb, 0xe8, 0x6b, 0x38, 0xd0, 0x83,
+ 0xd6, 0x85, 0x6d, 0x3e, 0xbd, 0xee, 0x06, 0x55 },
+ { 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+ 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x78, 0xf0, 0x88, 0xe0, 0x98, 0x10, 0x68,
+ 0xc0, 0xb8, 0x30, 0x48, 0x20, 0x58, 0xd0, 0xa8 },
+ { 0x00, 0x53, 0xbb, 0xe8, 0x6b, 0x38, 0xd0, 0x83,
+ 0xd6, 0x85, 0x6d, 0x3e, 0xbd, 0xee, 0x06, 0x55 },
+ { 0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+ 0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x79, 0xf2, 0x8b, 0xe4, 0x9d, 0x16, 0x6f,
+ 0xc8, 0xb1, 0x3a, 0x43, 0x2c, 0x55, 0xde, 0xa7 },
+ { 0x00, 0x53, 0xbb, 0xe8, 0x6b, 0x38, 0xd0, 0x83,
+ 0xcb, 0x98, 0x70, 0x23, 0xa0, 0xf3, 0x1b, 0x48 },
+ { 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+ 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x7a, 0xf4, 0x8e, 0xe8, 0x92, 0x1c, 0x66,
+ 0xd0, 0xaa, 0x24, 0x5e, 0x38, 0x42, 0xcc, 0xb6 },
+ { 0x00, 0x53, 0xbb, 0xe8, 0x6b, 0x38, 0xd0, 0x83,
+ 0xcb, 0x98, 0x70, 0x23, 0xa0, 0xf3, 0x1b, 0x48 },
+ { 0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+ 0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x7b, 0xf6, 0x8d, 0xec, 0x97, 0x1a, 0x61,
+ 0xd8, 0xa3, 0x2e, 0x55, 0x34, 0x4f, 0xc2, 0xb9 },
+ { 0x00, 0x53, 0xbb, 0xe8, 0x76, 0x25, 0xcd, 0x9e,
+ 0xec, 0xbf, 0x57, 0x04, 0x9a, 0xc9, 0x21, 0x72 },
+ { 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+ 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x7c, 0xf8, 0x84, 0xf0, 0x8c, 0x08, 0x74,
+ 0xe0, 0x9c, 0x18, 0x64, 0x10, 0x6c, 0xe8, 0x94 },
+ { 0x00, 0x53, 0xbb, 0xe8, 0x76, 0x25, 0xcd, 0x9e,
+ 0xec, 0xbf, 0x57, 0x04, 0x9a, 0xc9, 0x21, 0x72 },
+ { 0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+ 0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x7d, 0xfa, 0x87, 0xf4, 0x89, 0x0e, 0x73,
+ 0xe8, 0x95, 0x12, 0x6f, 0x1c, 0x61, 0xe6, 0x9b },
+ { 0x00, 0x53, 0xbb, 0xe8, 0x76, 0x25, 0xcd, 0x9e,
+ 0xf1, 0xa2, 0x4a, 0x19, 0x87, 0xd4, 0x3c, 0x6f },
+ { 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+ 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x7e, 0xfc, 0x82, 0xf8, 0x86, 0x04, 0x7a,
+ 0xf0, 0x8e, 0x0c, 0x72, 0x08, 0x76, 0xf4, 0x8a },
+ { 0x00, 0x53, 0xbb, 0xe8, 0x76, 0x25, 0xcd, 0x9e,
+ 0xf1, 0xa2, 0x4a, 0x19, 0x87, 0xd4, 0x3c, 0x6f },
+ { 0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+ 0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50 },
+ { 0x00, 0x00, 0x00, 0x00, 0x1d, 0x1d, 0x1d, 0x1d,
+ 0x27, 0x27, 0x27, 0x27, 0x3a, 0x3a, 0x3a, 0x3a },
+ { 0x00, 0x7f, 0xfe, 0x81, 0xfc, 0x83, 0x02, 0x7d,
+ 0xf8, 0x87, 0x06, 0x79, 0x04, 0x7b, 0xfa, 0x85 },
+ { 0x00, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2,
+ 0x13, 0xfb, 0xde, 0x36, 0x94, 0x7c, 0x59, 0xb1 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+ 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80 },
+ { 0x00, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2,
+ 0x13, 0xfb, 0xde, 0x36, 0x94, 0x7c, 0x59, 0xb1 },
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+ 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x81, 0x02, 0x83, 0x04, 0x85, 0x06, 0x87,
+ 0x08, 0x89, 0x0a, 0x8b, 0x0c, 0x8d, 0x0e, 0x8f },
+ { 0x00, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2,
+ 0x0e, 0xe6, 0xc3, 0x2b, 0x89, 0x61, 0x44, 0xac },
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+ 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x82, 0x04, 0x86, 0x08, 0x8a, 0x0c, 0x8e,
+ 0x10, 0x92, 0x14, 0x96, 0x18, 0x9a, 0x1c, 0x9e },
+ { 0x00, 0xe8, 0xcd, 0x25, 0x87, 0x6f, 0x4a, 0xa2,
+ 0x0e, 0xe6, 0xc3, 0x2b, 0x89, 0x61, 0x44, 0xac },
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+ 0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x83, 0x06, 0x85, 0x0c, 0x8f, 0x0a, 0x89,
+ 0x18, 0x9b, 0x1e, 0x9d, 0x14, 0x97, 0x12, 0x91 },
+ { 0x00, 0xe8, 0xcd, 0x25, 0x9a, 0x72, 0x57, 0xbf,
+ 0x29, 0xc1, 0xe4, 0x0c, 0xb3, 0x5b, 0x7e, 0x96 },
+ { 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+ 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x84, 0x08, 0x8c, 0x10, 0x94, 0x18, 0x9c,
+ 0x20, 0xa4, 0x28, 0xac, 0x30, 0xb4, 0x38, 0xbc },
+ { 0x00, 0xe8, 0xcd, 0x25, 0x9a, 0x72, 0x57, 0xbf,
+ 0x29, 0xc1, 0xe4, 0x0c, 0xb3, 0x5b, 0x7e, 0x96 },
+ { 0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+ 0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x85, 0x0a, 0x8f, 0x14, 0x91, 0x1e, 0x9b,
+ 0x28, 0xad, 0x22, 0xa7, 0x3c, 0xb9, 0x36, 0xb3 },
+ { 0x00, 0xe8, 0xcd, 0x25, 0x9a, 0x72, 0x57, 0xbf,
+ 0x34, 0xdc, 0xf9, 0x11, 0xae, 0x46, 0x63, 0x8b },
+ { 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+ 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x86, 0x0c, 0x8a, 0x18, 0x9e, 0x14, 0x92,
+ 0x30, 0xb6, 0x3c, 0xba, 0x28, 0xae, 0x24, 0xa2 },
+ { 0x00, 0xe8, 0xcd, 0x25, 0x9a, 0x72, 0x57, 0xbf,
+ 0x34, 0xdc, 0xf9, 0x11, 0xae, 0x46, 0x63, 0x8b },
+ { 0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+ 0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x87, 0x0e, 0x89, 0x1c, 0x9b, 0x12, 0x95,
+ 0x38, 0xbf, 0x36, 0xb1, 0x24, 0xa3, 0x2a, 0xad },
+ { 0x00, 0xe8, 0xd0, 0x38, 0xbd, 0x55, 0x6d, 0x85,
+ 0x67, 0x8f, 0xb7, 0x5f, 0xda, 0x32, 0x0a, 0xe2 },
+ { 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+ 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x88, 0x10, 0x98, 0x20, 0xa8, 0x30, 0xb8,
+ 0x40, 0xc8, 0x50, 0xd8, 0x60, 0xe8, 0x70, 0xf8 },
+ { 0x00, 0xe8, 0xd0, 0x38, 0xbd, 0x55, 0x6d, 0x85,
+ 0x67, 0x8f, 0xb7, 0x5f, 0xda, 0x32, 0x0a, 0xe2 },
+ { 0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+ 0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x89, 0x12, 0x9b, 0x24, 0xad, 0x36, 0xbf,
+ 0x48, 0xc1, 0x5a, 0xd3, 0x6c, 0xe5, 0x7e, 0xf7 },
+ { 0x00, 0xe8, 0xd0, 0x38, 0xbd, 0x55, 0x6d, 0x85,
+ 0x7a, 0x92, 0xaa, 0x42, 0xc7, 0x2f, 0x17, 0xff },
+ { 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+ 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x8a, 0x14, 0x9e, 0x28, 0xa2, 0x3c, 0xb6,
+ 0x50, 0xda, 0x44, 0xce, 0x78, 0xf2, 0x6c, 0xe6 },
+ { 0x00, 0xe8, 0xd0, 0x38, 0xbd, 0x55, 0x6d, 0x85,
+ 0x7a, 0x92, 0xaa, 0x42, 0xc7, 0x2f, 0x17, 0xff },
+ { 0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+ 0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x8b, 0x16, 0x9d, 0x2c, 0xa7, 0x3a, 0xb1,
+ 0x58, 0xd3, 0x4e, 0xc5, 0x74, 0xff, 0x62, 0xe9 },
+ { 0x00, 0xe8, 0xd0, 0x38, 0xa0, 0x48, 0x70, 0x98,
+ 0x5d, 0xb5, 0x8d, 0x65, 0xfd, 0x15, 0x2d, 0xc5 },
+ { 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+ 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x8c, 0x18, 0x94, 0x30, 0xbc, 0x28, 0xa4,
+ 0x60, 0xec, 0x78, 0xf4, 0x50, 0xdc, 0x48, 0xc4 },
+ { 0x00, 0xe8, 0xd0, 0x38, 0xa0, 0x48, 0x70, 0x98,
+ 0x5d, 0xb5, 0x8d, 0x65, 0xfd, 0x15, 0x2d, 0xc5 },
+ { 0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+ 0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x8d, 0x1a, 0x97, 0x34, 0xb9, 0x2e, 0xa3,
+ 0x68, 0xe5, 0x72, 0xff, 0x5c, 0xd1, 0x46, 0xcb },
+ { 0x00, 0xe8, 0xd0, 0x38, 0xa0, 0x48, 0x70, 0x98,
+ 0x40, 0xa8, 0x90, 0x78, 0xe0, 0x08, 0x30, 0xd8 },
+ { 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+ 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x8e, 0x1c, 0x92, 0x38, 0xb6, 0x24, 0xaa,
+ 0x70, 0xfe, 0x6c, 0xe2, 0x48, 0xc6, 0x54, 0xda },
+ { 0x00, 0xe8, 0xd0, 0x38, 0xa0, 0x48, 0x70, 0x98,
+ 0x40, 0xa8, 0x90, 0x78, 0xe0, 0x08, 0x30, 0xd8 },
+ { 0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+ 0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x8f, 0x1e, 0x91, 0x3c, 0xb3, 0x22, 0xad,
+ 0x78, 0xf7, 0x66, 0xe9, 0x44, 0xcb, 0x5a, 0xd5 },
+ { 0x00, 0xf5, 0xf7, 0x02, 0xf3, 0x06, 0x04, 0xf1,
+ 0xfb, 0x0e, 0x0c, 0xf9, 0x08, 0xfd, 0xff, 0x0a },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+ 0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70 },
+ { 0x00, 0xf5, 0xf7, 0x02, 0xf3, 0x06, 0x04, 0xf1,
+ 0xfb, 0x0e, 0x0c, 0xf9, 0x08, 0xfd, 0xff, 0x0a },
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+ 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x91, 0x22, 0xb3, 0x44, 0xd5, 0x66, 0xf7,
+ 0x88, 0x19, 0xaa, 0x3b, 0xcc, 0x5d, 0xee, 0x7f },
+ { 0x00, 0xf5, 0xf7, 0x02, 0xf3, 0x06, 0x04, 0xf1,
+ 0xe6, 0x13, 0x11, 0xe4, 0x15, 0xe0, 0xe2, 0x17 },
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+ 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x92, 0x24, 0xb6, 0x48, 0xda, 0x6c, 0xfe,
+ 0x90, 0x02, 0xb4, 0x26, 0xd8, 0x4a, 0xfc, 0x6e },
+ { 0x00, 0xf5, 0xf7, 0x02, 0xf3, 0x06, 0x04, 0xf1,
+ 0xe6, 0x13, 0x11, 0xe4, 0x15, 0xe0, 0xe2, 0x17 },
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+ 0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x93, 0x26, 0xb5, 0x4c, 0xdf, 0x6a, 0xf9,
+ 0x98, 0x0b, 0xbe, 0x2d, 0xd4, 0x47, 0xf2, 0x61 },
+ { 0x00, 0xf5, 0xf7, 0x02, 0xee, 0x1b, 0x19, 0xec,
+ 0xc1, 0x34, 0x36, 0xc3, 0x2f, 0xda, 0xd8, 0x2d },
+ { 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+ 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x94, 0x28, 0xbc, 0x50, 0xc4, 0x78, 0xec,
+ 0xa0, 0x34, 0x88, 0x1c, 0xf0, 0x64, 0xd8, 0x4c },
+ { 0x00, 0xf5, 0xf7, 0x02, 0xee, 0x1b, 0x19, 0xec,
+ 0xc1, 0x34, 0x36, 0xc3, 0x2f, 0xda, 0xd8, 0x2d },
+ { 0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+ 0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x95, 0x2a, 0xbf, 0x54, 0xc1, 0x7e, 0xeb,
+ 0xa8, 0x3d, 0x82, 0x17, 0xfc, 0x69, 0xd6, 0x43 },
+ { 0x00, 0xf5, 0xf7, 0x02, 0xee, 0x1b, 0x19, 0xec,
+ 0xdc, 0x29, 0x2b, 0xde, 0x32, 0xc7, 0xc5, 0x30 },
+ { 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+ 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x96, 0x2c, 0xba, 0x58, 0xce, 0x74, 0xe2,
+ 0xb0, 0x26, 0x9c, 0x0a, 0xe8, 0x7e, 0xc4, 0x52 },
+ { 0x00, 0xf5, 0xf7, 0x02, 0xee, 0x1b, 0x19, 0xec,
+ 0xdc, 0x29, 0x2b, 0xde, 0x32, 0xc7, 0xc5, 0x30 },
+ { 0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+ 0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x97, 0x2e, 0xb9, 0x5c, 0xcb, 0x72, 0xe5,
+ 0xb8, 0x2f, 0x96, 0x01, 0xe4, 0x73, 0xca, 0x5d },
+ { 0x00, 0xf5, 0xea, 0x1f, 0xc9, 0x3c, 0x23, 0xd6,
+ 0x8f, 0x7a, 0x65, 0x90, 0x46, 0xb3, 0xac, 0x59 },
+ { 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+ 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x98, 0x30, 0xa8, 0x60, 0xf8, 0x50, 0xc8,
+ 0xc0, 0x58, 0xf0, 0x68, 0xa0, 0x38, 0x90, 0x08 },
+ { 0x00, 0xf5, 0xea, 0x1f, 0xc9, 0x3c, 0x23, 0xd6,
+ 0x8f, 0x7a, 0x65, 0x90, 0x46, 0xb3, 0xac, 0x59 },
+ { 0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+ 0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x99, 0x32, 0xab, 0x64, 0xfd, 0x56, 0xcf,
+ 0xc8, 0x51, 0xfa, 0x63, 0xac, 0x35, 0x9e, 0x07 },
+ { 0x00, 0xf5, 0xea, 0x1f, 0xc9, 0x3c, 0x23, 0xd6,
+ 0x92, 0x67, 0x78, 0x8d, 0x5b, 0xae, 0xb1, 0x44 },
+ { 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+ 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x9a, 0x34, 0xae, 0x68, 0xf2, 0x5c, 0xc6,
+ 0xd0, 0x4a, 0xe4, 0x7e, 0xb8, 0x22, 0x8c, 0x16 },
+ { 0x00, 0xf5, 0xea, 0x1f, 0xc9, 0x3c, 0x23, 0xd6,
+ 0x92, 0x67, 0x78, 0x8d, 0x5b, 0xae, 0xb1, 0x44 },
+ { 0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+ 0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x9b, 0x36, 0xad, 0x6c, 0xf7, 0x5a, 0xc1,
+ 0xd8, 0x43, 0xee, 0x75, 0xb4, 0x2f, 0x82, 0x19 },
+ { 0x00, 0xf5, 0xea, 0x1f, 0xd4, 0x21, 0x3e, 0xcb,
+ 0xb5, 0x40, 0x5f, 0xaa, 0x61, 0x94, 0x8b, 0x7e },
+ { 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+ 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x9c, 0x38, 0xa4, 0x70, 0xec, 0x48, 0xd4,
+ 0xe0, 0x7c, 0xd8, 0x44, 0x90, 0x0c, 0xa8, 0x34 },
+ { 0x00, 0xf5, 0xea, 0x1f, 0xd4, 0x21, 0x3e, 0xcb,
+ 0xb5, 0x40, 0x5f, 0xaa, 0x61, 0x94, 0x8b, 0x7e },
+ { 0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+ 0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x9d, 0x3a, 0xa7, 0x74, 0xe9, 0x4e, 0xd3,
+ 0xe8, 0x75, 0xd2, 0x4f, 0x9c, 0x01, 0xa6, 0x3b },
+ { 0x00, 0xf5, 0xea, 0x1f, 0xd4, 0x21, 0x3e, 0xcb,
+ 0xa8, 0x5d, 0x42, 0xb7, 0x7c, 0x89, 0x96, 0x63 },
+ { 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+ 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x9e, 0x3c, 0xa2, 0x78, 0xe6, 0x44, 0xda,
+ 0xf0, 0x6e, 0xcc, 0x52, 0x88, 0x16, 0xb4, 0x2a },
+ { 0x00, 0xf5, 0xea, 0x1f, 0xd4, 0x21, 0x3e, 0xcb,
+ 0xa8, 0x5d, 0x42, 0xb7, 0x7c, 0x89, 0x96, 0x63 },
+ { 0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+ 0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x74, 0x74, 0x69, 0x69, 0x4e, 0x4e, 0x53, 0x53 },
+ { 0x00, 0x9f, 0x3e, 0xa1, 0x7c, 0xe3, 0x42, 0xdd,
+ 0xf8, 0x67, 0xc6, 0x59, 0x84, 0x1b, 0xba, 0x25 },
+ { 0x00, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x04,
+ 0xde, 0x0c, 0x67, 0xb5, 0xb1, 0x63, 0x08, 0xda },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+ 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60 },
+ { 0x00, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x04,
+ 0xde, 0x0c, 0x67, 0xb5, 0xb1, 0x63, 0x08, 0xda },
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+ 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xa1, 0x42, 0xe3, 0x84, 0x25, 0xc6, 0x67,
+ 0x08, 0xa9, 0x4a, 0xeb, 0x8c, 0x2d, 0xce, 0x6f },
+ { 0x00, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x04,
+ 0xc3, 0x11, 0x7a, 0xa8, 0xac, 0x7e, 0x15, 0xc7 },
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+ 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xa2, 0x44, 0xe6, 0x88, 0x2a, 0xcc, 0x6e,
+ 0x10, 0xb2, 0x54, 0xf6, 0x98, 0x3a, 0xdc, 0x7e },
+ { 0x00, 0xd2, 0xb9, 0x6b, 0x6f, 0xbd, 0xd6, 0x04,
+ 0xc3, 0x11, 0x7a, 0xa8, 0xac, 0x7e, 0x15, 0xc7 },
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+ 0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xa3, 0x46, 0xe5, 0x8c, 0x2f, 0xca, 0x69,
+ 0x18, 0xbb, 0x5e, 0xfd, 0x94, 0x37, 0xd2, 0x71 },
+ { 0x00, 0xd2, 0xb9, 0x6b, 0x72, 0xa0, 0xcb, 0x19,
+ 0xe4, 0x36, 0x5d, 0x8f, 0x96, 0x44, 0x2f, 0xfd },
+ { 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+ 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xa4, 0x48, 0xec, 0x90, 0x34, 0xd8, 0x7c,
+ 0x20, 0x84, 0x68, 0xcc, 0xb0, 0x14, 0xf8, 0x5c },
+ { 0x00, 0xd2, 0xb9, 0x6b, 0x72, 0xa0, 0xcb, 0x19,
+ 0xe4, 0x36, 0x5d, 0x8f, 0x96, 0x44, 0x2f, 0xfd },
+ { 0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+ 0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xa5, 0x4a, 0xef, 0x94, 0x31, 0xde, 0x7b,
+ 0x28, 0x8d, 0x62, 0xc7, 0xbc, 0x19, 0xf6, 0x53 },
+ { 0x00, 0xd2, 0xb9, 0x6b, 0x72, 0xa0, 0xcb, 0x19,
+ 0xf9, 0x2b, 0x40, 0x92, 0x8b, 0x59, 0x32, 0xe0 },
+ { 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+ 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xa6, 0x4c, 0xea, 0x98, 0x3e, 0xd4, 0x72,
+ 0x30, 0x96, 0x7c, 0xda, 0xa8, 0x0e, 0xe4, 0x42 },
+ { 0x00, 0xd2, 0xb9, 0x6b, 0x72, 0xa0, 0xcb, 0x19,
+ 0xf9, 0x2b, 0x40, 0x92, 0x8b, 0x59, 0x32, 0xe0 },
+ { 0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+ 0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xa7, 0x4e, 0xe9, 0x9c, 0x3b, 0xd2, 0x75,
+ 0x38, 0x9f, 0x76, 0xd1, 0xa4, 0x03, 0xea, 0x4d },
+ { 0x00, 0xd2, 0xa4, 0x76, 0x55, 0x87, 0xf1, 0x23,
+ 0xaa, 0x78, 0x0e, 0xdc, 0xff, 0x2d, 0x5b, 0x89 },
+ { 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+ 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xa8, 0x50, 0xf8, 0xa0, 0x08, 0xf0, 0x58,
+ 0x40, 0xe8, 0x10, 0xb8, 0xe0, 0x48, 0xb0, 0x18 },
+ { 0x00, 0xd2, 0xa4, 0x76, 0x55, 0x87, 0xf1, 0x23,
+ 0xaa, 0x78, 0x0e, 0xdc, 0xff, 0x2d, 0x5b, 0x89 },
+ { 0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+ 0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xa9, 0x52, 0xfb, 0xa4, 0x0d, 0xf6, 0x5f,
+ 0x48, 0xe1, 0x1a, 0xb3, 0xec, 0x45, 0xbe, 0x17 },
+ { 0x00, 0xd2, 0xa4, 0x76, 0x55, 0x87, 0xf1, 0x23,
+ 0xb7, 0x65, 0x13, 0xc1, 0xe2, 0x30, 0x46, 0x94 },
+ { 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+ 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xaa, 0x54, 0xfe, 0xa8, 0x02, 0xfc, 0x56,
+ 0x50, 0xfa, 0x04, 0xae, 0xf8, 0x52, 0xac, 0x06 },
+ { 0x00, 0xd2, 0xa4, 0x76, 0x55, 0x87, 0xf1, 0x23,
+ 0xb7, 0x65, 0x13, 0xc1, 0xe2, 0x30, 0x46, 0x94 },
+ { 0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+ 0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xab, 0x56, 0xfd, 0xac, 0x07, 0xfa, 0x51,
+ 0x58, 0xf3, 0x0e, 0xa5, 0xf4, 0x5f, 0xa2, 0x09 },
+ { 0x00, 0xd2, 0xa4, 0x76, 0x48, 0x9a, 0xec, 0x3e,
+ 0x90, 0x42, 0x34, 0xe6, 0xd8, 0x0a, 0x7c, 0xae },
+ { 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+ 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xac, 0x58, 0xf4, 0xb0, 0x1c, 0xe8, 0x44,
+ 0x60, 0xcc, 0x38, 0x94, 0xd0, 0x7c, 0x88, 0x24 },
+ { 0x00, 0xd2, 0xa4, 0x76, 0x48, 0x9a, 0xec, 0x3e,
+ 0x90, 0x42, 0x34, 0xe6, 0xd8, 0x0a, 0x7c, 0xae },
+ { 0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+ 0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xad, 0x5a, 0xf7, 0xb4, 0x19, 0xee, 0x43,
+ 0x68, 0xc5, 0x32, 0x9f, 0xdc, 0x71, 0x86, 0x2b },
+ { 0x00, 0xd2, 0xa4, 0x76, 0x48, 0x9a, 0xec, 0x3e,
+ 0x8d, 0x5f, 0x29, 0xfb, 0xc5, 0x17, 0x61, 0xb3 },
+ { 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+ 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xae, 0x5c, 0xf2, 0xb8, 0x16, 0xe4, 0x4a,
+ 0x70, 0xde, 0x2c, 0x82, 0xc8, 0x66, 0x94, 0x3a },
+ { 0x00, 0xd2, 0xa4, 0x76, 0x48, 0x9a, 0xec, 0x3e,
+ 0x8d, 0x5f, 0x29, 0xfb, 0xc5, 0x17, 0x61, 0xb3 },
+ { 0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+ 0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xaf, 0x5e, 0xf1, 0xbc, 0x13, 0xe2, 0x4d,
+ 0x78, 0xd7, 0x26, 0x89, 0xc4, 0x6b, 0x9a, 0x35 },
+ { 0x00, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57,
+ 0x36, 0xf9, 0xb5, 0x7a, 0x2d, 0xe2, 0xae, 0x61 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+ 0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90 },
+ { 0x00, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57,
+ 0x36, 0xf9, 0xb5, 0x7a, 0x2d, 0xe2, 0xae, 0x61 },
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+ 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xb1, 0x62, 0xd3, 0xc4, 0x75, 0xa6, 0x17,
+ 0x88, 0x39, 0xea, 0x5b, 0x4c, 0xfd, 0x2e, 0x9f },
+ { 0x00, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57,
+ 0x2b, 0xe4, 0xa8, 0x67, 0x30, 0xff, 0xb3, 0x7c },
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+ 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xb2, 0x64, 0xd6, 0xc8, 0x7a, 0xac, 0x1e,
+ 0x90, 0x22, 0xf4, 0x46, 0x58, 0xea, 0x3c, 0x8e },
+ { 0x00, 0xcf, 0x83, 0x4c, 0x1b, 0xd4, 0x98, 0x57,
+ 0x2b, 0xe4, 0xa8, 0x67, 0x30, 0xff, 0xb3, 0x7c },
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+ 0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xb3, 0x66, 0xd5, 0xcc, 0x7f, 0xaa, 0x19,
+ 0x98, 0x2b, 0xfe, 0x4d, 0x54, 0xe7, 0x32, 0x81 },
+ { 0x00, 0xcf, 0x83, 0x4c, 0x06, 0xc9, 0x85, 0x4a,
+ 0x0c, 0xc3, 0x8f, 0x40, 0x0a, 0xc5, 0x89, 0x46 },
+ { 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+ 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xb4, 0x68, 0xdc, 0xd0, 0x64, 0xb8, 0x0c,
+ 0xa0, 0x14, 0xc8, 0x7c, 0x70, 0xc4, 0x18, 0xac },
+ { 0x00, 0xcf, 0x83, 0x4c, 0x06, 0xc9, 0x85, 0x4a,
+ 0x0c, 0xc3, 0x8f, 0x40, 0x0a, 0xc5, 0x89, 0x46 },
+ { 0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+ 0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xb5, 0x6a, 0xdf, 0xd4, 0x61, 0xbe, 0x0b,
+ 0xa8, 0x1d, 0xc2, 0x77, 0x7c, 0xc9, 0x16, 0xa3 },
+ { 0x00, 0xcf, 0x83, 0x4c, 0x06, 0xc9, 0x85, 0x4a,
+ 0x11, 0xde, 0x92, 0x5d, 0x17, 0xd8, 0x94, 0x5b },
+ { 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+ 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xb6, 0x6c, 0xda, 0xd8, 0x6e, 0xb4, 0x02,
+ 0xb0, 0x06, 0xdc, 0x6a, 0x68, 0xde, 0x04, 0xb2 },
+ { 0x00, 0xcf, 0x83, 0x4c, 0x06, 0xc9, 0x85, 0x4a,
+ 0x11, 0xde, 0x92, 0x5d, 0x17, 0xd8, 0x94, 0x5b },
+ { 0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+ 0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xb7, 0x6e, 0xd9, 0xdc, 0x6b, 0xb2, 0x05,
+ 0xb8, 0x0f, 0xd6, 0x61, 0x64, 0xd3, 0x0a, 0xbd },
+ { 0x00, 0xcf, 0x9e, 0x51, 0x21, 0xee, 0xbf, 0x70,
+ 0x42, 0x8d, 0xdc, 0x13, 0x63, 0xac, 0xfd, 0x32 },
+ { 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+ 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xb8, 0x70, 0xc8, 0xe0, 0x58, 0x90, 0x28,
+ 0xc0, 0x78, 0xb0, 0x08, 0x20, 0x98, 0x50, 0xe8 },
+ { 0x00, 0xcf, 0x9e, 0x51, 0x21, 0xee, 0xbf, 0x70,
+ 0x42, 0x8d, 0xdc, 0x13, 0x63, 0xac, 0xfd, 0x32 },
+ { 0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+ 0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xb9, 0x72, 0xcb, 0xe4, 0x5d, 0x96, 0x2f,
+ 0xc8, 0x71, 0xba, 0x03, 0x2c, 0x95, 0x5e, 0xe7 },
+ { 0x00, 0xcf, 0x9e, 0x51, 0x21, 0xee, 0xbf, 0x70,
+ 0x5f, 0x90, 0xc1, 0x0e, 0x7e, 0xb1, 0xe0, 0x2f },
+ { 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+ 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xba, 0x74, 0xce, 0xe8, 0x52, 0x9c, 0x26,
+ 0xd0, 0x6a, 0xa4, 0x1e, 0x38, 0x82, 0x4c, 0xf6 },
+ { 0x00, 0xcf, 0x9e, 0x51, 0x21, 0xee, 0xbf, 0x70,
+ 0x5f, 0x90, 0xc1, 0x0e, 0x7e, 0xb1, 0xe0, 0x2f },
+ { 0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+ 0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xbb, 0x76, 0xcd, 0xec, 0x57, 0x9a, 0x21,
+ 0xd8, 0x63, 0xae, 0x15, 0x34, 0x8f, 0x42, 0xf9 },
+ { 0x00, 0xcf, 0x9e, 0x51, 0x3c, 0xf3, 0xa2, 0x6d,
+ 0x78, 0xb7, 0xe6, 0x29, 0x44, 0x8b, 0xda, 0x15 },
+ { 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+ 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xbc, 0x78, 0xc4, 0xf0, 0x4c, 0x88, 0x34,
+ 0xe0, 0x5c, 0x98, 0x24, 0x10, 0xac, 0x68, 0xd4 },
+ { 0x00, 0xcf, 0x9e, 0x51, 0x3c, 0xf3, 0xa2, 0x6d,
+ 0x78, 0xb7, 0xe6, 0x29, 0x44, 0x8b, 0xda, 0x15 },
+ { 0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+ 0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xbd, 0x7a, 0xc7, 0xf4, 0x49, 0x8e, 0x33,
+ 0xe8, 0x55, 0x92, 0x2f, 0x1c, 0xa1, 0x66, 0xdb },
+ { 0x00, 0xcf, 0x9e, 0x51, 0x3c, 0xf3, 0xa2, 0x6d,
+ 0x65, 0xaa, 0xfb, 0x34, 0x59, 0x96, 0xc7, 0x08 },
+ { 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+ 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xbe, 0x7c, 0xc2, 0xf8, 0x46, 0x84, 0x3a,
+ 0xf0, 0x4e, 0x8c, 0x32, 0x08, 0xb6, 0x74, 0xca },
+ { 0x00, 0xcf, 0x9e, 0x51, 0x3c, 0xf3, 0xa2, 0x6d,
+ 0x65, 0xaa, 0xfb, 0x34, 0x59, 0x96, 0xc7, 0x08 },
+ { 0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+ 0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x3a, 0x3a, 0x27, 0x27,
+ 0x69, 0x69, 0x74, 0x74, 0x53, 0x53, 0x4e, 0x4e },
+ { 0x00, 0xbf, 0x7e, 0xc1, 0xfc, 0x43, 0x82, 0x3d,
+ 0xf8, 0x47, 0x86, 0x39, 0x04, 0xbb, 0x7a, 0xc5 },
+ { 0x00, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3,
+ 0x94, 0x08, 0xb1, 0x2d, 0xde, 0x42, 0xfb, 0x67 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+ 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40 },
+ { 0x00, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3,
+ 0x94, 0x08, 0xb1, 0x2d, 0xde, 0x42, 0xfb, 0x67 },
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+ 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xc1, 0x82, 0x43, 0x04, 0xc5, 0x86, 0x47,
+ 0x08, 0xc9, 0x8a, 0x4b, 0x0c, 0xcd, 0x8e, 0x4f },
+ { 0x00, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3,
+ 0x89, 0x15, 0xac, 0x30, 0xc3, 0x5f, 0xe6, 0x7a },
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+ 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xc2, 0x84, 0x46, 0x08, 0xca, 0x8c, 0x4e,
+ 0x10, 0xd2, 0x94, 0x56, 0x18, 0xda, 0x9c, 0x5e },
+ { 0x00, 0x9c, 0x25, 0xb9, 0x4a, 0xd6, 0x6f, 0xf3,
+ 0x89, 0x15, 0xac, 0x30, 0xc3, 0x5f, 0xe6, 0x7a },
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+ 0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xc3, 0x86, 0x45, 0x0c, 0xcf, 0x8a, 0x49,
+ 0x18, 0xdb, 0x9e, 0x5d, 0x14, 0xd7, 0x92, 0x51 },
+ { 0x00, 0x9c, 0x25, 0xb9, 0x57, 0xcb, 0x72, 0xee,
+ 0xae, 0x32, 0x8b, 0x17, 0xf9, 0x65, 0xdc, 0x40 },
+ { 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+ 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xc4, 0x88, 0x4c, 0x10, 0xd4, 0x98, 0x5c,
+ 0x20, 0xe4, 0xa8, 0x6c, 0x30, 0xf4, 0xb8, 0x7c },
+ { 0x00, 0x9c, 0x25, 0xb9, 0x57, 0xcb, 0x72, 0xee,
+ 0xae, 0x32, 0x8b, 0x17, 0xf9, 0x65, 0xdc, 0x40 },
+ { 0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+ 0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xc5, 0x8a, 0x4f, 0x14, 0xd1, 0x9e, 0x5b,
+ 0x28, 0xed, 0xa2, 0x67, 0x3c, 0xf9, 0xb6, 0x73 },
+ { 0x00, 0x9c, 0x25, 0xb9, 0x57, 0xcb, 0x72, 0xee,
+ 0xb3, 0x2f, 0x96, 0x0a, 0xe4, 0x78, 0xc1, 0x5d },
+ { 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+ 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xc6, 0x8c, 0x4a, 0x18, 0xde, 0x94, 0x52,
+ 0x30, 0xf6, 0xbc, 0x7a, 0x28, 0xee, 0xa4, 0x62 },
+ { 0x00, 0x9c, 0x25, 0xb9, 0x57, 0xcb, 0x72, 0xee,
+ 0xb3, 0x2f, 0x96, 0x0a, 0xe4, 0x78, 0xc1, 0x5d },
+ { 0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+ 0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xc7, 0x8e, 0x49, 0x1c, 0xdb, 0x92, 0x55,
+ 0x38, 0xff, 0xb6, 0x71, 0x24, 0xe3, 0xaa, 0x6d },
+ { 0x00, 0x9c, 0x38, 0xa4, 0x70, 0xec, 0x48, 0xd4,
+ 0xe0, 0x7c, 0xd8, 0x44, 0x90, 0x0c, 0xa8, 0x34 },
+ { 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+ 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xc8, 0x90, 0x58, 0x20, 0xe8, 0xb0, 0x78,
+ 0x40, 0x88, 0xd0, 0x18, 0x60, 0xa8, 0xf0, 0x38 },
+ { 0x00, 0x9c, 0x38, 0xa4, 0x70, 0xec, 0x48, 0xd4,
+ 0xe0, 0x7c, 0xd8, 0x44, 0x90, 0x0c, 0xa8, 0x34 },
+ { 0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+ 0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xc9, 0x92, 0x5b, 0x24, 0xed, 0xb6, 0x7f,
+ 0x48, 0x81, 0xda, 0x13, 0x6c, 0xa5, 0xfe, 0x37 },
+ { 0x00, 0x9c, 0x38, 0xa4, 0x70, 0xec, 0x48, 0xd4,
+ 0xfd, 0x61, 0xc5, 0x59, 0x8d, 0x11, 0xb5, 0x29 },
+ { 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+ 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xca, 0x94, 0x5e, 0x28, 0xe2, 0xbc, 0x76,
+ 0x50, 0x9a, 0xc4, 0x0e, 0x78, 0xb2, 0xec, 0x26 },
+ { 0x00, 0x9c, 0x38, 0xa4, 0x70, 0xec, 0x48, 0xd4,
+ 0xfd, 0x61, 0xc5, 0x59, 0x8d, 0x11, 0xb5, 0x29 },
+ { 0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+ 0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xcb, 0x96, 0x5d, 0x2c, 0xe7, 0xba, 0x71,
+ 0x58, 0x93, 0xce, 0x05, 0x74, 0xbf, 0xe2, 0x29 },
+ { 0x00, 0x9c, 0x38, 0xa4, 0x6d, 0xf1, 0x55, 0xc9,
+ 0xda, 0x46, 0xe2, 0x7e, 0xb7, 0x2b, 0x8f, 0x13 },
+ { 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+ 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xcc, 0x98, 0x54, 0x30, 0xfc, 0xa8, 0x64,
+ 0x60, 0xac, 0xf8, 0x34, 0x50, 0x9c, 0xc8, 0x04 },
+ { 0x00, 0x9c, 0x38, 0xa4, 0x6d, 0xf1, 0x55, 0xc9,
+ 0xda, 0x46, 0xe2, 0x7e, 0xb7, 0x2b, 0x8f, 0x13 },
+ { 0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+ 0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xcd, 0x9a, 0x57, 0x34, 0xf9, 0xae, 0x63,
+ 0x68, 0xa5, 0xf2, 0x3f, 0x5c, 0x91, 0xc6, 0x0b },
+ { 0x00, 0x9c, 0x38, 0xa4, 0x6d, 0xf1, 0x55, 0xc9,
+ 0xc7, 0x5b, 0xff, 0x63, 0xaa, 0x36, 0x92, 0x0e },
+ { 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+ 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xce, 0x9c, 0x52, 0x38, 0xf6, 0xa4, 0x6a,
+ 0x70, 0xbe, 0xec, 0x22, 0x48, 0x86, 0xd4, 0x1a },
+ { 0x00, 0x9c, 0x38, 0xa4, 0x6d, 0xf1, 0x55, 0xc9,
+ 0xc7, 0x5b, 0xff, 0x63, 0xaa, 0x36, 0x92, 0x0e },
+ { 0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+ 0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xcf, 0x9e, 0x51, 0x3c, 0xf3, 0xa2, 0x6d,
+ 0x78, 0xb7, 0xe6, 0x29, 0x44, 0x8b, 0xda, 0x15 },
+ { 0x00, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0,
+ 0x7c, 0xfd, 0x63, 0xe2, 0x42, 0xc3, 0x5d, 0xdc },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+ 0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0 },
+ { 0x00, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0,
+ 0x7c, 0xfd, 0x63, 0xe2, 0x42, 0xc3, 0x5d, 0xdc },
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+ 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xd1, 0xa2, 0x73, 0x44, 0x95, 0xe6, 0x37,
+ 0x88, 0x59, 0x2a, 0xfb, 0xcc, 0x1d, 0x6e, 0xbf },
+ { 0x00, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0,
+ 0x61, 0xe0, 0x7e, 0xff, 0x5f, 0xde, 0x40, 0xc1 },
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+ 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xd2, 0xa4, 0x76, 0x48, 0x9a, 0xec, 0x3e,
+ 0x90, 0x42, 0x34, 0xe6, 0xd8, 0x0a, 0x7c, 0xae },
+ { 0x00, 0x81, 0x1f, 0x9e, 0x3e, 0xbf, 0x21, 0xa0,
+ 0x61, 0xe0, 0x7e, 0xff, 0x5f, 0xde, 0x40, 0xc1 },
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+ 0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xd3, 0xa6, 0x75, 0x4c, 0x9f, 0xea, 0x39,
+ 0x98, 0x4b, 0x3e, 0xed, 0xd4, 0x07, 0x72, 0xa1 },
+ { 0x00, 0x81, 0x1f, 0x9e, 0x23, 0xa2, 0x3c, 0xbd,
+ 0x46, 0xc7, 0x59, 0xd8, 0x65, 0xe4, 0x7a, 0xfb },
+ { 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+ 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xd4, 0xa8, 0x7c, 0x50, 0x84, 0xf8, 0x2c,
+ 0xa0, 0x74, 0x08, 0xdc, 0xf0, 0x24, 0x58, 0x8c },
+ { 0x00, 0x81, 0x1f, 0x9e, 0x23, 0xa2, 0x3c, 0xbd,
+ 0x46, 0xc7, 0x59, 0xd8, 0x65, 0xe4, 0x7a, 0xfb },
+ { 0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+ 0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xd5, 0xaa, 0x7f, 0x54, 0x81, 0xfe, 0x2b,
+ 0xa8, 0x7d, 0x02, 0xd7, 0xfc, 0x29, 0x56, 0x83 },
+ { 0x00, 0x81, 0x1f, 0x9e, 0x23, 0xa2, 0x3c, 0xbd,
+ 0x5b, 0xda, 0x44, 0xc5, 0x78, 0xf9, 0x67, 0xe6 },
+ { 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+ 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xd6, 0xac, 0x7a, 0x58, 0x8e, 0xf4, 0x22,
+ 0xb0, 0x66, 0x1c, 0xca, 0xe8, 0x3e, 0x44, 0x92 },
+ { 0x00, 0x81, 0x1f, 0x9e, 0x23, 0xa2, 0x3c, 0xbd,
+ 0x5b, 0xda, 0x44, 0xc5, 0x78, 0xf9, 0x67, 0xe6 },
+ { 0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+ 0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xd7, 0xae, 0x79, 0x5c, 0x8b, 0xf2, 0x25,
+ 0xb8, 0x6f, 0x16, 0xc1, 0xe4, 0x33, 0x4a, 0x9d },
+ { 0x00, 0x81, 0x02, 0x83, 0x04, 0x85, 0x06, 0x87,
+ 0x08, 0x89, 0x0a, 0x8b, 0x0c, 0x8d, 0x0e, 0x8f },
+ { 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+ 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xd8, 0xb0, 0x68, 0x60, 0xb8, 0xd0, 0x08,
+ 0xc0, 0x18, 0x70, 0xa8, 0xa0, 0x78, 0x10, 0xc8 },
+ { 0x00, 0x81, 0x02, 0x83, 0x04, 0x85, 0x06, 0x87,
+ 0x08, 0x89, 0x0a, 0x8b, 0x0c, 0x8d, 0x0e, 0x8f },
+ { 0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+ 0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xd9, 0xb2, 0x6b, 0x64, 0xbd, 0xd6, 0x0f,
+ 0xc8, 0x11, 0x7a, 0xa3, 0xac, 0x75, 0x1e, 0xc7 },
+ { 0x00, 0x81, 0x02, 0x83, 0x04, 0x85, 0x06, 0x87,
+ 0x15, 0x94, 0x17, 0x96, 0x11, 0x90, 0x13, 0x92 },
+ { 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+ 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xda, 0xb4, 0x6e, 0x68, 0xb2, 0xdc, 0x06,
+ 0xd0, 0x0a, 0x64, 0xbe, 0xb8, 0x62, 0x0c, 0xd6 },
+ { 0x00, 0x81, 0x02, 0x83, 0x04, 0x85, 0x06, 0x87,
+ 0x15, 0x94, 0x17, 0x96, 0x11, 0x90, 0x13, 0x92 },
+ { 0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+ 0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xdb, 0xb6, 0x6d, 0x6c, 0xb7, 0xda, 0x01,
+ 0xd8, 0x03, 0x6e, 0xb5, 0xb4, 0x6f, 0x02, 0xd9 },
+ { 0x00, 0x81, 0x02, 0x83, 0x19, 0x98, 0x1b, 0x9a,
+ 0x32, 0xb3, 0x30, 0xb1, 0x2b, 0xaa, 0x29, 0xa8 },
+ { 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+ 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xdc, 0xb8, 0x64, 0x70, 0xac, 0xc8, 0x14,
+ 0xe0, 0x3c, 0x58, 0x84, 0x90, 0x4c, 0x28, 0xf4 },
+ { 0x00, 0x81, 0x02, 0x83, 0x19, 0x98, 0x1b, 0x9a,
+ 0x32, 0xb3, 0x30, 0xb1, 0x2b, 0xaa, 0x29, 0xa8 },
+ { 0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+ 0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xdd, 0xba, 0x67, 0x74, 0xa9, 0xce, 0x13,
+ 0xe8, 0x35, 0x52, 0x8f, 0x9c, 0x41, 0x26, 0xfb },
+ { 0x00, 0x81, 0x02, 0x83, 0x19, 0x98, 0x1b, 0x9a,
+ 0x2f, 0xae, 0x2d, 0xac, 0x36, 0xb7, 0x34, 0xb5 },
+ { 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+ 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xde, 0xbc, 0x62, 0x78, 0xa6, 0xc4, 0x1a,
+ 0xf0, 0x2e, 0x4c, 0x92, 0x88, 0x56, 0x34, 0xea },
+ { 0x00, 0x81, 0x02, 0x83, 0x19, 0x98, 0x1b, 0x9a,
+ 0x2f, 0xae, 0x2d, 0xac, 0x36, 0xb7, 0x34, 0xb5 },
+ { 0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+ 0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x4e, 0x4e, 0x53, 0x53, 0x69, 0x69, 0x74, 0x74 },
+ { 0x00, 0xdf, 0xbe, 0x61, 0x7c, 0xa3, 0xc2, 0x1d,
+ 0xf8, 0x27, 0x46, 0x99, 0x84, 0x5b, 0x3a, 0xe5 },
+ { 0x00, 0xa6, 0x51, 0xf7, 0xa2, 0x04, 0xf3, 0x55,
+ 0x59, 0xff, 0x08, 0xae, 0xfb, 0x5d, 0xaa, 0x0c },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+ 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0 },
+ { 0x00, 0xa6, 0x51, 0xf7, 0xa2, 0x04, 0xf3, 0x55,
+ 0x59, 0xff, 0x08, 0xae, 0xfb, 0x5d, 0xaa, 0x0c },
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+ 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xe1, 0xc2, 0x23, 0x84, 0x65, 0x46, 0xa7,
+ 0x08, 0xe9, 0xca, 0x2b, 0x8c, 0x6d, 0x4e, 0xaf },
+ { 0x00, 0xa6, 0x51, 0xf7, 0xa2, 0x04, 0xf3, 0x55,
+ 0x44, 0xe2, 0x15, 0xb3, 0xe6, 0x40, 0xb7, 0x11 },
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+ 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xe2, 0xc4, 0x26, 0x88, 0x6a, 0x4c, 0xae,
+ 0x10, 0xf2, 0xd4, 0x36, 0x98, 0x7a, 0x5c, 0xbe },
+ { 0x00, 0xa6, 0x51, 0xf7, 0xa2, 0x04, 0xf3, 0x55,
+ 0x44, 0xe2, 0x15, 0xb3, 0xe6, 0x40, 0xb7, 0x11 },
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+ 0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xe3, 0xc6, 0x25, 0x8c, 0x6f, 0x4a, 0xa9,
+ 0x18, 0xfb, 0xde, 0x3d, 0x94, 0x77, 0x52, 0xb1 },
+ { 0x00, 0xa6, 0x51, 0xf7, 0xbf, 0x19, 0xee, 0x48,
+ 0x63, 0xc5, 0x32, 0x94, 0xdc, 0x7a, 0x8d, 0x2b },
+ { 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+ 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xe4, 0xc8, 0x2c, 0x90, 0x74, 0x58, 0xbc,
+ 0x20, 0xc4, 0xe8, 0x0c, 0xb0, 0x54, 0x78, 0x9c },
+ { 0x00, 0xa6, 0x51, 0xf7, 0xbf, 0x19, 0xee, 0x48,
+ 0x63, 0xc5, 0x32, 0x94, 0xdc, 0x7a, 0x8d, 0x2b },
+ { 0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+ 0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xe5, 0xca, 0x2f, 0x94, 0x71, 0x5e, 0xbb,
+ 0x28, 0xcd, 0xe2, 0x07, 0xbc, 0x59, 0x76, 0x93 },
+ { 0x00, 0xa6, 0x51, 0xf7, 0xbf, 0x19, 0xee, 0x48,
+ 0x7e, 0xd8, 0x2f, 0x89, 0xc1, 0x67, 0x90, 0x36 },
+ { 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+ 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xe6, 0xcc, 0x2a, 0x98, 0x7e, 0x54, 0xb2,
+ 0x30, 0xd6, 0xfc, 0x1a, 0xa8, 0x4e, 0x64, 0x82 },
+ { 0x00, 0xa6, 0x51, 0xf7, 0xbf, 0x19, 0xee, 0x48,
+ 0x7e, 0xd8, 0x2f, 0x89, 0xc1, 0x67, 0x90, 0x36 },
+ { 0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+ 0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xe7, 0xce, 0x29, 0x9c, 0x7b, 0x52, 0xb5,
+ 0x38, 0xdf, 0xf6, 0x11, 0xa4, 0x43, 0x6a, 0x8d },
+ { 0x00, 0xa6, 0x4c, 0xea, 0x98, 0x3e, 0xd4, 0x72,
+ 0x2d, 0x8b, 0x61, 0xc7, 0xb5, 0x13, 0xf9, 0x5f },
+ { 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+ 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xe8, 0xd0, 0x38, 0xa0, 0x48, 0x70, 0x98,
+ 0x40, 0xa8, 0x90, 0x78, 0xe0, 0x08, 0x30, 0xd8 },
+ { 0x00, 0xa6, 0x4c, 0xea, 0x98, 0x3e, 0xd4, 0x72,
+ 0x2d, 0x8b, 0x61, 0xc7, 0xb5, 0x13, 0xf9, 0x5f },
+ { 0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+ 0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xe9, 0xd2, 0x3b, 0xa4, 0x4d, 0x76, 0x9f,
+ 0x48, 0xa1, 0x9a, 0x73, 0xec, 0x05, 0x3e, 0xd7 },
+ { 0x00, 0xa6, 0x4c, 0xea, 0x98, 0x3e, 0xd4, 0x72,
+ 0x30, 0x96, 0x7c, 0xda, 0xa8, 0x0e, 0xe4, 0x42 },
+ { 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+ 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xea, 0xd4, 0x3e, 0xa8, 0x42, 0x7c, 0x96,
+ 0x50, 0xba, 0x84, 0x6e, 0xf8, 0x12, 0x2c, 0xc6 },
+ { 0x00, 0xa6, 0x4c, 0xea, 0x98, 0x3e, 0xd4, 0x72,
+ 0x30, 0x96, 0x7c, 0xda, 0xa8, 0x0e, 0xe4, 0x42 },
+ { 0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+ 0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xeb, 0xd6, 0x3d, 0xac, 0x47, 0x7a, 0x91,
+ 0x58, 0xb3, 0x8e, 0x65, 0xf4, 0x1f, 0x22, 0xc9 },
+ { 0x00, 0xa6, 0x4c, 0xea, 0x85, 0x23, 0xc9, 0x6f,
+ 0x17, 0xb1, 0x5b, 0xfd, 0x92, 0x34, 0xde, 0x78 },
+ { 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+ 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xec, 0xd8, 0x34, 0xb0, 0x5c, 0x68, 0x84,
+ 0x60, 0x8c, 0xb8, 0x54, 0xd0, 0x3c, 0x08, 0xe4 },
+ { 0x00, 0xa6, 0x4c, 0xea, 0x85, 0x23, 0xc9, 0x6f,
+ 0x17, 0xb1, 0x5b, 0xfd, 0x92, 0x34, 0xde, 0x78 },
+ { 0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+ 0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xed, 0xda, 0x37, 0xb4, 0x59, 0x6e, 0x83,
+ 0x68, 0x85, 0xb2, 0x5f, 0xdc, 0x31, 0x06, 0xeb },
+ { 0x00, 0xa6, 0x4c, 0xea, 0x85, 0x23, 0xc9, 0x6f,
+ 0x0a, 0xac, 0x46, 0xe0, 0x8f, 0x29, 0xc3, 0x65 },
+ { 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+ 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xee, 0xdc, 0x32, 0xb8, 0x56, 0x64, 0x8a,
+ 0x70, 0x9e, 0xac, 0x42, 0xc8, 0x26, 0x14, 0xfa },
+ { 0x00, 0xa6, 0x4c, 0xea, 0x85, 0x23, 0xc9, 0x6f,
+ 0x0a, 0xac, 0x46, 0xe0, 0x8f, 0x29, 0xc3, 0x65 },
+ { 0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+ 0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xef, 0xde, 0x31, 0xbc, 0x53, 0x62, 0x8d,
+ 0x78, 0x97, 0xa6, 0x49, 0xc4, 0x2b, 0x1a, 0xf5 },
+ { 0x00, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x06,
+ 0xb1, 0x0a, 0xda, 0x61, 0x67, 0xdc, 0x0c, 0xb7 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+ 0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50 },
+ { 0x00, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x06,
+ 0xb1, 0x0a, 0xda, 0x61, 0x67, 0xdc, 0x0c, 0xb7 },
+ { 0x00, 0x10, 0x20, 0x30, 0x40, 0x50, 0x60, 0x70,
+ 0x80, 0x90, 0xa0, 0xb0, 0xc0, 0xd0, 0xe0, 0xf0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xf1, 0xe2, 0x13, 0xc4, 0x35, 0x26, 0xd7,
+ 0x88, 0x79, 0x6a, 0x9b, 0x4c, 0xbd, 0xae, 0x5f },
+ { 0x00, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x06,
+ 0xac, 0x17, 0xc7, 0x7c, 0x7a, 0xc1, 0x11, 0xaa },
+ { 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0,
+ 0x00, 0x20, 0x40, 0x60, 0x80, 0xa0, 0xc0, 0xe0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xf2, 0xe4, 0x16, 0xc8, 0x3a, 0x2c, 0xde,
+ 0x90, 0x62, 0x74, 0x86, 0x58, 0xaa, 0xbc, 0x4e },
+ { 0x00, 0xbb, 0x6b, 0xd0, 0xd6, 0x6d, 0xbd, 0x06,
+ 0xac, 0x17, 0xc7, 0x7c, 0x7a, 0xc1, 0x11, 0xaa },
+ { 0x00, 0x30, 0x60, 0x50, 0xc0, 0xf0, 0xa0, 0x90,
+ 0x80, 0xb0, 0xe0, 0xd0, 0x40, 0x70, 0x20, 0x10 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xf3, 0xe6, 0x15, 0xcc, 0x3f, 0x2a, 0xd9,
+ 0x98, 0x6b, 0x7e, 0x8d, 0x54, 0xa7, 0xb2, 0x41 },
+ { 0x00, 0xbb, 0x6b, 0xd0, 0xcb, 0x70, 0xa0, 0x1b,
+ 0x8b, 0x30, 0xe0, 0x5b, 0x40, 0xfb, 0x2b, 0x90 },
+ { 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0,
+ 0x00, 0x40, 0x80, 0xc0, 0x00, 0x40, 0x80, 0xc0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xf4, 0xe8, 0x1c, 0xd0, 0x24, 0x38, 0xcc,
+ 0xa0, 0x54, 0x48, 0xbc, 0x70, 0x84, 0x98, 0x6c },
+ { 0x00, 0xbb, 0x6b, 0xd0, 0xcb, 0x70, 0xa0, 0x1b,
+ 0x8b, 0x30, 0xe0, 0x5b, 0x40, 0xfb, 0x2b, 0x90 },
+ { 0x00, 0x50, 0xa0, 0xf0, 0x40, 0x10, 0xe0, 0xb0,
+ 0x80, 0xd0, 0x20, 0x70, 0xc0, 0x90, 0x60, 0x30 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xf5, 0xea, 0x1f, 0xd4, 0x21, 0x3e, 0xcb,
+ 0xa8, 0x5d, 0x42, 0xb7, 0x7c, 0x89, 0x96, 0x63 },
+ { 0x00, 0xbb, 0x6b, 0xd0, 0xcb, 0x70, 0xa0, 0x1b,
+ 0x96, 0x2d, 0xfd, 0x46, 0x5d, 0xe6, 0x36, 0x8d },
+ { 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20,
+ 0x00, 0x60, 0xc0, 0xa0, 0x80, 0xe0, 0x40, 0x20 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xf6, 0xec, 0x1a, 0xd8, 0x2e, 0x34, 0xc2,
+ 0xb0, 0x46, 0x5c, 0xaa, 0x68, 0x9e, 0x84, 0x72 },
+ { 0x00, 0xbb, 0x6b, 0xd0, 0xcb, 0x70, 0xa0, 0x1b,
+ 0x96, 0x2d, 0xfd, 0x46, 0x5d, 0xe6, 0x36, 0x8d },
+ { 0x00, 0x70, 0xe0, 0x90, 0xc0, 0xb0, 0x20, 0x50,
+ 0x80, 0xf0, 0x60, 0x10, 0x40, 0x30, 0xa0, 0xd0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xf7, 0xee, 0x19, 0xdc, 0x2b, 0x32, 0xc5,
+ 0xb8, 0x4f, 0x56, 0xa1, 0x64, 0x93, 0x8a, 0x7d },
+ { 0x00, 0xbb, 0x76, 0xcd, 0xec, 0x57, 0x9a, 0x21,
+ 0xc5, 0x7e, 0xb3, 0x08, 0x29, 0x92, 0x5f, 0xe4 },
+ { 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80,
+ 0x00, 0x80, 0x00, 0x80, 0x00, 0x80, 0x00, 0x80 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xf8, 0xf0, 0x08, 0xe0, 0x18, 0x10, 0xe8,
+ 0xc0, 0x38, 0x30, 0xc8, 0x20, 0xd8, 0xd0, 0x28 },
+ { 0x00, 0xbb, 0x76, 0xcd, 0xec, 0x57, 0x9a, 0x21,
+ 0xc5, 0x7e, 0xb3, 0x08, 0x29, 0x92, 0x5f, 0xe4 },
+ { 0x00, 0x90, 0x20, 0xb0, 0x40, 0xd0, 0x60, 0xf0,
+ 0x80, 0x10, 0xa0, 0x30, 0xc0, 0x50, 0xe0, 0x70 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xf9, 0xf2, 0x0b, 0xe4, 0x1d, 0x16, 0xef,
+ 0xc8, 0x31, 0x3a, 0xc3, 0x2c, 0xd5, 0xde, 0x27 },
+ { 0x00, 0xbb, 0x76, 0xcd, 0xec, 0x57, 0x9a, 0x21,
+ 0xd8, 0x63, 0xae, 0x15, 0x34, 0x8f, 0x42, 0xf9 },
+ { 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60,
+ 0x00, 0xa0, 0x40, 0xe0, 0x80, 0x20, 0xc0, 0x60 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xfa, 0xf4, 0x0e, 0xe8, 0x12, 0x1c, 0xe6,
+ 0xd0, 0x2a, 0x24, 0xde, 0x38, 0xc2, 0xcc, 0x36 },
+ { 0x00, 0xbb, 0x76, 0xcd, 0xec, 0x57, 0x9a, 0x21,
+ 0xd8, 0x63, 0xae, 0x15, 0x34, 0x8f, 0x42, 0xf9 },
+ { 0x00, 0xb0, 0x60, 0xd0, 0xc0, 0x70, 0xa0, 0x10,
+ 0x80, 0x30, 0xe0, 0x50, 0x40, 0xf0, 0x20, 0x90 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xfb, 0xf6, 0x0d, 0xec, 0x17, 0x1a, 0xe1,
+ 0xd8, 0x23, 0x2e, 0xd5, 0x34, 0xcf, 0xc2, 0x39 },
+ { 0x00, 0xbb, 0x76, 0xcd, 0xf1, 0x4a, 0x87, 0x3c,
+ 0xff, 0x44, 0x89, 0x32, 0x0e, 0xb5, 0x78, 0xc3 },
+ { 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40,
+ 0x00, 0xc0, 0x80, 0x40, 0x00, 0xc0, 0x80, 0x40 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xfc, 0xf8, 0x04, 0xf0, 0x0c, 0x08, 0xf4,
+ 0xe0, 0x1c, 0x18, 0xe4, 0x10, 0xec, 0xe8, 0x14 },
+ { 0x00, 0xbb, 0x76, 0xcd, 0xf1, 0x4a, 0x87, 0x3c,
+ 0xff, 0x44, 0x89, 0x32, 0x0e, 0xb5, 0x78, 0xc3 },
+ { 0x00, 0xd0, 0xa0, 0x70, 0x40, 0x90, 0xe0, 0x30,
+ 0x80, 0x50, 0x20, 0xf0, 0xc0, 0x10, 0x60, 0xb0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xfd, 0xfa, 0x07, 0xf4, 0x09, 0x0e, 0xf3,
+ 0xe8, 0x15, 0x12, 0xef, 0x1c, 0xe1, 0xe6, 0x1b },
+ { 0x00, 0xbb, 0x76, 0xcd, 0xf1, 0x4a, 0x87, 0x3c,
+ 0xe2, 0x59, 0x94, 0x2f, 0x13, 0xa8, 0x65, 0xde },
+ { 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0,
+ 0x00, 0xe0, 0xc0, 0x20, 0x80, 0x60, 0x40, 0xa0 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xfe, 0xfc, 0x02, 0xf8, 0x06, 0x04, 0xfa,
+ 0xf0, 0x0e, 0x0c, 0xf2, 0x08, 0xf6, 0xf4, 0x0a },
+ { 0x00, 0xbb, 0x76, 0xcd, 0xf1, 0x4a, 0x87, 0x3c,
+ 0xe2, 0x59, 0x94, 0x2f, 0x13, 0xa8, 0x65, 0xde },
+ { 0x00, 0xf0, 0xe0, 0x10, 0xc0, 0x30, 0x20, 0xd0,
+ 0x80, 0x70, 0x60, 0x90, 0x40, 0xb0, 0xa0, 0x50 },
+ { 0x00, 0x00, 0x1d, 0x1d, 0x27, 0x27, 0x3a, 0x3a,
+ 0x53, 0x53, 0x4e, 0x4e, 0x74, 0x74, 0x69, 0x69 },
+ { 0x00, 0xff, 0xfe, 0x01, 0xfc, 0x03, 0x02, 0xfd,
+ 0xf8, 0x07, 0x06, 0xf9, 0x04, 0xfb, 0xfa, 0x05 }
+};
+/* END CSTYLED */
+#endif /* defined(HAVE_SSSE3) || defined(HAVE_AVX2) || defined(HAVE_AVX512BW) */
+#endif /* defined(__x86_64) */
diff --git a/zfs/module/zfs/vdev_root.c b/zfs/module/zfs/vdev_root.c
index 90250b0fb99c..6b456dd2bde0 100644
--- a/zfs/module/zfs/vdev_root.c
+++ b/zfs/module/zfs/vdev_root.c
@@ -120,6 +120,7 @@ vdev_ops_t vdev_root_ops = {
vdev_root_state_change,
NULL,
NULL,
+ NULL,
VDEV_TYPE_ROOT, /* name of this vdev type */
B_FALSE /* not a leaf vdev */
};
diff --git a/zfs/module/zfs/zap.c b/zfs/module/zfs/zap.c
index c9398e885fda..ee9962bff394 100644
--- a/zfs/module/zfs/zap.c
+++ b/zfs/module/zfs/zap.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
@@ -81,9 +81,10 @@ fzap_upgrade(zap_t *zap, dmu_tx_t *tx, zap_flags_t flags)
ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
zap->zap_ismicro = FALSE;
- zap->zap_dbu.dbu_evict_func = zap_evict;
+ zap->zap_dbu.dbu_evict_func_sync = zap_evict_sync;
+ zap->zap_dbu.dbu_evict_func_async = NULL;
- mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0);
+ mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, MUTEX_DEFAULT, 0);
zap->zap_f.zap_block_shift = highbit64(zap->zap_dbuf->db_size) - 1;
zp = zap_f_phys(zap);
@@ -162,8 +163,9 @@ zap_table_grow(zap_t *zap, zap_table_phys_t *tbl,
newblk = zap_allocate_blocks(zap, tbl->zt_numblks * 2);
tbl->zt_nextblk = newblk;
ASSERT0(tbl->zt_blks_copied);
- dmu_prefetch(zap->zap_objset, zap->zap_object,
- tbl->zt_blk << bs, tbl->zt_numblks << bs);
+ dmu_prefetch(zap->zap_objset, zap->zap_object, 0,
+ tbl->zt_blk << bs, tbl->zt_numblks << bs,
+ ZIO_PRIORITY_SYNC_READ);
}
/*
@@ -269,6 +271,7 @@ zap_table_load(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx, uint64_t *valp)
uint64_t blk, off;
int err;
dmu_buf_t *db;
+ dnode_t *dn;
int bs = FZAP_BLOCK_SHIFT(zap);
ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
@@ -276,8 +279,15 @@ zap_table_load(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx, uint64_t *valp)
blk = idx >> (bs-3);
off = idx & ((1<<(bs-3))-1);
- err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
+ /*
+ * Note: this is equivalent to dmu_buf_hold(), but we use
+ * _dnode_enter / _by_dnode because it's faster because we don't
+ * have to hold the dnode.
+ */
+ dn = dmu_buf_dnode_enter(zap->zap_dbuf);
+ err = dmu_buf_hold_by_dnode(dn,
(tbl->zt_blk + blk) << bs, FTAG, &db, DMU_READ_NO_PREFETCH);
+ dmu_buf_dnode_exit(zap->zap_dbuf);
if (err)
return (err);
*valp = ((uint64_t *)db->db_data)[off];
@@ -291,9 +301,11 @@ zap_table_load(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx, uint64_t *valp)
*/
blk = (idx*2) >> (bs-3);
- err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
+ dn = dmu_buf_dnode_enter(zap->zap_dbuf);
+ err = dmu_buf_hold_by_dnode(dn,
(tbl->zt_nextblk + blk) << bs, FTAG, &db,
DMU_READ_NO_PREFETCH);
+ dmu_buf_dnode_exit(zap->zap_dbuf);
if (err == 0)
dmu_buf_rele(db, FTAG);
}
@@ -388,7 +400,7 @@ zap_allocate_blocks(zap_t *zap, int nblocks)
}
static void
-zap_leaf_pageout(void *dbu)
+zap_leaf_evict_sync(void *dbu)
{
zap_leaf_t *l = dbu;
@@ -404,7 +416,7 @@ zap_create_leaf(zap_t *zap, dmu_tx_t *tx)
ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
- rw_init(&l->l_rwlock, NULL, RW_DEFAULT, NULL);
+ rw_init(&l->l_rwlock, NULL, RW_NOLOCKDEP, NULL);
rw_enter(&l->l_rwlock, RW_WRITER);
l->l_blkid = zap_allocate_blocks(zap, 1);
l->l_dbuf = NULL;
@@ -412,7 +424,7 @@ zap_create_leaf(zap_t *zap, dmu_tx_t *tx)
VERIFY(0 == dmu_buf_hold(zap->zap_objset, zap->zap_object,
l->l_blkid << FZAP_BLOCK_SHIFT(zap), NULL, &l->l_dbuf,
DMU_READ_NO_PREFETCH));
- dmu_buf_init_user(&l->l_dbu, zap_leaf_pageout, &l->l_dbuf);
+ dmu_buf_init_user(&l->l_dbu, zap_leaf_evict_sync, NULL, &l->l_dbuf);
winner = dmu_buf_set_user(l->l_dbuf, &l->l_dbu);
ASSERT(winner == NULL);
dmu_buf_will_dirty(l->l_dbuf, tx);
@@ -459,13 +471,13 @@ zap_open_leaf(uint64_t blkid, dmu_buf_t *db)
l->l_bs = highbit64(db->db_size) - 1;
l->l_dbuf = db;
- dmu_buf_init_user(&l->l_dbu, zap_leaf_pageout, &l->l_dbuf);
+ dmu_buf_init_user(&l->l_dbu, zap_leaf_evict_sync, NULL, &l->l_dbuf);
winner = dmu_buf_set_user(db, &l->l_dbu);
rw_exit(&l->l_rwlock);
if (winner != NULL) {
/* someone else set it first */
- zap_leaf_pageout(&l->l_dbu);
+ zap_leaf_evict_sync(&l->l_dbu);
l = winner;
}
@@ -501,6 +513,7 @@ zap_get_leaf_byblk(zap_t *zap, uint64_t blkid, dmu_tx_t *tx, krw_t lt,
zap_leaf_t *l;
int bs = FZAP_BLOCK_SHIFT(zap);
int err;
+ dnode_t *dn;
ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
@@ -514,8 +527,10 @@ zap_get_leaf_byblk(zap_t *zap, uint64_t blkid, dmu_tx_t *tx, krw_t lt,
if (blkid == 0)
return (ENOENT);
- err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
+ dn = dmu_buf_dnode_enter(zap->zap_dbuf);
+ err = dmu_buf_hold_by_dnode(dn,
blkid << bs, NULL, &db, DMU_READ_NO_PREFETCH);
+ dmu_buf_dnode_exit(zap->zap_dbuf);
if (err)
return (err);
@@ -584,7 +599,13 @@ zap_deref_leaf(zap_t *zap, uint64_t h, dmu_tx_t *tx, krw_t lt, zap_leaf_t **lp)
ASSERT(zap->zap_dbuf == NULL ||
zap_f_phys(zap) == zap->zap_dbuf->db_data);
- ASSERT3U(zap_f_phys(zap)->zap_magic, ==, ZAP_MAGIC);
+
+ /* Reality check for corrupt zap objects (leaf or header). */
+ if ((zap_f_phys(zap)->zap_block_type != ZBT_LEAF &&
+ zap_f_phys(zap)->zap_block_type != ZBT_HEADER) ||
+ zap_f_phys(zap)->zap_magic != ZAP_MAGIC) {
+ return (SET_ERROR(EIO));
+ }
idx = ZAP_HASH_IDX(h, zap_f_phys(zap)->zap_ptrtbl.zt_shift);
err = zap_idx_to_blk(zap, idx, &blk);
if (err != 0)
@@ -598,7 +619,8 @@ zap_deref_leaf(zap_t *zap, uint64_t h, dmu_tx_t *tx, krw_t lt, zap_leaf_t **lp)
}
static int
-zap_expand_leaf(zap_name_t *zn, zap_leaf_t *l, dmu_tx_t *tx, zap_leaf_t **lp)
+zap_expand_leaf(zap_name_t *zn, zap_leaf_t *l,
+ void *tag, dmu_tx_t *tx, zap_leaf_t **lp)
{
zap_t *zap = zn->zn_zap;
uint64_t hash = zn->zn_hash;
@@ -620,9 +642,9 @@ zap_expand_leaf(zap_name_t *zn, zap_leaf_t *l, dmu_tx_t *tx, zap_leaf_t **lp)
uint64_t object = zap->zap_object;
zap_put_leaf(l);
- zap_unlockdir(zap);
+ zap_unlockdir(zap, tag);
err = zap_lockdir(os, object, tx, RW_WRITER,
- FALSE, FALSE, &zn->zn_zap);
+ FALSE, FALSE, tag, &zn->zn_zap);
zap = zn->zn_zap;
if (err)
return (err);
@@ -672,6 +694,8 @@ zap_expand_leaf(zap_name_t *zn, zap_leaf_t *l, dmu_tx_t *tx, zap_leaf_t **lp)
ASSERT0(err); /* we checked for i/o errors above */
}
+ ASSERT3U(zap_leaf_phys(l)->l_hdr.lh_prefix_len, >, 0);
+
if (hash & (1ULL << (64 - zap_leaf_phys(l)->l_hdr.lh_prefix_len))) {
/* we want the sibling */
zap_put_leaf(l);
@@ -685,7 +709,8 @@ zap_expand_leaf(zap_name_t *zn, zap_leaf_t *l, dmu_tx_t *tx, zap_leaf_t **lp)
}
static void
-zap_put_leaf_maybe_grow_ptrtbl(zap_name_t *zn, zap_leaf_t *l, dmu_tx_t *tx)
+zap_put_leaf_maybe_grow_ptrtbl(zap_name_t *zn, zap_leaf_t *l,
+ void *tag, dmu_tx_t *tx)
{
zap_t *zap = zn->zn_zap;
int shift = zap_f_phys(zap)->zap_ptrtbl.zt_shift;
@@ -705,9 +730,9 @@ zap_put_leaf_maybe_grow_ptrtbl(zap_name_t *zn, zap_leaf_t *l, dmu_tx_t *tx)
objset_t *os = zap->zap_objset;
uint64_t zapobj = zap->zap_object;
- zap_unlockdir(zap);
+ zap_unlockdir(zap, tag);
err = zap_lockdir(os, zapobj, tx,
- RW_WRITER, FALSE, FALSE, &zn->zn_zap);
+ RW_WRITER, FALSE, FALSE, tag, &zn->zn_zap);
zap = zn->zn_zap;
if (err)
return;
@@ -797,7 +822,7 @@ fzap_lookup(zap_name_t *zn,
int
fzap_add_cd(zap_name_t *zn,
uint64_t integer_size, uint64_t num_integers,
- const void *val, uint32_t cd, dmu_tx_t *tx)
+ const void *val, uint32_t cd, void *tag, dmu_tx_t *tx)
{
zap_leaf_t *l;
int err;
@@ -826,7 +851,7 @@ fzap_add_cd(zap_name_t *zn,
if (err == 0) {
zap_increment_num_entries(zap, 1, tx);
} else if (err == EAGAIN) {
- err = zap_expand_leaf(zn, l, tx, &l);
+ err = zap_expand_leaf(zn, l, tag, tx, &l);
zap = zn->zn_zap; /* zap_expand_leaf() may change zap */
if (err == 0)
goto retry;
@@ -834,26 +859,27 @@ fzap_add_cd(zap_name_t *zn,
out:
if (zap != NULL)
- zap_put_leaf_maybe_grow_ptrtbl(zn, l, tx);
+ zap_put_leaf_maybe_grow_ptrtbl(zn, l, tag, tx);
return (err);
}
int
fzap_add(zap_name_t *zn,
uint64_t integer_size, uint64_t num_integers,
- const void *val, dmu_tx_t *tx)
+ const void *val, void *tag, dmu_tx_t *tx)
{
int err = fzap_check(zn, integer_size, num_integers);
if (err != 0)
return (err);
return (fzap_add_cd(zn, integer_size, num_integers,
- val, ZAP_NEED_CD, tx));
+ val, ZAP_NEED_CD, tag, tx));
}
int
fzap_update(zap_name_t *zn,
- int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx)
+ int integer_size, uint64_t num_integers, const void *val,
+ void *tag, dmu_tx_t *tx)
{
zap_leaf_t *l;
int err, create;
@@ -883,14 +909,14 @@ fzap_update(zap_name_t *zn,
}
if (err == EAGAIN) {
- err = zap_expand_leaf(zn, l, tx, &l);
+ err = zap_expand_leaf(zn, l, tag, tx, &l);
zap = zn->zn_zap; /* zap_expand_leaf() may change zap */
if (err == 0)
goto retry;
}
if (zap != NULL)
- zap_put_leaf_maybe_grow_ptrtbl(zn, l, tx);
+ zap_put_leaf_maybe_grow_ptrtbl(zn, l, tag, tx);
return (err);
}
@@ -949,7 +975,8 @@ fzap_prefetch(zap_name_t *zn)
if (zap_idx_to_blk(zap, idx, &blk) != 0)
return;
bs = FZAP_BLOCK_SHIFT(zap);
- dmu_prefetch(zap->zap_objset, zap->zap_object, blk << bs, 1 << bs);
+ dmu_prefetch(zap->zap_objset, zap->zap_object, 0, blk << bs, 1 << bs,
+ ZIO_PRIORITY_SYNC_READ);
}
/*
@@ -959,12 +986,20 @@ fzap_prefetch(zap_name_t *zn)
uint64_t
zap_create_link(objset_t *os, dmu_object_type_t ot, uint64_t parent_obj,
const char *name, dmu_tx_t *tx)
+{
+ return (zap_create_link_dnsize(os, ot, parent_obj, name, 0, tx));
+}
+
+uint64_t
+zap_create_link_dnsize(objset_t *os, dmu_object_type_t ot, uint64_t parent_obj,
+ const char *name, int dnodesize, dmu_tx_t *tx)
{
uint64_t new_obj;
- VERIFY((new_obj = zap_create(os, ot, DMU_OT_NONE, 0, tx)) > 0);
- VERIFY(zap_add(os, parent_obj, name, sizeof (uint64_t), 1, &new_obj,
- tx) == 0);
+ VERIFY((new_obj = zap_create_dnsize(os, ot, DMU_OT_NONE, 0,
+ dnodesize, tx)) > 0);
+ VERIFY0(zap_add(os, parent_obj, name, sizeof (uint64_t), 1, &new_obj,
+ tx));
return (new_obj);
}
@@ -1198,17 +1233,23 @@ fzap_cursor_retrieve(zap_t *zap, zap_cursor_t *zc, zap_attribute_t *za)
err = zap_leaf_lookup_closest(l, zc->zc_hash, zc->zc_cd, &zeh);
if (err == ENOENT) {
- uint64_t nocare =
- (1ULL << (64 - zap_leaf_phys(l)->l_hdr.lh_prefix_len)) - 1;
- zc->zc_hash = (zc->zc_hash & ~nocare) + nocare + 1;
- zc->zc_cd = 0;
- if (zap_leaf_phys(l)->l_hdr.lh_prefix_len == 0 ||
- zc->zc_hash == 0) {
+ if (zap_leaf_phys(l)->l_hdr.lh_prefix_len == 0) {
zc->zc_hash = -1ULL;
+ zc->zc_cd = 0;
} else {
- zap_put_leaf(zc->zc_leaf);
- zc->zc_leaf = NULL;
- goto again;
+ uint64_t nocare = (1ULL <<
+ (64 - zap_leaf_phys(l)->l_hdr.lh_prefix_len)) - 1;
+
+ zc->zc_hash = (zc->zc_hash & ~nocare) + nocare + 1;
+ zc->zc_cd = 0;
+
+ if (zc->zc_hash == 0) {
+ zc->zc_hash = -1ULL;
+ } else {
+ zap_put_leaf(zc->zc_leaf);
+ zc->zc_leaf = NULL;
+ goto again;
+ }
}
}
@@ -1295,9 +1336,10 @@ fzap_get_stats(zap_t *zap, zap_stats_t *zs)
} else {
int b;
- dmu_prefetch(zap->zap_objset, zap->zap_object,
+ dmu_prefetch(zap->zap_objset, zap->zap_object, 0,
zap_f_phys(zap)->zap_ptrtbl.zt_blk << bs,
- zap_f_phys(zap)->zap_ptrtbl.zt_numblks << bs);
+ zap_f_phys(zap)->zap_ptrtbl.zt_numblks << bs,
+ ZIO_PRIORITY_SYNC_READ);
for (b = 0; b < zap_f_phys(zap)->zap_ptrtbl.zt_numblks;
b++) {
@@ -1315,58 +1357,3 @@ fzap_get_stats(zap_t *zap, zap_stats_t *zs)
}
}
}
-
-int
-fzap_count_write(zap_name_t *zn, int add, uint64_t *towrite,
- uint64_t *tooverwrite)
-{
- zap_t *zap = zn->zn_zap;
- zap_leaf_t *l;
- int err;
-
- /*
- * Account for the header block of the fatzap.
- */
- if (!add && dmu_buf_freeable(zap->zap_dbuf)) {
- *tooverwrite += zap->zap_dbuf->db_size;
- } else {
- *towrite += zap->zap_dbuf->db_size;
- }
-
- /*
- * Account for the pointer table blocks.
- * If we are adding we need to account for the following cases :
- * - If the pointer table is embedded, this operation could force an
- * external pointer table.
- * - If this already has an external pointer table this operation
- * could extend the table.
- */
- if (add) {
- if (zap_f_phys(zap)->zap_ptrtbl.zt_blk == 0)
- *towrite += zap->zap_dbuf->db_size;
- else
- *towrite += (zap->zap_dbuf->db_size * 3);
- }
-
- /*
- * Now, check if the block containing leaf is freeable
- * and account accordingly.
- */
- err = zap_deref_leaf(zap, zn->zn_hash, NULL, RW_READER, &l);
- if (err != 0) {
- return (err);
- }
-
- if (!add && dmu_buf_freeable(l->l_dbuf)) {
- *tooverwrite += l->l_dbuf->db_size;
- } else {
- /*
- * If this an add operation, the leaf block could split.
- * Hence, we need to account for an additional leaf block.
- */
- *towrite += (add ? 2 : 1) * l->l_dbuf->db_size;
- }
-
- zap_put_leaf(l);
- return (0);
-}
diff --git a/zfs/module/zfs/zap_leaf.c b/zfs/module/zfs/zap_leaf.c
index 3abc08cff476..c342695c7f42 100644
--- a/zfs/module/zfs/zap_leaf.c
+++ b/zfs/module/zfs/zap_leaf.c
@@ -18,9 +18,11 @@
*
* CDDL HEADER END
*/
+
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2015 by Delphix. All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc.
*/
/*
@@ -364,7 +366,7 @@ zap_leaf_array_match(zap_leaf_t *l, zap_name_t *zn,
}
ASSERT(zn->zn_key_intlen == 1);
- if (zn->zn_matchtype == MT_FIRST) {
+ if (zn->zn_matchtype & MT_NORMALIZE) {
char *thisname = kmem_alloc(array_numints, KM_SLEEP);
boolean_t match;
@@ -406,7 +408,6 @@ zap_leaf_lookup(zap_leaf_t *l, zap_name_t *zn, zap_entry_handle_t *zeh)
ASSERT3U(zap_leaf_phys(l)->l_hdr.lh_magic, ==, ZAP_LEAF_MAGIC);
-again:
for (chunkp = LEAF_HASH_ENTPTR(l, zn->zn_hash);
*chunkp != CHAIN_END; chunkp = &le->le_next) {
uint16_t chunk = *chunkp;
@@ -421,9 +422,9 @@ zap_leaf_lookup(zap_leaf_t *l, zap_name_t *zn, zap_entry_handle_t *zeh)
/*
* NB: the entry chain is always sorted by cd on
* normalized zap objects, so this will find the
- * lowest-cd match for MT_FIRST.
+ * lowest-cd match for MT_NORMALIZE.
*/
- ASSERT(zn->zn_matchtype == MT_EXACT ||
+ ASSERT((zn->zn_matchtype == 0) ||
(zap_leaf_phys(l)->l_hdr.lh_flags & ZLF_ENTRIES_CDSORTED));
if (zap_leaf_array_match(l, zn, le->le_name_chunk,
le->le_name_numints)) {
@@ -437,15 +438,6 @@ zap_leaf_lookup(zap_leaf_t *l, zap_name_t *zn, zap_entry_handle_t *zeh)
}
}
- /*
- * NB: we could of course do this in one pass, but that would be
- * a pain. We'll see if MT_BEST is even used much.
- */
- if (zn->zn_matchtype == MT_BEST) {
- zn->zn_matchtype = MT_FIRST;
- goto again;
- }
-
return (SET_ERROR(ENOENT));
}
@@ -538,7 +530,7 @@ zap_entry_read_name(zap_t *zap, const zap_entry_handle_t *zeh, uint16_t buflen,
int
zap_entry_update(zap_entry_handle_t *zeh,
- uint8_t integer_size, uint64_t num_integers, const void *buf)
+ uint8_t integer_size, uint64_t num_integers, const void *buf)
{
int delta_chunks;
zap_leaf_t *l = zeh->zeh_leaf;
@@ -700,7 +692,7 @@ zap_entry_normalization_conflict(zap_entry_handle_t *zeh, zap_name_t *zn,
continue;
if (zn == NULL) {
- zn = zap_name_alloc(zap, name, MT_FIRST);
+ zn = zap_name_alloc(zap, name, MT_NORMALIZE);
allocdzn = B_TRUE;
}
if (zap_leaf_array_match(zeh->zeh_leaf, zn,
diff --git a/zfs/module/zfs/zap_micro.c b/zfs/module/zfs/zap_micro.c
index 85b465b05284..3ebf995c6780 100644
--- a/zfs/module/zfs/zap_micro.c
+++ b/zfs/module/zfs/zap_micro.c
@@ -18,10 +18,12 @@
*
* CDDL HEADER END
*/
+
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2016 by Delphix. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc.
*/
#include <sys/zio.h>
@@ -42,7 +44,8 @@
extern inline mzap_phys_t *zap_m_phys(zap_t *zap);
-static int mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags);
+static int mzap_upgrade(zap_t **zapp,
+ void *tag, dmu_tx_t *tx, zap_flags_t flags);
uint64_t
zap_getflags(zap_t *zap)
@@ -123,7 +126,7 @@ zap_hash(zap_name_t *zn)
* Don't use all 64 bits, since we need some in the cookie for
* the collision differentiator. We MUST use the high bits,
* since those are the ones that we first pay attention to when
- * chosing the bucket.
+ * choosing the bucket.
*/
h &= ~((1ULL << (64 - zap_hashbits(zap))) - 1);
@@ -131,7 +134,7 @@ zap_hash(zap_name_t *zn)
}
static int
-zap_normalize(zap_t *zap, const char *name, char *namenorm)
+zap_normalize(zap_t *zap, const char *name, char *namenorm, int normflags)
{
size_t inlen, outlen;
int err;
@@ -143,8 +146,8 @@ zap_normalize(zap_t *zap, const char *name, char *namenorm)
err = 0;
(void) u8_textprep_str((char *)name, &inlen, namenorm, &outlen,
- zap->zap_normflags | U8_TEXTPREP_IGNORE_NULL |
- U8_TEXTPREP_IGNORE_INVALID, U8_UNICODE_LATEST, &err);
+ normflags | U8_TEXTPREP_IGNORE_NULL | U8_TEXTPREP_IGNORE_INVALID,
+ U8_UNICODE_LATEST, &err);
return (err);
}
@@ -154,15 +157,15 @@ zap_match(zap_name_t *zn, const char *matchname)
{
ASSERT(!(zap_getflags(zn->zn_zap) & ZAP_FLAG_UINT64_KEY));
- if (zn->zn_matchtype == MT_FIRST) {
+ if (zn->zn_matchtype & MT_NORMALIZE) {
char norm[ZAP_MAXNAMELEN];
- if (zap_normalize(zn->zn_zap, matchname, norm) != 0)
+ if (zap_normalize(zn->zn_zap, matchname, norm,
+ zn->zn_normflags) != 0)
return (B_FALSE);
return (strcmp(zn->zn_key_norm, norm) == 0);
} else {
- /* MT_BEST or MT_EXACT */
return (strcmp(zn->zn_key_orig, matchname) == 0);
}
}
@@ -183,15 +186,30 @@ zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt)
zn->zn_key_orig = key;
zn->zn_key_orig_numints = strlen(zn->zn_key_orig) + 1;
zn->zn_matchtype = mt;
+ zn->zn_normflags = zap->zap_normflags;
+
+ /*
+ * If we're dealing with a case sensitive lookup on a mixed or
+ * insensitive fs, remove U8_TEXTPREP_TOUPPER or the lookup
+ * will fold case to all caps overriding the lookup request.
+ */
+ if (mt & MT_MATCH_CASE)
+ zn->zn_normflags &= ~U8_TEXTPREP_TOUPPER;
+
if (zap->zap_normflags) {
- if (zap_normalize(zap, key, zn->zn_normbuf) != 0) {
+ /*
+ * We *must* use zap_normflags because this normalization is
+ * what the hash is computed from.
+ */
+ if (zap_normalize(zap, key, zn->zn_normbuf,
+ zap->zap_normflags) != 0) {
zap_name_free(zn);
return (NULL);
}
zn->zn_key_norm = zn->zn_normbuf;
zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1;
} else {
- if (mt != MT_EXACT) {
+ if (mt != 0) {
zap_name_free(zn);
return (NULL);
}
@@ -200,6 +218,20 @@ zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt)
}
zn->zn_hash = zap_hash(zn);
+
+ if (zap->zap_normflags != zn->zn_normflags) {
+ /*
+ * We *must* use zn_normflags because this normalization is
+ * what the matching is based on. (Not the hash!)
+ */
+ if (zap_normalize(zap, key, zn->zn_normbuf,
+ zn->zn_normflags) != 0) {
+ zap_name_free(zn);
+ return (NULL);
+ }
+ zn->zn_key_norm_numints = strlen(zn->zn_key_norm) + 1;
+ }
+
return (zn);
}
@@ -213,7 +245,7 @@ zap_name_alloc_uint64(zap_t *zap, const uint64_t *key, int numints)
zn->zn_key_intlen = sizeof (*key);
zn->zn_key_orig = zn->zn_key_norm = key;
zn->zn_key_orig_numints = zn->zn_key_norm_numints = numints;
- zn->zn_matchtype = MT_EXACT;
+ zn->zn_matchtype = 0;
zn->zn_hash = zap_hash(zn);
return (zn);
@@ -256,15 +288,11 @@ mze_compare(const void *arg1, const void *arg2)
const mzap_ent_t *mze1 = arg1;
const mzap_ent_t *mze2 = arg2;
- if (mze1->mze_hash > mze2->mze_hash)
- return (+1);
- if (mze1->mze_hash < mze2->mze_hash)
- return (-1);
- if (mze1->mze_cd > mze2->mze_cd)
- return (+1);
- if (mze1->mze_cd < mze2->mze_cd)
- return (-1);
- return (0);
+ int cmp = AVL_CMP(mze1->mze_hash, mze2->mze_hash);
+ if (likely(cmp))
+ return (cmp);
+
+ return (AVL_CMP(mze1->mze_cd, mze2->mze_cd));
}
static void
@@ -297,7 +325,6 @@ mze_find(zap_name_t *zn)
mze_tofind.mze_hash = zn->zn_hash;
mze_tofind.mze_cd = 0;
-again:
mze = avl_find(avl, &mze_tofind, &idx);
if (mze == NULL)
mze = avl_nearest(avl, idx, AVL_AFTER);
@@ -306,10 +333,7 @@ mze_find(zap_name_t *zn)
if (zap_match(zn, MZE_PHYS(zn->zn_zap, mze)->mze_name))
return (mze);
}
- if (zn->zn_matchtype == MT_BEST) {
- zn->zn_matchtype = MT_FIRST;
- goto again;
- }
+
return (NULL);
}
@@ -366,6 +390,9 @@ mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db)
zap_t *winner;
zap_t *zap;
int i;
+ uint64_t *zap_hdr = (uint64_t *)db->db_data;
+ uint64_t zap_block_type = zap_hdr[0];
+ uint64_t zap_magic = zap_hdr[1];
ASSERT3U(MZAP_ENT_LEN, ==, sizeof (mzap_ent_phys_t));
@@ -376,9 +403,14 @@ mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db)
zap->zap_object = obj;
zap->zap_dbuf = db;
- if (*(uint64_t *)db->db_data != ZBT_MICRO) {
- mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, 0, 0);
+ if (zap_block_type != ZBT_MICRO) {
+ mutex_init(&zap->zap_f.zap_num_entries_mtx, 0, MUTEX_DEFAULT,
+ 0);
zap->zap_f.zap_block_shift = highbit64(db->db_size) - 1;
+ if (zap_block_type != ZBT_HEADER || zap_magic != ZAP_MAGIC) {
+ winner = NULL; /* No actual winner here... */
+ goto handle_winner;
+ }
} else {
zap->zap_ismicro = TRUE;
}
@@ -388,17 +420,11 @@ mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db)
* it, because zap_lockdir() checks zap_ismicro without the lock
* held.
*/
- dmu_buf_init_user(&zap->zap_dbu, zap_evict, &zap->zap_dbuf);
+ dmu_buf_init_user(&zap->zap_dbu, zap_evict_sync, NULL, &zap->zap_dbuf);
winner = dmu_buf_set_user(db, &zap->zap_dbu);
- if (winner != NULL) {
- rw_exit(&zap->zap_rwlock);
- rw_destroy(&zap->zap_rwlock);
- if (!zap->zap_ismicro)
- mutex_destroy(&zap->zap_f.zap_num_entries_mtx);
- kmem_free(zap, sizeof (zap_t));
- return (winner);
- }
+ if (winner != NULL)
+ goto handle_winner;
if (zap->zap_ismicro) {
zap->zap_salt = zap_m_phys(zap)->mz_salt;
@@ -414,8 +440,7 @@ mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db)
zap_name_t *zn;
zap->zap_m.zap_num_entries++;
- zn = zap_name_alloc(zap, mze->mze_name,
- MT_EXACT);
+ zn = zap_name_alloc(zap, mze->mze_name, 0);
mze_insert(zap, i, zn->zn_hash);
zap_name_free(zn);
}
@@ -445,31 +470,45 @@ mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db)
}
rw_exit(&zap->zap_rwlock);
return (zap);
+
+handle_winner:
+ rw_exit(&zap->zap_rwlock);
+ rw_destroy(&zap->zap_rwlock);
+ if (!zap->zap_ismicro)
+ mutex_destroy(&zap->zap_f.zap_num_entries_mtx);
+ kmem_free(zap, sizeof (zap_t));
+ return (winner);
}
-int
-zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
+static int
+zap_lockdir_impl(dmu_buf_t *db, void *tag, dmu_tx_t *tx,
krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp)
{
dmu_object_info_t doi;
zap_t *zap;
- dmu_buf_t *db;
krw_t lt;
- int err;
- *zapp = NULL;
+ objset_t *os = dmu_buf_get_objset(db);
+ uint64_t obj = db->db_object;
- err = dmu_buf_hold(os, obj, 0, NULL, &db, DMU_READ_NO_PREFETCH);
- if (err)
- return (err);
+ ASSERT0(db->db_offset);
+ *zapp = NULL;
dmu_object_info_from_db(db, &doi);
if (DMU_OT_BYTESWAP(doi.doi_type) != DMU_BSWAP_ZAP)
return (SET_ERROR(EINVAL));
zap = dmu_buf_get_user(db);
- if (zap == NULL)
+ if (zap == NULL) {
zap = mzap_open(os, obj, db);
+ if (zap == NULL) {
+ /*
+ * mzap_open() didn't like what it saw on-disk.
+ * Check for corruption!
+ */
+ return (SET_ERROR(EIO));
+ }
+ }
/*
* We're checking zap_ismicro without the lock held, in order to
@@ -502,13 +541,16 @@ zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
zap->zap_m.zap_num_entries == zap->zap_m.zap_num_chunks) {
uint64_t newsz = db->db_size + SPA_MINBLOCKSIZE;
if (newsz > MZAP_MAX_BLKSZ) {
+ int err;
dprintf("upgrading obj %llu: num_entries=%u\n",
obj, zap->zap_m.zap_num_entries);
*zapp = zap;
- return (mzap_upgrade(zapp, tx, 0));
+ err = mzap_upgrade(zapp, tag, tx, 0);
+ if (err != 0)
+ rw_exit(&zap->zap_rwlock);
+ return (err);
}
- err = dmu_object_set_blocksize(os, obj, newsz, 0, tx);
- ASSERT0(err);
+ VERIFY0(dmu_object_set_blocksize(os, obj, newsz, 0, tx));
zap->zap_m.zap_num_chunks =
db->db_size / MZAP_ENT_LEN - 1;
}
@@ -517,15 +559,49 @@ zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
return (0);
}
+static int
+zap_lockdir_by_dnode(dnode_t *dn, dmu_tx_t *tx,
+ krw_t lti, boolean_t fatreader, boolean_t adding, void *tag, zap_t **zapp)
+{
+ dmu_buf_t *db;
+ int err;
+
+ err = dmu_buf_hold_by_dnode(dn, 0, tag, &db, DMU_READ_NO_PREFETCH);
+ if (err != 0) {
+ return (err);
+ }
+ err = zap_lockdir_impl(db, tag, tx, lti, fatreader, adding, zapp);
+ if (err != 0) {
+ dmu_buf_rele(db, tag);
+ }
+ return (err);
+}
+
+int
+zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
+ krw_t lti, boolean_t fatreader, boolean_t adding, void *tag, zap_t **zapp)
+{
+ dmu_buf_t *db;
+ int err;
+
+ err = dmu_buf_hold(os, obj, 0, tag, &db, DMU_READ_NO_PREFETCH);
+ if (err != 0)
+ return (err);
+ err = zap_lockdir_impl(db, tag, tx, lti, fatreader, adding, zapp);
+ if (err != 0)
+ dmu_buf_rele(db, tag);
+ return (err);
+}
+
void
-zap_unlockdir(zap_t *zap)
+zap_unlockdir(zap_t *zap, void *tag)
{
rw_exit(&zap->zap_rwlock);
- dmu_buf_rele(zap->zap_dbuf, NULL);
+ dmu_buf_rele(zap->zap_dbuf, tag);
}
static int
-mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags)
+mzap_upgrade(zap_t **zapp, void *tag, dmu_tx_t *tx, zap_flags_t flags)
{
mzap_phys_t *mzp;
int i, sz, nchunks;
@@ -535,7 +611,7 @@ mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags)
ASSERT(RW_WRITE_HELD(&zap->zap_rwlock));
sz = zap->zap_dbuf->db_size;
- mzp = zio_buf_alloc(sz);
+ mzp = vmem_alloc(sz, KM_SLEEP);
bcopy(zap->zap_dbuf->db_data, mzp, sz);
nchunks = zap->zap_m.zap_num_chunks;
@@ -543,7 +619,7 @@ mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags)
err = dmu_object_set_blocksize(zap->zap_objset, zap->zap_object,
1ULL << fzap_default_block_shift, 0, tx);
if (err) {
- zio_buf_free(mzp, sz);
+ vmem_free(mzp, sz);
return (err);
}
}
@@ -562,18 +638,36 @@ mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags)
continue;
dprintf("adding %s=%llu\n",
mze->mze_name, mze->mze_value);
- zn = zap_name_alloc(zap, mze->mze_name, MT_EXACT);
- err = fzap_add_cd(zn, 8, 1, &mze->mze_value, mze->mze_cd, tx);
+ zn = zap_name_alloc(zap, mze->mze_name, 0);
+ err = fzap_add_cd(zn, 8, 1, &mze->mze_value, mze->mze_cd,
+ tag, tx);
zap = zn->zn_zap; /* fzap_add_cd() may change zap */
zap_name_free(zn);
if (err)
break;
}
- zio_buf_free(mzp, sz);
+ vmem_free(mzp, sz);
*zapp = zap;
return (err);
}
+/*
+ * The "normflags" determine the behavior of the matchtype_t which is
+ * passed to zap_lookup_norm(). Names which have the same normalized
+ * version will be stored with the same hash value, and therefore we can
+ * perform normalization-insensitive lookups. We can be Unicode form-
+ * insensitive and/or case-insensitive. The following flags are valid for
+ * "normflags":
+ *
+ * U8_TEXTPREP_NFC
+ * U8_TEXTPREP_NFD
+ * U8_TEXTPREP_NFKC
+ * U8_TEXTPREP_NFKD
+ * U8_TEXTPREP_TOUPPER
+ *
+ * The *_NF* (Normalization Form) flags are mutually exclusive; at most one
+ * of them may be supplied.
+ */
void
mzap_create_impl(objset_t *os, uint64_t obj, int normflags, zap_flags_t flags,
dmu_tx_t *tx)
@@ -581,7 +675,7 @@ mzap_create_impl(objset_t *os, uint64_t obj, int normflags, zap_flags_t flags,
dmu_buf_t *db;
mzap_phys_t *zp;
- VERIFY(0 == dmu_buf_hold(os, obj, 0, FTAG, &db, DMU_READ_NO_PREFETCH));
+ VERIFY0(dmu_buf_hold(os, obj, 0, FTAG, &db, DMU_READ_NO_PREFETCH));
#ifdef ZFS_DEBUG
{
@@ -602,9 +696,9 @@ mzap_create_impl(objset_t *os, uint64_t obj, int normflags, zap_flags_t flags,
zap_t *zap;
/* Only fat zap supports flags; upgrade immediately. */
VERIFY(0 == zap_lockdir(os, obj, tx, RW_WRITER,
- B_FALSE, B_FALSE, &zap));
- VERIFY3U(0, ==, mzap_upgrade(&zap, tx, flags));
- zap_unlockdir(zap);
+ B_FALSE, B_FALSE, FTAG, &zap));
+ VERIFY3U(0, ==, mzap_upgrade(&zap, FTAG, tx, flags));
+ zap_unlockdir(zap, FTAG);
}
}
@@ -612,18 +706,36 @@ int
zap_create_claim(objset_t *os, uint64_t obj, dmu_object_type_t ot,
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
{
- return (zap_create_claim_norm(os, obj,
- 0, ot, bonustype, bonuslen, tx));
+ return (zap_create_claim_dnsize(os, obj, ot, bonustype, bonuslen,
+ 0, tx));
+}
+
+int
+zap_create_claim_dnsize(objset_t *os, uint64_t obj, dmu_object_type_t ot,
+ dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx)
+{
+ return (zap_create_claim_norm_dnsize(os, obj,
+ 0, ot, bonustype, bonuslen, dnodesize, tx));
}
int
zap_create_claim_norm(objset_t *os, uint64_t obj, int normflags,
dmu_object_type_t ot,
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
+{
+ return (zap_create_claim_norm_dnsize(os, obj, normflags, ot, bonustype,
+ bonuslen, 0, tx));
+}
+
+int
+zap_create_claim_norm_dnsize(objset_t *os, uint64_t obj, int normflags,
+ dmu_object_type_t ot, dmu_object_type_t bonustype, int bonuslen,
+ int dnodesize, dmu_tx_t *tx)
{
int err;
- err = dmu_object_claim(os, obj, ot, 0, bonustype, bonuslen, tx);
+ err = dmu_object_claim_dnsize(os, obj, ot, 0, bonustype, bonuslen,
+ dnodesize, tx);
if (err != 0)
return (err);
mzap_create_impl(os, obj, normflags, 0, tx);
@@ -637,11 +749,28 @@ zap_create(objset_t *os, dmu_object_type_t ot,
return (zap_create_norm(os, 0, ot, bonustype, bonuslen, tx));
}
+uint64_t
+zap_create_dnsize(objset_t *os, dmu_object_type_t ot,
+ dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx)
+{
+ return (zap_create_norm_dnsize(os, 0, ot, bonustype, bonuslen,
+ dnodesize, tx));
+}
+
uint64_t
zap_create_norm(objset_t *os, int normflags, dmu_object_type_t ot,
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
{
- uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx);
+ return (zap_create_norm_dnsize(os, normflags, ot, bonustype, bonuslen,
+ 0, tx));
+}
+
+uint64_t
+zap_create_norm_dnsize(objset_t *os, int normflags, dmu_object_type_t ot,
+ dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx)
+{
+ uint64_t obj = dmu_object_alloc_dnsize(os, ot, 0, bonustype, bonuslen,
+ dnodesize, tx);
mzap_create_impl(os, obj, normflags, 0, tx);
return (obj);
@@ -652,7 +781,17 @@ zap_create_flags(objset_t *os, int normflags, zap_flags_t flags,
dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift,
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx)
{
- uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx);
+ return (zap_create_flags_dnsize(os, normflags, flags, ot,
+ leaf_blockshift, indirect_blockshift, bonustype, bonuslen, 0, tx));
+}
+
+uint64_t
+zap_create_flags_dnsize(objset_t *os, int normflags, zap_flags_t flags,
+ dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift,
+ dmu_object_type_t bonustype, int bonuslen, int dnodesize, dmu_tx_t *tx)
+{
+ uint64_t obj = dmu_object_alloc_dnsize(os, ot, 0, bonustype, bonuslen,
+ dnodesize, tx);
ASSERT(leaf_blockshift >= SPA_MINBLOCKSHIFT &&
leaf_blockshift <= SPA_OLD_MAXBLOCKSHIFT &&
@@ -679,7 +818,7 @@ zap_destroy(objset_t *os, uint64_t zapobj, dmu_tx_t *tx)
}
void
-zap_evict(void *dbu)
+zap_evict_sync(void *dbu)
{
zap_t *zap = dbu;
@@ -699,7 +838,7 @@ zap_count(objset_t *os, uint64_t zapobj, uint64_t *count)
zap_t *zap;
int err;
- err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
+ err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
if (err)
return (err);
if (!zap->zap_ismicro) {
@@ -707,7 +846,7 @@ zap_count(objset_t *os, uint64_t zapobj, uint64_t *count)
} else {
*count = zap->zap_m.zap_num_entries;
}
- zap_unlockdir(zap);
+ zap_unlockdir(zap, FTAG);
return (err);
}
@@ -732,7 +871,7 @@ mzap_normalization_conflict(zap_t *zap, zap_name_t *zn, mzap_ent_t *mze)
if (zn == NULL) {
zn = zap_name_alloc(zap, MZE_PHYS(zap, mze)->mze_name,
- MT_FIRST);
+ MT_NORMALIZE);
allocdzn = B_TRUE;
}
if (zap_match(zn, MZE_PHYS(zap, other)->mze_name)) {
@@ -761,28 +900,22 @@ zap_lookup(objset_t *os, uint64_t zapobj, const char *name,
uint64_t integer_size, uint64_t num_integers, void *buf)
{
return (zap_lookup_norm(os, zapobj, name, integer_size,
- num_integers, buf, MT_EXACT, NULL, 0, NULL));
+ num_integers, buf, 0, NULL, 0, NULL));
}
-int
-zap_lookup_norm(objset_t *os, uint64_t zapobj, const char *name,
+static int
+zap_lookup_impl(zap_t *zap, const char *name,
uint64_t integer_size, uint64_t num_integers, void *buf,
matchtype_t mt, char *realname, int rn_len,
boolean_t *ncp)
{
- zap_t *zap;
- int err;
+ int err = 0;
mzap_ent_t *mze;
zap_name_t *zn;
- err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
- if (err)
- return (err);
zn = zap_name_alloc(zap, name, mt);
- if (zn == NULL) {
- zap_unlockdir(zap);
+ if (zn == NULL)
return (SET_ERROR(ENOTSUP));
- }
if (!zap->zap_ismicro) {
err = fzap_lookup(zn, integer_size, num_integers, buf,
@@ -809,7 +942,24 @@ zap_lookup_norm(objset_t *os, uint64_t zapobj, const char *name,
}
}
zap_name_free(zn);
- zap_unlockdir(zap);
+ return (err);
+}
+
+int
+zap_lookup_norm(objset_t *os, uint64_t zapobj, const char *name,
+ uint64_t integer_size, uint64_t num_integers, void *buf,
+ matchtype_t mt, char *realname, int rn_len,
+ boolean_t *ncp)
+{
+ zap_t *zap;
+ int err;
+
+ err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
+ if (err != 0)
+ return (err);
+ err = zap_lookup_impl(zap, name, integer_size,
+ num_integers, buf, mt, realname, rn_len, ncp);
+ zap_unlockdir(zap, FTAG);
return (err);
}
@@ -820,18 +970,45 @@ zap_prefetch(objset_t *os, uint64_t zapobj, const char *name)
int err;
zap_name_t *zn;
- err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
+ err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
if (err)
return (err);
- zn = zap_name_alloc(zap, name, MT_EXACT);
+ zn = zap_name_alloc(zap, name, 0);
if (zn == NULL) {
- zap_unlockdir(zap);
+ zap_unlockdir(zap, FTAG);
return (SET_ERROR(ENOTSUP));
}
fzap_prefetch(zn);
zap_name_free(zn);
- zap_unlockdir(zap);
+ zap_unlockdir(zap, FTAG);
+ return (err);
+}
+
+int
+zap_lookup_by_dnode(dnode_t *dn, const char *name,
+ uint64_t integer_size, uint64_t num_integers, void *buf)
+{
+ return (zap_lookup_norm_by_dnode(dn, name, integer_size,
+ num_integers, buf, 0, NULL, 0, NULL));
+}
+
+int
+zap_lookup_norm_by_dnode(dnode_t *dn, const char *name,
+ uint64_t integer_size, uint64_t num_integers, void *buf,
+ matchtype_t mt, char *realname, int rn_len,
+ boolean_t *ncp)
+{
+ zap_t *zap;
+ int err;
+
+ err = zap_lockdir_by_dnode(dn, NULL, RW_READER, TRUE, FALSE,
+ FTAG, &zap);
+ if (err != 0)
+ return (err);
+ err = zap_lookup_impl(zap, name, integer_size,
+ num_integers, buf, mt, realname, rn_len, ncp);
+ zap_unlockdir(zap, FTAG);
return (err);
}
@@ -843,18 +1020,18 @@ zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
int err;
zap_name_t *zn;
- err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
+ err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
if (err)
return (err);
zn = zap_name_alloc_uint64(zap, key, key_numints);
if (zn == NULL) {
- zap_unlockdir(zap);
+ zap_unlockdir(zap, FTAG);
return (SET_ERROR(ENOTSUP));
}
fzap_prefetch(zn);
zap_name_free(zn);
- zap_unlockdir(zap);
+ zap_unlockdir(zap, FTAG);
return (err);
}
@@ -866,19 +1043,19 @@ zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
int err;
zap_name_t *zn;
- err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
+ err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
if (err)
return (err);
zn = zap_name_alloc_uint64(zap, key, key_numints);
if (zn == NULL) {
- zap_unlockdir(zap);
+ zap_unlockdir(zap, FTAG);
return (SET_ERROR(ENOTSUP));
}
err = fzap_lookup(zn, integer_size, num_integers, buf,
NULL, 0, NULL);
zap_name_free(zn);
- zap_unlockdir(zap);
+ zap_unlockdir(zap, FTAG);
return (err);
}
@@ -886,7 +1063,7 @@ int
zap_contains(objset_t *os, uint64_t zapobj, const char *name)
{
int err = zap_lookup_norm(os, zapobj, name, 0,
- 0, NULL, MT_EXACT, NULL, 0, NULL);
+ 0, NULL, 0, NULL, 0, NULL);
if (err == EOVERFLOW || err == EINVAL)
err = 0; /* found, but skipped reading the value */
return (err);
@@ -901,12 +1078,12 @@ zap_length(objset_t *os, uint64_t zapobj, const char *name,
mzap_ent_t *mze;
zap_name_t *zn;
- err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
+ err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
if (err)
return (err);
- zn = zap_name_alloc(zap, name, MT_EXACT);
+ zn = zap_name_alloc(zap, name, 0);
if (zn == NULL) {
- zap_unlockdir(zap);
+ zap_unlockdir(zap, FTAG);
return (SET_ERROR(ENOTSUP));
}
if (!zap->zap_ismicro) {
@@ -923,7 +1100,7 @@ zap_length(objset_t *os, uint64_t zapobj, const char *name,
}
}
zap_name_free(zn);
- zap_unlockdir(zap);
+ zap_unlockdir(zap, FTAG);
return (err);
}
@@ -935,17 +1112,17 @@ zap_length_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
int err;
zap_name_t *zn;
- err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
+ err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
if (err)
return (err);
zn = zap_name_alloc_uint64(zap, key, key_numints);
if (zn == NULL) {
- zap_unlockdir(zap);
+ zap_unlockdir(zap, FTAG);
return (SET_ERROR(ENOTSUP));
}
err = fzap_length(zn, integer_size, num_integers);
zap_name_free(zn);
- zap_unlockdir(zap);
+ zap_unlockdir(zap, FTAG);
return (err);
}
@@ -977,7 +1154,8 @@ mzap_addent(zap_name_t *zn, uint64_t value)
if (mze->mze_name[0] == 0) {
mze->mze_value = value;
mze->mze_cd = cd;
- (void) strcpy(mze->mze_name, zn->zn_key_orig);
+ (void) strlcpy(mze->mze_name, zn->zn_key_orig,
+ sizeof (mze->mze_name));
zap->zap_m.zap_num_entries++;
zap->zap_m.zap_alloc_next = i+1;
if (zap->zap_m.zap_alloc_next ==
@@ -994,33 +1172,31 @@ mzap_addent(zap_name_t *zn, uint64_t value)
cmn_err(CE_PANIC, "out of entries!");
}
-int
-zap_add(objset_t *os, uint64_t zapobj, const char *key,
+static int
+zap_add_impl(zap_t *zap, const char *key,
int integer_size, uint64_t num_integers,
- const void *val, dmu_tx_t *tx)
+ const void *val, dmu_tx_t *tx, void *tag)
{
- zap_t *zap;
- int err;
+ int err = 0;
mzap_ent_t *mze;
const uint64_t *intval = val;
zap_name_t *zn;
- err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap);
- if (err)
- return (err);
- zn = zap_name_alloc(zap, key, MT_EXACT);
+ zn = zap_name_alloc(zap, key, 0);
if (zn == NULL) {
- zap_unlockdir(zap);
+ zap_unlockdir(zap, tag);
return (SET_ERROR(ENOTSUP));
}
if (!zap->zap_ismicro) {
- err = fzap_add(zn, integer_size, num_integers, val, tx);
+ err = fzap_add(zn, integer_size, num_integers, val, tag, tx);
zap = zn->zn_zap; /* fzap_add() may change zap */
} else if (integer_size != 8 || num_integers != 1 ||
strlen(key) >= MZAP_NAME_LEN) {
- err = mzap_upgrade(&zn->zn_zap, tx, 0);
- if (err == 0)
- err = fzap_add(zn, integer_size, num_integers, val, tx);
+ err = mzap_upgrade(&zn->zn_zap, tag, tx, 0);
+ if (err == 0) {
+ err = fzap_add(zn, integer_size, num_integers, val,
+ tag, tx);
+ }
zap = zn->zn_zap; /* fzap_add() may change zap */
} else {
mze = mze_find(zn);
@@ -1033,7 +1209,39 @@ zap_add(objset_t *os, uint64_t zapobj, const char *key,
ASSERT(zap == zn->zn_zap);
zap_name_free(zn);
if (zap != NULL) /* may be NULL if fzap_add() failed */
- zap_unlockdir(zap);
+ zap_unlockdir(zap, tag);
+ return (err);
+}
+
+int
+zap_add(objset_t *os, uint64_t zapobj, const char *key,
+ int integer_size, uint64_t num_integers,
+ const void *val, dmu_tx_t *tx)
+{
+ zap_t *zap;
+ int err;
+
+ err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
+ if (err != 0)
+ return (err);
+ err = zap_add_impl(zap, key, integer_size, num_integers, val, tx, FTAG);
+ /* zap_add_impl() calls zap_unlockdir() */
+ return (err);
+}
+
+int
+zap_add_by_dnode(dnode_t *dn, const char *key,
+ int integer_size, uint64_t num_integers,
+ const void *val, dmu_tx_t *tx)
+{
+ zap_t *zap;
+ int err;
+
+ err = zap_lockdir_by_dnode(dn, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
+ if (err != 0)
+ return (err);
+ err = zap_add_impl(zap, key, integer_size, num_integers, val, tx, FTAG);
+ /* zap_add_impl() calls zap_unlockdir() */
return (err);
}
@@ -1046,19 +1254,19 @@ zap_add_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
int err;
zap_name_t *zn;
- err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap);
+ err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
if (err)
return (err);
zn = zap_name_alloc_uint64(zap, key, key_numints);
if (zn == NULL) {
- zap_unlockdir(zap);
+ zap_unlockdir(zap, FTAG);
return (SET_ERROR(ENOTSUP));
}
- err = fzap_add(zn, integer_size, num_integers, val, tx);
+ err = fzap_add(zn, integer_size, num_integers, val, FTAG, tx);
zap = zn->zn_zap; /* fzap_add() may change zap */
zap_name_free(zn);
if (zap != NULL) /* may be NULL if fzap_add() failed */
- zap_unlockdir(zap);
+ zap_unlockdir(zap, FTAG);
return (err);
}
@@ -1083,25 +1291,27 @@ zap_update(objset_t *os, uint64_t zapobj, const char *name,
(void) zap_lookup(os, zapobj, name, 8, 1, &oldval);
#endif
- err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap);
+ err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
if (err)
return (err);
- zn = zap_name_alloc(zap, name, MT_EXACT);
+ zn = zap_name_alloc(zap, name, 0);
if (zn == NULL) {
- zap_unlockdir(zap);
+ zap_unlockdir(zap, FTAG);
return (SET_ERROR(ENOTSUP));
}
if (!zap->zap_ismicro) {
- err = fzap_update(zn, integer_size, num_integers, val, tx);
+ err = fzap_update(zn, integer_size, num_integers, val,
+ FTAG, tx);
zap = zn->zn_zap; /* fzap_update() may change zap */
} else if (integer_size != 8 || num_integers != 1 ||
strlen(name) >= MZAP_NAME_LEN) {
dprintf("upgrading obj %llu: intsz=%u numint=%llu name=%s\n",
zapobj, integer_size, num_integers, name);
- err = mzap_upgrade(&zn->zn_zap, tx, 0);
- if (err == 0)
+ err = mzap_upgrade(&zn->zn_zap, FTAG, tx, 0);
+ if (err == 0) {
err = fzap_update(zn, integer_size, num_integers,
- val, tx);
+ val, FTAG, tx);
+ }
zap = zn->zn_zap; /* fzap_update() may change zap */
} else {
mze = mze_find(zn);
@@ -1115,7 +1325,7 @@ zap_update(objset_t *os, uint64_t zapobj, const char *name,
ASSERT(zap == zn->zn_zap);
zap_name_free(zn);
if (zap != NULL) /* may be NULL if fzap_upgrade() failed */
- zap_unlockdir(zap);
+ zap_unlockdir(zap, FTAG);
return (err);
}
@@ -1128,45 +1338,39 @@ zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
zap_name_t *zn;
int err;
- err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap);
+ err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
if (err)
return (err);
zn = zap_name_alloc_uint64(zap, key, key_numints);
if (zn == NULL) {
- zap_unlockdir(zap);
+ zap_unlockdir(zap, FTAG);
return (SET_ERROR(ENOTSUP));
}
- err = fzap_update(zn, integer_size, num_integers, val, tx);
+ err = fzap_update(zn, integer_size, num_integers, val, FTAG, tx);
zap = zn->zn_zap; /* fzap_update() may change zap */
zap_name_free(zn);
if (zap != NULL) /* may be NULL if fzap_upgrade() failed */
- zap_unlockdir(zap);
+ zap_unlockdir(zap, FTAG);
return (err);
}
int
zap_remove(objset_t *os, uint64_t zapobj, const char *name, dmu_tx_t *tx)
{
- return (zap_remove_norm(os, zapobj, name, MT_EXACT, tx));
+ return (zap_remove_norm(os, zapobj, name, 0, tx));
}
-int
-zap_remove_norm(objset_t *os, uint64_t zapobj, const char *name,
+static int
+zap_remove_impl(zap_t *zap, const char *name,
matchtype_t mt, dmu_tx_t *tx)
{
- zap_t *zap;
- int err;
mzap_ent_t *mze;
zap_name_t *zn;
+ int err = 0;
- err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, &zap);
- if (err)
- return (err);
zn = zap_name_alloc(zap, name, mt);
- if (zn == NULL) {
- zap_unlockdir(zap);
+ if (zn == NULL)
return (SET_ERROR(ENOTSUP));
- }
if (!zap->zap_ismicro) {
err = fzap_remove(zn, tx);
} else {
@@ -1181,7 +1385,35 @@ zap_remove_norm(objset_t *os, uint64_t zapobj, const char *name,
}
}
zap_name_free(zn);
- zap_unlockdir(zap);
+ return (err);
+}
+
+int
+zap_remove_norm(objset_t *os, uint64_t zapobj, const char *name,
+ matchtype_t mt, dmu_tx_t *tx)
+{
+ zap_t *zap;
+ int err;
+
+ err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, FTAG, &zap);
+ if (err)
+ return (err);
+ err = zap_remove_impl(zap, name, mt, tx);
+ zap_unlockdir(zap, FTAG);
+ return (err);
+}
+
+int
+zap_remove_by_dnode(dnode_t *dn, const char *name, dmu_tx_t *tx)
+{
+ zap_t *zap;
+ int err;
+
+ err = zap_lockdir_by_dnode(dn, tx, RW_WRITER, TRUE, FALSE, FTAG, &zap);
+ if (err)
+ return (err);
+ err = zap_remove_impl(zap, name, 0, tx);
+ zap_unlockdir(zap, FTAG);
return (err);
}
@@ -1193,17 +1425,17 @@ zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
int err;
zap_name_t *zn;
- err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, &zap);
+ err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, FTAG, &zap);
if (err)
return (err);
zn = zap_name_alloc_uint64(zap, key, key_numints);
if (zn == NULL) {
- zap_unlockdir(zap);
+ zap_unlockdir(zap, FTAG);
return (SET_ERROR(ENOTSUP));
}
err = fzap_remove(zn, tx);
zap_name_free(zn);
- zap_unlockdir(zap);
+ zap_unlockdir(zap, FTAG);
return (err);
}
@@ -1235,7 +1467,7 @@ zap_cursor_fini(zap_cursor_t *zc)
{
if (zc->zc_zap) {
rw_enter(&zc->zc_zap->zap_rwlock, RW_READER);
- zap_unlockdir(zc->zc_zap);
+ zap_unlockdir(zc->zc_zap, NULL);
zc->zc_zap = NULL;
}
if (zc->zc_leaf) {
@@ -1282,7 +1514,7 @@ zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za)
if (zc->zc_zap == NULL) {
int hb;
err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL,
- RW_READER, TRUE, FALSE, &zc->zc_zap);
+ RW_READER, TRUE, FALSE, NULL, &zc->zc_zap);
if (err)
return (err);
@@ -1346,7 +1578,7 @@ zap_get_stats(objset_t *os, uint64_t zapobj, zap_stats_t *zs)
int err;
zap_t *zap;
- err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
+ err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
if (err)
return (err);
@@ -1359,106 +1591,37 @@ zap_get_stats(objset_t *os, uint64_t zapobj, zap_stats_t *zs)
} else {
fzap_get_stats(zap, zs);
}
- zap_unlockdir(zap);
+ zap_unlockdir(zap, FTAG);
return (0);
}
-int
-zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add,
- uint64_t *towrite, uint64_t *tooverwrite)
-{
- zap_t *zap;
- int err = 0;
-
- /*
- * Since, we don't have a name, we cannot figure out which blocks will
- * be affected in this operation. So, account for the worst case :
- * - 3 blocks overwritten: target leaf, ptrtbl block, header block
- * - 4 new blocks written if adding:
- * - 2 blocks for possibly split leaves,
- * - 2 grown ptrtbl blocks
- *
- * This also accomodates the case where an add operation to a fairly
- * large microzap results in a promotion to fatzap.
- */
- if (name == NULL) {
- *towrite += (3 + (add ? 4 : 0)) * SPA_OLD_MAXBLOCKSIZE;
- return (err);
- }
-
- /*
- * We lock the zap with adding == FALSE. Because, if we pass
- * the actual value of add, it could trigger a mzap_upgrade().
- * At present we are just evaluating the possibility of this operation
- * and hence we donot want to trigger an upgrade.
- */
- err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
- if (err)
- return (err);
-
- if (!zap->zap_ismicro) {
- zap_name_t *zn = zap_name_alloc(zap, name, MT_EXACT);
- if (zn) {
- err = fzap_count_write(zn, add, towrite,
- tooverwrite);
- zap_name_free(zn);
- } else {
- /*
- * We treat this case as similar to (name == NULL)
- */
- *towrite += (3 + (add ? 4 : 0)) * SPA_OLD_MAXBLOCKSIZE;
- }
- } else {
- /*
- * We are here if (name != NULL) and this is a micro-zap.
- * We account for the header block depending on whether it
- * is freeable.
- *
- * Incase of an add-operation it is hard to find out
- * if this add will promote this microzap to fatzap.
- * Hence, we consider the worst case and account for the
- * blocks assuming this microzap would be promoted to a
- * fatzap.
- *
- * 1 block overwritten : header block
- * 4 new blocks written : 2 new split leaf, 2 grown
- * ptrtbl blocks
- */
- if (dmu_buf_freeable(zap->zap_dbuf))
- *tooverwrite += MZAP_MAX_BLKSZ;
- else
- *towrite += MZAP_MAX_BLKSZ;
-
- if (add) {
- *towrite += 4 * MZAP_MAX_BLKSZ;
- }
- }
-
- zap_unlockdir(zap);
- return (err);
-}
-
#if defined(_KERNEL) && defined(HAVE_SPL)
EXPORT_SYMBOL(zap_create);
+EXPORT_SYMBOL(zap_create_dnsize);
EXPORT_SYMBOL(zap_create_norm);
+EXPORT_SYMBOL(zap_create_norm_dnsize);
EXPORT_SYMBOL(zap_create_flags);
+EXPORT_SYMBOL(zap_create_flags_dnsize);
EXPORT_SYMBOL(zap_create_claim);
EXPORT_SYMBOL(zap_create_claim_norm);
+EXPORT_SYMBOL(zap_create_claim_norm_dnsize);
EXPORT_SYMBOL(zap_destroy);
EXPORT_SYMBOL(zap_lookup);
+EXPORT_SYMBOL(zap_lookup_by_dnode);
EXPORT_SYMBOL(zap_lookup_norm);
EXPORT_SYMBOL(zap_lookup_uint64);
EXPORT_SYMBOL(zap_contains);
EXPORT_SYMBOL(zap_prefetch);
EXPORT_SYMBOL(zap_prefetch_uint64);
-EXPORT_SYMBOL(zap_count_write);
EXPORT_SYMBOL(zap_add);
+EXPORT_SYMBOL(zap_add_by_dnode);
EXPORT_SYMBOL(zap_add_uint64);
EXPORT_SYMBOL(zap_update);
EXPORT_SYMBOL(zap_update_uint64);
EXPORT_SYMBOL(zap_length);
EXPORT_SYMBOL(zap_length_uint64);
EXPORT_SYMBOL(zap_remove);
+EXPORT_SYMBOL(zap_remove_by_dnode);
EXPORT_SYMBOL(zap_remove_norm);
EXPORT_SYMBOL(zap_remove_uint64);
EXPORT_SYMBOL(zap_count);
diff --git a/zfs/module/zfs/zfeature.c b/zfs/module/zfs/zfeature.c
index 352376f22b9e..d8220aa235a8 100644
--- a/zfs/module/zfs/zfeature.c
+++ b/zfs/module/zfs/zfeature.c
@@ -20,7 +20,7 @@
*/
/*
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -253,7 +253,7 @@ feature_get_refcount_from_disk(spa_t *spa, zfeature_info_t *feature,
{
int err;
uint64_t refcount;
- uint64_t zapobj = feature->fi_can_readonly ?
+ uint64_t zapobj = (feature->fi_flags & ZFEATURE_FLAG_READONLY_COMPAT) ?
spa->spa_feat_for_write_obj : spa->spa_feat_for_read_obj;
/*
@@ -277,7 +277,8 @@ feature_get_refcount_from_disk(spa_t *spa, zfeature_info_t *feature,
static int
-feature_get_enabled_txg(spa_t *spa, zfeature_info_t *feature, uint64_t *res) {
+feature_get_enabled_txg(spa_t *spa, zfeature_info_t *feature, uint64_t *res)
+{
ASSERTV(uint64_t enabled_txg_obj = spa->spa_feat_enabled_txg_obj);
ASSERT(zfeature_depends_on(feature->fi_feature,
@@ -306,7 +307,7 @@ feature_sync(spa_t *spa, zfeature_info_t *feature, uint64_t refcount,
uint64_t zapobj;
ASSERT(VALID_FEATURE_OR_NONE(feature->fi_feature));
- zapobj = feature->fi_can_readonly ?
+ zapobj = (feature->fi_flags & ZFEATURE_FLAG_READONLY_COMPAT) ?
spa->spa_feat_for_write_obj : spa->spa_feat_for_read_obj;
VERIFY0(zap_update(spa->spa_meta_objset, zapobj, feature->fi_guid,
sizeof (uint64_t), 1, &refcount, tx));
@@ -327,7 +328,7 @@ feature_sync(spa_t *spa, zfeature_info_t *feature, uint64_t refcount,
if (refcount == 0)
spa_deactivate_mos_feature(spa, feature->fi_guid);
- else if (feature->fi_mos)
+ else if (feature->fi_flags & ZFEATURE_FLAG_MOS)
spa_activate_mos_feature(spa, feature->fi_guid, tx);
}
@@ -338,8 +339,9 @@ feature_sync(spa_t *spa, zfeature_info_t *feature, uint64_t refcount,
void
feature_enable_sync(spa_t *spa, zfeature_info_t *feature, dmu_tx_t *tx)
{
- uint64_t initial_refcount = feature->fi_activate_on_enable ? 1 : 0;
- uint64_t zapobj = feature->fi_can_readonly ?
+ uint64_t initial_refcount =
+ (feature->fi_flags & ZFEATURE_FLAG_ACTIVATE_ON_ENABLE) ? 1 : 0;
+ uint64_t zapobj = (feature->fi_flags & ZFEATURE_FLAG_READONLY_COMPAT) ?
spa->spa_feat_for_write_obj : spa->spa_feat_for_read_obj;
int i;
@@ -385,7 +387,8 @@ feature_do_action(spa_t *spa, spa_feature_t fid, feature_action_t action,
{
uint64_t refcount = 0;
zfeature_info_t *feature = &spa_feature_table[fid];
- ASSERTV(uint64_t zapobj = feature->fi_can_readonly ?
+ ASSERTV(uint64_t zapobj =
+ (feature->fi_flags & ZFEATURE_FLAG_READONLY_COMPAT) ?
spa->spa_feat_for_write_obj : spa->spa_feat_for_read_obj);
ASSERT(VALID_FEATURE_FID(fid));
@@ -498,7 +501,8 @@ spa_feature_is_active(spa_t *spa, spa_feature_t fid)
* Returns B_FALSE otherwise (i.e. if the feature is not enabled).
*/
boolean_t
-spa_feature_enabled_txg(spa_t *spa, spa_feature_t fid, uint64_t *txg) {
+spa_feature_enabled_txg(spa_t *spa, spa_feature_t fid, uint64_t *txg)
+{
int err;
ASSERT(VALID_FEATURE_FID(fid));
diff --git a/zfs/module/zfs/zfeature_common.c b/zfs/module/zfs/zfeature_common.c
index 609a72ab301a..73abcb236c55 100644
--- a/zfs/module/zfs/zfeature_common.c
+++ b/zfs/module/zfs/zfeature_common.c
@@ -20,8 +20,9 @@
*/
/*
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+ * Copyright (c) 2013, Joyent, Inc. All rights reserved.
* Copyright (c) 2014, Nexenta Systems, Inc. All rights reserved.
*/
@@ -122,7 +123,8 @@ zfeature_lookup_name(const char *name, spa_feature_t *res)
}
boolean_t
-zfeature_depends_on(spa_feature_t fid, spa_feature_t check) {
+zfeature_depends_on(spa_feature_t fid, spa_feature_t check)
+{
zfeature_info_t *feature = &spa_feature_table[fid];
int i;
@@ -133,30 +135,43 @@ zfeature_depends_on(spa_feature_t fid, spa_feature_t check) {
return (B_FALSE);
}
+static boolean_t
+deps_contains_feature(const spa_feature_t *deps, const spa_feature_t feature)
+{
+ int i;
+
+ for (i = 0; deps[i] != SPA_FEATURE_NONE; i++)
+ if (deps[i] == feature)
+ return (B_TRUE);
+
+ return (B_FALSE);
+}
+
static void
zfeature_register(spa_feature_t fid, const char *guid, const char *name,
- const char *desc, boolean_t readonly, boolean_t mos,
- boolean_t activate_on_enable, const spa_feature_t *deps)
+ const char *desc, zfeature_flags_t flags, const spa_feature_t *deps)
{
zfeature_info_t *feature = &spa_feature_table[fid];
static spa_feature_t nodeps[] = { SPA_FEATURE_NONE };
ASSERT(name != NULL);
ASSERT(desc != NULL);
- ASSERT(!readonly || !mos);
+ ASSERT((flags & ZFEATURE_FLAG_READONLY_COMPAT) == 0 ||
+ (flags & ZFEATURE_FLAG_MOS) == 0);
ASSERT3U(fid, <, SPA_FEATURES);
ASSERT(zfeature_is_valid_guid(guid));
if (deps == NULL)
deps = nodeps;
+ VERIFY(((flags & ZFEATURE_FLAG_PER_DATASET) == 0) ||
+ (deps_contains_feature(deps, SPA_FEATURE_EXTENSIBLE_DATASET)));
+
feature->fi_feature = fid;
feature->fi_guid = guid;
feature->fi_uname = name;
feature->fi_desc = desc;
- feature->fi_can_readonly = readonly;
- feature->fi_mos = mos;
- feature->fi_activate_on_enable = activate_on_enable;
+ feature->fi_flags = flags;
feature->fi_depends = deps;
}
@@ -165,28 +180,33 @@ zpool_feature_init(void)
{
zfeature_register(SPA_FEATURE_ASYNC_DESTROY,
"com.delphix:async_destroy", "async_destroy",
- "Destroy filesystems asynchronously.", B_TRUE, B_FALSE,
- B_FALSE, NULL);
+ "Destroy filesystems asynchronously.",
+ ZFEATURE_FLAG_READONLY_COMPAT, NULL);
zfeature_register(SPA_FEATURE_EMPTY_BPOBJ,
"com.delphix:empty_bpobj", "empty_bpobj",
- "Snapshots use less space.", B_TRUE, B_FALSE,
- B_FALSE, NULL);
+ "Snapshots use less space.",
+ ZFEATURE_FLAG_READONLY_COMPAT, NULL);
zfeature_register(SPA_FEATURE_LZ4_COMPRESS,
"org.illumos:lz4_compress", "lz4_compress",
- "LZ4 compression algorithm support.", B_FALSE, B_FALSE,
- B_TRUE, NULL);
+ "LZ4 compression algorithm support.",
+ ZFEATURE_FLAG_ACTIVATE_ON_ENABLE, NULL);
+
+ zfeature_register(SPA_FEATURE_MULTI_VDEV_CRASH_DUMP,
+ "com.joyent:multi_vdev_crash_dump", "multi_vdev_crash_dump",
+ "Crash dumps to multiple vdev pools.",
+ 0, NULL);
zfeature_register(SPA_FEATURE_SPACEMAP_HISTOGRAM,
"com.delphix:spacemap_histogram", "spacemap_histogram",
- "Spacemaps maintain space histograms.", B_TRUE, B_FALSE,
- B_FALSE, NULL);
+ "Spacemaps maintain space histograms.",
+ ZFEATURE_FLAG_READONLY_COMPAT, NULL);
zfeature_register(SPA_FEATURE_ENABLED_TXG,
"com.delphix:enabled_txg", "enabled_txg",
- "Record txg at which a feature is enabled", B_TRUE, B_FALSE,
- B_FALSE, NULL);
+ "Record txg at which a feature is enabled",
+ ZFEATURE_FLAG_READONLY_COMPAT, NULL);
{
static const spa_feature_t hole_birth_deps[] = {
@@ -196,13 +216,14 @@ zpool_feature_init(void)
zfeature_register(SPA_FEATURE_HOLE_BIRTH,
"com.delphix:hole_birth", "hole_birth",
"Retain hole birth txg for more precise zfs send",
- B_FALSE, B_TRUE, B_TRUE, hole_birth_deps);
+ ZFEATURE_FLAG_MOS | ZFEATURE_FLAG_ACTIVATE_ON_ENABLE,
+ hole_birth_deps);
}
zfeature_register(SPA_FEATURE_EXTENSIBLE_DATASET,
"com.delphix:extensible_dataset", "extensible_dataset",
"Enhanced dataset functionality, used by other features.",
- B_FALSE, B_FALSE, B_FALSE, NULL);
+ 0, NULL);
{
static const spa_feature_t bookmarks_deps[] = {
@@ -213,24 +234,25 @@ zpool_feature_init(void)
zfeature_register(SPA_FEATURE_BOOKMARKS,
"com.delphix:bookmarks", "bookmarks",
"\"zfs bookmark\" command",
- B_TRUE, B_FALSE, B_FALSE, bookmarks_deps);
+ ZFEATURE_FLAG_READONLY_COMPAT, bookmarks_deps);
}
{
static const spa_feature_t filesystem_limits_deps[] = {
- SPA_FEATURE_EXTENSIBLE_DATASET,
- SPA_FEATURE_NONE
+ SPA_FEATURE_EXTENSIBLE_DATASET,
+ SPA_FEATURE_NONE
};
zfeature_register(SPA_FEATURE_FS_SS_LIMIT,
"com.joyent:filesystem_limits", "filesystem_limits",
- "Filesystem and snapshot limits.", B_TRUE, B_FALSE, B_FALSE,
- filesystem_limits_deps);
+ "Filesystem and snapshot limits.",
+ ZFEATURE_FLAG_READONLY_COMPAT, filesystem_limits_deps);
}
zfeature_register(SPA_FEATURE_EMBEDDED_DATA,
"com.delphix:embedded_data", "embedded_data",
"Blocks which compress very well use even less space.",
- B_FALSE, B_TRUE, B_TRUE, NULL);
+ ZFEATURE_FLAG_MOS | ZFEATURE_FLAG_ACTIVATE_ON_ENABLE,
+ NULL);
{
static const spa_feature_t large_blocks_deps[] = {
@@ -239,7 +261,61 @@ zpool_feature_init(void)
};
zfeature_register(SPA_FEATURE_LARGE_BLOCKS,
"org.open-zfs:large_blocks", "large_blocks",
- "Support for blocks larger than 128KB.", B_FALSE, B_FALSE, B_FALSE,
- large_blocks_deps);
+ "Support for blocks larger than 128KB.",
+ ZFEATURE_FLAG_PER_DATASET, large_blocks_deps);
+ }
+
+ {
+ static const spa_feature_t large_dnode_deps[] = {
+ SPA_FEATURE_EXTENSIBLE_DATASET,
+ SPA_FEATURE_NONE
+ };
+ zfeature_register(SPA_FEATURE_LARGE_DNODE,
+ "org.zfsonlinux:large_dnode", "large_dnode",
+ "Variable on-disk size of dnodes.",
+ ZFEATURE_FLAG_PER_DATASET, large_dnode_deps);
+ }
+
+ {
+ static const spa_feature_t sha512_deps[] = {
+ SPA_FEATURE_EXTENSIBLE_DATASET,
+ SPA_FEATURE_NONE
+ };
+ zfeature_register(SPA_FEATURE_SHA512,
+ "org.illumos:sha512", "sha512",
+ "SHA-512/256 hash algorithm.",
+ ZFEATURE_FLAG_PER_DATASET, sha512_deps);
+ }
+ {
+ static const spa_feature_t skein_deps[] = {
+ SPA_FEATURE_EXTENSIBLE_DATASET,
+ SPA_FEATURE_NONE
+ };
+ zfeature_register(SPA_FEATURE_SKEIN,
+ "org.illumos:skein", "skein",
+ "Skein hash algorithm.",
+ ZFEATURE_FLAG_PER_DATASET, skein_deps);
+ }
+
+ {
+ static const spa_feature_t edonr_deps[] = {
+ SPA_FEATURE_EXTENSIBLE_DATASET,
+ SPA_FEATURE_NONE
+ };
+ zfeature_register(SPA_FEATURE_EDONR,
+ "org.illumos:edonr", "edonr",
+ "Edon-R hash algorithm.",
+ ZFEATURE_FLAG_PER_DATASET, edonr_deps);
+ }
+ {
+ static const spa_feature_t userobj_accounting_deps[] = {
+ SPA_FEATURE_EXTENSIBLE_DATASET,
+ SPA_FEATURE_NONE
+ };
+ zfeature_register(SPA_FEATURE_USEROBJ_ACCOUNTING,
+ "org.zfsonlinux:userobj_accounting", "userobj_accounting",
+ "User/Group object accounting.",
+ ZFEATURE_FLAG_READONLY_COMPAT | ZFEATURE_FLAG_PER_DATASET,
+ userobj_accounting_deps);
}
}
diff --git a/zfs/module/zfs/zfs_acl.c b/zfs/module/zfs/zfs_acl.c
index ea8c16ed4f9b..0e7203ea6672 100644
--- a/zfs/module/zfs/zfs_acl.c
+++ b/zfs/module/zfs/zfs_acl.c
@@ -53,6 +53,7 @@
#include <sys/zap.h>
#include <sys/sa.h>
#include <sys/trace_acl.h>
+#include <sys/zpl.h>
#include "fs/fs_subr.h"
#define ALLOW ACE_ACCESS_ALLOWED_ACE_TYPE
@@ -370,23 +371,23 @@ static int
zfs_acl_znode_info(znode_t *zp, int *aclsize, int *aclcount,
zfs_acl_phys_t *aclphys)
{
- zfs_sb_t *zsb = ZTOZSB(zp);
+ zfsvfs_t *zfsvfs = ZTOZSB(zp);
uint64_t acl_count;
int size;
int error;
ASSERT(MUTEX_HELD(&zp->z_acl_lock));
if (zp->z_is_sa) {
- if ((error = sa_size(zp->z_sa_hdl, SA_ZPL_DACL_ACES(zsb),
+ if ((error = sa_size(zp->z_sa_hdl, SA_ZPL_DACL_ACES(zfsvfs),
&size)) != 0)
return (error);
*aclsize = size;
- if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DACL_COUNT(zsb),
+ if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DACL_COUNT(zfsvfs),
&acl_count, sizeof (acl_count))) != 0)
return (error);
*aclcount = acl_count;
} else {
- if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zsb),
+ if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zfsvfs),
aclphys, sizeof (*aclphys))) != 0)
return (error);
@@ -650,7 +651,7 @@ zfs_ace_walk(void *datap, uint64_t cookie, int aclcnt,
* ACE FUIDs will be created later.
*/
int
-zfs_copy_ace_2_fuid(zfs_sb_t *zsb, umode_t obj_mode, zfs_acl_t *aclp,
+zfs_copy_ace_2_fuid(zfsvfs_t *zfsvfs, umode_t obj_mode, zfs_acl_t *aclp,
void *datap, zfs_ace_t *z_acl, uint64_t aclcnt, size_t *size,
zfs_fuid_info_t **fuidp, cred_t *cr)
{
@@ -668,7 +669,7 @@ zfs_copy_ace_2_fuid(zfs_sb_t *zsb, umode_t obj_mode, zfs_acl_t *aclp,
entry_type = aceptr->z_hdr.z_flags & ACE_TYPE_FLAGS;
if (entry_type != ACE_OWNER && entry_type != OWNING_GROUP &&
entry_type != ACE_EVERYONE) {
- aceptr->z_fuid = zfs_fuid_create(zsb, acep->a_who,
+ aceptr->z_fuid = zfs_fuid_create(zfsvfs, acep->a_who,
cr, (entry_type == 0) ?
ZFS_ACE_USER : ZFS_ACE_GROUP, fuidp);
}
@@ -712,7 +713,7 @@ zfs_copy_ace_2_fuid(zfs_sb_t *zsb, umode_t obj_mode, zfs_acl_t *aclp,
* Copy ZFS ACEs to fixed size ace_t layout
*/
static void
-zfs_copy_fuid_2_ace(zfs_sb_t *zsb, zfs_acl_t *aclp, cred_t *cr,
+zfs_copy_fuid_2_ace(zfsvfs_t *zfsvfs, zfs_acl_t *aclp, cred_t *cr,
void *datap, int filter)
{
uint64_t who;
@@ -755,7 +756,7 @@ zfs_copy_fuid_2_ace(zfs_sb_t *zsb, zfs_acl_t *aclp, cred_t *cr,
if ((entry_type != ACE_OWNER &&
entry_type != OWNING_GROUP &&
entry_type != ACE_EVERYONE)) {
- acep->a_who = zfs_fuid_map_id(zsb, who,
+ acep->a_who = zfs_fuid_map_id(zfsvfs, who,
cr, (entry_type & ACE_IDENTIFIER_GROUP) ?
ZFS_ACE_GROUP : ZFS_ACE_USER);
} else {
@@ -1165,8 +1166,10 @@ zfs_acl_chown_setattr(znode_t *zp)
error = zfs_acl_node_read(zp, B_TRUE, &aclp, B_FALSE);
if (error == 0 && aclp->z_acl_count > 0)
- zp->z_mode = zfs_mode_compute(zp->z_mode, aclp,
- &zp->z_pflags, zp->z_uid, zp->z_gid);
+ zp->z_mode = ZTOI(zp)->i_mode =
+ zfs_mode_compute(zp->z_mode, aclp,
+ &zp->z_pflags, KUID_TO_SUID(ZTOI(zp)->i_uid),
+ KGID_TO_SGID(ZTOI(zp)->i_gid));
/*
* Some ZFS implementations (ZEVO) create neither a ZNODE_ACL
@@ -1313,7 +1316,7 @@ int
zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx)
{
int error;
- zfs_sb_t *zsb = ZTOZSB(zp);
+ zfsvfs_t *zfsvfs = ZTOZSB(zp);
dmu_object_type_t otype;
zfs_acl_locator_cb_t locate = { 0 };
uint64_t mode;
@@ -1324,14 +1327,14 @@ zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx)
mode = zp->z_mode;
mode = zfs_mode_compute(mode, aclp, &zp->z_pflags,
- zp->z_uid, zp->z_gid);
+ KUID_TO_SUID(ZTOI(zp)->i_uid), KGID_TO_SGID(ZTOI(zp)->i_gid));
- zp->z_mode = mode;
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zsb), NULL,
+ zp->z_mode = ZTOI(zp)->i_mode = mode;
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
&mode, sizeof (mode));
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL,
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
&zp->z_pflags, sizeof (zp->z_pflags));
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL,
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
&ctime, sizeof (ctime));
if (zp->z_acl_cached) {
@@ -1342,11 +1345,11 @@ zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx)
/*
* Upgrade needed?
*/
- if (!zsb->z_use_fuids) {
+ if (!zfsvfs->z_use_fuids) {
otype = DMU_OT_OLDACL;
} else {
if ((aclp->z_version == ZFS_ACL_VERSION_INITIAL) &&
- (zsb->z_version >= ZPL_VERSION_FUID))
+ (zfsvfs->z_version >= ZPL_VERSION_FUID))
zfs_acl_xform(zp, aclp, cr);
ASSERT(aclp->z_version >= ZFS_ACL_VERSION_FUID);
otype = DMU_OT_ACL;
@@ -1359,9 +1362,9 @@ zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx)
if (zp->z_is_sa) { /* the easy case, just update the ACL attribute */
locate.cb_aclp = aclp;
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DACL_ACES(zsb),
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DACL_ACES(zfsvfs),
zfs_acl_data_locator, &locate, aclp->z_acl_bytes);
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DACL_COUNT(zsb),
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DACL_COUNT(zfsvfs),
NULL, &aclp->z_acl_count, sizeof (uint64_t));
} else { /* Painful legacy way */
zfs_acl_node_t *aclnode;
@@ -1369,7 +1372,7 @@ zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx)
zfs_acl_phys_t acl_phys;
uint64_t aoid;
- if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zsb),
+ if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zfsvfs),
&acl_phys, sizeof (acl_phys))) != 0)
return (error);
@@ -1383,20 +1386,20 @@ zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx)
*/
if (aoid &&
aclp->z_version != acl_phys.z_acl_version) {
- error = dmu_object_free(zsb->z_os, aoid, tx);
+ error = dmu_object_free(zfsvfs->z_os, aoid, tx);
if (error)
return (error);
aoid = 0;
}
if (aoid == 0) {
- aoid = dmu_object_alloc(zsb->z_os,
+ aoid = dmu_object_alloc(zfsvfs->z_os,
otype, aclp->z_acl_bytes,
otype == DMU_OT_ACL ?
DMU_OT_SYSACL : DMU_OT_NONE,
otype == DMU_OT_ACL ?
- DN_MAX_BONUSLEN : 0, tx);
+ DN_OLD_MAX_BONUSLEN : 0, tx);
} else {
- (void) dmu_object_set_blocksize(zsb->z_os,
+ (void) dmu_object_set_blocksize(zfsvfs->z_os,
aoid, aclp->z_acl_bytes, 0, tx);
}
acl_phys.z_acl_extern_obj = aoid;
@@ -1404,7 +1407,7 @@ zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx)
aclnode = list_next(&aclp->z_acl, aclnode)) {
if (aclnode->z_ace_count == 0)
continue;
- dmu_write(zsb->z_os, aoid, off,
+ dmu_write(zfsvfs->z_os, aoid, off,
aclnode->z_size, aclnode->z_acldata, tx);
off += aclnode->z_size;
}
@@ -1414,7 +1417,7 @@ zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx)
* Migrating back embedded?
*/
if (acl_phys.z_acl_extern_obj) {
- error = dmu_object_free(zsb->z_os,
+ error = dmu_object_free(zfsvfs->z_os,
acl_phys.z_acl_extern_obj, tx);
if (error)
return (error);
@@ -1443,7 +1446,7 @@ zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx)
}
acl_phys.z_acl_version = aclp->z_version;
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ZNODE_ACL(zsb), NULL,
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ZNODE_ACL(zfsvfs), NULL,
&acl_phys, sizeof (acl_phys));
}
@@ -1462,7 +1465,7 @@ zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx)
}
static void
-zfs_acl_chmod(zfs_sb_t *zsb, uint64_t mode, zfs_acl_t *aclp)
+zfs_acl_chmod(zfsvfs_t *zfsvfs, uint64_t mode, zfs_acl_t *aclp)
{
void *acep = NULL;
uint64_t who;
@@ -1535,7 +1538,7 @@ zfs_acl_chmod(zfs_sb_t *zsb, uint64_t mode, zfs_acl_t *aclp)
* Limit permissions to be no greater than
* group permissions
*/
- if (zsb->z_acl_inherit == ZFS_ACL_RESTRICTED) {
+ if (zfsvfs->z_acl_inherit == ZFS_ACL_RESTRICTED) {
if (!(mode & S_IRGRP))
access_mask &= ~ACE_READ_DATA;
if (!(mode & S_IWGRP))
@@ -1587,11 +1590,11 @@ zfs_acl_chmod_setattr(znode_t *zp, zfs_acl_t **aclp, uint64_t mode)
* strip off write_owner and write_acl
*/
static void
-zfs_restricted_update(zfs_sb_t *zsb, zfs_acl_t *aclp, void *acep)
+zfs_restricted_update(zfsvfs_t *zfsvfs, zfs_acl_t *aclp, void *acep)
{
uint32_t mask = aclp->z_ops->ace_mask_get(acep);
- if ((zsb->z_acl_inherit == ZFS_ACL_RESTRICTED) &&
+ if ((zfsvfs->z_acl_inherit == ZFS_ACL_RESTRICTED) &&
(aclp->z_ops->ace_type_get(acep) == ALLOW)) {
mask &= ~RESTRICTED_CLEAR;
aclp->z_ops->ace_mask_set(acep, mask);
@@ -1618,7 +1621,7 @@ zfs_ace_can_use(umode_t obj_mode, uint16_t acep_flags)
* inherit inheritable ACEs from parent
*/
static zfs_acl_t *
-zfs_acl_inherit(zfs_sb_t *zsb, umode_t obj_mode, zfs_acl_t *paclp,
+zfs_acl_inherit(zfsvfs_t *zfsvfs, umode_t obj_mode, zfs_acl_t *paclp,
uint64_t mode, boolean_t *need_chmod)
{
void *pacep;
@@ -1636,16 +1639,16 @@ zfs_acl_inherit(zfs_sb_t *zsb, umode_t obj_mode, zfs_acl_t *paclp,
boolean_t passthrough, passthrough_x, noallow;
passthrough_x =
- zsb->z_acl_inherit == ZFS_ACL_PASSTHROUGH_X;
+ zfsvfs->z_acl_inherit == ZFS_ACL_PASSTHROUGH_X;
passthrough = passthrough_x ||
- zsb->z_acl_inherit == ZFS_ACL_PASSTHROUGH;
+ zfsvfs->z_acl_inherit == ZFS_ACL_PASSTHROUGH;
noallow =
- zsb->z_acl_inherit == ZFS_ACL_NOALLOW;
+ zfsvfs->z_acl_inherit == ZFS_ACL_NOALLOW;
*need_chmod = B_TRUE;
pacep = NULL;
aclp = zfs_acl_alloc(paclp->z_version);
- if (zsb->z_acl_inherit == ZFS_ACL_DISCARD || S_ISLNK(obj_mode))
+ if (zfsvfs->z_acl_inherit == ZFS_ACL_DISCARD || S_ISLNK(obj_mode))
return (aclp);
while ((pacep = zfs_acl_next_ace(paclp, pacep, &who,
&access_mask, &iflags, &type))) {
@@ -1709,7 +1712,7 @@ zfs_acl_inherit(zfs_sb_t *zsb, umode_t obj_mode, zfs_acl_t *paclp,
newflags &= ~ALL_INHERIT;
aclp->z_ops->ace_flags_set(acep,
newflags|ACE_INHERITED_ACE);
- zfs_restricted_update(zsb, aclp, acep);
+ zfs_restricted_update(zfsvfs, aclp, acep);
continue;
}
@@ -1742,11 +1745,9 @@ zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr,
vsecattr_t *vsecp, zfs_acl_ids_t *acl_ids)
{
int error;
- zfs_sb_t *zsb = ZTOZSB(dzp);
+ zfsvfs_t *zfsvfs = ZTOZSB(dzp);
zfs_acl_t *paclp;
-#ifdef HAVE_KSID
- gid_t gid;
-#endif /* HAVE_KSID */
+ gid_t gid = vap->va_gid;
boolean_t need_chmod = B_TRUE;
boolean_t inherited = B_FALSE;
@@ -1754,7 +1755,7 @@ zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr,
acl_ids->z_mode = vap->va_mode;
if (vsecp)
- if ((error = zfs_vsec_2_aclp(zsb, vap->va_mode, vsecp,
+ if ((error = zfs_vsec_2_aclp(zfsvfs, vap->va_mode, vsecp,
cr, &acl_ids->z_fuidp, &acl_ids->z_aclp)) != 0)
return (error);
@@ -1764,23 +1765,23 @@ zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr,
/*
* Determine uid and gid.
*/
- if ((flag & IS_ROOT_NODE) || zsb->z_replay ||
+ if ((flag & IS_ROOT_NODE) || zfsvfs->z_replay ||
((flag & IS_XATTR) && (S_ISDIR(vap->va_mode)))) {
- acl_ids->z_fuid = zfs_fuid_create(zsb, (uint64_t)vap->va_uid,
+ acl_ids->z_fuid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_uid,
cr, ZFS_OWNER, &acl_ids->z_fuidp);
- acl_ids->z_fgid = zfs_fuid_create(zsb, (uint64_t)vap->va_gid,
+ acl_ids->z_fgid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid,
cr, ZFS_GROUP, &acl_ids->z_fuidp);
gid = vap->va_gid;
} else {
- acl_ids->z_fuid = zfs_fuid_create_cred(zsb, ZFS_OWNER,
+ acl_ids->z_fuid = zfs_fuid_create_cred(zfsvfs, ZFS_OWNER,
cr, &acl_ids->z_fuidp);
acl_ids->z_fgid = 0;
if (vap->va_mask & AT_GID) {
- acl_ids->z_fgid = zfs_fuid_create(zsb,
+ acl_ids->z_fgid = zfs_fuid_create(zfsvfs,
(uint64_t)vap->va_gid,
cr, ZFS_GROUP, &acl_ids->z_fuidp);
gid = vap->va_gid;
- if (acl_ids->z_fgid != dzp->z_gid &&
+ if (acl_ids->z_fgid != KGID_TO_SGID(ZTOI(dzp)->i_gid) &&
!groupmember(vap->va_gid, cr) &&
secpolicy_vnode_create_gid(cr) != 0)
acl_ids->z_fgid = 0;
@@ -1790,14 +1791,15 @@ zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr,
char *domain;
uint32_t rid;
- acl_ids->z_fgid = dzp->z_gid;
- gid = zfs_fuid_map_id(zsb, acl_ids->z_fgid,
+ acl_ids->z_fgid = KGID_TO_SGID(
+ ZTOI(dzp)->i_gid);
+ gid = zfs_fuid_map_id(zfsvfs, acl_ids->z_fgid,
cr, ZFS_GROUP);
- if (zsb->z_use_fuids &&
+ if (zfsvfs->z_use_fuids &&
IS_EPHEMERAL(acl_ids->z_fgid)) {
domain = zfs_fuid_idx_domain(
- &zsb->z_fuid_idx,
+ &zfsvfs->z_fuid_idx,
FUID_INDEX(acl_ids->z_fgid));
rid = FUID_RID(acl_ids->z_fgid);
zfs_fuid_node_add(&acl_ids->z_fuidp,
@@ -1806,7 +1808,7 @@ zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr,
acl_ids->z_fgid, ZFS_GROUP);
}
} else {
- acl_ids->z_fgid = zfs_fuid_create_cred(zsb,
+ acl_ids->z_fgid = zfs_fuid_create_cred(zfsvfs,
ZFS_GROUP, cr, &acl_ids->z_fuidp);
gid = crgetgid(cr);
}
@@ -1838,7 +1840,7 @@ zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr,
!(dzp->z_pflags & ZFS_XATTR)) {
VERIFY(0 == zfs_acl_node_read(dzp, B_TRUE,
&paclp, B_FALSE));
- acl_ids->z_aclp = zfs_acl_inherit(zsb,
+ acl_ids->z_aclp = zfs_acl_inherit(zfsvfs,
vap->va_mode, paclp, acl_ids->z_mode, &need_chmod);
inherited = B_TRUE;
} else {
@@ -1851,7 +1853,7 @@ zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr,
if (need_chmod) {
acl_ids->z_aclp->z_hints |= S_ISDIR(vap->va_mode) ?
ZFS_ACL_AUTO_INHERIT : 0;
- zfs_acl_chmod(zsb, acl_ids->z_mode, acl_ids->z_aclp);
+ zfs_acl_chmod(zfsvfs, acl_ids->z_mode, acl_ids->z_aclp);
}
}
@@ -1881,10 +1883,12 @@ zfs_acl_ids_free(zfs_acl_ids_t *acl_ids)
}
boolean_t
-zfs_acl_ids_overquota(zfs_sb_t *zsb, zfs_acl_ids_t *acl_ids)
+zfs_acl_ids_overquota(zfsvfs_t *zfsvfs, zfs_acl_ids_t *acl_ids)
{
- return (zfs_fuid_overquota(zsb, B_FALSE, acl_ids->z_fuid) ||
- zfs_fuid_overquota(zsb, B_TRUE, acl_ids->z_fgid));
+ return (zfs_fuid_overquota(zfsvfs, B_FALSE, acl_ids->z_fuid) ||
+ zfs_fuid_overquota(zfsvfs, B_TRUE, acl_ids->z_fgid) ||
+ zfs_fuid_overobjquota(zfsvfs, B_FALSE, acl_ids->z_fuid) ||
+ zfs_fuid_overobjquota(zfsvfs, B_TRUE, acl_ids->z_fgid));
}
/*
@@ -1988,7 +1992,7 @@ zfs_getacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
}
int
-zfs_vsec_2_aclp(zfs_sb_t *zsb, umode_t obj_mode,
+zfs_vsec_2_aclp(zfsvfs_t *zfsvfs, umode_t obj_mode,
vsecattr_t *vsecp, cred_t *cr, zfs_fuid_info_t **fuidp, zfs_acl_t **zaclp)
{
zfs_acl_t *aclp;
@@ -1999,7 +2003,7 @@ zfs_vsec_2_aclp(zfs_sb_t *zsb, umode_t obj_mode,
if (vsecp->vsa_aclcnt > MAX_ACL_ENTRIES || vsecp->vsa_aclcnt <= 0)
return (SET_ERROR(EINVAL));
- aclp = zfs_acl_alloc(zfs_acl_version(zsb->z_version));
+ aclp = zfs_acl_alloc(zfs_acl_version(zfsvfs->z_version));
aclp->z_hints = 0;
aclnode = zfs_acl_node_alloc(aclcnt * sizeof (zfs_object_ace_t));
@@ -2012,7 +2016,7 @@ zfs_vsec_2_aclp(zfs_sb_t *zsb, umode_t obj_mode,
return (error);
}
} else {
- if ((error = zfs_copy_ace_2_fuid(zsb, obj_mode, aclp,
+ if ((error = zfs_copy_ace_2_fuid(zfsvfs, obj_mode, aclp,
vsecp->vsa_aclentp, aclnode->z_acldata, aclcnt,
&aclnode->z_size, fuidp, cr)) != 0) {
zfs_acl_free(aclp);
@@ -2048,8 +2052,8 @@ zfs_vsec_2_aclp(zfs_sb_t *zsb, umode_t obj_mode,
int
zfs_setacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
{
- zfs_sb_t *zsb = ZTOZSB(zp);
- zilog_t *zilog = zsb->z_log;
+ zfsvfs_t *zfsvfs = ZTOZSB(zp);
+ zilog_t *zilog = zfsvfs->z_log;
ulong_t mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT);
dmu_tx_t *tx;
int error;
@@ -2067,7 +2071,7 @@ zfs_setacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
if ((error = zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr)))
return (error);
- error = zfs_vsec_2_aclp(zsb, ZTOI(zp)->i_mode, vsecp, cr, &fuidp,
+ error = zfs_vsec_2_aclp(zfsvfs, ZTOI(zp)->i_mode, vsecp, cr, &fuidp,
&aclp);
if (error)
return (error);
@@ -2084,13 +2088,13 @@ zfs_setacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
mutex_enter(&zp->z_acl_lock);
mutex_enter(&zp->z_lock);
- tx = dmu_tx_create(zsb->z_os);
+ tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
- fuid_dirtied = zsb->z_fuid_dirty;
+ fuid_dirtied = zfsvfs->z_fuid_dirty;
if (fuid_dirtied)
- zfs_fuid_txhold(zsb, tx);
+ zfs_fuid_txhold(zfsvfs, tx);
/*
* If old version and ACL won't fit in bonus and we aren't
@@ -2098,7 +2102,7 @@ zfs_setacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
*/
if ((acl_obj = zfs_external_acl(zp)) != 0) {
- if (zsb->z_version >= ZPL_VERSION_FUID &&
+ if (zfsvfs->z_version >= ZPL_VERSION_FUID &&
zfs_znode_acl_version(zp) <= ZFS_ACL_VERSION_INITIAL) {
dmu_tx_hold_free(tx, acl_obj, 0,
DMU_OBJECT_END);
@@ -2133,7 +2137,7 @@ zfs_setacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr)
zp->z_acl_cached = aclp;
if (fuid_dirtied)
- zfs_fuid_sync(zsb, tx);
+ zfs_fuid_sync(zfsvfs, tx);
zfs_log_acl(zilog, tx, zp, vsecp, fuidp);
@@ -2214,7 +2218,7 @@ static int
zfs_zaccess_aces_check(znode_t *zp, uint32_t *working_mode,
boolean_t anyaccess, cred_t *cr)
{
- zfs_sb_t *zsb = ZTOZSB(zp);
+ zfsvfs_t *zfsvfs = ZTOZSB(zp);
zfs_acl_t *aclp;
int error;
uid_t uid = crgetuid(cr);
@@ -2269,7 +2273,7 @@ zfs_zaccess_aces_check(znode_t *zp, uint32_t *working_mode,
who = gowner;
/*FALLTHROUGH*/
case ACE_IDENTIFIER_GROUP:
- checkit = zfs_groupmember(zsb, who, cr);
+ checkit = zfs_groupmember(zfsvfs, who, cr);
break;
case ACE_EVERYONE:
checkit = B_TRUE;
@@ -2280,7 +2284,7 @@ zfs_zaccess_aces_check(znode_t *zp, uint32_t *working_mode,
if (entry_type == 0) {
uid_t newid;
- newid = zfs_fuid_map_id(zsb, who, cr,
+ newid = zfs_fuid_map_id(zfsvfs, who, cr,
ZFS_ACE_USER);
if (newid != IDMAP_WK_CREATOR_OWNER_UID &&
uid == newid)
@@ -2342,7 +2346,8 @@ zfs_has_access(znode_t *zp, cred_t *cr)
if (zfs_zaccess_aces_check(zp, &have, B_TRUE, cr) != 0) {
uid_t owner;
- owner = zfs_fuid_map_id(ZTOZSB(zp), zp->z_uid, cr, ZFS_OWNER);
+ owner = zfs_fuid_map_id(ZTOZSB(zp),
+ KUID_TO_SUID(ZTOI(zp)->i_uid), cr, ZFS_OWNER);
return (secpolicy_vnode_any_access(cr, ZTOI(zp), owner) == 0);
}
return (B_TRUE);
@@ -2352,7 +2357,7 @@ static int
zfs_zaccess_common(znode_t *zp, uint32_t v4_mode, uint32_t *working_mode,
boolean_t *check_privs, boolean_t skipaclchk, cred_t *cr)
{
- zfs_sb_t *zsb = ZTOZSB(zp);
+ zfsvfs_t *zfsvfs = ZTOZSB(zp);
int err;
*working_mode = v4_mode;
@@ -2361,7 +2366,7 @@ zfs_zaccess_common(znode_t *zp, uint32_t v4_mode, uint32_t *working_mode,
/*
* Short circuit empty requests
*/
- if (v4_mode == 0 || zsb->z_replay) {
+ if (v4_mode == 0 || zfsvfs->z_replay) {
*working_mode = 0;
return (0);
}
@@ -2420,12 +2425,13 @@ zfs_fastaccesschk_execute(znode_t *zdp, cred_t *cr)
return (0);
}
- if (FUID_INDEX(zdp->z_uid) != 0 || FUID_INDEX(zdp->z_gid) != 0) {
+ if (KUID_TO_SUID(ZTOI(zdp)->i_uid) != 0 ||
+ KGID_TO_SGID(ZTOI(zdp)->i_gid) != 0) {
mutex_exit(&zdp->z_acl_lock);
goto slow;
}
- if (uid == zdp->z_uid) {
+ if (uid == KUID_TO_SUID(ZTOI(zdp)->i_uid)) {
owner = B_TRUE;
if (zdp->z_mode & S_IXUSR) {
mutex_exit(&zdp->z_acl_lock);
@@ -2435,7 +2441,7 @@ zfs_fastaccesschk_execute(znode_t *zdp, cred_t *cr)
goto slow;
}
}
- if (groupmember(zdp->z_gid, cr)) {
+ if (groupmember(KGID_TO_SGID(ZTOI(zdp)->i_gid), cr)) {
groupmbr = B_TRUE;
if (zdp->z_mode & S_IXGRP) {
mutex_exit(&zdp->z_acl_lock);
@@ -2486,15 +2492,8 @@ zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr)
* If attribute then validate against base file
*/
if (is_attr) {
- uint64_t parent;
-
- if ((error = sa_lookup(zp->z_sa_hdl,
- SA_ZPL_PARENT(ZTOZSB(zp)), &parent,
- sizeof (parent))) != 0)
- return (error);
-
if ((error = zfs_zget(ZTOZSB(zp),
- parent, &xzp)) != 0) {
+ zp->z_xattr_parent, &xzp)) != 0) {
return (error);
}
@@ -2515,7 +2514,8 @@ zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr)
}
}
- owner = zfs_fuid_map_id(ZTOZSB(zp), zp->z_uid, cr, ZFS_OWNER);
+ owner = zfs_fuid_map_id(ZTOZSB(zp), KUID_TO_SUID(ZTOI(zp)->i_uid),
+ cr, ZFS_OWNER);
/*
* Map the bits required to the standard inode flags
* S_IRUSR|S_IWUSR|S_IXUSR in the needed_bits. Map the bits
@@ -2644,7 +2644,8 @@ zfs_delete_final_check(znode_t *zp, znode_t *dzp,
int error;
uid_t downer;
- downer = zfs_fuid_map_id(ZTOZSB(dzp), dzp->z_uid, cr, ZFS_OWNER);
+ downer = zfs_fuid_map_id(ZTOZSB(dzp), KUID_TO_SUID(ZTOI(dzp)->i_uid),
+ cr, ZFS_OWNER);
error = secpolicy_vnode_access2(cr, ZTOI(dzp),
downer, available_perms, S_IWUSR|S_IXUSR);
diff --git a/zfs/module/zfs/zfs_ctldir.c b/zfs/module/zfs/zfs_ctldir.c
index 2e1aa75fe332..3b5fb196f1d4 100644
--- a/zfs/module/zfs/zfs_ctldir.c
+++ b/zfs/module/zfs/zfs_ctldir.c
@@ -28,6 +28,7 @@
* Rohan Puri <rohan.puri15 at gmail.com>
* Brian Behlendorf <behlendorf1 at llnl.gov>
* Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
*/
/*
@@ -59,12 +60,12 @@
*
* The '.zfs', '.zfs/snapshot', and all directories created under
* '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') all share the same
- * share the same zfs_sb_t as the head filesystem (what '.zfs' lives under).
+ * share the same zfsvfs_t as the head filesystem (what '.zfs' lives under).
*
* File systems mounted on top of the '.zfs/snapshot/<snapname>' paths
* (ie: snapshots) are complete ZFS filesystems and have their own unique
- * zfs_sb_t. However, the fsid reported by these mounts will be the same
- * as that used by the parent zfs_sb_t to make NFS happy.
+ * zfsvfs_t. However, the fsid reported by these mounts will be the same
+ * as that used by the parent zfsvfs_t to make NFS happy.
*/
#include <sys/types.h>
@@ -108,12 +109,7 @@ static krwlock_t zfs_snapshot_lock;
* Control Directory Tunables (.zfs)
*/
int zfs_expire_snapshot = ZFSCTL_EXPIRE_SNAPSHOT;
-int zfs_admin_snapshot = 0;
-
-/*
- * Dedicated task queue for unmounting snapshots.
- */
-static taskq_t *zfs_expire_taskq;
+int zfs_admin_snapshot = 1;
typedef struct {
char *se_name; /* full snapshot name */
@@ -146,7 +142,7 @@ zfsctl_snapshot_alloc(char *full_name, char *full_path, spa_t *spa,
se->se_spa = spa;
se->se_objsetid = objsetid;
se->se_root_dentry = root_dentry;
- se->se_taskqid = -1;
+ se->se_taskqid = TASKQID_INVALID;
refcount_create(&se->se_refcount);
@@ -338,7 +334,7 @@ snapentry_expire(void *data)
return;
}
- se->se_taskqid = -1;
+ se->se_taskqid = TASKQID_INVALID;
(void) zfsctl_snapshot_unmount(se->se_name, MNT_EXPIRE);
zfsctl_snapshot_rele(se);
@@ -364,8 +360,8 @@ zfsctl_snapshot_unmount_cancel(zfs_snapentry_t *se)
{
ASSERT(RW_LOCK_HELD(&zfs_snapshot_lock));
- if (taskq_cancel_id(zfs_expire_taskq, se->se_taskqid) == 0) {
- se->se_taskqid = -1;
+ if (taskq_cancel_id(system_delay_taskq, se->se_taskqid) == 0) {
+ se->se_taskqid = TASKQID_INVALID;
zfsctl_snapshot_rele(se);
}
}
@@ -376,13 +372,13 @@ zfsctl_snapshot_unmount_cancel(zfs_snapentry_t *se)
static void
zfsctl_snapshot_unmount_delay_impl(zfs_snapentry_t *se, int delay)
{
- ASSERT3S(se->se_taskqid, ==, -1);
+ ASSERT3S(se->se_taskqid, ==, TASKQID_INVALID);
if (delay <= 0)
return;
zfsctl_snapshot_hold(se);
- se->se_taskqid = taskq_dispatch_delay(zfs_expire_taskq,
+ se->se_taskqid = taskq_dispatch_delay(system_delay_taskq,
snapentry_expire, se, TQ_SLEEP, ddi_get_lbolt() + delay * HZ);
}
@@ -452,14 +448,14 @@ zfsctl_is_snapdir(struct inode *ip)
* Allocate a new inode with the passed id and ops.
*/
static struct inode *
-zfsctl_inode_alloc(zfs_sb_t *zsb, uint64_t id,
+zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id,
const struct file_operations *fops, const struct inode_operations *ops)
{
struct timespec now;
struct inode *ip;
znode_t *zp;
- ip = new_inode(zsb->z_sb);
+ ip = new_inode(zfsvfs->z_sb);
if (ip == NULL)
return (NULL);
@@ -477,20 +473,17 @@ zfsctl_inode_alloc(zfs_sb_t *zsb, uint64_t id,
zp->z_blksz = 0;
zp->z_seq = 0;
zp->z_mapcnt = 0;
- zp->z_gen = 0;
zp->z_size = 0;
- zp->z_links = 0;
zp->z_pflags = 0;
- zp->z_uid = 0;
- zp->z_gid = 0;
zp->z_mode = 0;
zp->z_sync_cnt = 0;
zp->z_is_mapped = B_FALSE;
zp->z_is_ctldir = B_TRUE;
zp->z_is_sa = B_FALSE;
zp->z_is_stale = B_FALSE;
+ ip->i_generation = 0;
ip->i_ino = id;
- ip->i_mode = (S_IFDIR | S_IRUGO | S_IXUGO);
+ ip->i_mode = (S_IFDIR | S_IRWXUGO);
ip->i_uid = SUID_TO_KUID(0);
ip->i_gid = SGID_TO_KGID(0);
ip->i_blkbits = SPA_MINBLOCKSHIFT;
@@ -509,11 +502,11 @@ zfsctl_inode_alloc(zfs_sb_t *zsb, uint64_t id,
return (NULL);
}
- mutex_enter(&zsb->z_znodes_lock);
- list_insert_tail(&zsb->z_all_znodes, zp);
- zsb->z_nr_znodes++;
+ mutex_enter(&zfsvfs->z_znodes_lock);
+ list_insert_tail(&zfsvfs->z_all_znodes, zp);
+ zfsvfs->z_nr_znodes++;
membar_producer();
- mutex_exit(&zsb->z_znodes_lock);
+ mutex_exit(&zfsvfs->z_znodes_lock);
unlock_new_inode(ip);
@@ -524,18 +517,18 @@ zfsctl_inode_alloc(zfs_sb_t *zsb, uint64_t id,
* Lookup the inode with given id, it will be allocated if needed.
*/
static struct inode *
-zfsctl_inode_lookup(zfs_sb_t *zsb, uint64_t id,
+zfsctl_inode_lookup(zfsvfs_t *zfsvfs, uint64_t id,
const struct file_operations *fops, const struct inode_operations *ops)
{
struct inode *ip = NULL;
while (ip == NULL) {
- ip = ilookup(zsb->z_sb, (unsigned long)id);
+ ip = ilookup(zfsvfs->z_sb, (unsigned long)id);
if (ip)
break;
/* May fail due to concurrent zfsctl_inode_alloc() */
- ip = zfsctl_inode_alloc(zsb, id, fops, ops);
+ ip = zfsctl_inode_alloc(zfsvfs, id, fops, ops);
}
return (ip);
@@ -543,7 +536,7 @@ zfsctl_inode_lookup(zfs_sb_t *zsb, uint64_t id,
/*
* Create the '.zfs' directory. This directory is cached as part of the VFS
- * structure. This results in a hold on the zfs_sb_t. The code in zfs_umount()
+ * structure. This results in a hold on the zfsvfs_t. The code in zfs_umount()
* therefore checks against a vfs_count of 2 instead of 1. This reference
* is removed when the ctldir is destroyed in the unmount. All other entities
* under the '.zfs' directory are created dynamically as needed.
@@ -552,20 +545,16 @@ zfsctl_inode_lookup(zfs_sb_t *zsb, uint64_t id,
* of 64-bit inode numbers this support must be disabled on 32-bit systems.
*/
int
-zfsctl_create(zfs_sb_t *zsb)
+zfsctl_create(zfsvfs_t *zfsvfs)
{
-#if defined(CONFIG_64BIT)
- ASSERT(zsb->z_ctldir == NULL);
+ ASSERT(zfsvfs->z_ctldir == NULL);
- zsb->z_ctldir = zfsctl_inode_alloc(zsb, ZFSCTL_INO_ROOT,
+ zfsvfs->z_ctldir = zfsctl_inode_alloc(zfsvfs, ZFSCTL_INO_ROOT,
&zpl_fops_root, &zpl_ops_root);
- if (zsb->z_ctldir == NULL)
+ if (zfsvfs->z_ctldir == NULL)
return (SET_ERROR(ENOENT));
return (0);
-#else
- return (SET_ERROR(EOPNOTSUPP));
-#endif /* CONFIG_64BIT */
}
/*
@@ -573,12 +562,12 @@ zfsctl_create(zfs_sb_t *zsb)
* Only called when the filesystem is unmounted.
*/
void
-zfsctl_destroy(zfs_sb_t *zsb)
+zfsctl_destroy(zfsvfs_t *zfsvfs)
{
- if (zsb->z_issnap) {
+ if (zfsvfs->z_issnap) {
zfs_snapentry_t *se;
- spa_t *spa = zsb->z_os->os_spa;
- uint64_t objsetid = dmu_objset_id(zsb->z_os);
+ spa_t *spa = zfsvfs->z_os->os_spa;
+ uint64_t objsetid = dmu_objset_id(zfsvfs->z_os);
rw_enter(&zfs_snapshot_lock, RW_WRITER);
if ((se = zfsctl_snapshot_find_by_objsetid(spa, objsetid))
@@ -588,9 +577,9 @@ zfsctl_destroy(zfs_sb_t *zsb)
zfsctl_snapshot_rele(se);
}
rw_exit(&zfs_snapshot_lock);
- } else if (zsb->z_ctldir) {
- iput(zsb->z_ctldir);
- zsb->z_ctldir = NULL;
+ } else if (zfsvfs->z_ctldir) {
+ iput(zfsvfs->z_ctldir);
+ zfsvfs->z_ctldir = NULL;
}
}
@@ -605,27 +594,40 @@ zfsctl_root(znode_t *zp)
igrab(ZTOZSB(zp)->z_ctldir);
return (ZTOZSB(zp)->z_ctldir);
}
+
/*
- * Generate a long fid which includes the root object and objset of a
- * snapshot but not the generation number. For the root object the
- * generation number is ignored when zero to avoid needing to open
- * the dataset when generating fids for the snapshot names.
+ * Generate a long fid to indicate a snapdir. We encode whether snapdir is
+ * already monunted in gen field. We do this because nfsd lookup will not
+ * trigger automount. Next time the nfsd does fh_to_dentry, we will notice
+ * this and do automount and return ESTALE to force nfsd revalidate and follow
+ * mount.
*/
static int
zfsctl_snapdir_fid(struct inode *ip, fid_t *fidp)
{
- zfs_sb_t *zsb = ITOZSB(ip);
zfid_short_t *zfid = (zfid_short_t *)fidp;
zfid_long_t *zlfid = (zfid_long_t *)fidp;
uint32_t gen = 0;
uint64_t object;
uint64_t objsetid;
int i;
+ struct dentry *dentry;
+
+ if (fidp->fid_len < LONG_FID_LEN) {
+ fidp->fid_len = LONG_FID_LEN;
+ return (SET_ERROR(ENOSPC));
+ }
- object = zsb->z_root;
+ object = ip->i_ino;
objsetid = ZFSCTL_INO_SNAPDIRS - ip->i_ino;
zfid->zf_len = LONG_FID_LEN;
+ dentry = d_obtain_alias(igrab(ip));
+ if (!IS_ERR(dentry)) {
+ gen = !!d_mountpoint(dentry);
+ dput(dentry);
+ }
+
for (i = 0; i < sizeof (zfid->zf_object); i++)
zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
@@ -648,24 +650,24 @@ int
zfsctl_fid(struct inode *ip, fid_t *fidp)
{
znode_t *zp = ITOZ(ip);
- zfs_sb_t *zsb = ITOZSB(ip);
+ zfsvfs_t *zfsvfs = ITOZSB(ip);
uint64_t object = zp->z_id;
zfid_short_t *zfid;
int i;
- ZFS_ENTER(zsb);
+ ZFS_ENTER(zfsvfs);
+
+ if (zfsctl_is_snapdir(ip)) {
+ ZFS_EXIT(zfsvfs);
+ return (zfsctl_snapdir_fid(ip, fidp));
+ }
if (fidp->fid_len < SHORT_FID_LEN) {
fidp->fid_len = SHORT_FID_LEN;
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (SET_ERROR(ENOSPC));
}
- if (zfsctl_is_snapdir(ip)) {
- ZFS_EXIT(zsb);
- return (zfsctl_snapdir_fid(ip, fidp));
- }
-
zfid = (zfid_short_t *)fidp;
zfid->zf_len = SHORT_FID_LEN;
@@ -677,7 +679,7 @@ zfsctl_fid(struct inode *ip, fid_t *fidp)
for (i = 0; i < sizeof (zfid->zf_gen); i++)
zfid->zf_gen[i] = 0;
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (0);
}
@@ -685,10 +687,10 @@ zfsctl_fid(struct inode *ip, fid_t *fidp)
* Construct a full dataset name in full_name: "pool/dataset at snap_name"
*/
static int
-zfsctl_snapshot_name(zfs_sb_t *zsb, const char *snap_name, int len,
+zfsctl_snapshot_name(zfsvfs_t *zfsvfs, const char *snap_name, int len,
char *full_name)
{
- objset_t *os = zsb->z_os;
+ objset_t *os = zfsvfs->z_os;
if (zfs_component_namecheck(snap_name, NULL, NULL) != 0)
return (SET_ERROR(EILSEQ));
@@ -738,26 +740,27 @@ zfsctl_snapshot_path(struct path *path, int len, char *full_path)
* Returns full path in full_path: "/pool/dataset/.zfs/snapshot/snap_name/"
*/
static int
-zfsctl_snapshot_path_objset(zfs_sb_t *zsb, uint64_t objsetid,
+zfsctl_snapshot_path_objset(zfsvfs_t *zfsvfs, uint64_t objsetid,
int path_len, char *full_path)
{
- objset_t *os = zsb->z_os;
+ objset_t *os = zfsvfs->z_os;
fstrans_cookie_t cookie;
char *snapname;
boolean_t case_conflict;
uint64_t id, pos = 0;
int error = 0;
- if (zsb->z_mntopts->z_mntpoint == NULL)
+ if (zfsvfs->z_vfs->vfs_mntpoint == NULL)
return (ENOENT);
cookie = spl_fstrans_mark();
- snapname = kmem_alloc(MAXNAMELEN, KM_SLEEP);
+ snapname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
while (error == 0) {
dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
- error = dmu_snapshot_list_next(zsb->z_os, MAXNAMELEN,
- snapname, &id, &pos, &case_conflict);
+ error = dmu_snapshot_list_next(zfsvfs->z_os,
+ ZFS_MAX_DATASET_NAME_LEN, snapname, &id, &pos,
+ &case_conflict);
dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
if (error)
goto out;
@@ -768,9 +771,9 @@ zfsctl_snapshot_path_objset(zfs_sb_t *zsb, uint64_t objsetid,
memset(full_path, 0, path_len);
snprintf(full_path, path_len - 1, "%s/.zfs/snapshot/%s",
- zsb->z_mntopts->z_mntpoint, snapname);
+ zfsvfs->z_vfs->vfs_mntpoint, snapname);
out:
- kmem_free(snapname, MAXNAMELEN);
+ kmem_free(snapname, ZFS_MAX_DATASET_NAME_LEN);
spl_fstrans_unmark(cookie);
return (error);
@@ -783,18 +786,18 @@ int
zfsctl_root_lookup(struct inode *dip, char *name, struct inode **ipp,
int flags, cred_t *cr, int *direntflags, pathname_t *realpnp)
{
- zfs_sb_t *zsb = ITOZSB(dip);
+ zfsvfs_t *zfsvfs = ITOZSB(dip);
int error = 0;
- ZFS_ENTER(zsb);
+ ZFS_ENTER(zfsvfs);
if (strcmp(name, "..") == 0) {
*ipp = dip->i_sb->s_root->d_inode;
} else if (strcmp(name, ZFS_SNAPDIR_NAME) == 0) {
- *ipp = zfsctl_inode_lookup(zsb, ZFSCTL_INO_SNAPDIR,
+ *ipp = zfsctl_inode_lookup(zfsvfs, ZFSCTL_INO_SNAPDIR,
&zpl_fops_snapdir, &zpl_ops_snapdir);
} else if (strcmp(name, ZFS_SHAREDIR_NAME) == 0) {
- *ipp = zfsctl_inode_lookup(zsb, ZFSCTL_INO_SHARES,
+ *ipp = zfsctl_inode_lookup(zfsvfs, ZFSCTL_INO_SHARES,
&zpl_fops_shares, &zpl_ops_shares);
} else {
*ipp = NULL;
@@ -803,7 +806,7 @@ zfsctl_root_lookup(struct inode *dip, char *name, struct inode **ipp,
if (*ipp == NULL)
error = SET_ERROR(ENOENT);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
@@ -817,24 +820,24 @@ int
zfsctl_snapdir_lookup(struct inode *dip, char *name, struct inode **ipp,
int flags, cred_t *cr, int *direntflags, pathname_t *realpnp)
{
- zfs_sb_t *zsb = ITOZSB(dip);
+ zfsvfs_t *zfsvfs = ITOZSB(dip);
uint64_t id;
int error;
- ZFS_ENTER(zsb);
+ ZFS_ENTER(zfsvfs);
- error = dmu_snapshot_lookup(zsb->z_os, name, &id);
+ error = dmu_snapshot_lookup(zfsvfs->z_os, name, &id);
if (error) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
- *ipp = zfsctl_inode_lookup(zsb, ZFSCTL_INO_SNAPDIRS - id,
+ *ipp = zfsctl_inode_lookup(zfsvfs, ZFSCTL_INO_SNAPDIRS - id,
&simple_dir_operations, &simple_dir_inode_operations);
if (*ipp == NULL)
error = SET_ERROR(ENOENT);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
@@ -848,23 +851,23 @@ int
zfsctl_snapdir_rename(struct inode *sdip, char *snm,
struct inode *tdip, char *tnm, cred_t *cr, int flags)
{
- zfs_sb_t *zsb = ITOZSB(sdip);
+ zfsvfs_t *zfsvfs = ITOZSB(sdip);
char *to, *from, *real, *fsname;
int error;
if (!zfs_admin_snapshot)
return (EACCES);
- ZFS_ENTER(zsb);
+ ZFS_ENTER(zfsvfs);
- to = kmem_alloc(MAXNAMELEN, KM_SLEEP);
- from = kmem_alloc(MAXNAMELEN, KM_SLEEP);
- real = kmem_alloc(MAXNAMELEN, KM_SLEEP);
- fsname = kmem_alloc(MAXNAMELEN, KM_SLEEP);
+ to = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
+ from = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
+ real = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
+ fsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
- if (zsb->z_case == ZFS_CASE_INSENSITIVE) {
- error = dmu_snapshot_realname(zsb->z_os, snm, real,
- MAXNAMELEN, NULL);
+ if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
+ error = dmu_snapshot_realname(zfsvfs->z_os, snm, real,
+ ZFS_MAX_DATASET_NAME_LEN, NULL);
if (error == 0) {
snm = real;
} else if (error != ENOTSUP) {
@@ -872,11 +875,13 @@ zfsctl_snapdir_rename(struct inode *sdip, char *snm,
}
}
- dmu_objset_name(zsb->z_os, fsname);
+ dmu_objset_name(zfsvfs->z_os, fsname);
- error = zfsctl_snapshot_name(ITOZSB(sdip), snm, MAXNAMELEN, from);
+ error = zfsctl_snapshot_name(ITOZSB(sdip), snm,
+ ZFS_MAX_DATASET_NAME_LEN, from);
if (error == 0)
- error = zfsctl_snapshot_name(ITOZSB(tdip), tnm, MAXNAMELEN, to);
+ error = zfsctl_snapshot_name(ITOZSB(tdip), tnm,
+ ZFS_MAX_DATASET_NAME_LEN, to);
if (error == 0)
error = zfs_secpolicy_rename_perms(from, to, cr);
if (error != 0)
@@ -906,12 +911,12 @@ zfsctl_snapdir_rename(struct inode *sdip, char *snm,
rw_exit(&zfs_snapshot_lock);
out:
- kmem_free(from, MAXNAMELEN);
- kmem_free(to, MAXNAMELEN);
- kmem_free(real, MAXNAMELEN);
- kmem_free(fsname, MAXNAMELEN);
+ kmem_free(from, ZFS_MAX_DATASET_NAME_LEN);
+ kmem_free(to, ZFS_MAX_DATASET_NAME_LEN);
+ kmem_free(real, ZFS_MAX_DATASET_NAME_LEN);
+ kmem_free(fsname, ZFS_MAX_DATASET_NAME_LEN);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
@@ -923,21 +928,21 @@ zfsctl_snapdir_rename(struct inode *sdip, char *snm,
int
zfsctl_snapdir_remove(struct inode *dip, char *name, cred_t *cr, int flags)
{
- zfs_sb_t *zsb = ITOZSB(dip);
+ zfsvfs_t *zfsvfs = ITOZSB(dip);
char *snapname, *real;
int error;
if (!zfs_admin_snapshot)
return (EACCES);
- ZFS_ENTER(zsb);
+ ZFS_ENTER(zfsvfs);
- snapname = kmem_alloc(MAXNAMELEN, KM_SLEEP);
- real = kmem_alloc(MAXNAMELEN, KM_SLEEP);
+ snapname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
+ real = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
- if (zsb->z_case == ZFS_CASE_INSENSITIVE) {
- error = dmu_snapshot_realname(zsb->z_os, name, real,
- MAXNAMELEN, NULL);
+ if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
+ error = dmu_snapshot_realname(zfsvfs->z_os, name, real,
+ ZFS_MAX_DATASET_NAME_LEN, NULL);
if (error == 0) {
name = real;
} else if (error != ENOTSUP) {
@@ -945,7 +950,8 @@ zfsctl_snapdir_remove(struct inode *dip, char *name, cred_t *cr, int flags)
}
}
- error = zfsctl_snapshot_name(ITOZSB(dip), name, MAXNAMELEN, snapname);
+ error = zfsctl_snapshot_name(ITOZSB(dip), name,
+ ZFS_MAX_DATASET_NAME_LEN, snapname);
if (error == 0)
error = zfs_secpolicy_destroy_perms(snapname, cr);
if (error != 0)
@@ -955,10 +961,10 @@ zfsctl_snapdir_remove(struct inode *dip, char *name, cred_t *cr, int flags)
if ((error == 0) || (error == ENOENT))
error = dsl_destroy_snapshot(snapname, B_FALSE);
out:
- kmem_free(snapname, MAXNAMELEN);
- kmem_free(real, MAXNAMELEN);
+ kmem_free(snapname, ZFS_MAX_DATASET_NAME_LEN);
+ kmem_free(real, ZFS_MAX_DATASET_NAME_LEN);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
@@ -969,23 +975,23 @@ zfsctl_snapdir_remove(struct inode *dip, char *name, cred_t *cr, int flags)
*/
int
zfsctl_snapdir_mkdir(struct inode *dip, char *dirname, vattr_t *vap,
- struct inode **ipp, cred_t *cr, int flags)
+ struct inode **ipp, cred_t *cr, int flags)
{
- zfs_sb_t *zsb = ITOZSB(dip);
+ zfsvfs_t *zfsvfs = ITOZSB(dip);
char *dsname;
int error;
if (!zfs_admin_snapshot)
return (EACCES);
- dsname = kmem_alloc(MAXNAMELEN, KM_SLEEP);
+ dsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
if (zfs_component_namecheck(dirname, NULL, NULL) != 0) {
error = SET_ERROR(EILSEQ);
goto out;
}
- dmu_objset_name(zsb->z_os, dsname);
+ dmu_objset_name(zfsvfs->z_os, dsname);
error = zfs_secpolicy_snapshot_perms(dsname, cr);
if (error != 0)
@@ -1000,7 +1006,7 @@ zfsctl_snapdir_mkdir(struct inode *dip, char *dirname, vattr_t *vap,
0, cr, NULL, NULL);
}
out:
- kmem_free(dsname, MAXNAMELEN);
+ kmem_free(dsname, ZFS_MAX_DATASET_NAME_LEN);
return (error);
}
@@ -1053,8 +1059,8 @@ zfsctl_snapshot_mount(struct path *path, int flags)
{
struct dentry *dentry = path->dentry;
struct inode *ip = dentry->d_inode;
- zfs_sb_t *zsb;
- zfs_sb_t *snap_zsb;
+ zfsvfs_t *zfsvfs;
+ zfsvfs_t *snap_zfsvfs;
zfs_snapentry_t *se;
char *full_name, *full_path;
char *argv[] = { "/usr/bin/env", "mount", "-t", "zfs", "-n", NULL, NULL,
@@ -1066,14 +1072,14 @@ zfsctl_snapshot_mount(struct path *path, int flags)
if (ip == NULL)
return (EISDIR);
- zsb = ITOZSB(ip);
- ZFS_ENTER(zsb);
+ zfsvfs = ITOZSB(ip);
+ ZFS_ENTER(zfsvfs);
- full_name = kmem_zalloc(MAXNAMELEN, KM_SLEEP);
+ full_name = kmem_zalloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
full_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
- error = zfsctl_snapshot_name(zsb, dname(dentry),
- MAXNAMELEN, full_name);
+ error = zfsctl_snapshot_name(zfsvfs, dname(dentry),
+ ZFS_MAX_DATASET_NAME_LEN, full_name);
if (error)
goto error;
@@ -1132,14 +1138,14 @@ zfsctl_snapshot_mount(struct path *path, int flags)
spath = *path;
path_get(&spath);
if (zpl_follow_down_one(&spath)) {
- snap_zsb = ITOZSB(spath.dentry->d_inode);
- snap_zsb->z_parent = zsb;
+ snap_zfsvfs = ITOZSB(spath.dentry->d_inode);
+ snap_zfsvfs->z_parent = zfsvfs;
dentry = spath.dentry;
spath.mnt->mnt_flags |= MNT_SHRINKABLE;
rw_enter(&zfs_snapshot_lock, RW_WRITER);
se = zfsctl_snapshot_alloc(full_name, full_path,
- snap_zsb->z_os->os_spa, dmu_objset_id(snap_zsb->z_os),
+ snap_zfsvfs->z_os->os_spa, dmu_objset_id(snap_zfsvfs->z_os),
dentry);
zfsctl_snapshot_add(se);
zfsctl_snapshot_unmount_delay_impl(se, zfs_expire_snapshot);
@@ -1147,79 +1153,61 @@ zfsctl_snapshot_mount(struct path *path, int flags)
}
path_put(&spath);
error:
- kmem_free(full_name, MAXNAMELEN);
+ kmem_free(full_name, ZFS_MAX_DATASET_NAME_LEN);
kmem_free(full_path, MAXPATHLEN);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
/*
- * Given the objset id of the snapshot return its zfs_sb_t as zsbp.
+ * Get the snapdir inode from fid
*/
int
-zfsctl_lookup_objset(struct super_block *sb, uint64_t objsetid, zfs_sb_t **zsbp)
+zfsctl_snapdir_vget(struct super_block *sb, uint64_t objsetid, int gen,
+ struct inode **ipp)
{
- zfs_snapentry_t *se;
int error;
- spa_t *spa = ((zfs_sb_t *)(sb->s_fs_info))->z_os->os_spa;
+ struct path path;
+ char *mnt;
+ struct dentry *dentry;
- /*
- * Verify that the snapshot is mounted then lookup the mounted root
- * rather than the covered mount point. This may fail if the
- * snapshot has just been unmounted by an unrelated user space
- * process. This race cannot occur to an expired mount point
- * because we hold the zfs_snapshot_lock to prevent the race.
- */
- rw_enter(&zfs_snapshot_lock, RW_READER);
- if ((se = zfsctl_snapshot_find_by_objsetid(spa, objsetid)) != NULL) {
- zfs_sb_t *zsb;
-
- zsb = ITOZSB(se->se_root_dentry->d_inode);
- ASSERT3U(dmu_objset_id(zsb->z_os), ==, objsetid);
+ mnt = kmem_alloc(MAXPATHLEN, KM_SLEEP);
- if (time_after(jiffies, zsb->z_snap_defer_time +
- MAX(zfs_expire_snapshot * HZ / 2, HZ))) {
- zsb->z_snap_defer_time = jiffies;
- zfsctl_snapshot_unmount_cancel(se);
- zfsctl_snapshot_unmount_delay_impl(se,
- zfs_expire_snapshot);
- }
+ error = zfsctl_snapshot_path_objset(sb->s_fs_info, objsetid,
+ MAXPATHLEN, mnt);
+ if (error)
+ goto out;
- *zsbp = zsb;
- zfsctl_snapshot_rele(se);
- error = SET_ERROR(0);
- } else {
- error = SET_ERROR(ENOENT);
- }
- rw_exit(&zfs_snapshot_lock);
+ /* Trigger automount */
+ error = kern_path(mnt, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &path);
+ if (error)
+ goto out;
+ path_put(&path);
/*
- * Automount the snapshot given the objset id by constructing the
- * full mount point and performing a traversal.
+ * Get the snapdir inode. Note, we don't want to use the above
+ * path because it contains the root of the snapshot rather
+ * than the snapdir.
*/
- if (error == ENOENT) {
- struct path path;
- char *mnt;
-
- mnt = kmem_alloc(MAXPATHLEN, KM_SLEEP);
- error = zfsctl_snapshot_path_objset(sb->s_fs_info, objsetid,
- MAXPATHLEN, mnt);
- if (error) {
- kmem_free(mnt, MAXPATHLEN);
- return (SET_ERROR(error));
- }
-
- error = kern_path(mnt, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &path);
- if (error == 0) {
- *zsbp = ITOZSB(path.dentry->d_inode);
- path_put(&path);
- }
-
- kmem_free(mnt, MAXPATHLEN);
+ *ipp = ilookup(sb, ZFSCTL_INO_SNAPDIRS - objsetid);
+ if (*ipp == NULL) {
+ error = SET_ERROR(ENOENT);
+ goto out;
}
+ /* check gen, see zfsctl_snapdir_fid */
+ dentry = d_obtain_alias(igrab(*ipp));
+ if (gen != (!IS_ERR(dentry) && d_mountpoint(dentry))) {
+ iput(*ipp);
+ *ipp = NULL;
+ error = SET_ERROR(ENOENT);
+ }
+ if (!IS_ERR(dentry))
+ dput(dentry);
+out:
+ kmem_free(mnt, MAXPATHLEN);
return (error);
}
@@ -1227,33 +1215,28 @@ int
zfsctl_shares_lookup(struct inode *dip, char *name, struct inode **ipp,
int flags, cred_t *cr, int *direntflags, pathname_t *realpnp)
{
- zfs_sb_t *zsb = ITOZSB(dip);
+ zfsvfs_t *zfsvfs = ITOZSB(dip);
struct inode *ip;
znode_t *dzp;
int error;
- ZFS_ENTER(zsb);
+ ZFS_ENTER(zfsvfs);
- if (zsb->z_shares_dir == 0) {
- ZFS_EXIT(zsb);
+ if (zfsvfs->z_shares_dir == 0) {
+ ZFS_EXIT(zfsvfs);
return (SET_ERROR(ENOTSUP));
}
- error = zfs_zget(zsb, zsb->z_shares_dir, &dzp);
- if (error) {
- ZFS_EXIT(zsb);
- return (error);
+ if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
+ error = zfs_lookup(ZTOI(dzp), name, &ip, 0, cr, NULL, NULL);
+ iput(ZTOI(dzp));
}
- error = zfs_lookup(ZTOI(dzp), name, &ip, 0, cr, NULL, NULL);
-
- iput(ZTOI(dzp));
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
-
/*
* Initialize the various pieces we'll need to create and manipulate .zfs
* directories. Currently this is unused but available.
@@ -1268,9 +1251,6 @@ zfsctl_init(void)
sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t,
se_node_objsetid));
rw_init(&zfs_snapshot_lock, NULL, RW_DEFAULT, NULL);
-
- zfs_expire_taskq = taskq_create("z_unmount", 1, defclsyspri,
- 1, 8, TASKQ_PREPOPULATE);
}
/*
@@ -1280,8 +1260,6 @@ zfsctl_init(void)
void
zfsctl_fini(void)
{
- taskq_destroy(zfs_expire_taskq);
-
avl_destroy(&zfs_snapshots_by_name);
avl_destroy(&zfs_snapshots_by_objsetid);
rw_destroy(&zfs_snapshot_lock);
diff --git a/zfs/module/zfs/zfs_debug.c b/zfs/module/zfs/zfs_debug.c
index 2770359c8b48..d1dba3f8f574 100644
--- a/zfs/module/zfs/zfs_debug.c
+++ b/zfs/module/zfs/zfs_debug.c
@@ -42,7 +42,7 @@ kstat_t *zfs_dbgmsg_kstat;
* # Clear the kernel debug message log.
* echo 0 >/proc/spl/kstat/zfs/dbgmsg
*/
-#if defined(_KERNEL)
+#if defined(_KERNEL) && !defined(ZFS_DEBUG)
int zfs_dbgmsg_enable = 0;
#else
int zfs_dbgmsg_enable = 1;
@@ -62,7 +62,7 @@ zfs_dbgmsg_data(char *buf, size_t size, void *data)
zfs_dbgmsg_t *zdm = (zfs_dbgmsg_t *)data;
(void) snprintf(buf, size, "%-12llu %-s\n",
- (u_longlong_t) zdm->zdm_timestamp, zdm->zdm_msg);
+ (u_longlong_t)zdm->zdm_timestamp, zdm->zdm_msg);
return (0);
}
@@ -161,6 +161,13 @@ __zfs_dbgmsg(char *buf)
mutex_exit(&zfs_dbgmsgs_lock);
}
+void
+__set_error(const char *file, const char *func, int line, int err)
+{
+ if (zfs_flags & ZFS_DEBUG_SET_ERROR)
+ __dprintf(file, func, line, "error %lu", err);
+}
+
#ifdef _KERNEL
void
__dprintf(const char *file, const char *func, int line, const char *fmt, ...)
@@ -170,8 +177,10 @@ __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
size_t size;
char *buf;
char *nl;
+ int i;
- if (!zfs_dbgmsg_enable && !(zfs_flags & ZFS_DEBUG_DPRINTF))
+ if (!zfs_dbgmsg_enable &&
+ !(zfs_flags & (ZFS_DEBUG_DPRINTF | ZFS_DEBUG_SET_ERROR)))
return;
size = 1024;
@@ -187,9 +196,13 @@ __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
newfile = file;
}
- va_start(adx, fmt);
- (void) vsnprintf(buf, size, fmt, adx);
- va_end(adx);
+ i = snprintf(buf, size, "%s:%d:%s(): ", newfile, line, func);
+
+ if (i < size) {
+ va_start(adx, fmt);
+ (void) vsnprintf(buf + i, size - i, fmt, adx);
+ va_end(adx);
+ }
/*
* Get rid of trailing newline.
@@ -209,9 +222,8 @@ __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
* # Dump the ring buffer.
* $ cat /sys/kernel/debug/tracing/trace
*/
- if (zfs_flags & ZFS_DEBUG_DPRINTF)
- DTRACE_PROBE4(zfs__dprintf,
- char *, newfile, char *, func, int, line, char *, buf);
+ if (zfs_flags & (ZFS_DEBUG_DPRINTF | ZFS_DEBUG_SET_ERROR))
+ DTRACE_PROBE1(zfs__dprintf, char *, buf);
/*
* To get this data enable the zfs debug log as shown:
@@ -228,6 +240,21 @@ __dprintf(const char *file, const char *func, int line, const char *fmt, ...)
kmem_free(buf, size);
}
+
+#else
+
+void
+zfs_dbgmsg_print(const char *tag)
+{
+ zfs_dbgmsg_t *zdm;
+
+ (void) printf("ZFS_DBGMSG(%s):\n", tag);
+ mutex_enter(&zfs_dbgmsgs_lock);
+ for (zdm = list_head(&zfs_dbgmsgs); zdm;
+ zdm = list_next(&zfs_dbgmsgs, zdm))
+ (void) printf("%s\n", zdm->zdm_msg);
+ mutex_exit(&zfs_dbgmsgs_lock);
+}
#endif /* _KERNEL */
#ifdef _KERNEL
diff --git a/zfs/module/zfs/zfs_dir.c b/zfs/module/zfs/zfs_dir.c
index b3f98ef5b529..1fcc69fd12e6 100644
--- a/zfs/module/zfs/zfs_dir.c
+++ b/zfs/module/zfs/zfs_dir.c
@@ -18,12 +18,13 @@
*
* CDDL HEADER END
*/
+
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2016 by Delphix. All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc.
*/
-
#include <sys/types.h>
#include <sys/param.h>
#include <sys/time.h>
@@ -64,14 +65,13 @@
* of names after deciding which is the appropriate lookup interface.
*/
static int
-zfs_match_find(zfs_sb_t *zsb, znode_t *dzp, char *name, boolean_t exact,
+zfs_match_find(zfsvfs_t *zfsvfs, znode_t *dzp, char *name, matchtype_t mt,
boolean_t update, int *deflags, pathname_t *rpnp, uint64_t *zoid)
{
boolean_t conflict = B_FALSE;
int error;
- if (zsb->z_norm) {
- matchtype_t mt = MT_FIRST;
+ if (zfsvfs->z_norm) {
size_t bufsz = 0;
char *buf = NULL;
@@ -79,16 +79,15 @@ zfs_match_find(zfs_sb_t *zsb, znode_t *dzp, char *name, boolean_t exact,
buf = rpnp->pn_buf;
bufsz = rpnp->pn_bufsize;
}
- if (exact)
- mt = MT_EXACT;
+
/*
* In the non-mixed case we only expect there would ever
* be one match, but we need to use the normalizing lookup.
*/
- error = zap_lookup_norm(zsb->z_os, dzp->z_id, name, 8, 1,
+ error = zap_lookup_norm(zfsvfs->z_os, dzp->z_id, name, 8, 1,
zoid, mt, buf, bufsz, &conflict);
} else {
- error = zap_lookup(zsb->z_os, dzp->z_id, name, 8, 1, zoid);
+ error = zap_lookup(zfsvfs->z_os, dzp->z_id, name, 8, 1, zoid);
}
/*
@@ -101,7 +100,7 @@ zfs_match_find(zfs_sb_t *zsb, znode_t *dzp, char *name, boolean_t exact,
if (error == EOVERFLOW)
error = 0;
- if (zsb->z_norm && !error && deflags)
+ if (zfsvfs->z_norm && !error && deflags)
*deflags = conflict ? ED_CASE_CONFLICT : 0;
*zoid = ZFS_DIRENT_OBJ(*zoid);
@@ -153,10 +152,10 @@ int
zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
int flag, int *direntflags, pathname_t *realpnp)
{
- zfs_sb_t *zsb = ZTOZSB(dzp);
+ zfsvfs_t *zfsvfs = ZTOZSB(dzp);
zfs_dirlock_t *dl;
boolean_t update;
- boolean_t exact;
+ matchtype_t mt = 0;
uint64_t zoid;
#ifdef HAVE_DNLC
vnode_t *vp = NULL;
@@ -178,7 +177,7 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
/*
* Case sensitivity and normalization preferences are set when
* the file system is created. These are stored in the
- * zsb->z_case and zsb->z_norm fields. These choices
+ * zfsvfs->z_case and zfsvfs->z_norm fields. These choices
* affect what vnodes can be cached in the DNLC, how we
* perform zap lookups, and the "width" of our dirlocks.
*
@@ -193,13 +192,29 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
*/
/*
- * Decide if exact matches should be requested when performing
- * a zap lookup on file systems supporting case-insensitive
- * access.
+ * When matching we may need to normalize & change case according to
+ * FS settings.
+ *
+ * Note that a normalized match is necessary for a case insensitive
+ * filesystem when the lookup request is not exact because normalization
+ * can fold case independent of normalizing code point sequences.
+ *
+ * See the table above zfs_dropname().
*/
- exact =
- ((zsb->z_case == ZFS_CASE_INSENSITIVE) && (flag & ZCIEXACT)) ||
- ((zsb->z_case == ZFS_CASE_MIXED) && !(flag & ZCILOOK));
+ if (zfsvfs->z_norm != 0) {
+ mt = MT_NORMALIZE;
+
+ /*
+ * Determine if the match needs to honor the case specified in
+ * lookup, and if so keep track of that so that during
+ * normalization we don't fold case.
+ */
+ if ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE &&
+ (flag & ZCIEXACT)) ||
+ (zfsvfs->z_case == ZFS_CASE_MIXED && !(flag & ZCILOOK))) {
+ mt |= MT_MATCH_CASE;
+ }
+ }
/*
* Only look in or update the DNLC if we are looking for the
@@ -211,9 +226,9 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
* Maybe can add TO-UPPERed version of name to dnlc in ci-only
* case for performance improvement?
*/
- update = !zsb->z_norm ||
- ((zsb->z_case == ZFS_CASE_MIXED) &&
- !(zsb->z_norm & ~U8_TEXTPREP_TOUPPER) && !(flag & ZCILOOK));
+ update = !zfsvfs->z_norm ||
+ (zfsvfs->z_case == ZFS_CASE_MIXED &&
+ !(zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER) && !(flag & ZCILOOK));
/*
* ZRENAMING indicates we are in a situation where we should
@@ -226,7 +241,7 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
if (flag & ZRENAMING)
cmpflags = 0;
else
- cmpflags = zsb->z_norm;
+ cmpflags = zfsvfs->z_norm;
/*
* Wait until there are no locks on this name.
@@ -240,7 +255,7 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
mutex_enter(&dzp->z_lock);
for (;;) {
- if (dzp->z_unlinked) {
+ if (dzp->z_unlinked && !(flag & ZXATTR)) {
mutex_exit(&dzp->z_lock);
if (!(flag & ZHAVELOCK))
rw_exit(&dzp->z_name_lock);
@@ -289,8 +304,8 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
* dl_name in case the first thread goes away before we do.
* Note that we initialize the new name before storing its
* pointer into dl_name, because the first thread may load
- * dl->dl_name at any time. He'll either see the old value,
- * which is his, or the new shared copy; either is OK.
+ * dl->dl_name at any time. It'll either see the old value,
+ * which belongs to it, or the new shared copy; either is OK.
*/
dl->dl_namesize = strlen(dl->dl_name) + 1;
name = kmem_alloc(dl->dl_namesize, KM_SLEEP);
@@ -306,7 +321,7 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
* See if there's an object by this name; if so, put a hold on it.
*/
if (flag & ZXATTR) {
- error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_XATTR(zsb), &zoid,
+ error = sa_lookup(dzp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &zoid,
sizeof (zoid));
if (error == 0)
error = (zoid == 0 ? SET_ERROR(ENOENT) : 0);
@@ -327,11 +342,11 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
*zpp = VTOZ(vp);
return (0);
} else {
- error = zfs_match_find(zsb, dzp, name, exact,
+ error = zfs_match_find(zfsvfs, dzp, name, mt,
update, direntflags, realpnp, &zoid);
}
#else
- error = zfs_match_find(zsb, dzp, name, exact,
+ error = zfs_match_find(zfsvfs, dzp, name, mt,
update, direntflags, realpnp, &zoid);
#endif /* HAVE_DNLC */
}
@@ -345,7 +360,7 @@ zfs_dirent_lock(zfs_dirlock_t **dlpp, znode_t *dzp, char *name, znode_t **zpp,
zfs_dirent_unlock(dl);
return (SET_ERROR(EEXIST));
}
- error = zfs_zget(zsb, zoid, zpp);
+ error = zfs_zget(zfsvfs, zoid, zpp);
if (error) {
zfs_dirent_unlock(dl);
return (error);
@@ -414,23 +429,23 @@ zfs_dirlook(znode_t *dzp, char *name, struct inode **ipp, int flags,
*ipp = ZTOI(dzp);
igrab(*ipp);
} else if (name[0] == '.' && name[1] == '.' && name[2] == 0) {
- zfs_sb_t *zsb = ZTOZSB(dzp);
+ zfsvfs_t *zfsvfs = ZTOZSB(dzp);
/*
* If we are a snapshot mounted under .zfs, return
* the inode pointer for the snapshot directory.
*/
if ((error = sa_lookup(dzp->z_sa_hdl,
- SA_ZPL_PARENT(zsb), &parent, sizeof (parent))) != 0)
+ SA_ZPL_PARENT(zfsvfs), &parent, sizeof (parent))) != 0)
return (error);
- if (parent == dzp->z_id && zsb->z_parent != zsb) {
- error = zfsctl_root_lookup(zsb->z_parent->z_ctldir,
+ if (parent == dzp->z_id && zfsvfs->z_parent != zfsvfs) {
+ error = zfsctl_root_lookup(zfsvfs->z_parent->z_ctldir,
"snapshot", ipp, 0, kcred, NULL, NULL);
return (error);
}
rw_enter(&dzp->z_parent_lock, RW_READER);
- error = zfs_zget(zsb, parent, &zp);
+ error = zfs_zget(zfsvfs, parent, &zp);
if (error == 0)
*ipp = ZTOI(zp);
rw_exit(&dzp->z_parent_lock);
@@ -475,13 +490,13 @@ zfs_dirlook(znode_t *dzp, char *name, struct inode **ipp, int flags,
void
zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx)
{
- zfs_sb_t *zsb = ZTOZSB(zp);
+ zfsvfs_t *zfsvfs = ZTOZSB(zp);
ASSERT(zp->z_unlinked);
- ASSERT(zp->z_links == 0);
+ ASSERT(ZTOI(zp)->i_nlink == 0);
VERIFY3U(0, ==,
- zap_add_int(zsb->z_os, zsb->z_unlinkedobj, zp->z_id, tx));
+ zap_add_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx));
}
/*
@@ -489,7 +504,7 @@ zfs_unlinked_add(znode_t *zp, dmu_tx_t *tx)
* (force) umounted the file system.
*/
void
-zfs_unlinked_drain(zfs_sb_t *zsb)
+zfs_unlinked_drain(zfsvfs_t *zfsvfs)
{
zap_cursor_t zc;
zap_attribute_t zap;
@@ -500,7 +515,7 @@ zfs_unlinked_drain(zfs_sb_t *zsb)
/*
* Iterate over the contents of the unlinked set.
*/
- for (zap_cursor_init(&zc, zsb->z_os, zsb->z_unlinkedobj);
+ for (zap_cursor_init(&zc, zfsvfs->z_os, zfsvfs->z_unlinkedobj);
zap_cursor_retrieve(&zc, &zap) == 0;
zap_cursor_advance(&zc)) {
@@ -508,7 +523,8 @@ zfs_unlinked_drain(zfs_sb_t *zsb)
* See what kind of object we have in list
*/
- error = dmu_object_info(zsb->z_os, zap.za_first_integer, &doi);
+ error = dmu_object_info(zfsvfs->z_os,
+ zap.za_first_integer, &doi);
if (error != 0)
continue;
@@ -518,7 +534,7 @@ zfs_unlinked_drain(zfs_sb_t *zsb)
* We need to re-mark these list entries for deletion,
* so we pull them back into core and set zp->z_unlinked.
*/
- error = zfs_zget(zsb, zap.za_first_integer, &zp);
+ error = zfs_zget(zfsvfs, zap.za_first_integer, &zp);
/*
* We may pick up znodes that are already marked for deletion.
@@ -553,15 +569,15 @@ zfs_purgedir(znode_t *dzp)
zap_attribute_t zap;
znode_t *xzp;
dmu_tx_t *tx;
- zfs_sb_t *zsb = ZTOZSB(dzp);
+ zfsvfs_t *zfsvfs = ZTOZSB(dzp);
zfs_dirlock_t dl;
int skipped = 0;
int error;
- for (zap_cursor_init(&zc, zsb->z_os, dzp->z_id);
+ for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
(error = zap_cursor_retrieve(&zc, &zap)) == 0;
zap_cursor_advance(&zc)) {
- error = zfs_zget(zsb,
+ error = zfs_zget(zfsvfs,
ZFS_DIRENT_OBJ(zap.za_first_integer), &xzp);
if (error) {
skipped += 1;
@@ -571,13 +587,14 @@ zfs_purgedir(znode_t *dzp)
ASSERT(S_ISREG(ZTOI(xzp)->i_mode) ||
S_ISLNK(ZTOI(xzp)->i_mode));
- tx = dmu_tx_create(zsb->z_os);
+ tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
dmu_tx_hold_zap(tx, dzp->z_id, FALSE, zap.za_name);
dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
- dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, FALSE, NULL);
+ dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
/* Is this really needed ? */
zfs_sa_upgrade_txholds(tx, xzp);
+ dmu_tx_mark_netfree(tx);
error = dmu_tx_assign(tx, TXG_WAIT);
if (error) {
dmu_tx_abort(tx);
@@ -593,7 +610,7 @@ zfs_purgedir(znode_t *dzp)
if (error)
skipped += 1;
dmu_tx_commit(tx);
- set_nlink(ZTOI(xzp), xzp->z_links);
+
zfs_iput_async(ZTOI(xzp));
}
zap_cursor_fini(&zc);
@@ -605,15 +622,16 @@ zfs_purgedir(znode_t *dzp)
void
zfs_rmnode(znode_t *zp)
{
- zfs_sb_t *zsb = ZTOZSB(zp);
- objset_t *os = zsb->z_os;
+ zfsvfs_t *zfsvfs = ZTOZSB(zp);
+ objset_t *os = zfsvfs->z_os;
znode_t *xzp = NULL;
dmu_tx_t *tx;
uint64_t acl_obj;
uint64_t xattr_obj;
+ uint64_t links;
int error;
- ASSERT(zp->z_links == 0);
+ ASSERT(ZTOI(zp)->i_nlink == 0);
ASSERT(atomic_read(&ZTOI(zp)->i_count) == 0);
/*
@@ -642,8 +660,8 @@ zfs_rmnode(znode_t *zp)
error = dmu_free_long_range(os, zp->z_id, 0, DMU_OBJECT_END);
if (error) {
/*
- * Not enough space. Leave the file in the unlinked
- * set.
+ * Not enough space or we were interrupted by unmount.
+ * Leave the file in the unlinked set.
*/
zfs_znode_dmu_fini(zp);
return;
@@ -654,10 +672,10 @@ zfs_rmnode(znode_t *zp)
* If the file has extended attributes, we're going to unlink
* the xattr dir.
*/
- error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zsb),
+ error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
&xattr_obj, sizeof (xattr_obj));
if (error == 0 && xattr_obj) {
- error = zfs_zget(zsb, xattr_obj, &xzp);
+ error = zfs_zget(zfsvfs, xattr_obj, &xzp);
ASSERT(error == 0);
}
@@ -668,9 +686,9 @@ zfs_rmnode(znode_t *zp)
*/
tx = dmu_tx_create(os);
dmu_tx_hold_free(tx, zp->z_id, 0, DMU_OBJECT_END);
- dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, FALSE, NULL);
+ dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
if (xzp) {
- dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, TRUE, NULL);
+ dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, TRUE, NULL);
dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
}
if (acl_obj)
@@ -693,17 +711,17 @@ zfs_rmnode(znode_t *zp)
ASSERT(error == 0);
mutex_enter(&xzp->z_lock);
xzp->z_unlinked = B_TRUE; /* mark xzp for deletion */
- xzp->z_links = 0; /* no more links to it */
- set_nlink(ZTOI(xzp), 0); /* this will let iput purge us */
- VERIFY(0 == sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zsb),
- &xzp->z_links, sizeof (xzp->z_links), tx));
+ clear_nlink(ZTOI(xzp)); /* no more links to it */
+ links = 0;
+ VERIFY(0 == sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs),
+ &links, sizeof (links), tx));
mutex_exit(&xzp->z_lock);
zfs_unlinked_add(xzp, tx);
}
/* Remove this znode from the unlinked set */
VERIFY3U(0, ==,
- zap_remove_int(zsb->z_os, zsb->z_unlinkedobj, zp->z_id, tx));
+ zap_remove_int(zfsvfs->z_os, zfsvfs->z_unlinkedobj, zp->z_id, tx));
zfs_znode_delete(zp, tx);
@@ -730,11 +748,12 @@ int
zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
{
znode_t *dzp = dl->dl_dzp;
- zfs_sb_t *zsb = ZTOZSB(zp);
+ zfsvfs_t *zfsvfs = ZTOZSB(zp);
uint64_t value;
int zp_is_dir = S_ISDIR(ZTOI(zp)->i_mode);
sa_bulk_attr_t bulk[5];
uint64_t mtime[2], ctime[2];
+ uint64_t links;
int count = 0;
int error;
@@ -746,18 +765,24 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
mutex_exit(&zp->z_lock);
return (SET_ERROR(ENOENT));
}
- zp->z_links++;
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL,
- &zp->z_links, sizeof (zp->z_links));
-
+ if (!(flag & ZNEW)) {
+ /*
+ * ZNEW nodes come from zfs_mknode() where the link
+ * count has already been initialised
+ */
+ inc_nlink(ZTOI(zp));
+ links = ZTOI(zp)->i_nlink;
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs),
+ NULL, &links, sizeof (links));
+ }
}
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zsb), NULL,
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL,
&dzp->z_id, sizeof (dzp->z_id));
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL,
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
&zp->z_pflags, sizeof (zp->z_pflags));
if (!(flag & ZNEW)) {
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL,
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
ctime, sizeof (ctime));
zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime,
ctime);
@@ -769,17 +794,19 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
mutex_enter(&dzp->z_lock);
dzp->z_size++;
- dzp->z_links += zp_is_dir;
+ if (zp_is_dir)
+ inc_nlink(ZTOI(dzp));
+ links = ZTOI(dzp)->i_nlink;
count = 0;
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb), NULL,
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
&dzp->z_size, sizeof (dzp->z_size));
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL,
- &dzp->z_links, sizeof (dzp->z_links));
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL,
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
+ &links, sizeof (links));
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
mtime, sizeof (mtime));
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL,
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
ctime, sizeof (ctime));
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL,
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
&dzp->z_pflags, sizeof (dzp->z_pflags));
zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime);
error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx);
@@ -794,6 +821,28 @@ zfs_link_create(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag)
return (0);
}
+/*
+ * The match type in the code for this function should conform to:
+ *
+ * ------------------------------------------------------------------------
+ * fs type | z_norm | lookup type | match type
+ * ---------|-------------|-------------|----------------------------------
+ * CS !norm | 0 | 0 | 0 (exact)
+ * CS norm | formX | 0 | MT_NORMALIZE
+ * CI !norm | upper | !ZCIEXACT | MT_NORMALIZE
+ * CI !norm | upper | ZCIEXACT | MT_NORMALIZE | MT_MATCH_CASE
+ * CI norm | upper|formX | !ZCIEXACT | MT_NORMALIZE
+ * CI norm | upper|formX | ZCIEXACT | MT_NORMALIZE | MT_MATCH_CASE
+ * CM !norm | upper | !ZCILOOK | MT_NORMALIZE | MT_MATCH_CASE
+ * CM !norm | upper | ZCILOOK | MT_NORMALIZE
+ * CM norm | upper|formX | !ZCILOOK | MT_NORMALIZE | MT_MATCH_CASE
+ * CM norm | upper|formX | ZCILOOK | MT_NORMALIZE
+ *
+ * Abbreviations:
+ * CS = Case Sensitive, CI = Case Insensitive, CM = Case Mixed
+ * upper = case folding set by fs type on creation (U8_TEXTPREP_TOUPPER)
+ * formX = unicode normalization form set on fs creation
+ */
static int
zfs_dropname(zfs_dirlock_t *dl, znode_t *zp, znode_t *dzp, dmu_tx_t *tx,
int flag)
@@ -801,18 +850,20 @@ zfs_dropname(zfs_dirlock_t *dl, znode_t *zp, znode_t *dzp, dmu_tx_t *tx,
int error;
if (ZTOZSB(zp)->z_norm) {
- if (((ZTOZSB(zp)->z_case == ZFS_CASE_INSENSITIVE) &&
+ matchtype_t mt = MT_NORMALIZE;
+
+ if ((ZTOZSB(zp)->z_case == ZFS_CASE_INSENSITIVE &&
(flag & ZCIEXACT)) ||
- ((ZTOZSB(zp)->z_case == ZFS_CASE_MIXED) &&
- !(flag & ZCILOOK)))
- error = zap_remove_norm(ZTOZSB(zp)->z_os,
- dzp->z_id, dl->dl_name, MT_EXACT, tx);
- else
- error = zap_remove_norm(ZTOZSB(zp)->z_os,
- dzp->z_id, dl->dl_name, MT_FIRST, tx);
+ (ZTOZSB(zp)->z_case == ZFS_CASE_MIXED &&
+ !(flag & ZCILOOK))) {
+ mt |= MT_MATCH_CASE;
+ }
+
+ error = zap_remove_norm(ZTOZSB(zp)->z_os, dzp->z_id,
+ dl->dl_name, mt, tx);
} else {
- error = zap_remove(ZTOZSB(zp)->z_os,
- dzp->z_id, dl->dl_name, tx);
+ error = zap_remove(ZTOZSB(zp)->z_os, dzp->z_id, dl->dl_name,
+ tx);
}
return (error);
@@ -827,14 +878,15 @@ zfs_dropname(zfs_dirlock_t *dl, znode_t *zp, znode_t *dzp, dmu_tx_t *tx,
*/
int
zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag,
- boolean_t *unlinkedp)
+ boolean_t *unlinkedp)
{
znode_t *dzp = dl->dl_dzp;
- zfs_sb_t *zsb = ZTOZSB(dzp);
+ zfsvfs_t *zfsvfs = ZTOZSB(dzp);
int zp_is_dir = S_ISDIR(ZTOI(zp)->i_mode);
boolean_t unlinked = B_FALSE;
sa_bulk_attr_t bulk[5];
uint64_t mtime[2], ctime[2];
+ uint64_t links;
int count = 0;
int error;
@@ -861,26 +913,28 @@ zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag,
return (error);
}
- if (zp->z_links <= zp_is_dir) {
+ if (ZTOI(zp)->i_nlink <= zp_is_dir) {
zfs_panic_recover("zfs: link count on %lu is %u, "
"should be at least %u", zp->z_id,
- (int)zp->z_links, zp_is_dir + 1);
- zp->z_links = zp_is_dir + 1;
+ (int)ZTOI(zp)->i_nlink, zp_is_dir + 1);
+ set_nlink(ZTOI(zp), zp_is_dir + 1);
}
- if (--zp->z_links == zp_is_dir) {
+ drop_nlink(ZTOI(zp));
+ if (ZTOI(zp)->i_nlink == zp_is_dir) {
zp->z_unlinked = B_TRUE;
- zp->z_links = 0;
+ clear_nlink(ZTOI(zp));
unlinked = B_TRUE;
} else {
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb),
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs),
NULL, &ctime, sizeof (ctime));
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb),
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
NULL, &zp->z_pflags, sizeof (zp->z_pflags));
zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime,
ctime);
}
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb),
- NULL, &zp->z_links, sizeof (zp->z_links));
+ links = ZTOI(zp)->i_nlink;
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs),
+ NULL, &links, sizeof (links));
error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
count = 0;
ASSERT(error == 0);
@@ -893,16 +947,18 @@ zfs_link_destroy(zfs_dirlock_t *dl, znode_t *zp, dmu_tx_t *tx, int flag,
mutex_enter(&dzp->z_lock);
dzp->z_size--; /* one dirent removed */
- dzp->z_links -= zp_is_dir; /* ".." link from zp */
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb),
- NULL, &dzp->z_links, sizeof (dzp->z_links));
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb),
+ if (zp_is_dir)
+ drop_nlink(ZTOI(dzp)); /* ".." link from zp */
+ links = ZTOI(dzp)->i_nlink;
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs),
+ NULL, &links, sizeof (links));
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs),
NULL, &dzp->z_size, sizeof (dzp->z_size));
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb),
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs),
NULL, ctime, sizeof (ctime));
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb),
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs),
NULL, mtime, sizeof (mtime));
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb),
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
NULL, &dzp->z_pflags, sizeof (dzp->z_pflags));
zfs_tstamp_update_setup(dzp, CONTENT_MODIFIED, mtime, ctime);
error = sa_bulk_update(dzp->z_sa_hdl, bulk, count, tx);
@@ -931,7 +987,7 @@ zfs_dirempty(znode_t *dzp)
int
zfs_make_xattrdir(znode_t *zp, vattr_t *vap, struct inode **xipp, cred_t *cr)
{
- zfs_sb_t *zsb = ZTOZSB(zp);
+ zfsvfs_t *zfsvfs = ZTOZSB(zp);
znode_t *xzp;
dmu_tx_t *tx;
int error;
@@ -949,19 +1005,19 @@ zfs_make_xattrdir(znode_t *zp, vattr_t *vap, struct inode **xipp, cred_t *cr)
if ((error = zfs_acl_ids_create(zp, IS_XATTR, vap, cr, NULL,
&acl_ids)) != 0)
return (error);
- if (zfs_acl_ids_overquota(zsb, &acl_ids)) {
+ if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
zfs_acl_ids_free(&acl_ids);
return (SET_ERROR(EDQUOT));
}
- tx = dmu_tx_create(zsb->z_os);
+ tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
ZFS_SA_BASE_ATTR_SIZE);
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
- fuid_dirtied = zsb->z_fuid_dirty;
+ fuid_dirtied = zfsvfs->z_fuid_dirty;
if (fuid_dirtied)
- zfs_fuid_txhold(zsb, tx);
+ zfs_fuid_txhold(zfsvfs, tx);
error = dmu_tx_assign(tx, TXG_WAIT);
if (error) {
zfs_acl_ids_free(&acl_ids);
@@ -971,19 +1027,20 @@ zfs_make_xattrdir(znode_t *zp, vattr_t *vap, struct inode **xipp, cred_t *cr)
zfs_mknode(zp, vap, tx, cr, IS_XATTR, &xzp, &acl_ids);
if (fuid_dirtied)
- zfs_fuid_sync(zsb, tx);
+ zfs_fuid_sync(zfsvfs, tx);
#ifdef DEBUG
- error = sa_lookup(xzp->z_sa_hdl, SA_ZPL_PARENT(zsb),
+ error = sa_lookup(xzp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
&parent, sizeof (parent));
ASSERT(error == 0 && parent == zp->z_id);
#endif
- VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zsb), &xzp->z_id,
+ VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs), &xzp->z_id,
sizeof (xzp->z_id), tx));
- (void) zfs_log_create(zsb->z_log, tx, TX_MKXATTR, zp,
- xzp, "", NULL, acl_ids.z_fuidp, vap);
+ if (!zp->z_unlinked)
+ (void) zfs_log_create(zfsvfs->z_log, tx, TX_MKXATTR, zp,
+ xzp, "", NULL, acl_ids.z_fuidp, vap);
zfs_acl_ids_free(&acl_ids);
dmu_tx_commit(tx);
@@ -1009,7 +1066,7 @@ zfs_make_xattrdir(znode_t *zp, vattr_t *vap, struct inode **xipp, cred_t *cr)
int
zfs_get_xattrdir(znode_t *zp, struct inode **xipp, cred_t *cr, int flags)
{
- zfs_sb_t *zsb = ZTOZSB(zp);
+ zfsvfs_t *zfsvfs = ZTOZSB(zp);
znode_t *xzp;
zfs_dirlock_t *dl;
vattr_t va;
@@ -1030,7 +1087,7 @@ zfs_get_xattrdir(znode_t *zp, struct inode **xipp, cred_t *cr, int flags)
return (SET_ERROR(ENOENT));
}
- if (zfs_is_readonly(zsb)) {
+ if (zfs_is_readonly(zfsvfs)) {
zfs_dirent_unlock(dl);
return (SET_ERROR(EROFS));
}
@@ -1080,16 +1137,18 @@ zfs_sticky_remove_access(znode_t *zdp, znode_t *zp, cred_t *cr)
uid_t uid;
uid_t downer;
uid_t fowner;
- zfs_sb_t *zsb = ZTOZSB(zdp);
+ zfsvfs_t *zfsvfs = ZTOZSB(zdp);
- if (zsb->z_replay)
+ if (zfsvfs->z_replay)
return (0);
if ((zdp->z_mode & S_ISVTX) == 0)
return (0);
- downer = zfs_fuid_map_id(zsb, zdp->z_uid, cr, ZFS_OWNER);
- fowner = zfs_fuid_map_id(zsb, zp->z_uid, cr, ZFS_OWNER);
+ downer = zfs_fuid_map_id(zfsvfs, KUID_TO_SUID(ZTOI(zdp)->i_uid),
+ cr, ZFS_OWNER);
+ fowner = zfs_fuid_map_id(zfsvfs, KUID_TO_SUID(ZTOI(zp)->i_uid),
+ cr, ZFS_OWNER);
if ((uid = crgetuid(cr)) == downer || uid == fowner ||
(S_ISDIR(ZTOI(zp)->i_mode) &&
diff --git a/zfs/module/zfs/zfs_fm.c b/zfs/module/zfs/zfs_fm.c
index c7b7180009c8..3986b3959dea 100644
--- a/zfs/module/zfs/zfs_fm.c
+++ b/zfs/module/zfs/zfs_fm.c
@@ -102,7 +102,7 @@
* ereport with information about the differences.
*/
#ifdef _KERNEL
-static void
+void
zfs_zevent_post_cb(nvlist_t *nvl, nvlist_t *detector)
{
if (nvl)
@@ -112,9 +112,32 @@ zfs_zevent_post_cb(nvlist_t *nvl, nvlist_t *detector)
fm_nvlist_destroy(detector, FM_NVA_FREE);
}
-static void
-zfs_zevent_post_cb_noop(nvlist_t *nvl, nvlist_t *detector)
+/*
+ * We want to rate limit ZIO delay and checksum events so as to not
+ * flood ZED when a disk is acting up.
+ *
+ * Returns 1 if we're ratelimiting, 0 if not.
+ */
+static int
+zfs_is_ratelimiting_event(const char *subclass, vdev_t *vd)
{
+ int rc = 0;
+ /*
+ * __ratelimit() returns 1 if we're *not* ratelimiting and 0 if we
+ * are. Invert it to get our return value.
+ */
+ if (strcmp(subclass, FM_EREPORT_ZFS_DELAY) == 0) {
+ rc = !zfs_ratelimit(&vd->vdev_delay_rl);
+ } else if (strcmp(subclass, FM_EREPORT_ZFS_CHECKSUM) == 0) {
+ rc = !zfs_ratelimit(&vd->vdev_checksum_rl);
+ }
+
+ if (rc) {
+ /* We're rate limiting */
+ fm_erpt_dropped_increment();
+ }
+
+ return (rc);
}
static void
@@ -187,6 +210,12 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out,
(vd->vdev_remove_wanted || vd->vdev_state == VDEV_STATE_REMOVED))
return;
+ if ((strcmp(subclass, FM_EREPORT_ZFS_DELAY) == 0) &&
+ (zio != NULL) && (!zio->io_timestamp)) {
+ /* Ignore bogus delay events */
+ return;
+ }
+
if ((ereport = fm_nvlist_create(NULL)) == NULL)
return;
@@ -237,21 +266,21 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out,
/*
* Generic payload members common to all ereports.
*/
- fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_POOL,
- DATA_TYPE_STRING, spa_name(spa), FM_EREPORT_PAYLOAD_ZFS_POOL_GUID,
- DATA_TYPE_UINT64, spa_guid(spa),
+ fm_payload_set(ereport,
+ FM_EREPORT_PAYLOAD_ZFS_POOL, DATA_TYPE_STRING, spa_name(spa),
+ FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, DATA_TYPE_UINT64, spa_guid(spa),
+ FM_EREPORT_PAYLOAD_ZFS_POOL_STATE, DATA_TYPE_UINT64,
+ (uint64_t)spa_state(spa),
FM_EREPORT_PAYLOAD_ZFS_POOL_CONTEXT, DATA_TYPE_INT32,
- spa_load_state(spa), NULL);
-
- if (spa != NULL) {
- fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_POOL_FAILMODE,
- DATA_TYPE_STRING,
- spa_get_failmode(spa) == ZIO_FAILURE_MODE_WAIT ?
- FM_EREPORT_FAILMODE_WAIT :
- spa_get_failmode(spa) == ZIO_FAILURE_MODE_CONTINUE ?
- FM_EREPORT_FAILMODE_CONTINUE : FM_EREPORT_FAILMODE_PANIC,
- NULL);
- }
+ (int32_t)spa_load_state(spa), NULL);
+
+ fm_payload_set(ereport, FM_EREPORT_PAYLOAD_ZFS_POOL_FAILMODE,
+ DATA_TYPE_STRING,
+ spa_get_failmode(spa) == ZIO_FAILURE_MODE_WAIT ?
+ FM_EREPORT_FAILMODE_WAIT :
+ spa_get_failmode(spa) == ZIO_FAILURE_MODE_CONTINUE ?
+ FM_EREPORT_FAILMODE_CONTINUE : FM_EREPORT_FAILMODE_PANIC,
+ NULL);
if (vd != NULL) {
vdev_t *pvd = vd->vdev_parent;
@@ -278,6 +307,10 @@ zfs_ereport_start(nvlist_t **ereport_out, nvlist_t **detector_out,
fm_payload_set(ereport,
FM_EREPORT_PAYLOAD_ZFS_VDEV_FRU,
DATA_TYPE_STRING, vd->vdev_fru, NULL);
+ if (vd->vdev_enc_sysfs_path != NULL)
+ fm_payload_set(ereport,
+ FM_EREPORT_PAYLOAD_ZFS_VDEV_ENC_SYSFS_PATH,
+ DATA_TYPE_STRING, vd->vdev_enc_sysfs_path, NULL);
if (vd->vdev_ashift)
fm_payload_set(ereport,
FM_EREPORT_PAYLOAD_ZFS_VDEV_ASHIFT,
@@ -565,11 +598,11 @@ zei_range_total_size(zfs_ecksum_info_t *eip)
static zfs_ecksum_info_t *
annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
- const uint8_t *goodbuf, const uint8_t *badbuf, size_t size,
+ const abd_t *goodabd, const abd_t *badabd, size_t size,
boolean_t drop_if_identical)
{
- const uint64_t *good = (const uint64_t *)goodbuf;
- const uint64_t *bad = (const uint64_t *)badbuf;
+ const uint64_t *good;
+ const uint64_t *bad;
uint64_t allset = 0;
uint64_t allcleared = 0;
@@ -613,13 +646,16 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
}
}
- if (badbuf == NULL || goodbuf == NULL)
+ if (badabd == NULL || goodabd == NULL)
return (eip);
ASSERT3U(size, ==, nui64s * sizeof (uint64_t));
ASSERT3U(size, <=, SPA_MAXBLOCKSIZE);
ASSERT3U(size, <=, UINT32_MAX);
+ good = (const uint64_t *) abd_borrow_buf_copy((abd_t *)goodabd, size);
+ bad = (const uint64_t *) abd_borrow_buf_copy((abd_t *)badabd, size);
+
/* build up the range list by comparing the two buffers. */
for (idx = 0; idx < nui64s; idx++) {
if (good[idx] == bad[idx]) {
@@ -649,6 +685,8 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
*/
if (inline_size == 0 && drop_if_identical) {
kmem_free(eip, sizeof (*eip));
+ abd_return_buf((abd_t *)goodabd, (void *)good, size);
+ abd_return_buf((abd_t *)badabd, (void *)bad, size);
return (NULL);
}
@@ -689,6 +727,10 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info,
eip->zei_ranges[range].zr_start *= sizeof (uint64_t);
eip->zei_ranges[range].zr_end *= sizeof (uint64_t);
}
+
+ abd_return_buf((abd_t *)goodabd, (void *)good, size);
+ abd_return_buf((abd_t *)badabd, (void *)bad, size);
+
eip->zei_allowed_mingap *= sizeof (uint64_t);
inline_size *= sizeof (uint64_t);
@@ -736,6 +778,9 @@ zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio,
nvlist_t *ereport = NULL;
nvlist_t *detector = NULL;
+ if (zfs_is_ratelimiting_event(subclass, vd))
+ return;
+
zfs_ereport_start(&ereport, &detector,
subclass, spa, vd, zio, stateoroffset, size);
@@ -752,7 +797,15 @@ zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd,
struct zio *zio, uint64_t offset, uint64_t length, void *arg,
zio_bad_cksum_t *info)
{
- zio_cksum_report_t *report = kmem_zalloc(sizeof (*report), KM_SLEEP);
+ zio_cksum_report_t *report;
+
+
+#ifdef _KERNEL
+ if (zfs_is_ratelimiting_event(FM_EREPORT_ZFS_CHECKSUM, vd))
+ return;
+#endif
+
+ report = kmem_zalloc(sizeof (*report), KM_SLEEP);
if (zio->io_vsd != NULL)
zio->io_vsd_ops->vsd_cksum_report(zio, report, arg);
@@ -785,8 +838,8 @@ zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd,
}
void
-zfs_ereport_finish_checksum(zio_cksum_report_t *report,
- const void *good_data, const void *bad_data, boolean_t drop_if_identical)
+zfs_ereport_finish_checksum(zio_cksum_report_t *report, const abd_t *good_data,
+ const abd_t *bad_data, boolean_t drop_if_identical)
{
#ifdef _KERNEL
zfs_ecksum_info_t *info;
@@ -824,19 +877,11 @@ zfs_ereport_free_checksum(zio_cksum_report_t *rpt)
kmem_free(rpt, sizeof (*rpt));
}
-void
-zfs_ereport_send_interim_checksum(zio_cksum_report_t *report)
-{
-#ifdef _KERNEL
- zfs_zevent_post(report->zcr_ereport, report->zcr_detector,
- zfs_zevent_post_cb_noop);
-#endif
-}
void
zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd,
struct zio *zio, uint64_t offset, uint64_t length,
- const void *good_data, const void *bad_data, zio_bad_cksum_t *zbc)
+ const abd_t *good_data, const abd_t *bad_data, zio_bad_cksum_t *zbc)
{
#ifdef _KERNEL
nvlist_t *ereport = NULL;
@@ -859,25 +904,36 @@ zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd,
#endif
}
-static void
-zfs_post_common(spa_t *spa, vdev_t *vd, const char *name)
+/*
+ * The 'sysevent.fs.zfs.*' events are signals posted to notify user space of
+ * change in the pool. All sysevents are listed in sys/sysevent/eventdefs.h
+ * and are designed to be consumed by the ZFS Event Daemon (ZED). For
+ * additional details refer to the zed(8) man page.
+ */
+nvlist_t *
+zfs_event_create(spa_t *spa, vdev_t *vd, const char *type, const char *name,
+ nvlist_t *aux)
{
+ nvlist_t *resource = NULL;
#ifdef _KERNEL
- nvlist_t *resource;
char class[64];
if (spa_load_state(spa) == SPA_LOAD_TRYIMPORT)
- return;
+ return (NULL);
if ((resource = fm_nvlist_create(NULL)) == NULL)
- return;
+ return (NULL);
- (void) snprintf(class, sizeof (class), "%s.%s.%s", FM_RSRC_RESOURCE,
+ (void) snprintf(class, sizeof (class), "%s.%s.%s", type,
ZFS_ERROR_CLASS, name);
VERIFY0(nvlist_add_uint8(resource, FM_VERSION, FM_RSRC_VERSION));
VERIFY0(nvlist_add_string(resource, FM_CLASS, class));
+ VERIFY0(nvlist_add_string(resource,
+ FM_EREPORT_PAYLOAD_ZFS_POOL, spa_name(spa)));
VERIFY0(nvlist_add_uint64(resource,
FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, spa_guid(spa)));
+ VERIFY0(nvlist_add_uint64(resource,
+ FM_EREPORT_PAYLOAD_ZFS_POOL_STATE, spa_state(spa)));
VERIFY0(nvlist_add_int32(resource,
FM_EREPORT_PAYLOAD_ZFS_POOL_CONTEXT, spa_load_state(spa)));
@@ -886,9 +942,43 @@ zfs_post_common(spa_t *spa, vdev_t *vd, const char *name)
FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, vd->vdev_guid));
VERIFY0(nvlist_add_uint64(resource,
FM_EREPORT_PAYLOAD_ZFS_VDEV_STATE, vd->vdev_state));
+ if (vd->vdev_path != NULL)
+ VERIFY0(nvlist_add_string(resource,
+ FM_EREPORT_PAYLOAD_ZFS_VDEV_PATH, vd->vdev_path));
+ if (vd->vdev_devid != NULL)
+ VERIFY0(nvlist_add_string(resource,
+ FM_EREPORT_PAYLOAD_ZFS_VDEV_DEVID, vd->vdev_devid));
+ if (vd->vdev_fru != NULL)
+ VERIFY0(nvlist_add_string(resource,
+ FM_EREPORT_PAYLOAD_ZFS_VDEV_FRU, vd->vdev_fru));
+ if (vd->vdev_enc_sysfs_path != NULL)
+ VERIFY0(nvlist_add_string(resource,
+ FM_EREPORT_PAYLOAD_ZFS_VDEV_ENC_SYSFS_PATH,
+ vd->vdev_enc_sysfs_path));
}
- zfs_zevent_post(resource, NULL, zfs_zevent_post_cb);
+ /* also copy any optional payload data */
+ if (aux) {
+ nvpair_t *elem = NULL;
+
+ while ((elem = nvlist_next_nvpair(aux, elem)) != NULL)
+ (void) nvlist_add_nvpair(resource, elem);
+ }
+
+#endif
+ return (resource);
+}
+
+static void
+zfs_post_common(spa_t *spa, vdev_t *vd, const char *type, const char *name,
+ nvlist_t *aux)
+{
+#ifdef _KERNEL
+ nvlist_t *resource;
+
+ resource = zfs_event_create(spa, vd, type, name, aux);
+ if (resource)
+ zfs_zevent_post(resource, NULL, zfs_zevent_post_cb);
#endif
}
@@ -901,7 +991,7 @@ zfs_post_common(spa_t *spa, vdev_t *vd, const char *name)
void
zfs_post_remove(spa_t *spa, vdev_t *vd)
{
- zfs_post_common(spa, vd, FM_EREPORT_RESOURCE_REMOVED);
+ zfs_post_common(spa, vd, FM_RSRC_CLASS, FM_RESOURCE_REMOVED, NULL);
}
/*
@@ -912,7 +1002,7 @@ zfs_post_remove(spa_t *spa, vdev_t *vd)
void
zfs_post_autoreplace(spa_t *spa, vdev_t *vd)
{
- zfs_post_common(spa, vd, FM_EREPORT_RESOURCE_AUTOREPLACE);
+ zfs_post_common(spa, vd, FM_RSRC_CLASS, FM_RESOURCE_AUTOREPLACE, NULL);
}
/*
@@ -922,9 +1012,37 @@ zfs_post_autoreplace(spa_t *spa, vdev_t *vd)
* open because the device was not found (fault.fs.zfs.device).
*/
void
-zfs_post_state_change(spa_t *spa, vdev_t *vd)
+zfs_post_state_change(spa_t *spa, vdev_t *vd, uint64_t laststate)
{
- zfs_post_common(spa, vd, FM_EREPORT_RESOURCE_STATECHANGE);
+#ifdef _KERNEL
+ nvlist_t *aux;
+
+ /*
+ * Add optional supplemental keys to payload
+ */
+ aux = fm_nvlist_create(NULL);
+ if (vd && aux) {
+ if (vd->vdev_physpath) {
+ (void) nvlist_add_string(aux,
+ FM_EREPORT_PAYLOAD_ZFS_VDEV_PHYSPATH,
+ vd->vdev_physpath);
+ }
+ if (vd->vdev_enc_sysfs_path) {
+ (void) nvlist_add_string(aux,
+ FM_EREPORT_PAYLOAD_ZFS_VDEV_ENC_SYSFS_PATH,
+ vd->vdev_enc_sysfs_path);
+ }
+
+ (void) nvlist_add_uint64(aux,
+ FM_EREPORT_PAYLOAD_ZFS_VDEV_LASTSTATE, laststate);
+ }
+
+ zfs_post_common(spa, vd, FM_RSRC_CLASS, FM_RESOURCE_STATECHANGE,
+ aux);
+
+ if (aux)
+ fm_nvlist_destroy(aux, FM_NVA_FREE);
+#endif
}
#if defined(_KERNEL) && defined(HAVE_SPL)
diff --git a/zfs/module/zfs/zfs_fuid.c b/zfs/module/zfs/zfs_fuid.c
index 6ca61b87242f..5cfb0c975c6e 100644
--- a/zfs/module/zfs/zfs_fuid.c
+++ b/zfs/module/zfs/zfs_fuid.c
@@ -46,7 +46,7 @@
* two AVL trees are created. One tree is keyed by the index number
* and the other by the domain string. Nodes are never removed from
* trees, but new entries may be added. If a new entry is added then
- * the zsb->z_fuid_dirty flag is set to true and the caller will then
+ * the zfsvfs->z_fuid_dirty flag is set to true and the caller will then
* be responsible for calling zfs_fuid_sync() to sync the changes to disk.
*
*/
@@ -71,14 +71,10 @@ static char *nulldomain = "";
static int
idx_compare(const void *arg1, const void *arg2)
{
- const fuid_domain_t *node1 = arg1;
- const fuid_domain_t *node2 = arg2;
+ const fuid_domain_t *node1 = (const fuid_domain_t *)arg1;
+ const fuid_domain_t *node2 = (const fuid_domain_t *)arg2;
- if (node1->f_idx < node2->f_idx)
- return (-1);
- else if (node1->f_idx > node2->f_idx)
- return (1);
- return (0);
+ return (AVL_CMP(node1->f_idx, node2->f_idx));
}
/*
@@ -87,14 +83,13 @@ idx_compare(const void *arg1, const void *arg2)
static int
domain_compare(const void *arg1, const void *arg2)
{
- const fuid_domain_t *node1 = arg1;
- const fuid_domain_t *node2 = arg2;
+ const fuid_domain_t *node1 = (const fuid_domain_t *)arg1;
+ const fuid_domain_t *node2 = (const fuid_domain_t *)arg2;
int val;
val = strcmp(node1->f_ksid->kd_name, node2->f_ksid->kd_name);
- if (val == 0)
- return (0);
- return (val > 0 ? 1 : -1);
+
+ return (AVL_ISIGN(val));
}
void
@@ -196,34 +191,34 @@ zfs_fuid_idx_domain(avl_tree_t *idx_tree, uint32_t idx)
* Load the fuid table(s) into memory.
*/
static void
-zfs_fuid_init(zfs_sb_t *zsb)
+zfs_fuid_init(zfsvfs_t *zfsvfs)
{
- rw_enter(&zsb->z_fuid_lock, RW_WRITER);
+ rw_enter(&zfsvfs->z_fuid_lock, RW_WRITER);
- if (zsb->z_fuid_loaded) {
- rw_exit(&zsb->z_fuid_lock);
+ if (zfsvfs->z_fuid_loaded) {
+ rw_exit(&zfsvfs->z_fuid_lock);
return;
}
- zfs_fuid_avl_tree_create(&zsb->z_fuid_idx, &zsb->z_fuid_domain);
+ zfs_fuid_avl_tree_create(&zfsvfs->z_fuid_idx, &zfsvfs->z_fuid_domain);
- (void) zap_lookup(zsb->z_os, MASTER_NODE_OBJ,
- ZFS_FUID_TABLES, 8, 1, &zsb->z_fuid_obj);
- if (zsb->z_fuid_obj != 0) {
- zsb->z_fuid_size = zfs_fuid_table_load(zsb->z_os,
- zsb->z_fuid_obj, &zsb->z_fuid_idx,
- &zsb->z_fuid_domain);
+ (void) zap_lookup(zfsvfs->z_os, MASTER_NODE_OBJ,
+ ZFS_FUID_TABLES, 8, 1, &zfsvfs->z_fuid_obj);
+ if (zfsvfs->z_fuid_obj != 0) {
+ zfsvfs->z_fuid_size = zfs_fuid_table_load(zfsvfs->z_os,
+ zfsvfs->z_fuid_obj, &zfsvfs->z_fuid_idx,
+ &zfsvfs->z_fuid_domain);
}
- zsb->z_fuid_loaded = B_TRUE;
- rw_exit(&zsb->z_fuid_lock);
+ zfsvfs->z_fuid_loaded = B_TRUE;
+ rw_exit(&zfsvfs->z_fuid_lock);
}
/*
* sync out AVL trees to persistent storage.
*/
void
-zfs_fuid_sync(zfs_sb_t *zsb, dmu_tx_t *tx)
+zfs_fuid_sync(zfsvfs_t *zfsvfs, dmu_tx_t *tx)
{
nvlist_t *nvp;
nvlist_t **fuids;
@@ -234,30 +229,30 @@ zfs_fuid_sync(zfs_sb_t *zsb, dmu_tx_t *tx)
int numnodes;
int i;
- if (!zsb->z_fuid_dirty) {
+ if (!zfsvfs->z_fuid_dirty) {
return;
}
- rw_enter(&zsb->z_fuid_lock, RW_WRITER);
+ rw_enter(&zfsvfs->z_fuid_lock, RW_WRITER);
/*
* First see if table needs to be created?
*/
- if (zsb->z_fuid_obj == 0) {
- zsb->z_fuid_obj = dmu_object_alloc(zsb->z_os,
+ if (zfsvfs->z_fuid_obj == 0) {
+ zfsvfs->z_fuid_obj = dmu_object_alloc(zfsvfs->z_os,
DMU_OT_FUID, 1 << 14, DMU_OT_FUID_SIZE,
sizeof (uint64_t), tx);
- VERIFY(zap_add(zsb->z_os, MASTER_NODE_OBJ,
+ VERIFY(zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
ZFS_FUID_TABLES, sizeof (uint64_t), 1,
- &zsb->z_fuid_obj, tx) == 0);
+ &zfsvfs->z_fuid_obj, tx) == 0);
}
VERIFY(nvlist_alloc(&nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
- numnodes = avl_numnodes(&zsb->z_fuid_idx);
+ numnodes = avl_numnodes(&zfsvfs->z_fuid_idx);
fuids = kmem_alloc(numnodes * sizeof (void *), KM_SLEEP);
- for (i = 0, domnode = avl_first(&zsb->z_fuid_domain); domnode; i++,
- domnode = AVL_NEXT(&zsb->z_fuid_domain, domnode)) {
+ for (i = 0, domnode = avl_first(&zfsvfs->z_fuid_domain); domnode; i++,
+ domnode = AVL_NEXT(&zfsvfs->z_fuid_domain, domnode)) {
VERIFY(nvlist_alloc(&fuids[i], NV_UNIQUE_NAME, KM_SLEEP) == 0);
VERIFY(nvlist_add_uint64(fuids[i], FUID_IDX,
domnode->f_idx) == 0);
@@ -275,29 +270,30 @@ zfs_fuid_sync(zfs_sb_t *zsb, dmu_tx_t *tx)
VERIFY(nvlist_pack(nvp, &packed, &nvsize,
NV_ENCODE_XDR, KM_SLEEP) == 0);
nvlist_free(nvp);
- zsb->z_fuid_size = nvsize;
- dmu_write(zsb->z_os, zsb->z_fuid_obj, 0, zsb->z_fuid_size, packed, tx);
- kmem_free(packed, zsb->z_fuid_size);
- VERIFY(0 == dmu_bonus_hold(zsb->z_os, zsb->z_fuid_obj,
+ zfsvfs->z_fuid_size = nvsize;
+ dmu_write(zfsvfs->z_os, zfsvfs->z_fuid_obj, 0,
+ zfsvfs->z_fuid_size, packed, tx);
+ kmem_free(packed, zfsvfs->z_fuid_size);
+ VERIFY(0 == dmu_bonus_hold(zfsvfs->z_os, zfsvfs->z_fuid_obj,
FTAG, &db));
dmu_buf_will_dirty(db, tx);
- *(uint64_t *)db->db_data = zsb->z_fuid_size;
+ *(uint64_t *)db->db_data = zfsvfs->z_fuid_size;
dmu_buf_rele(db, FTAG);
- zsb->z_fuid_dirty = B_FALSE;
- rw_exit(&zsb->z_fuid_lock);
+ zfsvfs->z_fuid_dirty = B_FALSE;
+ rw_exit(&zfsvfs->z_fuid_lock);
}
/*
* Query domain table for a given domain.
*
* If domain isn't found and addok is set, it is added to AVL trees and
- * the zsb->z_fuid_dirty flag will be set to TRUE. It will then be
+ * the zfsvfs->z_fuid_dirty flag will be set to TRUE. It will then be
* necessary for the caller or another thread to detect the dirty table
* and sync out the changes.
*/
int
-zfs_fuid_find_by_domain(zfs_sb_t *zsb, const char *domain,
+zfs_fuid_find_by_domain(zfsvfs_t *zfsvfs, const char *domain,
char **retdomain, boolean_t addok)
{
fuid_domain_t searchnode, *findnode;
@@ -318,23 +314,23 @@ zfs_fuid_find_by_domain(zfs_sb_t *zsb, const char *domain,
searchnode.f_ksid = ksid_lookupdomain(domain);
if (retdomain)
*retdomain = searchnode.f_ksid->kd_name;
- if (!zsb->z_fuid_loaded)
- zfs_fuid_init(zsb);
+ if (!zfsvfs->z_fuid_loaded)
+ zfs_fuid_init(zfsvfs);
retry:
- rw_enter(&zsb->z_fuid_lock, rw);
- findnode = avl_find(&zsb->z_fuid_domain, &searchnode, &loc);
+ rw_enter(&zfsvfs->z_fuid_lock, rw);
+ findnode = avl_find(&zfsvfs->z_fuid_domain, &searchnode, &loc);
if (findnode) {
- rw_exit(&zsb->z_fuid_lock);
+ rw_exit(&zfsvfs->z_fuid_lock);
ksiddomain_rele(searchnode.f_ksid);
return (findnode->f_idx);
} else if (addok) {
fuid_domain_t *domnode;
uint64_t retidx;
- if (rw == RW_READER && !rw_tryupgrade(&zsb->z_fuid_lock)) {
- rw_exit(&zsb->z_fuid_lock);
+ if (rw == RW_READER && !rw_tryupgrade(&zfsvfs->z_fuid_lock)) {
+ rw_exit(&zfsvfs->z_fuid_lock);
rw = RW_WRITER;
goto retry;
}
@@ -342,15 +338,15 @@ zfs_fuid_find_by_domain(zfs_sb_t *zsb, const char *domain,
domnode = kmem_alloc(sizeof (fuid_domain_t), KM_SLEEP);
domnode->f_ksid = searchnode.f_ksid;
- retidx = domnode->f_idx = avl_numnodes(&zsb->z_fuid_idx) + 1;
+ retidx = domnode->f_idx = avl_numnodes(&zfsvfs->z_fuid_idx) + 1;
- avl_add(&zsb->z_fuid_domain, domnode);
- avl_add(&zsb->z_fuid_idx, domnode);
- zsb->z_fuid_dirty = B_TRUE;
- rw_exit(&zsb->z_fuid_lock);
+ avl_add(&zfsvfs->z_fuid_domain, domnode);
+ avl_add(&zfsvfs->z_fuid_idx, domnode);
+ zfsvfs->z_fuid_dirty = B_TRUE;
+ rw_exit(&zfsvfs->z_fuid_lock);
return (retidx);
} else {
- rw_exit(&zsb->z_fuid_lock);
+ rw_exit(&zfsvfs->z_fuid_lock);
return (-1);
}
}
@@ -362,23 +358,23 @@ zfs_fuid_find_by_domain(zfs_sb_t *zsb, const char *domain,
*
*/
const char *
-zfs_fuid_find_by_idx(zfs_sb_t *zsb, uint32_t idx)
+zfs_fuid_find_by_idx(zfsvfs_t *zfsvfs, uint32_t idx)
{
char *domain;
- if (idx == 0 || !zsb->z_use_fuids)
+ if (idx == 0 || !zfsvfs->z_use_fuids)
return (NULL);
- if (!zsb->z_fuid_loaded)
- zfs_fuid_init(zsb);
+ if (!zfsvfs->z_fuid_loaded)
+ zfs_fuid_init(zfsvfs);
- rw_enter(&zsb->z_fuid_lock, RW_READER);
+ rw_enter(&zfsvfs->z_fuid_lock, RW_READER);
- if (zsb->z_fuid_obj || zsb->z_fuid_dirty)
- domain = zfs_fuid_idx_domain(&zsb->z_fuid_idx, idx);
+ if (zfsvfs->z_fuid_obj || zfsvfs->z_fuid_dirty)
+ domain = zfs_fuid_idx_domain(&zfsvfs->z_fuid_idx, idx);
else
domain = nulldomain;
- rw_exit(&zsb->z_fuid_lock);
+ rw_exit(&zfsvfs->z_fuid_lock);
ASSERT(domain);
return (domain);
@@ -387,12 +383,14 @@ zfs_fuid_find_by_idx(zfs_sb_t *zsb, uint32_t idx)
void
zfs_fuid_map_ids(znode_t *zp, cred_t *cr, uid_t *uidp, uid_t *gidp)
{
- *uidp = zfs_fuid_map_id(ZTOZSB(zp), zp->z_uid, cr, ZFS_OWNER);
- *gidp = zfs_fuid_map_id(ZTOZSB(zp), zp->z_gid, cr, ZFS_GROUP);
+ *uidp = zfs_fuid_map_id(ZTOZSB(zp), KUID_TO_SUID(ZTOI(zp)->i_uid),
+ cr, ZFS_OWNER);
+ *gidp = zfs_fuid_map_id(ZTOZSB(zp), KGID_TO_SGID(ZTOI(zp)->i_gid),
+ cr, ZFS_GROUP);
}
uid_t
-zfs_fuid_map_id(zfs_sb_t *zsb, uint64_t fuid,
+zfs_fuid_map_id(zfsvfs_t *zfsvfs, uint64_t fuid,
cred_t *cr, zfs_fuid_type_t type)
{
#ifdef HAVE_KSID
@@ -403,7 +401,7 @@ zfs_fuid_map_id(zfs_sb_t *zsb, uint64_t fuid,
if (index == 0)
return (fuid);
- domain = zfs_fuid_find_by_idx(zsb, index);
+ domain = zfs_fuid_find_by_idx(zfsvfs, index);
ASSERT(domain != NULL);
if (type == ZFS_OWNER || type == ZFS_ACE_USER) {
@@ -498,7 +496,7 @@ zfs_fuid_node_add(zfs_fuid_info_t **fuidpp, const char *domain, uint32_t rid,
* be used if it exists.
*/
uint64_t
-zfs_fuid_create_cred(zfs_sb_t *zsb, zfs_fuid_type_t type,
+zfs_fuid_create_cred(zfsvfs_t *zfsvfs, zfs_fuid_type_t type,
cred_t *cr, zfs_fuid_info_t **fuidp)
{
uint64_t idx;
@@ -512,7 +510,7 @@ zfs_fuid_create_cred(zfs_sb_t *zsb, zfs_fuid_type_t type,
ksid = crgetsid(cr, (type == ZFS_OWNER) ? KSID_OWNER : KSID_GROUP);
- if (!zsb->z_use_fuids || (ksid == NULL)) {
+ if (!zfsvfs->z_use_fuids || (ksid == NULL)) {
id = (type == ZFS_OWNER) ? crgetuid(cr) : crgetgid(cr);
if (IS_EPHEMERAL(id))
@@ -535,7 +533,7 @@ zfs_fuid_create_cred(zfs_sb_t *zsb, zfs_fuid_type_t type,
rid = ksid_getrid(ksid);
domain = ksid_getdomain(ksid);
- idx = zfs_fuid_find_by_domain(zsb, domain, &kdomain, B_TRUE);
+ idx = zfs_fuid_find_by_domain(zfsvfs, domain, &kdomain, B_TRUE);
zfs_fuid_node_add(fuidp, kdomain, rid, idx, id, type);
@@ -553,10 +551,10 @@ zfs_fuid_create_cred(zfs_sb_t *zsb, zfs_fuid_type_t type,
*
* During replay operations the domain+rid information is
* found in the zfs_fuid_info_t that the replay code has
- * attached to the zsb of the file system.
+ * attached to the zfsvfs of the file system.
*/
uint64_t
-zfs_fuid_create(zfs_sb_t *zsb, uint64_t id, cred_t *cr,
+zfs_fuid_create(zfsvfs_t *zfsvfs, uint64_t id, cred_t *cr,
zfs_fuid_type_t type, zfs_fuid_info_t **fuidpp)
{
#ifdef HAVE_KSID
@@ -577,11 +575,11 @@ zfs_fuid_create(zfs_sb_t *zsb, uint64_t id, cred_t *cr,
* chmod.
*/
- if (!zsb->z_use_fuids || !IS_EPHEMERAL(id) || fuid_idx != 0)
+ if (!zfsvfs->z_use_fuids || !IS_EPHEMERAL(id) || fuid_idx != 0)
return (id);
- if (zsb->z_replay) {
- fuidp = zsb->z_fuid_replay;
+ if (zfsvfs->z_replay) {
+ fuidp = zfsvfs->z_fuid_replay;
/*
* If we are passed an ephemeral id, but no
@@ -631,9 +629,9 @@ zfs_fuid_create(zfs_sb_t *zsb, uint64_t id, cred_t *cr,
}
}
- idx = zfs_fuid_find_by_domain(zsb, domain, &kdomain, B_TRUE);
+ idx = zfs_fuid_find_by_domain(zfsvfs, domain, &kdomain, B_TRUE);
- if (!zsb->z_replay)
+ if (!zfsvfs->z_replay)
zfs_fuid_node_add(fuidpp, kdomain,
rid, idx, id, type);
else if (zfuid != NULL) {
@@ -650,15 +648,15 @@ zfs_fuid_create(zfs_sb_t *zsb, uint64_t id, cred_t *cr,
}
void
-zfs_fuid_destroy(zfs_sb_t *zsb)
+zfs_fuid_destroy(zfsvfs_t *zfsvfs)
{
- rw_enter(&zsb->z_fuid_lock, RW_WRITER);
- if (!zsb->z_fuid_loaded) {
- rw_exit(&zsb->z_fuid_lock);
+ rw_enter(&zfsvfs->z_fuid_lock, RW_WRITER);
+ if (!zfsvfs->z_fuid_loaded) {
+ rw_exit(&zfsvfs->z_fuid_lock);
return;
}
- zfs_fuid_table_destroy(&zsb->z_fuid_idx, &zsb->z_fuid_domain);
- rw_exit(&zsb->z_fuid_lock);
+ zfs_fuid_table_destroy(&zfsvfs->z_fuid_idx, &zfsvfs->z_fuid_domain);
+ rw_exit(&zfsvfs->z_fuid_lock);
}
/*
@@ -694,7 +692,7 @@ zfs_fuid_info_free(zfs_fuid_info_t *fuidp)
if (fuidp->z_domain_table != NULL)
kmem_free(fuidp->z_domain_table,
- (sizeof (char **)) * fuidp->z_domain_cnt);
+ (sizeof (char *)) * fuidp->z_domain_cnt);
while ((zdomain = list_head(&fuidp->z_domains)) != NULL) {
list_remove(&fuidp->z_domains, zdomain);
@@ -712,7 +710,7 @@ zfs_fuid_info_free(zfs_fuid_info_t *fuidp)
* Will use a straight FUID compare when possible.
*/
boolean_t
-zfs_groupmember(zfs_sb_t *zsb, uint64_t id, cred_t *cr)
+zfs_groupmember(zfsvfs_t *zfsvfs, uint64_t id, cred_t *cr)
{
#ifdef HAVE_KSID
ksid_t *ksid = crgetsid(cr, KSID_GROUP);
@@ -736,7 +734,7 @@ zfs_groupmember(zfs_sb_t *zsb, uint64_t id, cred_t *cr)
} else {
const char *domain;
- domain = zfs_fuid_find_by_idx(zsb, idx);
+ domain = zfs_fuid_find_by_idx(zfsvfs, idx);
ASSERT(domain != NULL);
if (strcmp(domain,
@@ -754,7 +752,7 @@ zfs_groupmember(zfs_sb_t *zsb, uint64_t id, cred_t *cr)
/*
* Not found in ksidlist, check posix groups
*/
- gid = zfs_fuid_map_id(zsb, id, cr, ZFS_GROUP);
+ gid = zfs_fuid_map_id(zfsvfs, id, cr, ZFS_GROUP);
return (groupmember(gid, cr));
#else
return (B_TRUE);
@@ -762,17 +760,17 @@ zfs_groupmember(zfs_sb_t *zsb, uint64_t id, cred_t *cr)
}
void
-zfs_fuid_txhold(zfs_sb_t *zsb, dmu_tx_t *tx)
+zfs_fuid_txhold(zfsvfs_t *zfsvfs, dmu_tx_t *tx)
{
- if (zsb->z_fuid_obj == 0) {
+ if (zfsvfs->z_fuid_obj == 0) {
dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
- FUID_SIZE_ESTIMATE(zsb));
+ FUID_SIZE_ESTIMATE(zfsvfs));
dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, FALSE, NULL);
} else {
- dmu_tx_hold_bonus(tx, zsb->z_fuid_obj);
- dmu_tx_hold_write(tx, zsb->z_fuid_obj, 0,
- FUID_SIZE_ESTIMATE(zsb));
+ dmu_tx_hold_bonus(tx, zfsvfs->z_fuid_obj);
+ dmu_tx_hold_write(tx, zfsvfs->z_fuid_obj, 0,
+ FUID_SIZE_ESTIMATE(zfsvfs));
}
}
#endif
diff --git a/zfs/module/zfs/zfs_ioctl.c b/zfs/module/zfs/zfs_ioctl.c
index 3ebe28d7fcfc..d195eded76dc 100644
--- a/zfs/module/zfs/zfs_ioctl.c
+++ b/zfs/module/zfs/zfs_ioctl.c
@@ -22,15 +22,20 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Portions Copyright 2011 Martin Matuska
+ * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
* Portions Copyright 2012 Pawel Jakub Dawidek <pawel at dawidek.net>
- * Copyright (c) 2012, Joyent, Inc. All rights reserved.
- * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2014, 2016 Joyent, Inc. All rights reserved.
+ * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014, Joyent, Inc. All rights reserved.
- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
- * Copyright (c) 2014, Nexenta Systems, Inc. All rights reserved.
+ * Copyright (c) 2014 Integros [integros.com]
+ * Copyright 2016 Toomas Soome <tsoome at me.com>
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
+ * Copyright (c) 2017, loli10K <ezomori.nozomu at gmail.com>. All rights reserved.
+ * Copyright (c) 2017 Datto Inc.
+ * Copyright 2017 RackTop Systems.
*/
/*
@@ -153,6 +158,7 @@
#include <sys/spa.h>
#include <sys/spa_impl.h>
#include <sys/vdev.h>
+#include <sys/vdev_impl.h>
#include <sys/priv_impl.h>
#include <sys/dmu.h>
#include <sys/dsl_dir.h>
@@ -185,14 +191,22 @@
#include <sys/dsl_bookmark.h>
#include <sys/dsl_userhold.h>
#include <sys/zfeature.h>
+#include <sys/zio_checksum.h>
#include <linux/miscdevice.h>
+#include <linux/slab.h>
#include "zfs_namecheck.h"
#include "zfs_prop.h"
#include "zfs_deleg.h"
#include "zfs_comutil.h"
+/*
+ * Limit maximum nvlist size. We don't want users passing in insane values
+ * for zc->zc_nvlist_src_size, since we will need to allocate that much memory.
+ */
+#define MAX_NVLIST_SRC_SIZE KMALLOC_MAX_SIZE
+
kmutex_t zfsdev_state_lock;
zfsdev_state_t *zfsdev_state_list;
@@ -236,9 +250,14 @@ static const char *userquota_perms[] = {
ZFS_DELEG_PERM_USERQUOTA,
ZFS_DELEG_PERM_GROUPUSED,
ZFS_DELEG_PERM_GROUPQUOTA,
+ ZFS_DELEG_PERM_USEROBJUSED,
+ ZFS_DELEG_PERM_USEROBJQUOTA,
+ ZFS_DELEG_PERM_GROUPOBJUSED,
+ ZFS_DELEG_PERM_GROUPOBJQUOTA,
};
static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
+static int zfs_ioc_userobjspace_upgrade(zfs_cmd_t *zc);
static int zfs_check_settable(const char *name, nvpair_t *property,
cred_t *cr);
static int zfs_check_clearable(char *dataset, nvlist_t *props,
@@ -454,6 +473,14 @@ zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
dsl_dataset_t *ds;
dsl_pool_t *dp;
+ /*
+ * First do a quick check for root in the global zone, which
+ * is allowed to do all write_perms. This ensures that zfs_ioc_*
+ * will get to handle nonexistent datasets.
+ */
+ if (INGLOBALZONE(curproc) && secpolicy_zfs(cr) == 0)
+ return (0);
+
error = dsl_pool_hold(name, FTAG, &dp);
if (error != 0)
return (error);
@@ -527,7 +554,7 @@ zfs_set_slabel_policy(const char *name, char *strval, cred_t *cr)
/*
* If the existing dataset label is nondefault, check if the
* dataset is mounted (label cannot be changed while mounted).
- * Get the zfs_sb_t; if there isn't one, then the dataset isn't
+ * Get the zfsvfs_t; if there isn't one, then the dataset isn't
* mounted (or isn't a dataset, doesn't exist, ...).
*/
if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
@@ -597,7 +624,7 @@ zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
case ZFS_PROP_SNAPSHOT_LIMIT:
if (!INGLOBALZONE(curproc)) {
uint64_t zoned;
- char setpoint[MAXNAMELEN];
+ char setpoint[ZFS_MAX_DATASET_NAME_LEN];
/*
* Unprivileged users are allowed to modify the
* limit on things *under* (ie. contained by)
@@ -839,7 +866,7 @@ zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
int
zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
{
- char parentname[MAXNAMELEN];
+ char parentname[ZFS_MAX_DATASET_NAME_LEN];
int error;
if ((error = zfs_secpolicy_write_perms(from,
@@ -892,7 +919,7 @@ zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &clone);
if (error == 0) {
- char parentname[MAXNAMELEN];
+ char parentname[ZFS_MAX_DATASET_NAME_LEN];
dsl_dataset_t *origin = NULL;
dsl_dir_t *dd;
dd = clone->ds_dir;
@@ -938,6 +965,13 @@ zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
ZFS_DELEG_PERM_CREATE, cr));
}
+/* ARGSUSED */
+static int
+zfs_secpolicy_recv_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
+{
+ return (zfs_secpolicy_recv(zc, innvl, cr));
+}
+
int
zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
{
@@ -1062,7 +1096,7 @@ zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
static int
zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
{
- char parentname[MAXNAMELEN];
+ char parentname[ZFS_MAX_DATASET_NAME_LEN];
int error;
char *origin;
@@ -1156,7 +1190,9 @@ zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
* themself, allow it.
*/
if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
- zc->zc_objset_type == ZFS_PROP_USERQUOTA) {
+ zc->zc_objset_type == ZFS_PROP_USERQUOTA ||
+ zc->zc_objset_type == ZFS_PROP_USEROBJUSED ||
+ zc->zc_objset_type == ZFS_PROP_USEROBJQUOTA) {
if (zc->zc_guid == crgetuid(cr))
return (0);
} else {
@@ -1205,7 +1241,7 @@ zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
pair = nvlist_next_nvpair(holds, pair)) {
- char fsname[MAXNAMELEN];
+ char fsname[ZFS_MAX_DATASET_NAME_LEN];
error = dmu_fsname(nvpair_name(pair), fsname);
if (error != 0)
return (error);
@@ -1226,7 +1262,7 @@ zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
pair = nvlist_next_nvpair(innvl, pair)) {
- char fsname[MAXNAMELEN];
+ char fsname[ZFS_MAX_DATASET_NAME_LEN];
error = dmu_fsname(nvpair_name(pair), fsname);
if (error != 0)
return (error);
@@ -1364,7 +1400,7 @@ put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
}
static int
-get_zfs_sb(const char *dsname, zfs_sb_t **zsbp)
+getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
{
objset_t *os;
int error;
@@ -1378,10 +1414,10 @@ get_zfs_sb(const char *dsname, zfs_sb_t **zsbp)
}
mutex_enter(&os->os_user_ptr_lock);
- *zsbp = dmu_objset_get_user(os);
+ *zfvp = dmu_objset_get_user(os);
/* bump s_active only when non-zero to prevent umount race */
- if (*zsbp == NULL || (*zsbp)->z_sb == NULL ||
- !atomic_inc_not_zero(&((*zsbp)->z_sb->s_active))) {
+ if (*zfvp == NULL || (*zfvp)->z_sb == NULL ||
+ !atomic_inc_not_zero(&((*zfvp)->z_sb->s_active))) {
error = SET_ERROR(ESRCH);
}
mutex_exit(&os->os_user_ptr_lock);
@@ -1390,28 +1426,28 @@ get_zfs_sb(const char *dsname, zfs_sb_t **zsbp)
}
/*
- * Find a zfs_sb_t for a mounted filesystem, or create our own, in which
+ * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
* case its z_sb will be NULL, and it will be opened as the owner.
* If 'writer' is set, the z_teardown_lock will be held for RW_WRITER,
* which prevents all inode ops from running.
*/
static int
-zfs_sb_hold(const char *name, void *tag, zfs_sb_t **zsbp, boolean_t writer)
+zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer)
{
int error = 0;
- if (get_zfs_sb(name, zsbp) != 0)
- error = zfs_sb_create(name, NULL, zsbp);
+ if (getzfsvfs(name, zfvp) != 0)
+ error = zfsvfs_create(name, zfvp);
if (error == 0) {
- rrm_enter(&(*zsbp)->z_teardown_lock, (writer) ? RW_WRITER :
+ rrm_enter(&(*zfvp)->z_teardown_lock, (writer) ? RW_WRITER :
RW_READER, tag);
- if ((*zsbp)->z_unmounted) {
+ if ((*zfvp)->z_unmounted) {
/*
* XXX we could probably try again, since the unmounting
* thread should be just about to disassociate the
- * objset from the zsb.
+ * objset from the zfsvfs.
*/
- rrm_exit(&(*zsbp)->z_teardown_lock, tag);
+ rrm_exit(&(*zfvp)->z_teardown_lock, tag);
return (SET_ERROR(EBUSY));
}
}
@@ -1419,15 +1455,15 @@ zfs_sb_hold(const char *name, void *tag, zfs_sb_t **zsbp, boolean_t writer)
}
static void
-zfs_sb_rele(zfs_sb_t *zsb, void *tag)
+zfsvfs_rele(zfsvfs_t *zfsvfs, void *tag)
{
- rrm_exit(&zsb->z_teardown_lock, tag);
+ rrm_exit(&zfsvfs->z_teardown_lock, tag);
- if (zsb->z_sb) {
- deactivate_super(zsb->z_sb);
+ if (zfsvfs->z_sb) {
+ deactivate_super(zfsvfs->z_sb);
} else {
- dmu_objset_disown(zsb->z_os, zsb);
- zfs_sb_free(zsb);
+ dmu_objset_disown(zfsvfs->z_os, zfsvfs);
+ zfsvfs_free(zfsvfs);
}
}
@@ -1537,9 +1573,7 @@ zfs_ioc_pool_import(zfs_cmd_t *zc)
}
nvlist_free(config);
-
- if (props)
- nvlist_free(props);
+ nvlist_free(props);
return (error);
}
@@ -1616,7 +1650,7 @@ zfs_ioc_pool_stats(zfs_cmd_t *zc)
static int
zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
{
- nvlist_t *tryconfig, *config;
+ nvlist_t *tryconfig, *config = NULL;
int error;
if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
@@ -1640,6 +1674,7 @@ zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
* inputs:
* zc_name name of the pool
* zc_cookie scan func (pool_scan_func_t)
+ * zc_flags scrub pause/resume flag (pool_scrub_cmd_t)
*/
static int
zfs_ioc_pool_scan(zfs_cmd_t *zc)
@@ -1650,7 +1685,12 @@ zfs_ioc_pool_scan(zfs_cmd_t *zc)
if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
return (error);
- if (zc->zc_cookie == POOL_SCAN_NONE)
+ if (zc->zc_flags >= POOL_SCRUB_FLAGS_END)
+ return (SET_ERROR(EINVAL));
+
+ if (zc->zc_flags == POOL_SCRUB_PAUSE)
+ error = spa_scrub_pause_resume(spa, POOL_SCRUB_PAUSE);
+ else if (zc->zc_cookie == POOL_SCAN_NONE)
error = spa_scan_stop(spa);
else
error = spa_scan(spa, zc->zc_cookie);
@@ -1865,7 +1905,8 @@ zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
case VDEV_STATE_FAULTED:
if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
- zc->zc_obj != VDEV_AUX_EXTERNAL)
+ zc->zc_obj != VDEV_AUX_EXTERNAL &&
+ zc->zc_obj != VDEV_AUX_EXTERNAL_PERSIST)
zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
@@ -2013,8 +2054,10 @@ zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
if (!zc->zc_objset_stats.dds_inconsistent &&
dmu_objset_type(os) == DMU_OST_ZVOL) {
error = zvol_get_stats(os, nv);
- if (error == EIO)
+ if (error == EIO) {
+ nvlist_free(nv);
return (error);
+ }
VERIFY0(error);
}
if (error == 0)
@@ -2248,7 +2291,8 @@ zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
* A dataset name of maximum length cannot have any snapshots,
* so exit immediately.
*/
- if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >= MAXNAMELEN) {
+ if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >=
+ ZFS_MAX_DATASET_NAME_LEN) {
dmu_objset_rele(os, FTAG);
return (SET_ERROR(ESRCH));
}
@@ -2293,7 +2337,7 @@ zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
zfs_userquota_prop_t type;
uint64_t rid;
uint64_t quota;
- zfs_sb_t *zsb;
+ zfsvfs_t *zfsvfs;
int err;
if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
@@ -2318,10 +2362,10 @@ zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
rid = valary[1];
quota = valary[2];
- err = zfs_sb_hold(dsname, FTAG, &zsb, B_FALSE);
+ err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE);
if (err == 0) {
- err = zfs_set_userquota(zsb, type, domain, rid, quota);
- zfs_sb_rele(zsb, FTAG);
+ err = zfs_set_userquota(zfsvfs, type, domain, rid, quota);
+ zfsvfs_rele(zfsvfs, FTAG);
}
return (err);
@@ -2396,15 +2440,18 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
case ZFS_PROP_SNAPDEV:
err = zvol_set_snapdev(dsname, source, intval);
break;
+ case ZFS_PROP_VOLMODE:
+ err = zvol_set_volmode(dsname, source, intval);
+ break;
case ZFS_PROP_VERSION:
{
- zfs_sb_t *zsb;
+ zfsvfs_t *zfsvfs;
- if ((err = zfs_sb_hold(dsname, FTAG, &zsb, B_TRUE)) != 0)
+ if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0)
break;
- err = zfs_set_version(zsb, intval);
- zfs_sb_rele(zsb, FTAG);
+ err = zfs_set_version(zfsvfs, intval);
+ zfsvfs_rele(zfsvfs, FTAG);
if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
zfs_cmd_t *zc;
@@ -2412,6 +2459,7 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
(void) strcpy(zc->zc_name, dsname);
(void) zfs_ioc_userspace_upgrade(zc);
+ (void) zfs_ioc_userobjspace_upgrade(zc);
kmem_free(zc, sizeof (zfs_cmd_t));
}
break;
@@ -2463,7 +2511,11 @@ zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
}
/* Validate value type */
- if (err == 0 && prop == ZPROP_INVAL) {
+ if (err == 0 && source == ZPROP_SRC_INHERITED) {
+ /* inherited properties are expected to be booleans */
+ if (nvpair_type(propval) != DATA_TYPE_BOOLEAN)
+ err = SET_ERROR(EINVAL);
+ } else if (err == 0 && prop == ZPROP_INVAL) {
if (zfs_prop_user(propname)) {
if (nvpair_type(propval) != DATA_TYPE_STRING)
err = SET_ERROR(EINVAL);
@@ -2508,7 +2560,11 @@ zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
err = zfs_check_settable(dsname, pair, CRED());
if (err == 0) {
- err = zfs_prop_set_special(dsname, source, pair);
+ if (source == ZPROP_SRC_INHERITED)
+ err = -1; /* does not need special handling */
+ else
+ err = zfs_prop_set_special(dsname, source,
+ pair);
if (err == -1) {
/*
* For better performance we build up a list of
@@ -2560,6 +2616,9 @@ zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
strval = fnvpair_value_string(propval);
err = dsl_prop_set_string(dsname, propname,
source, strval);
+ } else if (nvpair_type(propval) == DATA_TYPE_BOOLEAN) {
+ err = dsl_prop_inherit(dsname, propname,
+ source);
} else {
intval = fnvpair_value_uint64(propval);
err = dsl_prop_set_int(dsname, propname, source,
@@ -2712,50 +2771,12 @@ zfs_ioc_inherit_prop(zfs_cmd_t *zc)
zprop_source_t source = (received
? ZPROP_SRC_NONE /* revert to received value, if any */
: ZPROP_SRC_INHERITED); /* explicitly inherit */
+ nvlist_t *dummy;
+ nvpair_t *pair;
+ zprop_type_t type;
+ int err;
- if (received) {
- nvlist_t *dummy;
- nvpair_t *pair;
- zprop_type_t type;
- int err;
-
- /*
- * zfs_prop_set_special() expects properties in the form of an
- * nvpair with type info.
- */
- if (prop == ZPROP_INVAL) {
- if (!zfs_prop_user(propname))
- return (SET_ERROR(EINVAL));
-
- type = PROP_TYPE_STRING;
- } else if (prop == ZFS_PROP_VOLSIZE ||
- prop == ZFS_PROP_VERSION) {
- return (SET_ERROR(EINVAL));
- } else {
- type = zfs_prop_get_type(prop);
- }
-
- VERIFY(nvlist_alloc(&dummy, NV_UNIQUE_NAME, KM_SLEEP) == 0);
-
- switch (type) {
- case PROP_TYPE_STRING:
- VERIFY(0 == nvlist_add_string(dummy, propname, ""));
- break;
- case PROP_TYPE_NUMBER:
- case PROP_TYPE_INDEX:
- VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
- break;
- default:
- nvlist_free(dummy);
- return (SET_ERROR(EINVAL));
- }
-
- pair = nvlist_next_nvpair(dummy, NULL);
- err = zfs_prop_set_special(zc->zc_name, source, pair);
- nvlist_free(dummy);
- if (err != -1)
- return (err); /* special property already handled */
- } else {
+ if (!received) {
/*
* Only check this in the non-received case. We want to allow
* 'inherit -S' to revert non-inheritable properties like quota
@@ -2766,8 +2787,49 @@ zfs_ioc_inherit_prop(zfs_cmd_t *zc)
return (SET_ERROR(EINVAL));
}
- /* property name has been validated by zfs_secpolicy_inherit_prop() */
- return (dsl_prop_inherit(zc->zc_name, zc->zc_value, source));
+ if (prop == ZPROP_INVAL) {
+ if (!zfs_prop_user(propname))
+ return (SET_ERROR(EINVAL));
+
+ type = PROP_TYPE_STRING;
+ } else if (prop == ZFS_PROP_VOLSIZE || prop == ZFS_PROP_VERSION) {
+ return (SET_ERROR(EINVAL));
+ } else {
+ type = zfs_prop_get_type(prop);
+ }
+
+ /*
+ * zfs_prop_set_special() expects properties in the form of an
+ * nvpair with type info.
+ */
+ dummy = fnvlist_alloc();
+
+ switch (type) {
+ case PROP_TYPE_STRING:
+ VERIFY(0 == nvlist_add_string(dummy, propname, ""));
+ break;
+ case PROP_TYPE_NUMBER:
+ case PROP_TYPE_INDEX:
+ VERIFY(0 == nvlist_add_uint64(dummy, propname, 0));
+ break;
+ default:
+ err = SET_ERROR(EINVAL);
+ goto errout;
+ }
+
+ pair = nvlist_next_nvpair(dummy, NULL);
+ if (pair == NULL) {
+ err = SET_ERROR(EINVAL);
+ } else {
+ err = zfs_prop_set_special(zc->zc_name, source, pair);
+ if (err == -1) /* property is not "special", needs handling */
+ err = dsl_prop_inherit(zc->zc_name, zc->zc_value,
+ source);
+ }
+
+errout:
+ nvlist_free(dummy);
+ return (err);
}
static int
@@ -2960,6 +3022,9 @@ zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
ASSERT(zplprops != NULL);
+ if (os != NULL && os->os_phys->os_type != DMU_OST_ZFS)
+ return (SET_ERROR(EINVAL));
+
/*
* Pull out creator prop choices, if any.
*/
@@ -3036,7 +3101,7 @@ zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
boolean_t fuids_ok, sa_ok;
uint64_t zplver = ZPL_VERSION;
objset_t *os = NULL;
- char parentname[MAXNAMELEN];
+ char parentname[ZFS_MAX_DATASET_NAME_LEN];
char *cp;
spa_t *spa;
uint64_t spa_vers;
@@ -3185,8 +3250,25 @@ zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
if (error == 0) {
error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
nvprops, outnvl);
- if (error != 0)
- (void) dsl_destroy_head(fsname);
+ if (error != 0) {
+ spa_t *spa;
+ int error2;
+
+ /*
+ * Volumes will return EBUSY and cannot be destroyed
+ * until all asynchronous minor handling has completed.
+ * Wait for the spa_zvol_taskq to drain then retry.
+ */
+ error2 = dsl_destroy_head(fsname);
+ while ((error2 == EBUSY) && (type == DMU_OST_ZVOL)) {
+ error2 = spa_open(fsname, &spa, FTAG);
+ if (error2 == 0) {
+ taskq_wait(spa->spa_zvol_taskq);
+ spa_close(spa, FTAG);
+ }
+ error2 = dsl_destroy_head(fsname);
+ }
+ }
}
return (error);
}
@@ -3315,6 +3397,8 @@ zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
* we clear the TSD here.
*/
poolname = tsd_get(zfs_allow_log_key);
+ if (poolname == NULL)
+ return (SET_ERROR(EINVAL));
(void) tsd_set(zfs_allow_log_key, NULL);
error = spa_open(poolname, &spa, FTAG);
strfree(poolname);
@@ -3385,7 +3469,7 @@ zfs_destroy_unmount_origin(const char *fsname)
return;
ds = dmu_objset_ds(os);
if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev)) {
- char originname[MAXNAMELEN];
+ char originname[ZFS_MAX_DATASET_NAME_LEN];
dsl_dataset_name(ds->ds_prev, originname);
dmu_objset_rele(os, FTAG);
(void) zfs_unmount_snap(originname);
@@ -3538,10 +3622,39 @@ zfs_ioc_destroy(zfs_cmd_t *zc)
return (err);
}
- if (strchr(zc->zc_name, '@'))
+ if (strchr(zc->zc_name, '@')) {
err = dsl_destroy_snapshot(zc->zc_name, zc->zc_defer_destroy);
- else
+ } else {
err = dsl_destroy_head(zc->zc_name);
+ if (err == EEXIST) {
+ /*
+ * It is possible that the given DS may have
+ * hidden child (%recv) datasets - "leftovers"
+ * resulting from the previously interrupted
+ * 'zfs receive'.
+ *
+ * 6 extra bytes for /%recv
+ */
+ char namebuf[ZFS_MAX_DATASET_NAME_LEN + 6];
+
+ if (snprintf(namebuf, sizeof (namebuf), "%s/%s",
+ zc->zc_name, recv_clone_name) >=
+ sizeof (namebuf))
+ return (SET_ERROR(EINVAL));
+
+ /*
+ * Try to remove the hidden child (%recv) and after
+ * that try to remove the target dataset.
+ * If the hidden child (%recv) does not exist
+ * the original error (EEXIST) will be returned
+ */
+ err = dsl_destroy_head(namebuf);
+ if (err == 0)
+ err = dsl_destroy_head(zc->zc_name);
+ else if (err == ENOENT)
+ err = EEXIST;
+ }
+ }
return (err);
}
@@ -3549,30 +3662,50 @@ zfs_ioc_destroy(zfs_cmd_t *zc)
/*
* fsname is name of dataset to rollback (to most recent snapshot)
*
- * innvl is not used.
+ * innvl may contain name of expected target snapshot
*
* outnvl: "target" -> name of most recent snapshot
* }
*/
/* ARGSUSED */
static int
-zfs_ioc_rollback(const char *fsname, nvlist_t *args, nvlist_t *outnvl)
+zfs_ioc_rollback(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
{
- zfs_sb_t *zsb;
+ zfsvfs_t *zfsvfs;
+ zvol_state_t *zv;
+ char *target = NULL;
int error;
- if (get_zfs_sb(fsname, &zsb) == 0) {
- error = zfs_suspend_fs(zsb);
+ (void) nvlist_lookup_string(innvl, "target", &target);
+ if (target != NULL) {
+ int fslen = strlen(fsname);
+
+ if (strncmp(fsname, target, fslen) != 0)
+ return (SET_ERROR(EINVAL));
+ if (target[fslen] != '@')
+ return (SET_ERROR(EINVAL));
+ }
+
+ if (getzfsvfs(fsname, &zfsvfs) == 0) {
+ dsl_dataset_t *ds;
+
+ ds = dmu_objset_ds(zfsvfs->z_os);
+ error = zfs_suspend_fs(zfsvfs);
if (error == 0) {
int resume_err;
- error = dsl_dataset_rollback(fsname, zsb, outnvl);
- resume_err = zfs_resume_fs(zsb, fsname);
+ error = dsl_dataset_rollback(fsname, target, zfsvfs,
+ outnvl);
+ resume_err = zfs_resume_fs(zfsvfs, ds);
error = error ? error : resume_err;
}
- deactivate_super(zsb->z_sb);
+ deactivate_super(zfsvfs->z_sb);
+ } else if ((zv = zvol_suspend(fsname)) != NULL) {
+ error = dsl_dataset_rollback(fsname, target, zvol_tag(zv),
+ outnvl);
+ zvol_resume(zv);
} else {
- error = dsl_dataset_rollback(fsname, NULL, outnvl);
+ error = dsl_dataset_rollback(fsname, target, NULL, outnvl);
}
return (error);
}
@@ -3660,13 +3793,23 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
const char *gq_prefix =
zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
+ const char *uiq_prefix =
+ zfs_userquota_prop_prefixes[ZFS_PROP_USEROBJQUOTA];
+ const char *giq_prefix =
+ zfs_userquota_prop_prefixes[ZFS_PROP_GROUPOBJQUOTA];
if (strncmp(propname, uq_prefix,
strlen(uq_prefix)) == 0) {
perm = ZFS_DELEG_PERM_USERQUOTA;
+ } else if (strncmp(propname, uiq_prefix,
+ strlen(uiq_prefix)) == 0) {
+ perm = ZFS_DELEG_PERM_USEROBJQUOTA;
} else if (strncmp(propname, gq_prefix,
strlen(gq_prefix)) == 0) {
perm = ZFS_DELEG_PERM_GROUPQUOTA;
+ } else if (strncmp(propname, giq_prefix,
+ strlen(giq_prefix)) == 0) {
+ perm = ZFS_DELEG_PERM_GROUPOBJQUOTA;
} else {
/* USERUSED and GROUPUSED are read-only */
return (SET_ERROR(EINVAL));
@@ -3750,11 +3893,6 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
return (SET_ERROR(ENOTSUP));
break;
- case ZFS_PROP_DEDUP:
- if (zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
- return (SET_ERROR(ENOTSUP));
- break;
-
case ZFS_PROP_VOLBLOCKSIZE:
case ZFS_PROP_RECORDSIZE:
/* Record sizes above 128k need the feature to be enabled */
@@ -3762,23 +3900,13 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
intval > SPA_OLD_MAXBLOCKSIZE) {
spa_t *spa;
- /*
- * If this is a bootable dataset then
- * the we don't allow large (>128K) blocks,
- * because GRUB doesn't support them.
- */
- if (zfs_is_bootfs(dsname) &&
- intval > SPA_OLD_MAXBLOCKSIZE) {
- return (SET_ERROR(EDOM));
- }
-
/*
* We don't allow setting the property above 1MB,
* unless the tunable has been changed.
*/
if (intval > zfs_max_recordsize ||
intval > SPA_MAXBLOCKSIZE)
- return (SET_ERROR(EDOM));
+ return (SET_ERROR(ERANGE));
if ((err = spa_open(dsname, &spa, FTAG)) != 0)
return (err);
@@ -3792,6 +3920,34 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
}
break;
+ case ZFS_PROP_DNODESIZE:
+ /* Dnode sizes above 512 need the feature to be enabled */
+ if (nvpair_value_uint64(pair, &intval) == 0 &&
+ intval != ZFS_DNSIZE_LEGACY) {
+ spa_t *spa;
+
+ /*
+ * If this is a bootable dataset then
+ * we don't allow large (>512B) dnodes,
+ * because GRUB doesn't support them.
+ */
+ if (zfs_is_bootfs(dsname) &&
+ intval != ZFS_DNSIZE_LEGACY) {
+ return (SET_ERROR(EDOM));
+ }
+
+ if ((err = spa_open(dsname, &spa, FTAG)) != 0)
+ return (err);
+
+ if (!spa_feature_is_enabled(spa,
+ SPA_FEATURE_LARGE_DNODE)) {
+ spa_close(spa, FTAG);
+ return (SET_ERROR(ENOTSUP));
+ }
+ spa_close(spa, FTAG);
+ }
+ break;
+
case ZFS_PROP_SHARESMB:
if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
return (SET_ERROR(ENOTSUP));
@@ -3806,6 +3962,47 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
return (SET_ERROR(ENOTSUP));
}
break;
+ case ZFS_PROP_CHECKSUM:
+ case ZFS_PROP_DEDUP:
+ {
+ spa_feature_t feature;
+ spa_t *spa;
+ uint64_t intval;
+ int err;
+
+ /* dedup feature version checks */
+ if (prop == ZFS_PROP_DEDUP &&
+ zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
+ return (SET_ERROR(ENOTSUP));
+
+ if (nvpair_value_uint64(pair, &intval) != 0)
+ return (SET_ERROR(EINVAL));
+
+ /* check prop value is enabled in features */
+ feature = zio_checksum_to_feature(intval & ZIO_CHECKSUM_MASK);
+ if (feature == SPA_FEATURE_NONE)
+ break;
+
+ if ((err = spa_open(dsname, &spa, FTAG)) != 0)
+ return (err);
+ /*
+ * Salted checksums are not supported on root pools.
+ */
+ if (spa_bootfs(spa) != 0 &&
+ intval < ZIO_CHECKSUM_FUNCTIONS &&
+ (zio_checksum_table[intval].ci_flags &
+ ZCHECKSUM_FLAG_SALTED)) {
+ spa_close(spa, FTAG);
+ return (SET_ERROR(ERANGE));
+ }
+ if (!spa_feature_is_enabled(spa, feature)) {
+ spa_close(spa, FTAG);
+ return (SET_ERROR(ENOTSUP));
+ }
+ spa_close(spa, FTAG);
+ break;
+ }
+
default:
break;
}
@@ -3841,12 +4038,13 @@ zfs_check_clearable(char *dataset, nvlist_t *props, nvlist_t **errlist)
VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
- (void) strcpy(zc->zc_name, dataset);
+ (void) strlcpy(zc->zc_name, dataset, sizeof (zc->zc_name));
pair = nvlist_next_nvpair(props, NULL);
while (pair != NULL) {
next_pair = nvlist_next_nvpair(props, pair);
- (void) strcpy(zc->zc_value, nvpair_name(pair));
+ (void) strlcpy(zc->zc_value, nvpair_name(pair),
+ sizeof (zc->zc_value));
if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
(err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) {
VERIFY(nvlist_remove_nvpair(props, pair) == 0);
@@ -3941,74 +4139,90 @@ props_reduce(nvlist_t *props, nvlist_t *origprops)
}
}
+/*
+ * Extract properties that cannot be set PRIOR to the receipt of a dataset.
+ * For example, refquota cannot be set until after the receipt of a dataset,
+ * because in replication streams, an older/earlier snapshot may exceed the
+ * refquota. We want to receive the older/earlier snapshot, but setting
+ * refquota pre-receipt will set the dsl's ACTUAL quota, which will prevent
+ * the older/earlier snapshot from being received (with EDQUOT).
+ *
+ * The ZFS test "zfs_receive_011_pos" demonstrates such a scenario.
+ *
+ * libzfs will need to be judicious handling errors encountered by props
+ * extracted by this function.
+ */
+static nvlist_t *
+extract_delay_props(nvlist_t *props)
+{
+ nvlist_t *delayprops;
+ nvpair_t *nvp, *tmp;
+ static const zfs_prop_t delayable[] = { ZFS_PROP_REFQUOTA, 0 };
+ int i;
+
+ VERIFY(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP) == 0);
+
+ for (nvp = nvlist_next_nvpair(props, NULL); nvp != NULL;
+ nvp = nvlist_next_nvpair(props, nvp)) {
+ /*
+ * strcmp() is safe because zfs_prop_to_name() always returns
+ * a bounded string.
+ */
+ for (i = 0; delayable[i] != 0; i++) {
+ if (strcmp(zfs_prop_to_name(delayable[i]),
+ nvpair_name(nvp)) == 0) {
+ break;
+ }
+ }
+ if (delayable[i] != 0) {
+ tmp = nvlist_prev_nvpair(props, nvp);
+ VERIFY(nvlist_add_nvpair(delayprops, nvp) == 0);
+ VERIFY(nvlist_remove_nvpair(props, nvp) == 0);
+ nvp = tmp;
+ }
+ }
+
+ if (nvlist_empty(delayprops)) {
+ nvlist_free(delayprops);
+ delayprops = NULL;
+ }
+ return (delayprops);
+}
+
#ifdef DEBUG
static boolean_t zfs_ioc_recv_inject_err;
#endif
/*
- * inputs:
- * zc_name name of containing filesystem
- * zc_nvlist_src{_size} nvlist of properties to apply
- * zc_value name of snapshot to create
- * zc_string name of clone origin (if DRR_FLAG_CLONE)
- * zc_cookie file descriptor to recv from
- * zc_begin_record the BEGIN record of the stream (not byteswapped)
- * zc_guid force flag
- * zc_cleanup_fd cleanup-on-exit file descriptor
- * zc_action_handle handle for this guid/ds mapping (or zero on first call)
- *
- * outputs:
- * zc_cookie number of bytes read
- * zc_nvlist_dst{_size} error for each unapplied received property
- * zc_obj zprop_errflags_t
- * zc_action_handle handle for this guid/ds mapping
+ * nvlist 'errors' is always allocated. It will contain descriptions of
+ * encountered errors, if any. It's the callers responsibility to free.
*/
static int
-zfs_ioc_recv(zfs_cmd_t *zc)
+zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
+ nvlist_t *localprops, boolean_t force, boolean_t resumable, int input_fd,
+ dmu_replay_record_t *begin_record, int cleanup_fd, uint64_t *read_bytes,
+ uint64_t *errflags, uint64_t *action_handle, nvlist_t **errors)
{
- file_t *fp;
dmu_recv_cookie_t drc;
- boolean_t force = (boolean_t)zc->zc_guid;
- int fd;
int error = 0;
int props_error = 0;
- nvlist_t *errors;
offset_t off;
- nvlist_t *props = NULL; /* sent properties */
+ nvlist_t *delayprops = NULL; /* sent properties applied post-receive */
nvlist_t *origprops = NULL; /* existing properties */
- char *origin = NULL;
- char *tosnap;
- char tofs[ZFS_MAXNAMELEN];
+ nvlist_t *origrecvd = NULL; /* existing received properties */
boolean_t first_recvd_props = B_FALSE;
+ file_t *input_fp;
- if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
- strchr(zc->zc_value, '@') == NULL ||
- strchr(zc->zc_value, '%'))
- return (SET_ERROR(EINVAL));
+ *read_bytes = 0;
+ *errflags = 0;
+ *errors = fnvlist_alloc();
- (void) strcpy(tofs, zc->zc_value);
- tosnap = strchr(tofs, '@');
- *tosnap++ = '\0';
-
- if (zc->zc_nvlist_src != 0 &&
- (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
- zc->zc_iflags, &props)) != 0)
- return (error);
-
- fd = zc->zc_cookie;
- fp = getf(fd);
- if (fp == NULL) {
- nvlist_free(props);
+ input_fp = getf(input_fd);
+ if (input_fp == NULL)
return (SET_ERROR(EBADF));
- }
-
- VERIFY(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP) == 0);
-
- if (zc->zc_string[0])
- origin = zc->zc_string;
error = dmu_recv_begin(tofs, tosnap,
- &zc->zc_begin_record, force, origin, &drc);
+ begin_record, force, resumable, origin, &drc);
if (error != 0)
goto out;
@@ -4017,7 +4231,7 @@ zfs_ioc_recv(zfs_cmd_t *zc)
* to the new data. Note that we must call dmu_recv_stream() if
* dmu_recv_begin() succeeds.
*/
- if (props != NULL && !drc.drc_newfs) {
+ if (recvprops != NULL && !drc.drc_newfs) {
if (spa_version(dsl_dataset_get_spa(drc.drc_ds)) >=
SPA_VERSION_RECVD_PROPS &&
!dsl_prop_get_hasrecvd(tofs))
@@ -4028,7 +4242,7 @@ zfs_ioc_recv(zfs_cmd_t *zc)
* completely replace the existing received properties, so stash
* away the existing ones.
*/
- if (dsl_prop_get_received(tofs, &origprops) == 0) {
+ if (dsl_prop_get_received(tofs, &origrecvd) == 0) {
nvlist_t *errlist = NULL;
/*
* Don't bother writing a property if its value won't
@@ -4039,67 +4253,132 @@ zfs_ioc_recv(zfs_cmd_t *zc)
* regardless.
*/
if (!first_recvd_props)
- props_reduce(props, origprops);
- if (zfs_check_clearable(tofs, origprops, &errlist) != 0)
- (void) nvlist_merge(errors, errlist, 0);
+ props_reduce(recvprops, origrecvd);
+ if (zfs_check_clearable(tofs, origrecvd, &errlist) != 0)
+ (void) nvlist_merge(*errors, errlist, 0);
nvlist_free(errlist);
- if (clear_received_props(tofs, origprops,
- first_recvd_props ? NULL : props) != 0)
- zc->zc_obj |= ZPROP_ERR_NOCLEAR;
+ if (clear_received_props(tofs, origrecvd,
+ first_recvd_props ? NULL : recvprops) != 0)
+ *errflags |= ZPROP_ERR_NOCLEAR;
} else {
- zc->zc_obj |= ZPROP_ERR_NOCLEAR;
+ *errflags |= ZPROP_ERR_NOCLEAR;
}
}
- if (props != NULL) {
+ /*
+ * Stash away existing properties so we can restore them on error unless
+ * we're doing the first receive after SPA_VERSION_RECVD_PROPS, in which
+ * case "origrecvd" will take care of that.
+ */
+ if (localprops != NULL && !drc.drc_newfs && !first_recvd_props) {
+ objset_t *os;
+ if (dmu_objset_hold(tofs, FTAG, &os) == 0) {
+ if (dsl_prop_get_all(os, &origprops) != 0) {
+ *errflags |= ZPROP_ERR_NOCLEAR;
+ }
+ dmu_objset_rele(os, FTAG);
+ } else {
+ *errflags |= ZPROP_ERR_NOCLEAR;
+ }
+ }
+
+ if (recvprops != NULL) {
props_error = dsl_prop_set_hasrecvd(tofs);
if (props_error == 0) {
+ delayprops = extract_delay_props(recvprops);
(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
- props, errors);
+ recvprops, *errors);
}
}
- if (zc->zc_nvlist_dst_size != 0 &&
- (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 ||
- put_nvlist(zc, errors) != 0)) {
- /*
- * Caller made zc->zc_nvlist_dst less than the minimum expected
- * size or supplied an invalid address.
- */
- props_error = SET_ERROR(EINVAL);
+ if (localprops != NULL) {
+ nvlist_t *oprops = fnvlist_alloc();
+ nvlist_t *xprops = fnvlist_alloc();
+ nvpair_t *nvp = NULL;
+
+ while ((nvp = nvlist_next_nvpair(localprops, nvp)) != NULL) {
+ if (nvpair_type(nvp) == DATA_TYPE_BOOLEAN) {
+ /* -x property */
+ const char *name = nvpair_name(nvp);
+ zfs_prop_t prop = zfs_name_to_prop(name);
+ if (prop != ZPROP_INVAL) {
+ if (!zfs_prop_inheritable(prop))
+ continue;
+ } else if (!zfs_prop_user(name))
+ continue;
+ fnvlist_add_boolean(xprops, name);
+ } else {
+ /* -o property=value */
+ fnvlist_add_nvpair(oprops, nvp);
+ }
+ }
+ (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL,
+ oprops, *errors);
+ (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED,
+ xprops, *errors);
+
+ nvlist_free(oprops);
+ nvlist_free(xprops);
}
- off = fp->f_offset;
- error = dmu_recv_stream(&drc, fp->f_vnode, &off, zc->zc_cleanup_fd,
- &zc->zc_action_handle);
+ off = input_fp->f_offset;
+ error = dmu_recv_stream(&drc, input_fp->f_vnode, &off, cleanup_fd,
+ action_handle);
if (error == 0) {
- zfs_sb_t *zsb = NULL;
+ zfsvfs_t *zfsvfs = NULL;
+ zvol_state_t *zv = NULL;
- if (get_zfs_sb(tofs, &zsb) == 0) {
+ if (getzfsvfs(tofs, &zfsvfs) == 0) {
/* online recv */
+ dsl_dataset_t *ds;
int end_err;
- error = zfs_suspend_fs(zsb);
+ ds = dmu_objset_ds(zfsvfs->z_os);
+ error = zfs_suspend_fs(zfsvfs);
/*
* If the suspend fails, then the recv_end will
* likely also fail, and clean up after itself.
*/
- end_err = dmu_recv_end(&drc, zsb);
+ end_err = dmu_recv_end(&drc, zfsvfs);
if (error == 0)
- error = zfs_resume_fs(zsb, tofs);
+ error = zfs_resume_fs(zfsvfs, ds);
error = error ? error : end_err;
- deactivate_super(zsb->z_sb);
+ deactivate_super(zfsvfs->z_sb);
+ } else if ((zv = zvol_suspend(tofs)) != NULL) {
+ error = dmu_recv_end(&drc, zvol_tag(zv));
+ zvol_resume(zv);
} else {
error = dmu_recv_end(&drc, NULL);
}
+
+ /* Set delayed properties now, after we're done receiving. */
+ if (delayprops != NULL && error == 0) {
+ (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
+ delayprops, *errors);
+ }
}
- zc->zc_cookie = off - fp->f_offset;
- if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
- fp->f_offset = off;
+ if (delayprops != NULL) {
+ /*
+ * Merge delayed props back in with initial props, in case
+ * we're DEBUG and zfs_ioc_recv_inject_err is set (which means
+ * we have to make sure clear_received_props() includes
+ * the delayed properties).
+ *
+ * Since zfs_ioc_recv_inject_err is only in DEBUG kernels,
+ * using ASSERT() will be just like a VERIFY.
+ */
+ ASSERT(nvlist_merge(recvprops, delayprops, 0) == 0);
+ nvlist_free(delayprops);
+ }
+
+
+ *read_bytes = off - input_fp->f_offset;
+ if (VOP_SEEK(input_fp->f_vnode, input_fp->f_offset, &off, NULL) == 0)
+ input_fp->f_offset = off;
#ifdef DEBUG
if (zfs_ioc_recv_inject_err) {
@@ -4111,45 +4390,99 @@ zfs_ioc_recv(zfs_cmd_t *zc)
/*
* On error, restore the original props.
*/
- if (error != 0 && props != NULL && !drc.drc_newfs) {
- if (clear_received_props(tofs, props, NULL) != 0) {
+ if (error != 0 && recvprops != NULL && !drc.drc_newfs) {
+ if (clear_received_props(tofs, recvprops, NULL) != 0) {
/*
* We failed to clear the received properties.
* Since we may have left a $recvd value on the
* system, we can't clear the $hasrecvd flag.
*/
- zc->zc_obj |= ZPROP_ERR_NORESTORE;
+ *errflags |= ZPROP_ERR_NORESTORE;
} else if (first_recvd_props) {
dsl_prop_unset_hasrecvd(tofs);
}
- if (origprops == NULL && !drc.drc_newfs) {
+ if (origrecvd == NULL && !drc.drc_newfs) {
/* We failed to stash the original properties. */
- zc->zc_obj |= ZPROP_ERR_NORESTORE;
+ *errflags |= ZPROP_ERR_NORESTORE;
}
/*
* dsl_props_set() will not convert RECEIVED to LOCAL on or
* after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
- * explictly if we're restoring local properties cleared in the
+ * explicitly if we're restoring local properties cleared in the
* first new-style receive.
*/
- if (origprops != NULL &&
+ if (origrecvd != NULL &&
zfs_set_prop_nvlist(tofs, (first_recvd_props ?
ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
- origprops, NULL) != 0) {
+ origrecvd, NULL) != 0) {
/*
* We stashed the original properties but failed to
* restore them.
*/
- zc->zc_obj |= ZPROP_ERR_NORESTORE;
+ *errflags |= ZPROP_ERR_NORESTORE;
+ }
+ }
+ if (error != 0 && localprops != NULL && !drc.drc_newfs &&
+ !first_recvd_props) {
+ nvlist_t *setprops;
+ nvlist_t *inheritprops;
+ nvpair_t *nvp;
+
+ if (origprops == NULL) {
+ /* We failed to stash the original properties. */
+ *errflags |= ZPROP_ERR_NORESTORE;
+ goto out;
+ }
+
+ /* Restore original props */
+ setprops = fnvlist_alloc();
+ inheritprops = fnvlist_alloc();
+ nvp = NULL;
+ while ((nvp = nvlist_next_nvpair(localprops, nvp)) != NULL) {
+ const char *name = nvpair_name(nvp);
+ const char *source;
+ nvlist_t *attrs;
+
+ if (!nvlist_exists(origprops, name)) {
+ /*
+ * Property was not present or was explicitly
+ * inherited before the receive, restore this.
+ */
+ fnvlist_add_boolean(inheritprops, name);
+ continue;
+ }
+ attrs = fnvlist_lookup_nvlist(origprops, name);
+ source = fnvlist_lookup_string(attrs, ZPROP_SOURCE);
+
+ /* Skip received properties */
+ if (strcmp(source, ZPROP_SOURCE_VAL_RECVD) == 0)
+ continue;
+
+ if (strcmp(source, tofs) == 0) {
+ /* Property was locally set */
+ fnvlist_add_nvlist(setprops, name, attrs);
+ } else {
+ /* Property was implicitly inherited */
+ fnvlist_add_boolean(inheritprops, name);
+ }
}
+
+ if (zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL, setprops,
+ NULL) != 0)
+ *errflags |= ZPROP_ERR_NORESTORE;
+ if (zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED, inheritprops,
+ NULL) != 0)
+ *errflags |= ZPROP_ERR_NORESTORE;
+
+ nvlist_free(setprops);
+ nvlist_free(inheritprops);
}
out:
- nvlist_free(props);
+ releasef(input_fd);
+ nvlist_free(origrecvd);
nvlist_free(origprops);
- nvlist_free(errors);
- releasef(fd);
if (error == 0)
error = props_error;
@@ -4157,6 +4490,194 @@ zfs_ioc_recv(zfs_cmd_t *zc)
return (error);
}
+/*
+ * inputs:
+ * zc_name name of containing filesystem (unused)
+ * zc_nvlist_src{_size} nvlist of properties to apply
+ * zc_nvlist_conf{_size} nvlist of properties to exclude
+ * (DATA_TYPE_BOOLEAN) and override (everything else)
+ * zc_value name of snapshot to create
+ * zc_string name of clone origin (if DRR_FLAG_CLONE)
+ * zc_cookie file descriptor to recv from
+ * zc_begin_record the BEGIN record of the stream (not byteswapped)
+ * zc_guid force flag
+ * zc_cleanup_fd cleanup-on-exit file descriptor
+ * zc_action_handle handle for this guid/ds mapping (or zero on first call)
+ *
+ * outputs:
+ * zc_cookie number of bytes read
+ * zc_obj zprop_errflags_t
+ * zc_action_handle handle for this guid/ds mapping
+ * zc_nvlist_dst{_size} error for each unapplied received property
+ */
+static int
+zfs_ioc_recv(zfs_cmd_t *zc)
+{
+ dmu_replay_record_t begin_record;
+ nvlist_t *errors = NULL;
+ nvlist_t *recvdprops = NULL;
+ nvlist_t *localprops = NULL;
+ char *origin = NULL;
+ char *tosnap;
+ char tofs[ZFS_MAX_DATASET_NAME_LEN];
+ int error = 0;
+
+ if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
+ strchr(zc->zc_value, '@') == NULL ||
+ strchr(zc->zc_value, '%'))
+ return (SET_ERROR(EINVAL));
+
+ (void) strlcpy(tofs, zc->zc_value, sizeof (tofs));
+ tosnap = strchr(tofs, '@');
+ *tosnap++ = '\0';
+
+ if (zc->zc_nvlist_src != 0 &&
+ (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
+ zc->zc_iflags, &recvdprops)) != 0)
+ return (error);
+
+ if (zc->zc_nvlist_conf != 0 &&
+ (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
+ zc->zc_iflags, &localprops)) != 0)
+ return (error);
+
+ if (zc->zc_string[0])
+ origin = zc->zc_string;
+
+ begin_record.drr_type = DRR_BEGIN;
+ begin_record.drr_payloadlen = 0;
+ begin_record.drr_u.drr_begin = zc->zc_begin_record;
+
+ error = zfs_ioc_recv_impl(tofs, tosnap, origin, recvdprops, localprops,
+ zc->zc_guid, B_FALSE, zc->zc_cookie, &begin_record,
+ zc->zc_cleanup_fd, &zc->zc_cookie, &zc->zc_obj,
+ &zc->zc_action_handle, &errors);
+ nvlist_free(recvdprops);
+ nvlist_free(localprops);
+
+ /*
+ * Now that all props, initial and delayed, are set, report the prop
+ * errors to the caller.
+ */
+ if (zc->zc_nvlist_dst_size != 0 && errors != NULL &&
+ (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 ||
+ put_nvlist(zc, errors) != 0)) {
+ /*
+ * Caller made zc->zc_nvlist_dst less than the minimum expected
+ * size or supplied an invalid address.
+ */
+ error = SET_ERROR(EINVAL);
+ }
+
+ nvlist_free(errors);
+
+ return (error);
+}
+
+/*
+ * innvl: {
+ * "snapname" -> full name of the snapshot to create
+ * (optional) "props" -> received properties to set (nvlist)
+ * (optional) "localprops" -> override and exclude properties (nvlist)
+ * (optional) "origin" -> name of clone origin (DRR_FLAG_CLONE)
+ * "begin_record" -> non-byteswapped dmu_replay_record_t
+ * "input_fd" -> file descriptor to read stream from (int32)
+ * (optional) "force" -> force flag (value ignored)
+ * (optional) "resumable" -> resumable flag (value ignored)
+ * (optional) "cleanup_fd" -> cleanup-on-exit file descriptor
+ * (optional) "action_handle" -> handle for this guid/ds mapping
+ * }
+ *
+ * outnvl: {
+ * "read_bytes" -> number of bytes read
+ * "error_flags" -> zprop_errflags_t
+ * "action_handle" -> handle for this guid/ds mapping
+ * "errors" -> error for each unapplied received property (nvlist)
+ * }
+ */
+static int
+zfs_ioc_recv_new(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
+{
+ dmu_replay_record_t *begin_record;
+ uint_t begin_record_size;
+ nvlist_t *errors = NULL;
+ nvlist_t *recvprops = NULL;
+ nvlist_t *localprops = NULL;
+ char *snapname = NULL;
+ char *origin = NULL;
+ char *tosnap;
+ char tofs[ZFS_MAX_DATASET_NAME_LEN];
+ boolean_t force;
+ boolean_t resumable;
+ uint64_t action_handle = 0;
+ uint64_t read_bytes = 0;
+ uint64_t errflags = 0;
+ int input_fd = -1;
+ int cleanup_fd = -1;
+ int error;
+
+ error = nvlist_lookup_string(innvl, "snapname", &snapname);
+ if (error != 0)
+ return (SET_ERROR(EINVAL));
+
+ if (dataset_namecheck(snapname, NULL, NULL) != 0 ||
+ strchr(snapname, '@') == NULL ||
+ strchr(snapname, '%'))
+ return (SET_ERROR(EINVAL));
+
+ (void) strcpy(tofs, snapname);
+ tosnap = strchr(tofs, '@');
+ *tosnap++ = '\0';
+
+ error = nvlist_lookup_string(innvl, "origin", &origin);
+ if (error && error != ENOENT)
+ return (error);
+
+ error = nvlist_lookup_byte_array(innvl, "begin_record",
+ (uchar_t **)&begin_record, &begin_record_size);
+ if (error != 0 || begin_record_size != sizeof (*begin_record))
+ return (SET_ERROR(EINVAL));
+
+ error = nvlist_lookup_int32(innvl, "input_fd", &input_fd);
+ if (error != 0)
+ return (SET_ERROR(EINVAL));
+
+ force = nvlist_exists(innvl, "force");
+ resumable = nvlist_exists(innvl, "resumable");
+
+ error = nvlist_lookup_int32(innvl, "cleanup_fd", &cleanup_fd);
+ if (error && error != ENOENT)
+ return (error);
+
+ error = nvlist_lookup_uint64(innvl, "action_handle", &action_handle);
+ if (error && error != ENOENT)
+ return (error);
+
+ /* we still use "props" here for backwards compatibility */
+ error = nvlist_lookup_nvlist(innvl, "props", &recvprops);
+ if (error && error != ENOENT)
+ return (error);
+
+ error = nvlist_lookup_nvlist(innvl, "localprops", &localprops);
+ if (error && error != ENOENT)
+ return (error);
+
+ error = zfs_ioc_recv_impl(tofs, tosnap, origin, recvprops, localprops,
+ force, resumable, input_fd, begin_record, cleanup_fd, &read_bytes,
+ &errflags, &action_handle, &errors);
+
+ fnvlist_add_uint64(outnvl, "read_bytes", read_bytes);
+ fnvlist_add_uint64(outnvl, "error_flags", errflags);
+ fnvlist_add_uint64(outnvl, "action_handle", action_handle);
+ fnvlist_add_nvlist(outnvl, "errors", errors);
+
+ nvlist_free(errors);
+ nvlist_free(recvprops);
+ nvlist_free(localprops);
+
+ return (error);
+}
+
/*
* inputs:
* zc_name name of snapshot to send
@@ -4179,6 +4700,7 @@ zfs_ioc_send(zfs_cmd_t *zc)
boolean_t estimate = (zc->zc_guid != 0);
boolean_t embedok = (zc->zc_flags & 0x1);
boolean_t large_block_ok = (zc->zc_flags & 0x2);
+ boolean_t compressok = (zc->zc_flags & 0x4);
if (zc->zc_obj != 0) {
dsl_pool_t *dp;
@@ -4226,7 +4748,7 @@ zfs_ioc_send(zfs_cmd_t *zc)
}
}
- error = dmu_send_estimate(tosnap, fromsnap,
+ error = dmu_send_estimate(tosnap, fromsnap, compressok,
&zc->zc_objset_type);
if (fromsnap != NULL)
@@ -4240,7 +4762,7 @@ zfs_ioc_send(zfs_cmd_t *zc)
off = fp->f_offset;
error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
- zc->zc_fromobj, embedok, large_block_ok,
+ zc->zc_fromobj, embedok, large_block_ok, compressok,
zc->zc_cookie, fp->f_vnode, &off);
if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
@@ -4424,7 +4946,8 @@ zfs_ioc_clear(zfs_cmd_t *zc)
vdev_clear(spa, vd);
- (void) spa_vdev_state_exit(spa, NULL, 0);
+ (void) spa_vdev_state_exit(spa, spa_suspended(spa) ?
+ NULL : spa->spa_root_vdev, 0);
/*
* Resume any suspended I/Os.
@@ -4466,7 +4989,6 @@ zfs_ioc_pool_reopen(zfs_cmd_t *zc)
/*
* inputs:
* zc_name name of filesystem
- * zc_value name of origin snapshot
*
* outputs:
* zc_string name of conflicting snapshot, if there is one
@@ -4474,16 +4996,49 @@ zfs_ioc_pool_reopen(zfs_cmd_t *zc)
static int
zfs_ioc_promote(zfs_cmd_t *zc)
{
+ dsl_pool_t *dp;
+ dsl_dataset_t *ds, *ods;
+ char origin[ZFS_MAX_DATASET_NAME_LEN];
char *cp;
+ int error;
+
+ error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
+ if (error != 0)
+ return (error);
+
+ error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
+ if (error != 0) {
+ dsl_pool_rele(dp, FTAG);
+ return (error);
+ }
+
+ if (!dsl_dir_is_clone(ds->ds_dir)) {
+ dsl_dataset_rele(ds, FTAG);
+ dsl_pool_rele(dp, FTAG);
+ return (SET_ERROR(EINVAL));
+ }
+
+ error = dsl_dataset_hold_obj(dp,
+ dsl_dir_phys(ds->ds_dir)->dd_origin_obj, FTAG, &ods);
+ if (error != 0) {
+ dsl_dataset_rele(ds, FTAG);
+ dsl_pool_rele(dp, FTAG);
+ return (error);
+ }
+
+ dsl_dataset_name(ods, origin);
+ dsl_dataset_rele(ods, FTAG);
+ dsl_dataset_rele(ds, FTAG);
+ dsl_pool_rele(dp, FTAG);
/*
* We don't need to unmount *all* the origin fs's snapshots, but
* it's easier.
*/
- cp = strchr(zc->zc_value, '@');
+ cp = strchr(origin, '@');
if (cp)
*cp = '\0';
- (void) dmu_objset_find(zc->zc_value,
+ (void) dmu_objset_find(origin,
zfs_unmount_snap_cb, NULL, DS_FIND_SNAPSHOTS);
return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
}
@@ -4503,19 +5058,19 @@ zfs_ioc_promote(zfs_cmd_t *zc)
static int
zfs_ioc_userspace_one(zfs_cmd_t *zc)
{
- zfs_sb_t *zsb;
+ zfsvfs_t *zfsvfs;
int error;
if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
return (SET_ERROR(EINVAL));
- error = zfs_sb_hold(zc->zc_name, FTAG, &zsb, B_FALSE);
+ error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
if (error != 0)
return (error);
- error = zfs_userspace_one(zsb,
+ error = zfs_userspace_one(zfsvfs,
zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
- zfs_sb_rele(zsb, FTAG);
+ zfsvfs_rele(zfsvfs, FTAG);
return (error);
}
@@ -4534,7 +5089,7 @@ zfs_ioc_userspace_one(zfs_cmd_t *zc)
static int
zfs_ioc_userspace_many(zfs_cmd_t *zc)
{
- zfs_sb_t *zsb;
+ zfsvfs_t *zfsvfs;
int bufsize = zc->zc_nvlist_dst_size;
int error;
void *buf;
@@ -4542,13 +5097,13 @@ zfs_ioc_userspace_many(zfs_cmd_t *zc)
if (bufsize <= 0)
return (SET_ERROR(ENOMEM));
- error = zfs_sb_hold(zc->zc_name, FTAG, &zsb, B_FALSE);
+ error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
if (error != 0)
return (error);
buf = vmem_alloc(bufsize, KM_SLEEP);
- error = zfs_userspace_many(zsb, zc->zc_objset_type, &zc->zc_cookie,
+ error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
buf, &zc->zc_nvlist_dst_size);
if (error == 0) {
@@ -4557,7 +5112,7 @@ zfs_ioc_userspace_many(zfs_cmd_t *zc)
zc->zc_nvlist_dst_size);
}
vmem_free(buf, bufsize);
- zfs_sb_rele(zsb, FTAG);
+ zfsvfs_rele(zfsvfs, FTAG);
return (error);
}
@@ -4574,25 +5129,28 @@ zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
{
objset_t *os;
int error = 0;
- zfs_sb_t *zsb;
+ zfsvfs_t *zfsvfs;
- if (get_zfs_sb(zc->zc_name, &zsb) == 0) {
- if (!dmu_objset_userused_enabled(zsb->z_os)) {
+ if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
+ if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
/*
* If userused is not enabled, it may be because the
* objset needs to be closed & reopened (to grow the
* objset_phys_t). Suspend/resume the fs will do that.
*/
- error = zfs_suspend_fs(zsb);
+ dsl_dataset_t *ds;
+
+ ds = dmu_objset_ds(zfsvfs->z_os);
+ error = zfs_suspend_fs(zfsvfs);
if (error == 0) {
- dmu_objset_refresh_ownership(zsb->z_os,
- zsb);
- error = zfs_resume_fs(zsb, zc->zc_name);
+ dmu_objset_refresh_ownership(zfsvfs->z_os,
+ zfsvfs);
+ error = zfs_resume_fs(zfsvfs, ds);
}
}
if (error == 0)
- error = dmu_objset_userspace_upgrade(zsb->z_os);
- deactivate_super(zsb->z_sb);
+ error = dmu_objset_userspace_upgrade(zfsvfs->z_os);
+ deactivate_super(zfsvfs->z_sb);
} else {
/* XXX kind of reading contents without owning */
error = dmu_objset_hold(zc->zc_name, FTAG, &os);
@@ -4606,6 +5164,48 @@ zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
return (error);
}
+/*
+ * inputs:
+ * zc_name name of filesystem
+ *
+ * outputs:
+ * none
+ */
+static int
+zfs_ioc_userobjspace_upgrade(zfs_cmd_t *zc)
+{
+ objset_t *os;
+ int error;
+
+ error = dmu_objset_hold(zc->zc_name, FTAG, &os);
+ if (error != 0)
+ return (error);
+
+ dsl_dataset_long_hold(dmu_objset_ds(os), FTAG);
+ dsl_pool_rele(dmu_objset_pool(os), FTAG);
+
+ if (dmu_objset_userobjspace_upgradable(os)) {
+ mutex_enter(&os->os_upgrade_lock);
+ if (os->os_upgrade_id == 0) {
+ /* clear potential error code and retry */
+ os->os_upgrade_status = 0;
+ mutex_exit(&os->os_upgrade_lock);
+
+ dmu_objset_userobjspace_upgrade(os);
+ } else {
+ mutex_exit(&os->os_upgrade_lock);
+ }
+
+ taskq_wait_id(os->os_spa->spa_upgrade_taskq, os->os_upgrade_id);
+ error = os->os_upgrade_status;
+ }
+
+ dsl_dataset_long_rele(dmu_objset_ds(os), FTAG);
+ dsl_dataset_rele(dmu_objset_ds(os), FTAG);
+
+ return (error);
+}
+
static int
zfs_ioc_share(zfs_cmd_t *zc)
{
@@ -4668,7 +5268,8 @@ zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor,
hold_name);
if (error == 0)
- (void) strcpy(zc->zc_value, snap_name);
+ (void) strlcpy(zc->zc_value, snap_name,
+ sizeof (zc->zc_value));
strfree(snap_name);
strfree(hold_name);
zfs_onexit_fd_rele(zc->zc_cleanup_fd);
@@ -4715,10 +5316,10 @@ zfs_smb_acl_purge(znode_t *dzp)
{
zap_cursor_t zc;
zap_attribute_t zap;
- zfs_sb_t *zsb = ZTOZSB(dzp);
+ zfsvfs_t *zfsvfs = ZTOZSB(dzp);
int error;
- for (zap_cursor_init(&zc, zsb->z_os, dzp->z_id);
+ for (zap_cursor_init(&zc, zfsvfs->z_os, dzp->z_id);
(error = zap_cursor_retrieve(&zc, &zap)) == 0;
zap_cursor_advance(&zc)) {
if ((error = VOP_REMOVE(ZTOV(dzp), zap.za_name, kcred,
@@ -4738,7 +5339,7 @@ zfs_ioc_smb_acl(zfs_cmd_t *zc)
znode_t *dzp;
vnode_t *resourcevp = NULL;
znode_t *sharedir;
- zfs_sb_t *zsb;
+ zfsvfs_t *zfsvfs;
nvlist_t *nvlist;
char *src, *target;
vattr_t vattr;
@@ -4759,17 +5360,17 @@ zfs_ioc_smb_acl(zfs_cmd_t *zc)
}
dzp = VTOZ(vp);
- zsb = ZTOZSB(dzp);
- ZFS_ENTER(zsb);
+ zfsvfs = ZTOZSB(dzp);
+ ZFS_ENTER(zfsvfs);
/*
* Create share dir if its missing.
*/
- mutex_enter(&zsb->z_lock);
- if (zsb->z_shares_dir == 0) {
+ mutex_enter(&zfsvfs->z_lock);
+ if (zfsvfs->z_shares_dir == 0) {
dmu_tx_t *tx;
- tx = dmu_tx_create(zsb->z_os);
+ tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, TRUE,
ZFS_SHARES_DIR);
dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
@@ -4777,22 +5378,22 @@ zfs_ioc_smb_acl(zfs_cmd_t *zc)
if (error != 0) {
dmu_tx_abort(tx);
} else {
- error = zfs_create_share_dir(zsb, tx);
+ error = zfs_create_share_dir(zfsvfs, tx);
dmu_tx_commit(tx);
}
if (error != 0) {
- mutex_exit(&zsb->z_lock);
+ mutex_exit(&zfsvfs->z_lock);
VN_RELE(vp);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
}
- mutex_exit(&zsb->z_lock);
+ mutex_exit(&zfsvfs->z_lock);
- ASSERT(zsb->z_shares_dir);
- if ((error = zfs_zget(zsb, zsb->z_shares_dir, &sharedir)) != 0) {
+ ASSERT(zfsvfs->z_shares_dir);
+ if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &sharedir)) != 0) {
VN_RELE(vp);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
@@ -4823,7 +5424,8 @@ zfs_ioc_smb_acl(zfs_cmd_t *zc)
if ((error = get_nvlist(zc->zc_nvlist_src,
zc->zc_nvlist_src_size, zc->zc_iflags, &nvlist)) != 0) {
VN_RELE(vp);
- ZFS_EXIT(zsb);
+ VN_RELE(ZTOV(sharedir));
+ ZFS_EXIT(zfsvfs);
return (error);
}
if (nvlist_lookup_string(nvlist, ZFS_SMB_ACL_SRC, &src) ||
@@ -4831,7 +5433,7 @@ zfs_ioc_smb_acl(zfs_cmd_t *zc)
&target)) {
VN_RELE(vp);
VN_RELE(ZTOV(sharedir));
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
nvlist_free(nvlist);
return (error);
}
@@ -4852,7 +5454,7 @@ zfs_ioc_smb_acl(zfs_cmd_t *zc)
VN_RELE(vp);
VN_RELE(ZTOV(sharedir));
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
#else
@@ -4875,6 +5477,7 @@ zfs_ioc_smb_acl(zfs_cmd_t *zc)
static int
zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
{
+ nvpair_t *pair;
nvlist_t *holds;
int cleanup_fd = -1;
int error;
@@ -4884,6 +5487,19 @@ zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
if (error != 0)
return (SET_ERROR(EINVAL));
+ /* make sure the user didn't pass us any invalid (empty) tags */
+ for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
+ pair = nvlist_next_nvpair(holds, pair)) {
+ char *htag;
+
+ error = nvpair_value_string(pair, &htag);
+ if (error != 0)
+ return (SET_ERROR(error));
+
+ if (strlen(htag) == 0)
+ return (SET_ERROR(EINVAL));
+ }
+
if (nvlist_lookup_int32(args, "cleanup_fd", &cleanup_fd) == 0) {
error = zfs_onexit_fd_hold(cleanup_fd, &minor);
if (error != 0)
@@ -4908,6 +5524,7 @@ zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
static int
zfs_ioc_get_holds(const char *snapname, nvlist_t *args, nvlist_t *outnvl)
{
+ ASSERT3P(args, ==, NULL);
return (dsl_dataset_get_holds(snapname, outnvl));
}
@@ -4953,7 +5570,7 @@ zfs_ioc_events_next(zfs_cmd_t *zc)
do {
error = zfs_zevent_next(ze, &event,
- &zc->zc_nvlist_dst_size, &dropped);
+ &zc->zc_nvlist_dst_size, &dropped);
if (event != NULL) {
zc->zc_cookie = dropped;
error = put_nvlist(zc, event);
@@ -5118,6 +5735,10 @@ zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
* indicates that blocks > 128KB are permitted
* (optional) "embedok" -> (value ignored)
* presence indicates DRR_WRITE_EMBEDDED records are permitted
+ * (optional) "compressok" -> (value ignored)
+ * presence indicates compressed DRR_WRITE records are permitted
+ * (optional) "resume_object" and "resume_offset" -> (uint64)
+ * if present, resume send stream from specified object and offset.
* }
*
* outnvl is unused
@@ -5133,6 +5754,9 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
file_t *fp;
boolean_t largeblockok;
boolean_t embedok;
+ boolean_t compressok;
+ uint64_t resumeobj = 0;
+ uint64_t resumeoff = 0;
error = nvlist_lookup_int32(innvl, "fd", &fd);
if (error != 0)
@@ -5142,13 +5766,17 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
largeblockok = nvlist_exists(innvl, "largeblockok");
embedok = nvlist_exists(innvl, "embedok");
+ compressok = nvlist_exists(innvl, "compressok");
+
+ (void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
+ (void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
if ((fp = getf(fd)) == NULL)
return (SET_ERROR(EBADF));
off = fp->f_offset;
- error = dmu_send(snapname, fromname, embedok, largeblockok,
- fd, fp->f_vnode, &off);
+ error = dmu_send(snapname, fromname, embedok, largeblockok, compressok,
+ fd, resumeobj, resumeoff, fp->f_vnode, &off);
if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
fp->f_offset = off;
@@ -5164,6 +5792,12 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
* innvl: {
* (optional) "from" -> full snap or bookmark name to send an incremental
* from
+ * (optional) "largeblockok" -> (value ignored)
+ * indicates that blocks > 128KB are permitted
+ * (optional) "embedok" -> (value ignored)
+ * presence indicates DRR_WRITE_EMBEDDED records are permitted
+ * (optional) "compressok" -> (value ignored)
+ * presence indicates compressed DRR_WRITE records are permitted
* }
*
* outnvl: {
@@ -5177,6 +5811,11 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
dsl_dataset_t *tosnap;
int error;
char *fromname;
+ /* LINTED E_FUNC_SET_NOT_USED */
+ boolean_t largeblockok;
+ /* LINTED E_FUNC_SET_NOT_USED */
+ boolean_t embedok;
+ boolean_t compressok;
uint64_t space;
error = dsl_pool_hold(snapname, FTAG, &dp);
@@ -5189,6 +5828,10 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
return (error);
}
+ largeblockok = nvlist_exists(innvl, "largeblockok");
+ embedok = nvlist_exists(innvl, "embedok");
+ compressok = nvlist_exists(innvl, "compressok");
+
error = nvlist_lookup_string(innvl, "from", &fromname);
if (error == 0) {
if (strchr(fromname, '@') != NULL) {
@@ -5201,7 +5844,8 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
if (error != 0)
goto out;
- error = dmu_send_estimate(tosnap, fromsnap, &space);
+ error = dmu_send_estimate(tosnap, fromsnap, compressok,
+ &space);
dsl_dataset_rele(fromsnap, FTAG);
} else if (strchr(fromname, '#') != NULL) {
/*
@@ -5216,7 +5860,7 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
if (error != 0)
goto out;
error = dmu_send_estimate_from_txg(tosnap,
- frombm.zbm_creation_txg, &space);
+ frombm.zbm_creation_txg, compressok, &space);
} else {
/*
* from is not properly formatted as a snapshot or
@@ -5227,7 +5871,7 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
}
} else {
// If estimating the size of a full send, use dmu_send_estimate
- error = dmu_send_estimate(tosnap, NULL, &space);
+ error = dmu_send_estimate(tosnap, NULL, compressok, &space);
}
fnvlist_add_uint64(outnvl, "space", space);
@@ -5238,6 +5882,44 @@ zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
return (error);
}
+/*
+ * Sync the currently open TXG to disk for the specified pool.
+ * This is somewhat similar to 'zfs_sync()'.
+ * For cases that do not result in error this ioctl will wait for
+ * the currently open TXG to commit before returning back to the caller.
+ *
+ * innvl: {
+ * "force" -> when true, force uberblock update even if there is no dirty data.
+ * In addition this will cause the vdev configuration to be written
+ * out including updating the zpool cache file. (boolean_t)
+ * }
+ *
+ * onvl is unused
+ */
+/* ARGSUSED */
+static int
+zfs_ioc_pool_sync(const char *pool, nvlist_t *innvl, nvlist_t *onvl)
+{
+ int err;
+ boolean_t force;
+ spa_t *spa;
+
+ if ((err = spa_open(pool, &spa, FTAG)) != 0)
+ return (err);
+
+ force = fnvlist_lookup_boolean_value(innvl, "force");
+ if (force) {
+ spa_config_enter(spa, SCL_CONFIG, FTAG, RW_WRITER);
+ vdev_config_dirty(spa->spa_root_vdev);
+ spa_config_exit(spa, SCL_CONFIG, FTAG);
+ }
+ txg_wait_synced(spa_get_dsl(spa), 0);
+
+ spa_close(spa, FTAG);
+
+ return (err);
+}
+
static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
static void
@@ -5337,7 +6019,7 @@ zfs_ioctl_register_dataset_read(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
static void
zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
- zfs_secpolicy_func_t *secpolicy)
+ zfs_secpolicy_func_t *secpolicy)
{
zfs_ioctl_register_legacy(ioc, func, secpolicy,
DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
@@ -5406,6 +6088,14 @@ zfs_ioctl_init(void)
POOL_NAME,
POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
+ zfs_ioctl_register("receive", ZFS_IOC_RECV_NEW,
+ zfs_ioc_recv_new, zfs_secpolicy_recv_new, DATASET_NAME,
+ POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE);
+
+ zfs_ioctl_register("sync", ZFS_IOC_POOL_SYNC,
+ zfs_ioc_pool_sync, zfs_secpolicy_none, POOL_NAME,
+ POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE);
+
/* IOCTLS that use the legacy function signature */
zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
@@ -5477,7 +6167,7 @@ zfs_ioctl_init(void)
zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
zfs_ioctl_register_pool(ZFS_IOC_CLEAR, zfs_ioc_clear,
- zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
+ zfs_secpolicy_config, B_TRUE, POOL_CHECK_READONLY);
zfs_ioctl_register_pool(ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen,
zfs_secpolicy_config, B_TRUE, POOL_CHECK_SUSPENDED);
@@ -5783,7 +6473,23 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
}
zc->zc_iflags = flag & FKIOCTL;
- if (zc->zc_nvlist_src_size != 0) {
+ if (zc->zc_nvlist_src_size > MAX_NVLIST_SRC_SIZE) {
+ /*
+ * Make sure the user doesn't pass in an insane value for
+ * zc_nvlist_src_size. We have to check, since we will end
+ * up allocating that much memory inside of get_nvlist(). This
+ * prevents a nefarious user from allocating tons of kernel
+ * memory.
+ *
+ * Also, we return EINVAL instead of ENOMEM here. The reason
+ * being that returning ENOMEM from an ioctl() has a special
+ * connotation; that the user's size value is too small and
+ * needs to be expanded to hold the nvlist. See
+ * zcmd_expand_dst_nvlist() for details.
+ */
+ error = SET_ERROR(EINVAL); /* User's size too big */
+
+ } else if (zc->zc_nvlist_src_size != 0) {
error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
zc->zc_iflags, &innvl);
if (error != 0)
@@ -5817,7 +6523,7 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
}
- if (error == 0 && !(flag & FKIOCTL)) {
+ if (error == 0) {
cookie = spl_fstrans_mark();
error = vec->zvec_secpolicy(zc, innvl, CRED());
spl_fstrans_unmark(cookie);
@@ -5975,7 +6681,9 @@ static void
zfs_allow_log_destroy(void *arg)
{
char *poolname = arg;
- strfree(poolname);
+
+ if (poolname != NULL)
+ strfree(poolname);
}
#ifdef DEBUG
diff --git a/zfs/module/zfs/zfs_log.c b/zfs/module/zfs/zfs_log.c
index 38d8de0ebf97..8887f037aa34 100644
--- a/zfs/module/zfs/zfs_log.c
+++ b/zfs/module/zfs/zfs_log.c
@@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015 by Delphix. All rights reserved.
*/
@@ -211,6 +212,34 @@ zfs_log_fuid_domains(zfs_fuid_info_t *fuidp, void *start)
return (start);
}
+/*
+ * If zp is an xattr node, check whether the xattr owner is unlinked.
+ * We don't want to log anything if the owner is unlinked.
+ */
+static int
+zfs_xattr_owner_unlinked(znode_t *zp)
+{
+ int unlinked = 0;
+ znode_t *dzp;
+ igrab(ZTOI(zp));
+ /*
+ * if zp is XATTR node, keep walking up via z_xattr_parent until we
+ * get the owner
+ */
+ while (zp->z_pflags & ZFS_XATTR) {
+ ASSERT3U(zp->z_xattr_parent, !=, 0);
+ if (zfs_zget(ZTOZSB(zp), zp->z_xattr_parent, &dzp) != 0) {
+ unlinked = 1;
+ break;
+ }
+ iput(ZTOI(zp));
+ zp = dzp;
+ unlinked = zp->z_unlinked;
+ }
+ iput(ZTOI(zp));
+ return (unlinked);
+}
+
/*
* Handles TX_CREATE, TX_CREATE_ATTR, TX_MKDIR, TX_MKDIR_ATTR and
* TK_MKXATTR transactions.
@@ -247,7 +276,7 @@ zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
size_t namesize = strlen(name) + 1;
size_t fuidsz = 0;
- if (zil_replaying(zilog, tx))
+ if (zil_replaying(zilog, tx) || zfs_xattr_owner_unlinked(dzp))
return;
/*
@@ -279,14 +308,16 @@ zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
lr = (lr_create_t *)&itx->itx_lr;
lr->lr_doid = dzp->z_id;
lr->lr_foid = zp->z_id;
+ /* Store dnode slot count in 8 bits above object id. */
+ LR_FOID_SET_SLOTS(lr->lr_foid, zp->z_dnodesize >> DNODE_SHIFT);
lr->lr_mode = zp->z_mode;
- if (!IS_EPHEMERAL(zp->z_uid)) {
- lr->lr_uid = (uint64_t)zp->z_uid;
+ if (!IS_EPHEMERAL(KUID_TO_SUID(ZTOI(zp)->i_uid))) {
+ lr->lr_uid = (uint64_t)KUID_TO_SUID(ZTOI(zp)->i_uid);
} else {
lr->lr_uid = fuidp->z_fuid_owner;
}
- if (!IS_EPHEMERAL(zp->z_gid)) {
- lr->lr_gid = (uint64_t)zp->z_gid;
+ if (!IS_EPHEMERAL(KGID_TO_SGID(ZTOI(zp)->i_gid))) {
+ lr->lr_gid = (uint64_t)KGID_TO_SGID(ZTOI(zp)->i_gid);
} else {
lr->lr_gid = fuidp->z_fuid_group;
}
@@ -344,13 +375,13 @@ zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
*/
void
zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
- znode_t *dzp, char *name, uint64_t foid)
+ znode_t *dzp, char *name, uint64_t foid)
{
itx_t *itx;
lr_remove_t *lr;
size_t namesize = strlen(name) + 1;
- if (zil_replaying(zilog, tx))
+ if (zil_replaying(zilog, tx) || zfs_xattr_owner_unlinked(dzp))
return;
itx = zil_itx_create(txtype, sizeof (*lr) + namesize);
@@ -368,7 +399,7 @@ zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
*/
void
zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
- znode_t *dzp, znode_t *zp, char *name)
+ znode_t *dzp, znode_t *zp, char *name)
{
itx_t *itx;
lr_link_t *lr;
@@ -405,8 +436,8 @@ zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
lr = (lr_create_t *)&itx->itx_lr;
lr->lr_doid = dzp->z_id;
lr->lr_foid = zp->z_id;
- lr->lr_uid = zp->z_uid;
- lr->lr_gid = zp->z_gid;
+ lr->lr_uid = KUID_TO_SUID(ZTOI(zp)->i_uid);
+ lr->lr_gid = KGID_TO_SGID(ZTOI(zp)->i_gid);
lr->lr_mode = zp->z_mode;
(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(ZTOZSB(zp)), &lr->lr_gen,
sizeof (uint64_t));
@@ -423,7 +454,7 @@ zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
*/
void
zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype,
- znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp)
+ znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp)
{
itx_t *itx;
lr_rename_t *lr;
@@ -453,26 +484,24 @@ long zfs_immediate_write_sz = 32768;
void
zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
- znode_t *zp, offset_t off, ssize_t resid, int ioflag,
- zil_callback_t callback, void *callback_data)
+ znode_t *zp, offset_t off, ssize_t resid, int ioflag,
+ zil_callback_t callback, void *callback_data)
{
+ uint32_t blocksize = zp->z_blksz;
itx_wr_state_t write_state;
- boolean_t slogging;
uintptr_t fsync_cnt;
- ssize_t immediate_write_sz;
- if (zil_replaying(zilog, tx) || zp->z_unlinked) {
+ if (zil_replaying(zilog, tx) || zp->z_unlinked ||
+ zfs_xattr_owner_unlinked(zp)) {
if (callback != NULL)
callback(callback_data);
return;
}
- immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
- ? 0 : (ssize_t)zfs_immediate_write_sz;
-
- slogging = spa_has_slogs(zilog->zl_spa) &&
- (zilog->zl_logbias == ZFS_LOGBIAS_LATENCY);
- if (resid > immediate_write_sz && !slogging && resid <= zp->z_blksz)
+ if (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
+ write_state = WR_INDIRECT;
+ else if (!spa_has_slogs(zilog->zl_spa) &&
+ resid >= zfs_immediate_write_sz)
write_state = WR_INDIRECT;
else if (ioflag & (FSYNC | FDSYNC))
write_state = WR_COPIED;
@@ -486,30 +515,26 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
while (resid) {
itx_t *itx;
lr_write_t *lr;
- ssize_t len;
+ itx_wr_state_t wr_state = write_state;
+ ssize_t len = resid;
- /*
- * If the write would overflow the largest block then split it.
- */
- if (write_state != WR_INDIRECT && resid > ZIL_MAX_LOG_DATA)
- len = SPA_OLD_MAXBLOCKSIZE >> 1;
- else
- len = resid;
+ if (wr_state == WR_COPIED && resid > ZIL_MAX_COPIED_DATA)
+ wr_state = WR_NEED_COPY;
+ else if (wr_state == WR_INDIRECT)
+ len = MIN(blocksize - P2PHASE(off, blocksize), resid);
itx = zil_itx_create(txtype, sizeof (*lr) +
- (write_state == WR_COPIED ? len : 0));
+ (wr_state == WR_COPIED ? len : 0));
lr = (lr_write_t *)&itx->itx_lr;
- if (write_state == WR_COPIED && dmu_read(ZTOZSB(zp)->z_os,
+ if (wr_state == WR_COPIED && dmu_read(ZTOZSB(zp)->z_os,
zp->z_id, off, len, lr + 1, DMU_READ_NO_PREFETCH) != 0) {
zil_itx_destroy(itx);
itx = zil_itx_create(txtype, sizeof (*lr));
lr = (lr_write_t *)&itx->itx_lr;
- write_state = WR_NEED_COPY;
+ wr_state = WR_NEED_COPY;
}
- itx->itx_wr_state = write_state;
- if (write_state == WR_NEED_COPY)
- itx->itx_sod += len;
+ itx->itx_wr_state = wr_state;
lr->lr_foid = zp->z_id;
lr->lr_offset = off;
lr->lr_length = len;
@@ -536,12 +561,13 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
*/
void
zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype,
- znode_t *zp, uint64_t off, uint64_t len)
+ znode_t *zp, uint64_t off, uint64_t len)
{
itx_t *itx;
lr_truncate_t *lr;
- if (zil_replaying(zilog, tx) || zp->z_unlinked)
+ if (zil_replaying(zilog, tx) || zp->z_unlinked ||
+ zfs_xattr_owner_unlinked(zp))
return;
itx = zil_itx_create(txtype, sizeof (*lr));
diff --git a/zfs/module/zfs/zfs_replay.c b/zfs/module/zfs/zfs_replay.c
index 0ca1e03b595b..30efb4b57bc4 100644
--- a/zfs/module/zfs/zfs_replay.c
+++ b/zfs/module/zfs/zfs_replay.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 Cyril Plisko. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2015 by Delphix. All rights reserved.
*/
#include <sys/types.h>
@@ -58,7 +58,7 @@
static void
zfs_init_vattr(vattr_t *vap, uint64_t mask, uint64_t mode,
- uint64_t uid, uint64_t gid, uint64_t rdev, uint64_t nodeid)
+ uint64_t uid, uint64_t gid, uint64_t rdev, uint64_t nodeid)
{
bzero(vap, sizeof (*vap));
vap->va_mask = (uint_t)mask;
@@ -72,7 +72,7 @@ zfs_init_vattr(vattr_t *vap, uint64_t mask, uint64_t mode,
/* ARGSUSED */
static int
-zfs_replay_error(zfs_sb_t *zsb, lr_t *lr, boolean_t byteswap)
+zfs_replay_error(zfsvfs_t *zfsvfs, lr_t *lr, boolean_t byteswap)
{
return (SET_ERROR(ENOTSUP));
}
@@ -204,7 +204,7 @@ zfs_replay_fuid_domain(void *buf, void **end, uint64_t uid, uint64_t gid)
return (fuid_infop);
fuid_infop->z_domain_table =
- kmem_zalloc(domcnt * sizeof (char **), KM_SLEEP);
+ kmem_zalloc(domcnt * sizeof (char *), KM_SLEEP);
zfs_replay_fuid_ugid(fuid_infop, uid, gid);
@@ -228,7 +228,7 @@ zfs_replay_fuids(void *start, void **end, int idcnt, int domcnt, uint64_t uid,
fuid_infop->z_domain_cnt = domcnt;
fuid_infop->z_domain_table =
- kmem_zalloc(domcnt * sizeof (char **), KM_SLEEP);
+ kmem_zalloc(domcnt * sizeof (char *), KM_SLEEP);
for (i = 0; i != idcnt; i++) {
zfs_fuid_t *zfuid;
@@ -265,7 +265,8 @@ zfs_replay_swap_attrs(lr_attr_t *lrattr)
* as option FUID information.
*/
static int
-zfs_replay_create_acl(zfs_sb_t *zsb, lr_acl_create_t *lracl, boolean_t byteswap)
+zfs_replay_create_acl(zfsvfs_t *zfsvfs,
+ lr_acl_create_t *lracl, boolean_t byteswap)
{
char *name = NULL; /* location determined later */
lr_create_t *lr = (lr_create_t *)lracl;
@@ -279,6 +280,8 @@ zfs_replay_create_acl(zfs_sb_t *zsb, lr_acl_create_t *lracl, boolean_t byteswap)
void *fuidstart;
size_t xvatlen = 0;
uint64_t txtype;
+ uint64_t objid;
+ uint64_t dnodesize;
int error;
txtype = (lr->lr_common.lrc_txtype & ~TX_CI);
@@ -301,24 +304,29 @@ zfs_replay_create_acl(zfs_sb_t *zsb, lr_acl_create_t *lracl, boolean_t byteswap)
}
}
- if ((error = zfs_zget(zsb, lr->lr_doid, &dzp)) != 0)
+ if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0)
return (error);
+ objid = LR_FOID_GET_OBJ(lr->lr_foid);
+ dnodesize = LR_FOID_GET_SLOTS(lr->lr_foid) << DNODE_SHIFT;
+
xva_init(&xva);
zfs_init_vattr(&xva.xva_vattr, ATTR_MODE | ATTR_UID | ATTR_GID,
- lr->lr_mode, lr->lr_uid, lr->lr_gid, lr->lr_rdev, lr->lr_foid);
+ lr->lr_mode, lr->lr_uid, lr->lr_gid, lr->lr_rdev, objid);
/*
* All forms of zfs create (create, mkdir, mkxattrdir, symlink)
* eventually end up in zfs_mknode(), which assigns the object's
- * creation time and generation number. The generic zfs_create()
- * doesn't have either concept, so we smuggle the values inside
- * the vattr's otherwise unused va_ctime and va_nblocks fields.
+ * creation time, generation number, and dnode size. The generic
+ * zfs_create() has no concept of these attributes, so we smuggle
+ * the values inside the vattr's otherwise unused va_ctime,
+ * va_nblocks, and va_fsid fields.
*/
ZFS_TIME_DECODE(&xva.xva_vattr.va_ctime, lr->lr_crtime);
xva.xva_vattr.va_nblocks = lr->lr_gen;
+ xva.xva_vattr.va_fsid = dnodesize;
- error = dmu_object_info(zsb->z_os, lr->lr_foid, NULL);
+ error = dmu_object_info(zfsvfs->z_os, lr->lr_foid, NULL);
if (error != ENOENT)
goto bail;
@@ -329,7 +337,7 @@ zfs_replay_create_acl(zfs_sb_t *zsb, lr_acl_create_t *lracl, boolean_t byteswap)
aclstart = (caddr_t)(lracl + 1);
fuidstart = (caddr_t)aclstart +
ZIL_ACE_LENGTH(lracl->lr_acl_bytes);
- zsb->z_fuid_replay = zfs_replay_fuids(fuidstart,
+ zfsvfs->z_fuid_replay = zfs_replay_fuids(fuidstart,
(void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
lr->lr_uid, lr->lr_gid);
/*FALLTHROUGH*/
@@ -345,10 +353,10 @@ zfs_replay_create_acl(zfs_sb_t *zsb, lr_acl_create_t *lracl, boolean_t byteswap)
vsec.vsa_aclcnt = lracl->lr_aclcnt;
vsec.vsa_aclentsz = lracl->lr_acl_bytes;
vsec.vsa_aclflags = lracl->lr_acl_flags;
- if (zsb->z_fuid_replay == NULL) {
+ if (zfsvfs->z_fuid_replay == NULL) {
fuidstart = (caddr_t)(lracl + 1) + xvatlen +
ZIL_ACE_LENGTH(lracl->lr_acl_bytes);
- zsb->z_fuid_replay =
+ zfsvfs->z_fuid_replay =
zfs_replay_fuids(fuidstart,
(void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
lr->lr_uid, lr->lr_gid);
@@ -361,7 +369,7 @@ zfs_replay_create_acl(zfs_sb_t *zsb, lr_acl_create_t *lracl, boolean_t byteswap)
aclstart = (caddr_t)(lracl + 1);
fuidstart = (caddr_t)aclstart +
ZIL_ACE_LENGTH(lracl->lr_acl_bytes);
- zsb->z_fuid_replay = zfs_replay_fuids(fuidstart,
+ zfsvfs->z_fuid_replay = zfs_replay_fuids(fuidstart,
(void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
lr->lr_uid, lr->lr_gid);
/*FALLTHROUGH*/
@@ -376,10 +384,10 @@ zfs_replay_create_acl(zfs_sb_t *zsb, lr_acl_create_t *lracl, boolean_t byteswap)
vsec.vsa_aclcnt = lracl->lr_aclcnt;
vsec.vsa_aclentsz = lracl->lr_acl_bytes;
vsec.vsa_aclflags = lracl->lr_acl_flags;
- if (zsb->z_fuid_replay == NULL) {
+ if (zfsvfs->z_fuid_replay == NULL) {
fuidstart = (caddr_t)(lracl + 1) + xvatlen +
ZIL_ACE_LENGTH(lracl->lr_acl_bytes);
- zsb->z_fuid_replay =
+ zfsvfs->z_fuid_replay =
zfs_replay_fuids(fuidstart,
(void *)&name, lracl->lr_fuidcnt, lracl->lr_domcnt,
lr->lr_uid, lr->lr_gid);
@@ -397,15 +405,15 @@ zfs_replay_create_acl(zfs_sb_t *zsb, lr_acl_create_t *lracl, boolean_t byteswap)
iput(ZTOI(dzp));
- if (zsb->z_fuid_replay)
- zfs_fuid_info_free(zsb->z_fuid_replay);
- zsb->z_fuid_replay = NULL;
+ if (zfsvfs->z_fuid_replay)
+ zfs_fuid_info_free(zfsvfs->z_fuid_replay);
+ zfsvfs->z_fuid_replay = NULL;
return (error);
}
static int
-zfs_replay_create(zfs_sb_t *zsb, lr_create_t *lr, boolean_t byteswap)
+zfs_replay_create(zfsvfs_t *zfsvfs, lr_create_t *lr, boolean_t byteswap)
{
char *name = NULL; /* location determined later */
char *link; /* symlink content follows name */
@@ -418,6 +426,8 @@ zfs_replay_create(zfs_sb_t *zsb, lr_create_t *lr, boolean_t byteswap)
void *start;
size_t xvatlen;
uint64_t txtype;
+ uint64_t objid;
+ uint64_t dnodesize;
int error;
txtype = (lr->lr_common.lrc_txtype & ~TX_CI);
@@ -428,24 +438,29 @@ zfs_replay_create(zfs_sb_t *zsb, lr_create_t *lr, boolean_t byteswap)
}
- if ((error = zfs_zget(zsb, lr->lr_doid, &dzp)) != 0)
+ if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0)
return (error);
+ objid = LR_FOID_GET_OBJ(lr->lr_foid);
+ dnodesize = LR_FOID_GET_SLOTS(lr->lr_foid) << DNODE_SHIFT;
+
xva_init(&xva);
zfs_init_vattr(&xva.xva_vattr, ATTR_MODE | ATTR_UID | ATTR_GID,
- lr->lr_mode, lr->lr_uid, lr->lr_gid, lr->lr_rdev, lr->lr_foid);
+ lr->lr_mode, lr->lr_uid, lr->lr_gid, lr->lr_rdev, objid);
/*
* All forms of zfs create (create, mkdir, mkxattrdir, symlink)
* eventually end up in zfs_mknode(), which assigns the object's
- * creation time and generation number. The generic zfs_create()
- * doesn't have either concept, so we smuggle the values inside
- * the vattr's otherwise unused va_ctime and va_nblocks fields.
+ * creation time, generation number, and dnode slot count. The
+ * generic zfs_create() has no concept of these attributes, so
+ * we smuggle the values inside * the vattr's otherwise unused
+ * va_ctime, va_nblocks, and va_nlink fields.
*/
ZFS_TIME_DECODE(&xva.xva_vattr.va_ctime, lr->lr_crtime);
xva.xva_vattr.va_nblocks = lr->lr_gen;
+ xva.xva_vattr.va_fsid = dnodesize;
- error = dmu_object_info(zsb->z_os, lr->lr_foid, NULL);
+ error = dmu_object_info(zfsvfs->z_os, objid, NULL);
if (error != ENOENT)
goto out;
@@ -462,7 +477,7 @@ zfs_replay_create(zfs_sb_t *zsb, lr_create_t *lr, boolean_t byteswap)
(int)lr->lr_common.lrc_txtype != TX_MKDIR_ATTR &&
(int)lr->lr_common.lrc_txtype != TX_CREATE_ATTR) {
start = (lr + 1);
- zsb->z_fuid_replay =
+ zfsvfs->z_fuid_replay =
zfs_replay_fuid_domain(start, &start,
lr->lr_uid, lr->lr_gid);
}
@@ -473,7 +488,7 @@ zfs_replay_create(zfs_sb_t *zsb, lr_create_t *lr, boolean_t byteswap)
xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
zfs_replay_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), &xva);
start = (caddr_t)(lr + 1) + xvatlen;
- zsb->z_fuid_replay =
+ zfsvfs->z_fuid_replay =
zfs_replay_fuid_domain(start, &start,
lr->lr_uid, lr->lr_gid);
name = (char *)start;
@@ -491,7 +506,7 @@ zfs_replay_create(zfs_sb_t *zsb, lr_create_t *lr, boolean_t byteswap)
xvatlen = ZIL_XVAT_SIZE(lrattr->lr_attr_masksize);
zfs_replay_xvattr((lr_attr_t *)((caddr_t)lr + lrsize), &xva);
start = (caddr_t)(lr + 1) + xvatlen;
- zsb->z_fuid_replay =
+ zfsvfs->z_fuid_replay =
zfs_replay_fuid_domain(start, &start,
lr->lr_uid, lr->lr_gid);
name = (char *)start;
@@ -523,14 +538,14 @@ zfs_replay_create(zfs_sb_t *zsb, lr_create_t *lr, boolean_t byteswap)
iput(ZTOI(dzp));
- if (zsb->z_fuid_replay)
- zfs_fuid_info_free(zsb->z_fuid_replay);
- zsb->z_fuid_replay = NULL;
+ if (zfsvfs->z_fuid_replay)
+ zfs_fuid_info_free(zfsvfs->z_fuid_replay);
+ zfsvfs->z_fuid_replay = NULL;
return (error);
}
static int
-zfs_replay_remove(zfs_sb_t *zsb, lr_remove_t *lr, boolean_t byteswap)
+zfs_replay_remove(zfsvfs_t *zfsvfs, lr_remove_t *lr, boolean_t byteswap)
{
char *name = (char *)(lr + 1); /* name follows lr_remove_t */
znode_t *dzp;
@@ -540,7 +555,7 @@ zfs_replay_remove(zfs_sb_t *zsb, lr_remove_t *lr, boolean_t byteswap)
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
- if ((error = zfs_zget(zsb, lr->lr_doid, &dzp)) != 0)
+ if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0)
return (error);
if (lr->lr_common.lrc_txtype & TX_CI)
@@ -548,7 +563,7 @@ zfs_replay_remove(zfs_sb_t *zsb, lr_remove_t *lr, boolean_t byteswap)
switch ((int)lr->lr_common.lrc_txtype) {
case TX_REMOVE:
- error = zfs_remove(ZTOI(dzp), name, kcred);
+ error = zfs_remove(ZTOI(dzp), name, kcred, vflg);
break;
case TX_RMDIR:
error = zfs_rmdir(ZTOI(dzp), name, NULL, kcred, vflg);
@@ -563,7 +578,7 @@ zfs_replay_remove(zfs_sb_t *zsb, lr_remove_t *lr, boolean_t byteswap)
}
static int
-zfs_replay_link(zfs_sb_t *zsb, lr_link_t *lr, boolean_t byteswap)
+zfs_replay_link(zfsvfs_t *zfsvfs, lr_link_t *lr, boolean_t byteswap)
{
char *name = (char *)(lr + 1); /* name follows lr_link_t */
znode_t *dzp, *zp;
@@ -573,10 +588,10 @@ zfs_replay_link(zfs_sb_t *zsb, lr_link_t *lr, boolean_t byteswap)
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
- if ((error = zfs_zget(zsb, lr->lr_doid, &dzp)) != 0)
+ if ((error = zfs_zget(zfsvfs, lr->lr_doid, &dzp)) != 0)
return (error);
- if ((error = zfs_zget(zsb, lr->lr_link_obj, &zp)) != 0) {
+ if ((error = zfs_zget(zfsvfs, lr->lr_link_obj, &zp)) != 0) {
iput(ZTOI(dzp));
return (error);
}
@@ -584,7 +599,7 @@ zfs_replay_link(zfs_sb_t *zsb, lr_link_t *lr, boolean_t byteswap)
if (lr->lr_common.lrc_txtype & TX_CI)
vflg |= FIGNORECASE;
- error = zfs_link(ZTOI(dzp), ZTOI(zp), name, kcred);
+ error = zfs_link(ZTOI(dzp), ZTOI(zp), name, kcred, vflg);
iput(ZTOI(zp));
iput(ZTOI(dzp));
@@ -593,7 +608,7 @@ zfs_replay_link(zfs_sb_t *zsb, lr_link_t *lr, boolean_t byteswap)
}
static int
-zfs_replay_rename(zfs_sb_t *zsb, lr_rename_t *lr, boolean_t byteswap)
+zfs_replay_rename(zfsvfs_t *zfsvfs, lr_rename_t *lr, boolean_t byteswap)
{
char *sname = (char *)(lr + 1); /* sname and tname follow lr_rename_t */
char *tname = sname + strlen(sname) + 1;
@@ -604,10 +619,10 @@ zfs_replay_rename(zfs_sb_t *zsb, lr_rename_t *lr, boolean_t byteswap)
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
- if ((error = zfs_zget(zsb, lr->lr_sdoid, &sdzp)) != 0)
+ if ((error = zfs_zget(zfsvfs, lr->lr_sdoid, &sdzp)) != 0)
return (error);
- if ((error = zfs_zget(zsb, lr->lr_tdoid, &tdzp)) != 0) {
+ if ((error = zfs_zget(zfsvfs, lr->lr_tdoid, &tdzp)) != 0) {
iput(ZTOI(sdzp));
return (error);
}
@@ -624,7 +639,7 @@ zfs_replay_rename(zfs_sb_t *zsb, lr_rename_t *lr, boolean_t byteswap)
}
static int
-zfs_replay_write(zfs_sb_t *zsb, lr_write_t *lr, boolean_t byteswap)
+zfs_replay_write(zfsvfs_t *zfsvfs, lr_write_t *lr, boolean_t byteswap)
{
char *data = (char *)(lr + 1); /* data follows lr_write_t */
znode_t *zp;
@@ -634,7 +649,7 @@ zfs_replay_write(zfs_sb_t *zsb, lr_write_t *lr, boolean_t byteswap)
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
- if ((error = zfs_zget(zsb, lr->lr_foid, &zp)) != 0) {
+ if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0) {
/*
* As we can log writes out of order, it's possible the
* file has been removed. In this case just drop the write
@@ -657,10 +672,10 @@ zfs_replay_write(zfs_sb_t *zsb, lr_write_t *lr, boolean_t byteswap)
* write needs to be there. So we write the whole block and
* reduce the eof. This needs to be done within the single dmu
* transaction created within vn_rdwr -> zfs_write. So a possible
- * new end of file is passed through in zsb->z_replay_eof
+ * new end of file is passed through in zfsvfs->z_replay_eof
*/
- zsb->z_replay_eof = 0; /* 0 means don't change end of file */
+ zfsvfs->z_replay_eof = 0; /* 0 means don't change end of file */
/* If it's a dmu_sync() block, write the whole block */
if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
@@ -670,7 +685,7 @@ zfs_replay_write(zfs_sb_t *zsb, lr_write_t *lr, boolean_t byteswap)
length = blocksize;
}
if (zp->z_size < eod)
- zsb->z_replay_eof = eod;
+ zfsvfs->z_replay_eof = eod;
}
written = zpl_write_common(ZTOI(zp), data, length, &offset,
@@ -681,7 +696,7 @@ zfs_replay_write(zfs_sb_t *zsb, lr_write_t *lr, boolean_t byteswap)
error = SET_ERROR(EIO); /* short write */
iput(ZTOI(zp));
- zsb->z_replay_eof = 0; /* safety */
+ zfsvfs->z_replay_eof = 0; /* safety */
return (error);
}
@@ -693,7 +708,7 @@ zfs_replay_write(zfs_sb_t *zsb, lr_write_t *lr, boolean_t byteswap)
* the file is grown.
*/
static int
-zfs_replay_write2(zfs_sb_t *zsb, lr_write_t *lr, boolean_t byteswap)
+zfs_replay_write2(zfsvfs_t *zfsvfs, lr_write_t *lr, boolean_t byteswap)
{
znode_t *zp;
int error;
@@ -702,13 +717,13 @@ zfs_replay_write2(zfs_sb_t *zsb, lr_write_t *lr, boolean_t byteswap)
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
- if ((error = zfs_zget(zsb, lr->lr_foid, &zp)) != 0)
+ if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0)
return (error);
top:
end = lr->lr_offset + lr->lr_length;
if (end > zp->z_size) {
- dmu_tx_t *tx = dmu_tx_create(zsb->z_os);
+ dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os);
zp->z_size = end;
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
@@ -723,11 +738,11 @@ zfs_replay_write2(zfs_sb_t *zsb, lr_write_t *lr, boolean_t byteswap)
dmu_tx_abort(tx);
return (error);
}
- (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zsb),
+ (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
(void *)&zp->z_size, sizeof (uint64_t), tx);
/* Ensure the replayed seq is updated */
- (void) zil_replaying(zsb->z_log, tx);
+ (void) zil_replaying(zfsvfs->z_log, tx);
dmu_tx_commit(tx);
}
@@ -738,7 +753,7 @@ zfs_replay_write2(zfs_sb_t *zsb, lr_write_t *lr, boolean_t byteswap)
}
static int
-zfs_replay_truncate(zfs_sb_t *zsb, lr_truncate_t *lr, boolean_t byteswap)
+zfs_replay_truncate(zfsvfs_t *zfsvfs, lr_truncate_t *lr, boolean_t byteswap)
{
znode_t *zp;
flock64_t fl;
@@ -747,7 +762,7 @@ zfs_replay_truncate(zfs_sb_t *zsb, lr_truncate_t *lr, boolean_t byteswap)
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
- if ((error = zfs_zget(zsb, lr->lr_foid, &zp)) != 0)
+ if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0)
return (error);
bzero(&fl, sizeof (fl));
@@ -765,7 +780,7 @@ zfs_replay_truncate(zfs_sb_t *zsb, lr_truncate_t *lr, boolean_t byteswap)
}
static int
-zfs_replay_setattr(zfs_sb_t *zsb, lr_setattr_t *lr, boolean_t byteswap)
+zfs_replay_setattr(zfsvfs_t *zfsvfs, lr_setattr_t *lr, boolean_t byteswap)
{
znode_t *zp;
xvattr_t xva;
@@ -778,11 +793,11 @@ zfs_replay_setattr(zfs_sb_t *zsb, lr_setattr_t *lr, boolean_t byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
if ((lr->lr_mask & ATTR_XVATTR) &&
- zsb->z_version >= ZPL_VERSION_INITIAL)
+ zfsvfs->z_version >= ZPL_VERSION_INITIAL)
zfs_replay_swap_attrs((lr_attr_t *)(lr + 1));
}
- if ((error = zfs_zget(zsb, lr->lr_foid, &zp)) != 0)
+ if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0)
return (error);
zfs_init_vattr(vap, lr->lr_mask, lr->lr_mode,
@@ -791,6 +806,8 @@ zfs_replay_setattr(zfs_sb_t *zsb, lr_setattr_t *lr, boolean_t byteswap)
vap->va_size = lr->lr_size;
ZFS_TIME_DECODE(&vap->va_atime, lr->lr_atime);
ZFS_TIME_DECODE(&vap->va_mtime, lr->lr_mtime);
+ gethrestime(&vap->va_ctime);
+ vap->va_mask |= ATTR_CTIME;
/*
* Fill in xvattr_t portions if necessary.
@@ -804,20 +821,20 @@ zfs_replay_setattr(zfs_sb_t *zsb, lr_setattr_t *lr, boolean_t byteswap)
} else
xva.xva_vattr.va_mask &= ~ATTR_XVATTR;
- zsb->z_fuid_replay = zfs_replay_fuid_domain(start, &start,
+ zfsvfs->z_fuid_replay = zfs_replay_fuid_domain(start, &start,
lr->lr_uid, lr->lr_gid);
error = zfs_setattr(ZTOI(zp), vap, 0, kcred);
- zfs_fuid_info_free(zsb->z_fuid_replay);
- zsb->z_fuid_replay = NULL;
+ zfs_fuid_info_free(zfsvfs->z_fuid_replay);
+ zfsvfs->z_fuid_replay = NULL;
iput(ZTOI(zp));
return (error);
}
static int
-zfs_replay_acl_v0(zfs_sb_t *zsb, lr_acl_v0_t *lr, boolean_t byteswap)
+zfs_replay_acl_v0(zfsvfs_t *zfsvfs, lr_acl_v0_t *lr, boolean_t byteswap)
{
ace_t *ace = (ace_t *)(lr + 1); /* ace array follows lr_acl_t */
vsecattr_t vsa;
@@ -829,7 +846,7 @@ zfs_replay_acl_v0(zfs_sb_t *zsb, lr_acl_v0_t *lr, boolean_t byteswap)
zfs_oldace_byteswap(ace, lr->lr_aclcnt);
}
- if ((error = zfs_zget(zsb, lr->lr_foid, &zp)) != 0)
+ if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0)
return (error);
bzero(&vsa, sizeof (vsa));
@@ -854,14 +871,14 @@ zfs_replay_acl_v0(zfs_sb_t *zsb, lr_acl_v0_t *lr, boolean_t byteswap)
* The FUID table index may no longer be valid and
* during zfs_create() a new index may be assigned.
* Because of this the log will contain the original
- * doman+rid in order to create a new FUID.
+ * domain+rid in order to create a new FUID.
*
* The individual ACEs may contain an ephemeral uid/gid which is no
* longer valid and will need to be replaced with an actual FUID.
*
*/
static int
-zfs_replay_acl(zfs_sb_t *zsb, lr_acl_t *lr, boolean_t byteswap)
+zfs_replay_acl(zfsvfs_t *zfsvfs, lr_acl_t *lr, boolean_t byteswap)
{
ace_t *ace = (ace_t *)(lr + 1);
vsecattr_t vsa;
@@ -878,7 +895,7 @@ zfs_replay_acl(zfs_sb_t *zsb, lr_acl_t *lr, boolean_t byteswap)
}
}
- if ((error = zfs_zget(zsb, lr->lr_foid, &zp)) != 0)
+ if ((error = zfs_zget(zfsvfs, lr->lr_foid, &zp)) != 0)
return (error);
bzero(&vsa, sizeof (vsa));
@@ -892,17 +909,17 @@ zfs_replay_acl(zfs_sb_t *zsb, lr_acl_t *lr, boolean_t byteswap)
void *fuidstart = (caddr_t)ace +
ZIL_ACE_LENGTH(lr->lr_acl_bytes);
- zsb->z_fuid_replay =
+ zfsvfs->z_fuid_replay =
zfs_replay_fuids(fuidstart, &fuidstart,
lr->lr_fuidcnt, lr->lr_domcnt, 0, 0);
}
error = zfs_setsecattr(ZTOI(zp), &vsa, 0, kcred);
- if (zsb->z_fuid_replay)
- zfs_fuid_info_free(zsb->z_fuid_replay);
+ if (zfsvfs->z_fuid_replay)
+ zfs_fuid_info_free(zfsvfs->z_fuid_replay);
- zsb->z_fuid_replay = NULL;
+ zfsvfs->z_fuid_replay = NULL;
iput(ZTOI(zp));
return (error);
diff --git a/zfs/module/zfs/zfs_rlock.c b/zfs/module/zfs/zfs_rlock.c
index fd3e8a68d2a7..d7fc014968bf 100644
--- a/zfs/module/zfs/zfs_rlock.c
+++ b/zfs/module/zfs/zfs_rlock.c
@@ -65,7 +65,7 @@
* Otherwise, the proxy lock is split into smaller lock ranges and
* new proxy locks created for non overlapping ranges.
* The reference counts are adjusted accordingly.
- * Meanwhile, the orginal lock is kept around (this is the callers handle)
+ * Meanwhile, the original lock is kept around (this is the callers handle)
* and its offset and length are used when releasing the lock.
*
* Thread coordination
@@ -87,7 +87,7 @@
*
* Grow block handling
* -------------------
- * ZFS supports multiple block sizes currently upto 128K. The smallest
+ * ZFS supports multiple block sizes currently up to 128K. The smallest
* block size is used for the file which is grown as needed. During this
* growth all other writers and readers must be excluded.
* So if the block size needs to be grown then the whole file is
@@ -617,14 +617,10 @@ zfs_range_reduce(rl_t *rl, uint64_t off, uint64_t len)
int
zfs_range_compare(const void *arg1, const void *arg2)
{
- const rl_t *rl1 = arg1;
- const rl_t *rl2 = arg2;
-
- if (rl1->r_off > rl2->r_off)
- return (1);
- if (rl1->r_off < rl2->r_off)
- return (-1);
- return (0);
+ const rl_t *rl1 = (const rl_t *)arg1;
+ const rl_t *rl2 = (const rl_t *)arg2;
+
+ return (AVL_CMP(rl1->r_off, rl2->r_off));
}
#ifdef _KERNEL
diff --git a/zfs/module/zfs/zfs_sa.c b/zfs/module/zfs/zfs_sa.c
index 98e6185cfd1a..6eb7e09990b2 100644
--- a/zfs/module/zfs/zfs_sa.c
+++ b/zfs/module/zfs/zfs_sa.c
@@ -97,8 +97,7 @@ zfs_sa_symlink(znode_t *zp, char *link, int len, dmu_tx_t *tx)
dmu_buf_t *db = sa_get_db(zp->z_sa_hdl);
if (ZFS_OLD_ZNODE_PHYS_SIZE + len <= dmu_bonus_max()) {
- VERIFY(dmu_set_bonus(db,
- len + ZFS_OLD_ZNODE_PHYS_SIZE, tx) == 0);
+ VERIFY0(dmu_set_bonus(db, len + ZFS_OLD_ZNODE_PHYS_SIZE, tx));
if (len) {
bcopy(link, (caddr_t)db->db_data +
ZFS_OLD_ZNODE_PHYS_SIZE, len);
@@ -107,8 +106,8 @@ zfs_sa_symlink(znode_t *zp, char *link, int len, dmu_tx_t *tx)
dmu_buf_t *dbp;
zfs_grow_blocksize(zp, len, tx);
- VERIFY(0 == dmu_buf_hold(ZTOZSB(zp)->z_os,
- zp->z_id, 0, FTAG, &dbp, DMU_READ_NO_PREFETCH));
+ VERIFY0(dmu_buf_hold(ZTOZSB(zp)->z_os, zp->z_id, 0, FTAG, &dbp,
+ DMU_READ_NO_PREFETCH));
dmu_buf_will_dirty(dbp, tx);
@@ -121,13 +120,13 @@ zfs_sa_symlink(znode_t *zp, char *link, int len, dmu_tx_t *tx)
void
zfs_sa_get_scanstamp(znode_t *zp, xvattr_t *xvap)
{
- zfs_sb_t *zsb = ZTOZSB(zp);
+ zfsvfs_t *zfsvfs = ZTOZSB(zp);
xoptattr_t *xoap;
ASSERT(MUTEX_HELD(&zp->z_lock));
VERIFY((xoap = xva_getxoptattr(xvap)) != NULL);
if (zp->z_is_sa) {
- if (sa_lookup(zp->z_sa_hdl, SA_ZPL_SCANSTAMP(zsb),
+ if (sa_lookup(zp->z_sa_hdl, SA_ZPL_SCANSTAMP(zfsvfs),
&xoap->xoa_av_scanstamp,
sizeof (xoap->xoa_av_scanstamp)) != 0)
return;
@@ -155,13 +154,13 @@ zfs_sa_get_scanstamp(znode_t *zp, xvattr_t *xvap)
void
zfs_sa_set_scanstamp(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
{
- zfs_sb_t *zsb = ZTOZSB(zp);
+ zfsvfs_t *zfsvfs = ZTOZSB(zp);
xoptattr_t *xoap;
ASSERT(MUTEX_HELD(&zp->z_lock));
VERIFY((xoap = xva_getxoptattr(xvap)) != NULL);
if (zp->z_is_sa)
- VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SCANSTAMP(zsb),
+ VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SCANSTAMP(zfsvfs),
&xoap->xoa_av_scanstamp,
sizeof (xoap->xoa_av_scanstamp), tx));
else {
@@ -178,7 +177,7 @@ zfs_sa_set_scanstamp(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
xoap->xoa_av_scanstamp, sizeof (xoap->xoa_av_scanstamp));
zp->z_pflags |= ZFS_BONUS_SCANSTAMP;
- VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_FLAGS(zsb),
+ VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
&zp->z_pflags, sizeof (uint64_t), tx));
}
}
@@ -186,7 +185,7 @@ zfs_sa_set_scanstamp(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
int
zfs_sa_get_xattr(znode_t *zp)
{
- zfs_sb_t *zsb = ZTOZSB(zp);
+ zfsvfs_t *zfsvfs = ZTOZSB(zp);
char *obj;
int size;
int error;
@@ -195,7 +194,7 @@ zfs_sa_get_xattr(znode_t *zp)
ASSERT(!zp->z_xattr_cached);
ASSERT(zp->z_is_sa);
- error = sa_size(zp->z_sa_hdl, SA_ZPL_DXATTR(zsb), &size);
+ error = sa_size(zp->z_sa_hdl, SA_ZPL_DXATTR(zfsvfs), &size);
if (error) {
if (error == ENOENT)
return nvlist_alloc(&zp->z_xattr_cached,
@@ -204,13 +203,13 @@ zfs_sa_get_xattr(znode_t *zp)
return (error);
}
- obj = zio_buf_alloc(size);
+ obj = vmem_alloc(size, KM_SLEEP);
- error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DXATTR(zsb), obj, size);
+ error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DXATTR(zfsvfs), obj, size);
if (error == 0)
error = nvlist_unpack(obj, size, &zp->z_xattr_cached, KM_SLEEP);
- zio_buf_free(obj, size);
+ vmem_free(obj, size);
return (error);
}
@@ -218,7 +217,7 @@ zfs_sa_get_xattr(znode_t *zp)
int
zfs_sa_set_xattr(znode_t *zp)
{
- zfs_sb_t *zsb = ZTOZSB(zp);
+ zfsvfs_t *zfsvfs = ZTOZSB(zp);
dmu_tx_t *tx;
char *obj;
size_t size;
@@ -234,14 +233,14 @@ zfs_sa_set_xattr(znode_t *zp)
if (error)
goto out;
- obj = zio_buf_alloc(size);
+ obj = vmem_alloc(size, KM_SLEEP);
error = nvlist_pack(zp->z_xattr_cached, &obj, &size,
NV_ENCODE_XDR, KM_SLEEP);
if (error)
goto out_free;
- tx = dmu_tx_create(zsb->z_os);
+ tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_sa_create(tx, size);
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
@@ -249,12 +248,21 @@ zfs_sa_set_xattr(znode_t *zp)
if (error) {
dmu_tx_abort(tx);
} else {
- VERIFY0(sa_update(zp->z_sa_hdl, SA_ZPL_DXATTR(zsb),
- obj, size, tx));
+ int count = 0;
+ sa_bulk_attr_t bulk[2];
+ uint64_t ctime[2];
+
+ zfs_tstamp_update_setup(zp, STATE_CHANGED, NULL, ctime);
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DXATTR(zfsvfs),
+ NULL, obj, size);
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs),
+ NULL, &ctime, 16);
+ VERIFY0(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx));
+
dmu_tx_commit(tx);
}
out_free:
- zio_buf_free(obj, size);
+ vmem_free(obj, size);
out:
return (error);
}
@@ -272,12 +280,13 @@ zfs_sa_upgrade(sa_handle_t *hdl, dmu_tx_t *tx)
{
dmu_buf_t *db = sa_get_db(hdl);
znode_t *zp = sa_get_userdata(hdl);
- zfs_sb_t *zsb = ZTOZSB(zp);
+ zfsvfs_t *zfsvfs = ZTOZSB(zp);
int count = 0;
sa_bulk_attr_t *bulk, *sa_attrs;
zfs_acl_locator_cb_t locate = { 0 };
- uint64_t uid, gid, mode, rdev, xattr, parent;
+ uint64_t uid, gid, mode, rdev, xattr, parent, tmp_gen;
uint64_t crtime[2], mtime[2], ctime[2], atime[2];
+ uint64_t links;
zfs_acl_phys_t znode_acl;
char scanstamp[AV_SCANSTAMP_SZ];
boolean_t drop_lock = B_FALSE;
@@ -309,17 +318,18 @@ zfs_sa_upgrade(sa_handle_t *hdl, dmu_tx_t *tx)
/* First do a bulk query of the attributes that aren't cached */
bulk = kmem_alloc(sizeof (sa_bulk_attr_t) * 20, KM_SLEEP);
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zsb), NULL, &atime, 16);
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL, &mtime, 16);
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL, &ctime, 16);
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zsb), NULL, &crtime, 16);
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zsb), NULL, &mode, 8);
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zsb), NULL, &parent, 8);
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_XATTR(zsb), NULL, &xattr, 8);
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zsb), NULL, &rdev, 8);
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zsb), NULL, &uid, 8);
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zsb), NULL, &gid, 8);
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ZNODE_ACL(zsb), NULL,
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, &atime, 16);
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16);
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8);
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, &parent, 8);
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_XATTR(zfsvfs), NULL, &xattr, 8);
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_RDEV(zfsvfs), NULL, &rdev, 8);
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, &uid, 8);
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL, &gid, 8);
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL, &tmp_gen, 8);
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ZNODE_ACL(zfsvfs), NULL,
&znode_acl, 88);
if (sa_bulk_lookup_locked(hdl, bulk, count) != 0) {
@@ -333,42 +343,43 @@ zfs_sa_upgrade(sa_handle_t *hdl, dmu_tx_t *tx)
*/
count = 0;
sa_attrs = kmem_zalloc(sizeof (sa_bulk_attr_t) * 20, KM_SLEEP);
- SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_MODE(zsb), NULL, &mode, 8);
- SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_SIZE(zsb), NULL,
+ SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8);
+ SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_SIZE(zfsvfs), NULL,
&zp->z_size, 8);
- SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_GEN(zsb),
- NULL, &zp->z_gen, 8);
- SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_UID(zsb), NULL, &uid, 8);
- SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_GID(zsb), NULL, &gid, 8);
- SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_PARENT(zsb),
+ SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_GEN(zfsvfs),
+ NULL, &tmp_gen, 8);
+ SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_UID(zfsvfs), NULL, &uid, 8);
+ SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_GID(zfsvfs), NULL, &gid, 8);
+ SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_PARENT(zfsvfs),
NULL, &parent, 8);
- SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_FLAGS(zsb), NULL,
+ SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_FLAGS(zfsvfs), NULL,
&zp->z_pflags, 8);
- SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_ATIME(zsb), NULL,
+ SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_ATIME(zfsvfs), NULL,
&atime, 16);
- SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_MTIME(zsb), NULL,
+ SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_MTIME(zfsvfs), NULL,
&mtime, 16);
- SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_CTIME(zsb), NULL,
+ SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_CTIME(zfsvfs), NULL,
&ctime, 16);
- SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_CRTIME(zsb), NULL,
+ SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_CRTIME(zfsvfs), NULL,
&crtime, 16);
- SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_LINKS(zsb), NULL,
- &zp->z_links, 8);
+ links = ZTOI(zp)->i_nlink;
+ SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_LINKS(zfsvfs), NULL,
+ &links, 8);
if (S_ISBLK(ZTOI(zp)->i_mode) || S_ISCHR(ZTOI(zp)->i_mode))
- SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_RDEV(zsb), NULL,
+ SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_RDEV(zfsvfs), NULL,
&rdev, 8);
- SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_DACL_COUNT(zsb), NULL,
+ SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_DACL_COUNT(zfsvfs), NULL,
&zp->z_acl_cached->z_acl_count, 8);
if (zp->z_acl_cached->z_version < ZFS_ACL_VERSION_FUID)
zfs_acl_xform(zp, zp->z_acl_cached, CRED());
locate.cb_aclp = zp->z_acl_cached;
- SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_DACL_ACES(zsb),
+ SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_DACL_ACES(zfsvfs),
zfs_acl_data_locator, &locate, zp->z_acl_cached->z_acl_bytes);
if (xattr)
- SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_XATTR(zsb),
+ SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_XATTR(zfsvfs),
NULL, &xattr, 8);
/* if scanstamp then add scanstamp */
@@ -376,7 +387,7 @@ zfs_sa_upgrade(sa_handle_t *hdl, dmu_tx_t *tx)
if (zp->z_pflags & ZFS_BONUS_SCANSTAMP) {
bcopy((caddr_t)db->db_data + ZFS_OLD_ZNODE_PHYS_SIZE,
scanstamp, AV_SCANSTAMP_SZ);
- SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_SCANSTAMP(zsb),
+ SA_ADD_BULK_ATTR(sa_attrs, count, SA_ZPL_SCANSTAMP(zfsvfs),
NULL, scanstamp, AV_SCANSTAMP_SZ);
zp->z_pflags &= ~ZFS_BONUS_SCANSTAMP;
}
@@ -385,7 +396,7 @@ zfs_sa_upgrade(sa_handle_t *hdl, dmu_tx_t *tx)
VERIFY(sa_replace_all_by_template_locked(hdl, sa_attrs,
count, tx) == 0);
if (znode_acl.z_acl_extern_obj)
- VERIFY(0 == dmu_object_free(zsb->z_os,
+ VERIFY(0 == dmu_object_free(zfsvfs->z_os,
znode_acl.z_acl_extern_obj, tx));
zp->z_is_sa = B_TRUE;
diff --git a/zfs/module/zfs/zfs_vfsops.c b/zfs/module/zfs/zfs_vfsops.c
index 589b48d0f668..f97660f37a69 100644
--- a/zfs/module/zfs/zfs_vfsops.c
+++ b/zfs/module/zfs/zfs_vfsops.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
*/
/* Portions Copyright 2010 Robert Milkowski */
@@ -68,11 +68,205 @@
#include <sys/zpl.h>
#include "zfs_comutil.h"
+enum {
+ TOKEN_RO,
+ TOKEN_RW,
+ TOKEN_SETUID,
+ TOKEN_NOSETUID,
+ TOKEN_EXEC,
+ TOKEN_NOEXEC,
+ TOKEN_DEVICES,
+ TOKEN_NODEVICES,
+ TOKEN_DIRXATTR,
+ TOKEN_SAXATTR,
+ TOKEN_XATTR,
+ TOKEN_NOXATTR,
+ TOKEN_ATIME,
+ TOKEN_NOATIME,
+ TOKEN_RELATIME,
+ TOKEN_NORELATIME,
+ TOKEN_NBMAND,
+ TOKEN_NONBMAND,
+ TOKEN_MNTPOINT,
+ TOKEN_LAST,
+};
+
+static const match_table_t zpl_tokens = {
+ { TOKEN_RO, MNTOPT_RO },
+ { TOKEN_RW, MNTOPT_RW },
+ { TOKEN_SETUID, MNTOPT_SETUID },
+ { TOKEN_NOSETUID, MNTOPT_NOSETUID },
+ { TOKEN_EXEC, MNTOPT_EXEC },
+ { TOKEN_NOEXEC, MNTOPT_NOEXEC },
+ { TOKEN_DEVICES, MNTOPT_DEVICES },
+ { TOKEN_NODEVICES, MNTOPT_NODEVICES },
+ { TOKEN_DIRXATTR, MNTOPT_DIRXATTR },
+ { TOKEN_SAXATTR, MNTOPT_SAXATTR },
+ { TOKEN_XATTR, MNTOPT_XATTR },
+ { TOKEN_NOXATTR, MNTOPT_NOXATTR },
+ { TOKEN_ATIME, MNTOPT_ATIME },
+ { TOKEN_NOATIME, MNTOPT_NOATIME },
+ { TOKEN_RELATIME, MNTOPT_RELATIME },
+ { TOKEN_NORELATIME, MNTOPT_NORELATIME },
+ { TOKEN_NBMAND, MNTOPT_NBMAND },
+ { TOKEN_NONBMAND, MNTOPT_NONBMAND },
+ { TOKEN_MNTPOINT, MNTOPT_MNTPOINT "=%s" },
+ { TOKEN_LAST, NULL },
+};
+
+static void
+zfsvfs_vfs_free(vfs_t *vfsp)
+{
+ if (vfsp != NULL) {
+ if (vfsp->vfs_mntpoint != NULL)
+ strfree(vfsp->vfs_mntpoint);
+
+ kmem_free(vfsp, sizeof (vfs_t));
+ }
+}
+
+static int
+zfsvfs_parse_option(char *option, int token, substring_t *args, vfs_t *vfsp)
+{
+ switch (token) {
+ case TOKEN_RO:
+ vfsp->vfs_readonly = B_TRUE;
+ vfsp->vfs_do_readonly = B_TRUE;
+ break;
+ case TOKEN_RW:
+ vfsp->vfs_readonly = B_FALSE;
+ vfsp->vfs_do_readonly = B_TRUE;
+ break;
+ case TOKEN_SETUID:
+ vfsp->vfs_setuid = B_TRUE;
+ vfsp->vfs_do_setuid = B_TRUE;
+ break;
+ case TOKEN_NOSETUID:
+ vfsp->vfs_setuid = B_FALSE;
+ vfsp->vfs_do_setuid = B_TRUE;
+ break;
+ case TOKEN_EXEC:
+ vfsp->vfs_exec = B_TRUE;
+ vfsp->vfs_do_exec = B_TRUE;
+ break;
+ case TOKEN_NOEXEC:
+ vfsp->vfs_exec = B_FALSE;
+ vfsp->vfs_do_exec = B_TRUE;
+ break;
+ case TOKEN_DEVICES:
+ vfsp->vfs_devices = B_TRUE;
+ vfsp->vfs_do_devices = B_TRUE;
+ break;
+ case TOKEN_NODEVICES:
+ vfsp->vfs_devices = B_FALSE;
+ vfsp->vfs_do_devices = B_TRUE;
+ break;
+ case TOKEN_DIRXATTR:
+ vfsp->vfs_xattr = ZFS_XATTR_DIR;
+ vfsp->vfs_do_xattr = B_TRUE;
+ break;
+ case TOKEN_SAXATTR:
+ vfsp->vfs_xattr = ZFS_XATTR_SA;
+ vfsp->vfs_do_xattr = B_TRUE;
+ break;
+ case TOKEN_XATTR:
+ vfsp->vfs_xattr = ZFS_XATTR_DIR;
+ vfsp->vfs_do_xattr = B_TRUE;
+ break;
+ case TOKEN_NOXATTR:
+ vfsp->vfs_xattr = ZFS_XATTR_OFF;
+ vfsp->vfs_do_xattr = B_TRUE;
+ break;
+ case TOKEN_ATIME:
+ vfsp->vfs_atime = B_TRUE;
+ vfsp->vfs_do_atime = B_TRUE;
+ break;
+ case TOKEN_NOATIME:
+ vfsp->vfs_atime = B_FALSE;
+ vfsp->vfs_do_atime = B_TRUE;
+ break;
+ case TOKEN_RELATIME:
+ vfsp->vfs_relatime = B_TRUE;
+ vfsp->vfs_do_relatime = B_TRUE;
+ break;
+ case TOKEN_NORELATIME:
+ vfsp->vfs_relatime = B_FALSE;
+ vfsp->vfs_do_relatime = B_TRUE;
+ break;
+ case TOKEN_NBMAND:
+ vfsp->vfs_nbmand = B_TRUE;
+ vfsp->vfs_do_nbmand = B_TRUE;
+ break;
+ case TOKEN_NONBMAND:
+ vfsp->vfs_nbmand = B_FALSE;
+ vfsp->vfs_do_nbmand = B_TRUE;
+ break;
+ case TOKEN_MNTPOINT:
+ vfsp->vfs_mntpoint = match_strdup(&args[0]);
+ if (vfsp->vfs_mntpoint == NULL)
+ return (SET_ERROR(ENOMEM));
+
+ break;
+ default:
+ break;
+ }
+
+ return (0);
+}
+
+/*
+ * Parse the raw mntopts and return a vfs_t describing the options.
+ */
+static int
+zfsvfs_parse_options(char *mntopts, vfs_t **vfsp)
+{
+ vfs_t *tmp_vfsp;
+ int error;
+
+ tmp_vfsp = kmem_zalloc(sizeof (vfs_t), KM_SLEEP);
+
+ if (mntopts != NULL) {
+ substring_t args[MAX_OPT_ARGS];
+ char *tmp_mntopts, *p, *t;
+ int token;
+
+ tmp_mntopts = t = strdup(mntopts);
+ if (tmp_mntopts == NULL)
+ return (SET_ERROR(ENOMEM));
+
+ while ((p = strsep(&t, ",")) != NULL) {
+ if (!*p)
+ continue;
+
+ args[0].to = args[0].from = NULL;
+ token = match_token(p, zpl_tokens, args);
+ error = zfsvfs_parse_option(p, token, args, tmp_vfsp);
+ if (error) {
+ strfree(tmp_mntopts);
+ zfsvfs_vfs_free(tmp_vfsp);
+ return (error);
+ }
+ }
+
+ strfree(tmp_mntopts);
+ }
+
+ *vfsp = tmp_vfsp;
+
+ return (0);
+}
+
+boolean_t
+zfs_is_readonly(zfsvfs_t *zfsvfs)
+{
+ return (!!(zfsvfs->z_sb->s_flags & MS_RDONLY));
+}
+
/*ARGSUSED*/
int
zfs_sync(struct super_block *sb, int wait, cred_t *cr)
{
- zfs_sb_t *zsb = sb->s_fs_info;
+ zfsvfs_t *zfsvfs = sb->s_fs_info;
/*
* Data integrity is job one. We don't want a compromised kernel
@@ -88,28 +282,28 @@ zfs_sync(struct super_block *sb, int wait, cred_t *cr)
if (!wait)
return (0);
- if (zsb != NULL) {
+ if (zfsvfs != NULL) {
/*
* Sync a specific filesystem.
*/
dsl_pool_t *dp;
- ZFS_ENTER(zsb);
- dp = dmu_objset_pool(zsb->z_os);
+ ZFS_ENTER(zfsvfs);
+ dp = dmu_objset_pool(zfsvfs->z_os);
/*
* If the system is shutting down, then skip any
* filesystems which may exist on a suspended pool.
*/
if (spa_suspended(dp->dp_spa)) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (0);
}
- if (zsb->z_log != NULL)
- zil_commit(zsb->z_log, 0);
+ if (zfsvfs->z_log != NULL)
+ zil_commit(zfsvfs->z_log, 0);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
} else {
/*
* Sync all ZFS filesystems. This is what happens when you
@@ -121,61 +315,53 @@ zfs_sync(struct super_block *sb, int wait, cred_t *cr)
return (0);
}
-EXPORT_SYMBOL(zfs_sync);
-
-boolean_t
-zfs_is_readonly(zfs_sb_t *zsb)
-{
- return (!!(zsb->z_sb->s_flags & MS_RDONLY));
-}
-EXPORT_SYMBOL(zfs_is_readonly);
static void
atime_changed_cb(void *arg, uint64_t newval)
{
- ((zfs_sb_t *)arg)->z_atime = newval;
+ ((zfsvfs_t *)arg)->z_atime = newval;
}
static void
relatime_changed_cb(void *arg, uint64_t newval)
{
- ((zfs_sb_t *)arg)->z_relatime = newval;
+ ((zfsvfs_t *)arg)->z_relatime = newval;
}
static void
xattr_changed_cb(void *arg, uint64_t newval)
{
- zfs_sb_t *zsb = arg;
+ zfsvfs_t *zfsvfs = arg;
if (newval == ZFS_XATTR_OFF) {
- zsb->z_flags &= ~ZSB_XATTR;
+ zfsvfs->z_flags &= ~ZSB_XATTR;
} else {
- zsb->z_flags |= ZSB_XATTR;
+ zfsvfs->z_flags |= ZSB_XATTR;
if (newval == ZFS_XATTR_SA)
- zsb->z_xattr_sa = B_TRUE;
+ zfsvfs->z_xattr_sa = B_TRUE;
else
- zsb->z_xattr_sa = B_FALSE;
+ zfsvfs->z_xattr_sa = B_FALSE;
}
}
static void
acltype_changed_cb(void *arg, uint64_t newval)
{
- zfs_sb_t *zsb = arg;
+ zfsvfs_t *zfsvfs = arg;
switch (newval) {
case ZFS_ACLTYPE_OFF:
- zsb->z_acl_type = ZFS_ACLTYPE_OFF;
- zsb->z_sb->s_flags &= ~MS_POSIXACL;
+ zfsvfs->z_acl_type = ZFS_ACLTYPE_OFF;
+ zfsvfs->z_sb->s_flags &= ~MS_POSIXACL;
break;
case ZFS_ACLTYPE_POSIXACL:
#ifdef CONFIG_FS_POSIX_ACL
- zsb->z_acl_type = ZFS_ACLTYPE_POSIXACL;
- zsb->z_sb->s_flags |= MS_POSIXACL;
+ zfsvfs->z_acl_type = ZFS_ACLTYPE_POSIXACL;
+ zfsvfs->z_sb->s_flags |= MS_POSIXACL;
#else
- zsb->z_acl_type = ZFS_ACLTYPE_OFF;
- zsb->z_sb->s_flags &= ~MS_POSIXACL;
+ zfsvfs->z_acl_type = ZFS_ACLTYPE_OFF;
+ zfsvfs->z_sb->s_flags &= ~MS_POSIXACL;
#endif /* CONFIG_FS_POSIX_ACL */
break;
default:
@@ -186,19 +372,19 @@ acltype_changed_cb(void *arg, uint64_t newval)
static void
blksz_changed_cb(void *arg, uint64_t newval)
{
- zfs_sb_t *zsb = arg;
- ASSERT3U(newval, <=, spa_maxblocksize(dmu_objset_spa(zsb->z_os)));
+ zfsvfs_t *zfsvfs = arg;
+ ASSERT3U(newval, <=, spa_maxblocksize(dmu_objset_spa(zfsvfs->z_os)));
ASSERT3U(newval, >=, SPA_MINBLOCKSIZE);
ASSERT(ISP2(newval));
- zsb->z_max_blksz = newval;
+ zfsvfs->z_max_blksz = newval;
}
static void
readonly_changed_cb(void *arg, uint64_t newval)
{
- zfs_sb_t *zsb = arg;
- struct super_block *sb = zsb->z_sb;
+ zfsvfs_t *zfsvfs = arg;
+ struct super_block *sb = zfsvfs->z_sb;
if (sb == NULL)
return;
@@ -227,8 +413,8 @@ exec_changed_cb(void *arg, uint64_t newval)
static void
nbmand_changed_cb(void *arg, uint64_t newval)
{
- zfs_sb_t *zsb = arg;
- struct super_block *sb = zsb->z_sb;
+ zfsvfs_t *zfsvfs = arg;
+ struct super_block *sb = zfsvfs->z_sb;
if (sb == NULL)
return;
@@ -242,31 +428,33 @@ nbmand_changed_cb(void *arg, uint64_t newval)
static void
snapdir_changed_cb(void *arg, uint64_t newval)
{
- ((zfs_sb_t *)arg)->z_show_ctldir = newval;
+ ((zfsvfs_t *)arg)->z_show_ctldir = newval;
}
static void
vscan_changed_cb(void *arg, uint64_t newval)
{
- ((zfs_sb_t *)arg)->z_vscan = newval;
+ ((zfsvfs_t *)arg)->z_vscan = newval;
}
static void
acl_inherit_changed_cb(void *arg, uint64_t newval)
{
- ((zfs_sb_t *)arg)->z_acl_inherit = newval;
+ ((zfsvfs_t *)arg)->z_acl_inherit = newval;
}
-int
-zfs_register_callbacks(zfs_sb_t *zsb)
+static int
+zfs_register_callbacks(vfs_t *vfsp)
{
struct dsl_dataset *ds = NULL;
- objset_t *os = zsb->z_os;
- zfs_mntopts_t *zmo = zsb->z_mntopts;
+ objset_t *os = NULL;
+ zfsvfs_t *zfsvfs = NULL;
int error = 0;
- ASSERT(zsb);
- ASSERT(zmo);
+ ASSERT(vfsp);
+ zfsvfs = vfsp->vfs_data;
+ ASSERT(zfsvfs);
+ os = zfsvfs->z_os;
/*
* The act of registering our callbacks will destroy any mount
@@ -274,9 +462,9 @@ zfs_register_callbacks(zfs_sb_t *zsb)
* of mount options, we stash away the current values and
* restore them after we register the callbacks.
*/
- if (zfs_is_readonly(zsb) || !spa_writeable(dmu_objset_spa(os))) {
- zmo->z_do_readonly = B_TRUE;
- zmo->z_readonly = B_TRUE;
+ if (zfs_is_readonly(zfsvfs) || !spa_writeable(dmu_objset_spa(os))) {
+ vfsp->vfs_do_readonly = B_TRUE;
+ vfsp->vfs_readonly = B_TRUE;
}
/*
@@ -289,31 +477,32 @@ zfs_register_callbacks(zfs_sb_t *zsb)
ds = dmu_objset_ds(os);
dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
error = dsl_prop_register(ds,
- zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zsb);
+ zfs_prop_to_name(ZFS_PROP_ATIME), atime_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
- zfs_prop_to_name(ZFS_PROP_RELATIME), relatime_changed_cb, zsb);
+ zfs_prop_to_name(ZFS_PROP_RELATIME), relatime_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
- zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zsb);
+ zfs_prop_to_name(ZFS_PROP_XATTR), xattr_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
- zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zsb);
+ zfs_prop_to_name(ZFS_PROP_RECORDSIZE), blksz_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
- zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zsb);
+ zfs_prop_to_name(ZFS_PROP_READONLY), readonly_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
- zfs_prop_to_name(ZFS_PROP_DEVICES), devices_changed_cb, zsb);
+ zfs_prop_to_name(ZFS_PROP_DEVICES), devices_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
- zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zsb);
+ zfs_prop_to_name(ZFS_PROP_SETUID), setuid_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
- zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zsb);
+ zfs_prop_to_name(ZFS_PROP_EXEC), exec_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
- zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zsb);
+ zfs_prop_to_name(ZFS_PROP_SNAPDIR), snapdir_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
- zfs_prop_to_name(ZFS_PROP_ACLTYPE), acltype_changed_cb, zsb);
+ zfs_prop_to_name(ZFS_PROP_ACLTYPE), acltype_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
- zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb, zsb);
+ zfs_prop_to_name(ZFS_PROP_ACLINHERIT), acl_inherit_changed_cb,
+ zfsvfs);
error = error ? error : dsl_prop_register(ds,
- zfs_prop_to_name(ZFS_PROP_VSCAN), vscan_changed_cb, zsb);
+ zfs_prop_to_name(ZFS_PROP_VSCAN), vscan_changed_cb, zfsvfs);
error = error ? error : dsl_prop_register(ds,
- zfs_prop_to_name(ZFS_PROP_NBMAND), nbmand_changed_cb, zsb);
+ zfs_prop_to_name(ZFS_PROP_NBMAND), nbmand_changed_cb, zfsvfs);
dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
if (error)
goto unregister;
@@ -321,61 +510,29 @@ zfs_register_callbacks(zfs_sb_t *zsb)
/*
* Invoke our callbacks to restore temporary mount options.
*/
- if (zmo->z_do_readonly)
- readonly_changed_cb(zsb, zmo->z_readonly);
- if (zmo->z_do_setuid)
- setuid_changed_cb(zsb, zmo->z_setuid);
- if (zmo->z_do_exec)
- exec_changed_cb(zsb, zmo->z_exec);
- if (zmo->z_do_devices)
- devices_changed_cb(zsb, zmo->z_devices);
- if (zmo->z_do_xattr)
- xattr_changed_cb(zsb, zmo->z_xattr);
- if (zmo->z_do_atime)
- atime_changed_cb(zsb, zmo->z_atime);
- if (zmo->z_do_relatime)
- relatime_changed_cb(zsb, zmo->z_relatime);
- if (zmo->z_do_nbmand)
- nbmand_changed_cb(zsb, zmo->z_nbmand);
+ if (vfsp->vfs_do_readonly)
+ readonly_changed_cb(zfsvfs, vfsp->vfs_readonly);
+ if (vfsp->vfs_do_setuid)
+ setuid_changed_cb(zfsvfs, vfsp->vfs_setuid);
+ if (vfsp->vfs_do_exec)
+ exec_changed_cb(zfsvfs, vfsp->vfs_exec);
+ if (vfsp->vfs_do_devices)
+ devices_changed_cb(zfsvfs, vfsp->vfs_devices);
+ if (vfsp->vfs_do_xattr)
+ xattr_changed_cb(zfsvfs, vfsp->vfs_xattr);
+ if (vfsp->vfs_do_atime)
+ atime_changed_cb(zfsvfs, vfsp->vfs_atime);
+ if (vfsp->vfs_do_relatime)
+ relatime_changed_cb(zfsvfs, vfsp->vfs_relatime);
+ if (vfsp->vfs_do_nbmand)
+ nbmand_changed_cb(zfsvfs, vfsp->vfs_nbmand);
return (0);
unregister:
- /*
- * We may attempt to unregister some callbacks that are not
- * registered, but this is OK; it will simply return ENOMSG,
- * which we will ignore.
- */
- (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_ATIME),
- atime_changed_cb, zsb);
- (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_RELATIME),
- relatime_changed_cb, zsb);
- (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_XATTR),
- xattr_changed_cb, zsb);
- (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_RECORDSIZE),
- blksz_changed_cb, zsb);
- (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_READONLY),
- readonly_changed_cb, zsb);
- (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_DEVICES),
- devices_changed_cb, zsb);
- (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_SETUID),
- setuid_changed_cb, zsb);
- (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_EXEC),
- exec_changed_cb, zsb);
- (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_SNAPDIR),
- snapdir_changed_cb, zsb);
- (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_ACLTYPE),
- acltype_changed_cb, zsb);
- (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_ACLINHERIT),
- acl_inherit_changed_cb, zsb);
- (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_VSCAN),
- vscan_changed_cb, zsb);
- (void) dsl_prop_unregister(ds, zfs_prop_to_name(ZFS_PROP_NBMAND),
- nbmand_changed_cb, zsb);
-
+ dsl_prop_unregister_all(ds, zfsvfs);
return (error);
}
-EXPORT_SYMBOL(zfs_register_callbacks);
static int
zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
@@ -441,15 +598,15 @@ zfs_space_delta_cb(dmu_object_type_t bonustype, void *data,
}
static void
-fuidstr_to_sid(zfs_sb_t *zsb, const char *fuidstr,
+fuidstr_to_sid(zfsvfs_t *zfsvfs, const char *fuidstr,
char *domainbuf, int buflen, uid_t *ridp)
{
uint64_t fuid;
const char *domain;
- fuid = strtonum(fuidstr, NULL);
+ fuid = zfs_strtonum(fuidstr, NULL);
- domain = zfs_fuid_find_by_idx(zsb, FUID_INDEX(fuid));
+ domain = zfs_fuid_find_by_idx(zfsvfs, FUID_INDEX(fuid));
if (domain)
(void) strlcpy(domainbuf, domain, buflen);
else
@@ -458,25 +615,30 @@ fuidstr_to_sid(zfs_sb_t *zsb, const char *fuidstr,
}
static uint64_t
-zfs_userquota_prop_to_obj(zfs_sb_t *zsb, zfs_userquota_prop_t type)
+zfs_userquota_prop_to_obj(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type)
{
switch (type) {
case ZFS_PROP_USERUSED:
+ case ZFS_PROP_USEROBJUSED:
return (DMU_USERUSED_OBJECT);
case ZFS_PROP_GROUPUSED:
+ case ZFS_PROP_GROUPOBJUSED:
return (DMU_GROUPUSED_OBJECT);
case ZFS_PROP_USERQUOTA:
- return (zsb->z_userquota_obj);
+ return (zfsvfs->z_userquota_obj);
case ZFS_PROP_GROUPQUOTA:
- return (zsb->z_groupquota_obj);
+ return (zfsvfs->z_groupquota_obj);
+ case ZFS_PROP_USEROBJQUOTA:
+ return (zfsvfs->z_userobjquota_obj);
+ case ZFS_PROP_GROUPOBJQUOTA:
+ return (zfsvfs->z_groupobjquota_obj);
default:
- return (SET_ERROR(ENOTSUP));
+ return (ZFS_NO_OBJECT);
}
- return (0);
}
int
-zfs_userspace_many(zfs_sb_t *zsb, zfs_userquota_prop_t type,
+zfs_userspace_many(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
uint64_t *cookiep, void *vbuf, uint64_t *bufsizep)
{
int error;
@@ -484,24 +646,41 @@ zfs_userspace_many(zfs_sb_t *zsb, zfs_userquota_prop_t type,
zap_attribute_t za;
zfs_useracct_t *buf = vbuf;
uint64_t obj;
+ int offset = 0;
+
+ if (!dmu_objset_userspace_present(zfsvfs->z_os))
+ return (SET_ERROR(ENOTSUP));
- if (!dmu_objset_userspace_present(zsb->z_os))
+ if ((type == ZFS_PROP_USEROBJUSED || type == ZFS_PROP_GROUPOBJUSED ||
+ type == ZFS_PROP_USEROBJQUOTA || type == ZFS_PROP_GROUPOBJQUOTA) &&
+ !dmu_objset_userobjspace_present(zfsvfs->z_os))
return (SET_ERROR(ENOTSUP));
- obj = zfs_userquota_prop_to_obj(zsb, type);
- if (obj == 0) {
+ obj = zfs_userquota_prop_to_obj(zfsvfs, type);
+ if (obj == ZFS_NO_OBJECT) {
*bufsizep = 0;
return (0);
}
- for (zap_cursor_init_serialized(&zc, zsb->z_os, obj, *cookiep);
+ if (type == ZFS_PROP_USEROBJUSED || type == ZFS_PROP_GROUPOBJUSED)
+ offset = DMU_OBJACCT_PREFIX_LEN;
+
+ for (zap_cursor_init_serialized(&zc, zfsvfs->z_os, obj, *cookiep);
(error = zap_cursor_retrieve(&zc, &za)) == 0;
zap_cursor_advance(&zc)) {
if ((uintptr_t)buf - (uintptr_t)vbuf + sizeof (zfs_useracct_t) >
*bufsizep)
break;
- fuidstr_to_sid(zsb, za.za_name,
+ /*
+ * skip object quota (with zap name prefix DMU_OBJACCT_PREFIX)
+ * when dealing with block quota and vice versa.
+ */
+ if ((offset > 0) != (strncmp(za.za_name, DMU_OBJACCT_PREFIX,
+ DMU_OBJACCT_PREFIX_LEN) == 0))
+ continue;
+
+ fuidstr_to_sid(zfsvfs, za.za_name + offset,
buf->zu_domain, sizeof (buf->zu_domain), &buf->zu_rid);
buf->zu_space = za.za_first_integer;
@@ -516,20 +695,19 @@ zfs_userspace_many(zfs_sb_t *zsb, zfs_userquota_prop_t type,
zap_cursor_fini(&zc);
return (error);
}
-EXPORT_SYMBOL(zfs_userspace_many);
/*
* buf must be big enough (eg, 32 bytes)
*/
static int
-id_to_fuidstr(zfs_sb_t *zsb, const char *domain, uid_t rid,
+id_to_fuidstr(zfsvfs_t *zfsvfs, const char *domain, uid_t rid,
char *buf, boolean_t addok)
{
uint64_t fuid;
int domainid = 0;
if (domain && domain[0]) {
- domainid = zfs_fuid_find_by_domain(zsb, domain, NULL, addok);
+ domainid = zfs_fuid_find_by_domain(zfsvfs, domain, NULL, addok);
if (domainid == -1)
return (SET_ERROR(ENOENT));
}
@@ -539,35 +717,45 @@ id_to_fuidstr(zfs_sb_t *zsb, const char *domain, uid_t rid,
}
int
-zfs_userspace_one(zfs_sb_t *zsb, zfs_userquota_prop_t type,
+zfs_userspace_one(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
const char *domain, uint64_t rid, uint64_t *valp)
{
- char buf[32];
+ char buf[20 + DMU_OBJACCT_PREFIX_LEN];
+ int offset = 0;
int err;
uint64_t obj;
*valp = 0;
- if (!dmu_objset_userspace_present(zsb->z_os))
+ if (!dmu_objset_userspace_present(zfsvfs->z_os))
return (SET_ERROR(ENOTSUP));
- obj = zfs_userquota_prop_to_obj(zsb, type);
- if (obj == 0)
+ if ((type == ZFS_PROP_USEROBJUSED || type == ZFS_PROP_GROUPOBJUSED ||
+ type == ZFS_PROP_USEROBJQUOTA || type == ZFS_PROP_GROUPOBJQUOTA) &&
+ !dmu_objset_userobjspace_present(zfsvfs->z_os))
+ return (SET_ERROR(ENOTSUP));
+
+ obj = zfs_userquota_prop_to_obj(zfsvfs, type);
+ if (obj == ZFS_NO_OBJECT)
return (0);
- err = id_to_fuidstr(zsb, domain, rid, buf, B_FALSE);
+ if (type == ZFS_PROP_USEROBJUSED || type == ZFS_PROP_GROUPOBJUSED) {
+ strlcpy(buf, DMU_OBJACCT_PREFIX, DMU_OBJACCT_PREFIX_LEN);
+ offset = DMU_OBJACCT_PREFIX_LEN;
+ }
+
+ err = id_to_fuidstr(zfsvfs, domain, rid, buf + offset, B_FALSE);
if (err)
return (err);
- err = zap_lookup(zsb->z_os, obj, buf, 8, 1, valp);
+ err = zap_lookup(zfsvfs->z_os, obj, buf, 8, 1, valp);
if (err == ENOENT)
err = 0;
return (err);
}
-EXPORT_SYMBOL(zfs_userspace_one);
int
-zfs_set_userquota(zfs_sb_t *zsb, zfs_userquota_prop_t type,
+zfs_set_userquota(zfsvfs_t *zfsvfs, zfs_userquota_prop_t type,
const char *domain, uint64_t rid, uint64_t quota)
{
char buf[32];
@@ -576,293 +764,342 @@ zfs_set_userquota(zfs_sb_t *zsb, zfs_userquota_prop_t type,
uint64_t *objp;
boolean_t fuid_dirtied;
- if (type != ZFS_PROP_USERQUOTA && type != ZFS_PROP_GROUPQUOTA)
- return (SET_ERROR(EINVAL));
-
- if (zsb->z_version < ZPL_VERSION_USERSPACE)
+ if (zfsvfs->z_version < ZPL_VERSION_USERSPACE)
return (SET_ERROR(ENOTSUP));
- objp = (type == ZFS_PROP_USERQUOTA) ? &zsb->z_userquota_obj :
- &zsb->z_groupquota_obj;
+ switch (type) {
+ case ZFS_PROP_USERQUOTA:
+ objp = &zfsvfs->z_userquota_obj;
+ break;
+ case ZFS_PROP_GROUPQUOTA:
+ objp = &zfsvfs->z_groupquota_obj;
+ break;
+ case ZFS_PROP_USEROBJQUOTA:
+ objp = &zfsvfs->z_userobjquota_obj;
+ break;
+ case ZFS_PROP_GROUPOBJQUOTA:
+ objp = &zfsvfs->z_groupobjquota_obj;
+ break;
+ default:
+ return (SET_ERROR(EINVAL));
+ }
- err = id_to_fuidstr(zsb, domain, rid, buf, B_TRUE);
+ err = id_to_fuidstr(zfsvfs, domain, rid, buf, B_TRUE);
if (err)
return (err);
- fuid_dirtied = zsb->z_fuid_dirty;
+ fuid_dirtied = zfsvfs->z_fuid_dirty;
- tx = dmu_tx_create(zsb->z_os);
+ tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_zap(tx, *objp ? *objp : DMU_NEW_OBJECT, B_TRUE, NULL);
if (*objp == 0) {
dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
zfs_userquota_prop_prefixes[type]);
}
if (fuid_dirtied)
- zfs_fuid_txhold(zsb, tx);
+ zfs_fuid_txhold(zfsvfs, tx);
err = dmu_tx_assign(tx, TXG_WAIT);
if (err) {
dmu_tx_abort(tx);
return (err);
}
- mutex_enter(&zsb->z_lock);
+ mutex_enter(&zfsvfs->z_lock);
if (*objp == 0) {
- *objp = zap_create(zsb->z_os, DMU_OT_USERGROUP_QUOTA,
+ *objp = zap_create(zfsvfs->z_os, DMU_OT_USERGROUP_QUOTA,
DMU_OT_NONE, 0, tx);
- VERIFY(0 == zap_add(zsb->z_os, MASTER_NODE_OBJ,
+ VERIFY(0 == zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
zfs_userquota_prop_prefixes[type], 8, 1, objp, tx));
}
- mutex_exit(&zsb->z_lock);
+ mutex_exit(&zfsvfs->z_lock);
if (quota == 0) {
- err = zap_remove(zsb->z_os, *objp, buf, tx);
+ err = zap_remove(zfsvfs->z_os, *objp, buf, tx);
if (err == ENOENT)
err = 0;
} else {
- err = zap_update(zsb->z_os, *objp, buf, 8, 1, "a, tx);
+ err = zap_update(zfsvfs->z_os, *objp, buf, 8, 1, "a, tx);
}
ASSERT(err == 0);
if (fuid_dirtied)
- zfs_fuid_sync(zsb, tx);
+ zfs_fuid_sync(zfsvfs, tx);
dmu_tx_commit(tx);
return (err);
}
-EXPORT_SYMBOL(zfs_set_userquota);
boolean_t
-zfs_fuid_overquota(zfs_sb_t *zsb, boolean_t isgroup, uint64_t fuid)
+zfs_fuid_overobjquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid)
{
- char buf[32];
+ char buf[20 + DMU_OBJACCT_PREFIX_LEN];
uint64_t used, quota, usedobj, quotaobj;
int err;
- usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT;
- quotaobj = isgroup ? zsb->z_groupquota_obj : zsb->z_userquota_obj;
+ if (!dmu_objset_userobjspace_present(zfsvfs->z_os)) {
+ if (dmu_objset_userobjspace_upgradable(zfsvfs->z_os))
+ dmu_objset_userobjspace_upgrade(zfsvfs->z_os);
+ return (B_FALSE);
+ }
- if (quotaobj == 0 || zsb->z_replay)
+ usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT;
+ quotaobj = isgroup ? zfsvfs->z_groupobjquota_obj :
+ zfsvfs->z_userobjquota_obj;
+ if (quotaobj == 0 || zfsvfs->z_replay)
return (B_FALSE);
(void) sprintf(buf, "%llx", (longlong_t)fuid);
- err = zap_lookup(zsb->z_os, quotaobj, buf, 8, 1, "a);
+ err = zap_lookup(zfsvfs->z_os, quotaobj, buf, 8, 1, "a);
if (err != 0)
return (B_FALSE);
- err = zap_lookup(zsb->z_os, usedobj, buf, 8, 1, &used);
+ (void) sprintf(buf, DMU_OBJACCT_PREFIX "%llx", (longlong_t)fuid);
+ err = zap_lookup(zfsvfs->z_os, usedobj, buf, 8, 1, &used);
if (err != 0)
return (B_FALSE);
return (used >= quota);
}
-EXPORT_SYMBOL(zfs_fuid_overquota);
boolean_t
-zfs_owner_overquota(zfs_sb_t *zsb, znode_t *zp, boolean_t isgroup)
+zfs_fuid_overquota(zfsvfs_t *zfsvfs, boolean_t isgroup, uint64_t fuid)
{
- uint64_t fuid;
- uint64_t quotaobj;
+ char buf[20];
+ uint64_t used, quota, usedobj, quotaobj;
+ int err;
- quotaobj = isgroup ? zsb->z_groupquota_obj : zsb->z_userquota_obj;
+ usedobj = isgroup ? DMU_GROUPUSED_OBJECT : DMU_USERUSED_OBJECT;
+ quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj;
- fuid = isgroup ? zp->z_gid : zp->z_uid;
+ if (quotaobj == 0 || zfsvfs->z_replay)
+ return (B_FALSE);
- if (quotaobj == 0 || zsb->z_replay)
+ (void) sprintf(buf, "%llx", (longlong_t)fuid);
+ err = zap_lookup(zfsvfs->z_os, quotaobj, buf, 8, 1, "a);
+ if (err != 0)
return (B_FALSE);
- return (zfs_fuid_overquota(zsb, isgroup, fuid));
+ err = zap_lookup(zfsvfs->z_os, usedobj, buf, 8, 1, &used);
+ if (err != 0)
+ return (B_FALSE);
+ return (used >= quota);
}
-EXPORT_SYMBOL(zfs_owner_overquota);
-zfs_mntopts_t *
-zfs_mntopts_alloc(void)
+boolean_t
+zfs_owner_overquota(zfsvfs_t *zfsvfs, znode_t *zp, boolean_t isgroup)
{
- return (kmem_zalloc(sizeof (zfs_mntopts_t), KM_SLEEP));
-}
+ uint64_t fuid;
+ uint64_t quotaobj;
+ struct inode *ip = ZTOI(zp);
-void
-zfs_mntopts_free(zfs_mntopts_t *zmo)
-{
- if (zmo->z_osname)
- strfree(zmo->z_osname);
+ quotaobj = isgroup ? zfsvfs->z_groupquota_obj : zfsvfs->z_userquota_obj;
- if (zmo->z_mntpoint)
- strfree(zmo->z_mntpoint);
+ fuid = isgroup ? KGID_TO_SGID(ip->i_gid) : KUID_TO_SUID(ip->i_uid);
- kmem_free(zmo, sizeof (zfs_mntopts_t));
+ if (quotaobj == 0 || zfsvfs->z_replay)
+ return (B_FALSE);
+
+ return (zfs_fuid_overquota(zfsvfs, isgroup, fuid));
}
-int
-zfs_sb_create(const char *osname, zfs_mntopts_t *zmo, zfs_sb_t **zsbp)
+/*
+ * Associate this zfsvfs with the given objset, which must be owned.
+ * This will cache a bunch of on-disk state from the objset in the
+ * zfsvfs.
+ */
+static int
+zfsvfs_init(zfsvfs_t *zfsvfs, objset_t *os)
{
- objset_t *os;
- zfs_sb_t *zsb;
- uint64_t zval;
- int i, size, error;
- uint64_t sa_obj;
-
- zsb = kmem_zalloc(sizeof (zfs_sb_t), KM_SLEEP);
-
- /*
- * Optional temporary mount options, free'd in zfs_sb_free().
- */
- zsb->z_mntopts = (zmo ? zmo : zfs_mntopts_alloc());
-
- /*
- * We claim to always be readonly so we can open snapshots;
- * other ZPL code will prevent us from writing to snapshots.
- */
- error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, zsb, &os);
- if (error)
- goto out_zmo;
+ int error;
+ uint64_t val;
- /*
- * Initialize the zfs-specific filesystem structure.
- * Should probably make this a kmem cache, shuffle fields.
- */
- zsb->z_sb = NULL;
- zsb->z_parent = zsb;
- zsb->z_max_blksz = SPA_OLD_MAXBLOCKSIZE;
- zsb->z_show_ctldir = ZFS_SNAPDIR_VISIBLE;
- zsb->z_os = os;
+ zfsvfs->z_max_blksz = SPA_OLD_MAXBLOCKSIZE;
+ zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE;
+ zfsvfs->z_os = os;
- error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zsb->z_version);
- if (error) {
- goto out;
- } else if (zsb->z_version > ZPL_VERSION) {
- error = SET_ERROR(ENOTSUP);
- goto out;
+ error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version);
+ if (error != 0)
+ return (error);
+ if (zfsvfs->z_version >
+ zfs_zpl_version_map(spa_version(dmu_objset_spa(os)))) {
+ (void) printk("Can't mount a version %lld file system "
+ "on a version %lld pool\n. Pool must be upgraded to mount "
+ "this file system.", (u_longlong_t)zfsvfs->z_version,
+ (u_longlong_t)spa_version(dmu_objset_spa(os)));
+ return (SET_ERROR(ENOTSUP));
}
- if ((error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &zval)) != 0)
- goto out;
- zsb->z_norm = (int)zval;
+ error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &val);
+ if (error != 0)
+ return (error);
+ zfsvfs->z_norm = (int)val;
- if ((error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &zval)) != 0)
- goto out;
- zsb->z_utf8 = (zval != 0);
+ error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &val);
+ if (error != 0)
+ return (error);
+ zfsvfs->z_utf8 = (val != 0);
- if ((error = zfs_get_zplprop(os, ZFS_PROP_CASE, &zval)) != 0)
- goto out;
- zsb->z_case = (uint_t)zval;
+ error = zfs_get_zplprop(os, ZFS_PROP_CASE, &val);
+ if (error != 0)
+ return (error);
+ zfsvfs->z_case = (uint_t)val;
- if ((error = zfs_get_zplprop(os, ZFS_PROP_ACLTYPE, &zval)) != 0)
- goto out;
- zsb->z_acl_type = (uint_t)zval;
+ if ((error = zfs_get_zplprop(os, ZFS_PROP_ACLTYPE, &val)) != 0)
+ return (error);
+ zfsvfs->z_acl_type = (uint_t)val;
/*
* Fold case on file systems that are always or sometimes case
* insensitive.
*/
- if (zsb->z_case == ZFS_CASE_INSENSITIVE ||
- zsb->z_case == ZFS_CASE_MIXED)
- zsb->z_norm |= U8_TEXTPREP_TOUPPER;
+ if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE ||
+ zfsvfs->z_case == ZFS_CASE_MIXED)
+ zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
- zsb->z_use_fuids = USE_FUIDS(zsb->z_version, zsb->z_os);
- zsb->z_use_sa = USE_SA(zsb->z_version, zsb->z_os);
+ zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
+ zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
- if (zsb->z_use_sa) {
+ uint64_t sa_obj = 0;
+ if (zfsvfs->z_use_sa) {
/* should either have both of these objects or none */
error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1,
&sa_obj);
- if (error)
- goto out;
+ if (error != 0)
+ return (error);
- error = zfs_get_zplprop(os, ZFS_PROP_XATTR, &zval);
- if ((error == 0) && (zval == ZFS_XATTR_SA))
- zsb->z_xattr_sa = B_TRUE;
- } else {
- /*
- * Pre SA versions file systems should never touch
- * either the attribute registration or layout objects.
- */
- sa_obj = 0;
+ error = zfs_get_zplprop(os, ZFS_PROP_XATTR, &val);
+ if ((error == 0) && (val == ZFS_XATTR_SA))
+ zfsvfs->z_xattr_sa = B_TRUE;
}
error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
- &zsb->z_attr_table);
- if (error)
- goto out;
+ &zfsvfs->z_attr_table);
+ if (error != 0)
+ return (error);
- if (zsb->z_version >= ZPL_VERSION_SA)
+ if (zfsvfs->z_version >= ZPL_VERSION_SA)
sa_register_update_callback(os, zfs_sa_upgrade);
error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1,
- &zsb->z_root);
- if (error)
- goto out;
- ASSERT(zsb->z_root != 0);
+ &zfsvfs->z_root);
+ if (error != 0)
+ return (error);
+ ASSERT(zfsvfs->z_root != 0);
error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1,
- &zsb->z_unlinkedobj);
- if (error)
- goto out;
+ &zfsvfs->z_unlinkedobj);
+ if (error != 0)
+ return (error);
error = zap_lookup(os, MASTER_NODE_OBJ,
zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA],
- 8, 1, &zsb->z_userquota_obj);
- if (error && error != ENOENT)
- goto out;
+ 8, 1, &zfsvfs->z_userquota_obj);
+ if (error == ENOENT)
+ zfsvfs->z_userquota_obj = 0;
+ else if (error != 0)
+ return (error);
error = zap_lookup(os, MASTER_NODE_OBJ,
zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA],
- 8, 1, &zsb->z_groupquota_obj);
- if (error && error != ENOENT)
- goto out;
+ 8, 1, &zfsvfs->z_groupquota_obj);
+ if (error == ENOENT)
+ zfsvfs->z_groupquota_obj = 0;
+ else if (error != 0)
+ return (error);
+
+ error = zap_lookup(os, MASTER_NODE_OBJ,
+ zfs_userquota_prop_prefixes[ZFS_PROP_USEROBJQUOTA],
+ 8, 1, &zfsvfs->z_userobjquota_obj);
+ if (error == ENOENT)
+ zfsvfs->z_userobjquota_obj = 0;
+ else if (error != 0)
+ return (error);
+
+ error = zap_lookup(os, MASTER_NODE_OBJ,
+ zfs_userquota_prop_prefixes[ZFS_PROP_GROUPOBJQUOTA],
+ 8, 1, &zfsvfs->z_groupobjquota_obj);
+ if (error == ENOENT)
+ zfsvfs->z_groupobjquota_obj = 0;
+ else if (error != 0)
+ return (error);
error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1,
- &zsb->z_fuid_obj);
- if (error && error != ENOENT)
- goto out;
+ &zfsvfs->z_fuid_obj);
+ if (error == ENOENT)
+ zfsvfs->z_fuid_obj = 0;
+ else if (error != 0)
+ return (error);
error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1,
- &zsb->z_shares_dir);
- if (error && error != ENOENT)
- goto out;
+ &zfsvfs->z_shares_dir);
+ if (error == ENOENT)
+ zfsvfs->z_shares_dir = 0;
+ else if (error != 0)
+ return (error);
- mutex_init(&zsb->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
- mutex_init(&zsb->z_lock, NULL, MUTEX_DEFAULT, NULL);
- list_create(&zsb->z_all_znodes, sizeof (znode_t),
+ return (0);
+}
+
+int
+zfsvfs_create(const char *osname, zfsvfs_t **zfvp)
+{
+ objset_t *os;
+ zfsvfs_t *zfsvfs;
+ int error;
+
+ zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
+
+ /*
+ * We claim to always be readonly so we can open snapshots;
+ * other ZPL code will prevent us from writing to snapshots.
+ */
+ error = dmu_objset_own(osname, DMU_OST_ZFS, B_TRUE, zfsvfs, &os);
+ if (error) {
+ kmem_free(zfsvfs, sizeof (zfsvfs_t));
+ return (error);
+ }
+
+ zfsvfs->z_vfs = NULL;
+ zfsvfs->z_sb = NULL;
+ zfsvfs->z_parent = zfsvfs;
+
+ mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&zfsvfs->z_lock, NULL, MUTEX_DEFAULT, NULL);
+ list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
offsetof(znode_t, z_link_node));
- rrm_init(&zsb->z_teardown_lock, B_FALSE);
- rw_init(&zsb->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);
- rw_init(&zsb->z_fuid_lock, NULL, RW_DEFAULT, NULL);
-
- size = MIN(1 << (highbit64(zfs_object_mutex_size)-1), ZFS_OBJ_MTX_MAX);
- zsb->z_hold_size = size;
- zsb->z_hold_trees = vmem_zalloc(sizeof (avl_tree_t) * size, KM_SLEEP);
- zsb->z_hold_locks = vmem_zalloc(sizeof (kmutex_t) * size, KM_SLEEP);
- for (i = 0; i != size; i++) {
- avl_create(&zsb->z_hold_trees[i], zfs_znode_hold_compare,
+ rrm_init(&zfsvfs->z_teardown_lock, B_FALSE);
+ rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);
+ rw_init(&zfsvfs->z_fuid_lock, NULL, RW_DEFAULT, NULL);
+
+ int size = MIN(1 << (highbit64(zfs_object_mutex_size) - 1),
+ ZFS_OBJ_MTX_MAX);
+ zfsvfs->z_hold_size = size;
+ zfsvfs->z_hold_trees = vmem_zalloc(sizeof (avl_tree_t) * size,
+ KM_SLEEP);
+ zfsvfs->z_hold_locks = vmem_zalloc(sizeof (kmutex_t) * size, KM_SLEEP);
+ for (int i = 0; i != size; i++) {
+ avl_create(&zfsvfs->z_hold_trees[i], zfs_znode_hold_compare,
sizeof (znode_hold_t), offsetof(znode_hold_t, zh_node));
- mutex_init(&zsb->z_hold_locks[i], NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&zfsvfs->z_hold_locks[i], NULL, MUTEX_DEFAULT, NULL);
}
- *zsbp = zsb;
- return (0);
+ error = zfsvfs_init(zfsvfs, os);
+ if (error != 0) {
+ dmu_objset_disown(os, zfsvfs);
+ *zfvp = NULL;
+ kmem_free(zfsvfs, sizeof (zfsvfs_t));
+ return (error);
+ }
-out:
- dmu_objset_disown(os, zsb);
-out_zmo:
- *zsbp = NULL;
- zfs_mntopts_free(zsb->z_mntopts);
- kmem_free(zsb, sizeof (zfs_sb_t));
- return (error);
+ *zfvp = zfsvfs;
+ return (0);
}
-EXPORT_SYMBOL(zfs_sb_create);
-int
-zfs_sb_setup(zfs_sb_t *zsb, boolean_t mounting)
+static int
+zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
{
int error;
- error = zfs_register_callbacks(zsb);
+ error = zfs_register_callbacks(zfsvfs->z_vfs);
if (error)
return (error);
- /*
- * Set the objset user_ptr to track its zsb.
- */
- mutex_enter(&zsb->z_os->os_user_ptr_lock);
- dmu_objset_set_user(zsb->z_os, zsb);
- mutex_exit(&zsb->z_os->os_user_ptr_lock);
-
- zsb->z_log = zil_open(zsb->z_os, zfs_get_data);
+ zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data);
/*
* If we are not mounting (ie: online recv), then we don't
@@ -876,11 +1113,11 @@ zfs_sb_setup(zfs_sb_t *zsb, boolean_t mounting)
* During replay we remove the read only flag to
* allow replays to succeed.
*/
- readonly = zfs_is_readonly(zsb);
+ readonly = zfs_is_readonly(zfsvfs);
if (readonly != 0)
- readonly_changed_cb(zsb, B_FALSE);
+ readonly_changed_cb(zfsvfs, B_FALSE);
else
- zfs_unlinked_drain(zsb);
+ zfs_unlinked_drain(zfsvfs);
/*
* Parse and replay the intent log.
@@ -909,109 +1146,70 @@ zfs_sb_setup(zfs_sb_t *zsb, boolean_t mounting)
* allocated and in the unlinked set, and there is an
* intent log record saying to allocate it.
*/
- if (spa_writeable(dmu_objset_spa(zsb->z_os))) {
+ if (spa_writeable(dmu_objset_spa(zfsvfs->z_os))) {
if (zil_replay_disable) {
- zil_destroy(zsb->z_log, B_FALSE);
+ zil_destroy(zfsvfs->z_log, B_FALSE);
} else {
- zsb->z_replay = B_TRUE;
- zil_replay(zsb->z_os, zsb,
+ zfsvfs->z_replay = B_TRUE;
+ zil_replay(zfsvfs->z_os, zfsvfs,
zfs_replay_vector);
- zsb->z_replay = B_FALSE;
+ zfsvfs->z_replay = B_FALSE;
}
}
/* restore readonly bit */
if (readonly != 0)
- readonly_changed_cb(zsb, B_TRUE);
+ readonly_changed_cb(zfsvfs, B_TRUE);
}
+ /*
+ * Set the objset user_ptr to track its zfsvfs.
+ */
+ mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
+ dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
+ mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
+
return (0);
}
-EXPORT_SYMBOL(zfs_sb_setup);
void
-zfs_sb_free(zfs_sb_t *zsb)
+zfsvfs_free(zfsvfs_t *zfsvfs)
{
- int i, size = zsb->z_hold_size;
+ int i, size = zfsvfs->z_hold_size;
- zfs_fuid_destroy(zsb);
+ zfs_fuid_destroy(zfsvfs);
- mutex_destroy(&zsb->z_znodes_lock);
- mutex_destroy(&zsb->z_lock);
- list_destroy(&zsb->z_all_znodes);
- rrm_destroy(&zsb->z_teardown_lock);
- rw_destroy(&zsb->z_teardown_inactive_lock);
- rw_destroy(&zsb->z_fuid_lock);
+ mutex_destroy(&zfsvfs->z_znodes_lock);
+ mutex_destroy(&zfsvfs->z_lock);
+ list_destroy(&zfsvfs->z_all_znodes);
+ rrm_destroy(&zfsvfs->z_teardown_lock);
+ rw_destroy(&zfsvfs->z_teardown_inactive_lock);
+ rw_destroy(&zfsvfs->z_fuid_lock);
for (i = 0; i != size; i++) {
- avl_destroy(&zsb->z_hold_trees[i]);
- mutex_destroy(&zsb->z_hold_locks[i]);
+ avl_destroy(&zfsvfs->z_hold_trees[i]);
+ mutex_destroy(&zfsvfs->z_hold_locks[i]);
}
- vmem_free(zsb->z_hold_trees, sizeof (avl_tree_t) * size);
- vmem_free(zsb->z_hold_locks, sizeof (kmutex_t) * size);
- zfs_mntopts_free(zsb->z_mntopts);
- kmem_free(zsb, sizeof (zfs_sb_t));
+ vmem_free(zfsvfs->z_hold_trees, sizeof (avl_tree_t) * size);
+ vmem_free(zfsvfs->z_hold_locks, sizeof (kmutex_t) * size);
+ zfsvfs_vfs_free(zfsvfs->z_vfs);
+ kmem_free(zfsvfs, sizeof (zfsvfs_t));
}
-EXPORT_SYMBOL(zfs_sb_free);
static void
-zfs_set_fuid_feature(zfs_sb_t *zsb)
+zfs_set_fuid_feature(zfsvfs_t *zfsvfs)
{
- zsb->z_use_fuids = USE_FUIDS(zsb->z_version, zsb->z_os);
- zsb->z_use_sa = USE_SA(zsb->z_version, zsb->z_os);
+ zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
+ zfsvfs->z_use_sa = USE_SA(zfsvfs->z_version, zfsvfs->z_os);
}
void
-zfs_unregister_callbacks(zfs_sb_t *zsb)
+zfs_unregister_callbacks(zfsvfs_t *zfsvfs)
{
- objset_t *os = zsb->z_os;
- struct dsl_dataset *ds;
-
- /*
- * Unregister properties.
- */
- if (!dmu_objset_is_snapshot(os)) {
- ds = dmu_objset_ds(os);
- VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb,
- zsb) == 0);
-
- VERIFY(dsl_prop_unregister(ds, "relatime", relatime_changed_cb,
- zsb) == 0);
-
- VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb,
- zsb) == 0);
-
- VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb,
- zsb) == 0);
-
- VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb,
- zsb) == 0);
-
- VERIFY(dsl_prop_unregister(ds, "devices", devices_changed_cb,
- zsb) == 0);
+ objset_t *os = zfsvfs->z_os;
- VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb,
- zsb) == 0);
-
- VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb,
- zsb) == 0);
-
- VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb,
- zsb) == 0);
-
- VERIFY(dsl_prop_unregister(ds, "acltype", acltype_changed_cb,
- zsb) == 0);
-
- VERIFY(dsl_prop_unregister(ds, "aclinherit",
- acl_inherit_changed_cb, zsb) == 0);
-
- VERIFY(dsl_prop_unregister(ds, "vscan",
- vscan_changed_cb, zsb) == 0);
-
- VERIFY(dsl_prop_unregister(ds, "nbmand",
- nbmand_changed_cb, zsb) == 0);
- }
+ if (!dmu_objset_is_snapshot(os))
+ dsl_prop_unregister_all(dmu_objset_ds(os), zfsvfs);
}
-EXPORT_SYMBOL(zfs_unregister_callbacks);
#ifdef HAVE_MLSLABEL
/*
@@ -1040,23 +1238,22 @@ zfs_check_global_label(const char *dsname, const char *hexsl)
}
return (SET_ERROR(EACCES));
}
-EXPORT_SYMBOL(zfs_check_global_label);
#endif /* HAVE_MLSLABEL */
int
zfs_statvfs(struct dentry *dentry, struct kstatfs *statp)
{
- zfs_sb_t *zsb = dentry->d_sb->s_fs_info;
+ zfsvfs_t *zfsvfs = dentry->d_sb->s_fs_info;
uint64_t refdbytes, availbytes, usedobjs, availobjs;
uint64_t fsid;
uint32_t bshift;
- ZFS_ENTER(zsb);
+ ZFS_ENTER(zfsvfs);
- dmu_objset_space(zsb->z_os,
+ dmu_objset_space(zfsvfs->z_os,
&refdbytes, &availbytes, &usedobjs, &availobjs);
- fsid = dmu_objset_fsid_guid(zsb->z_os);
+ fsid = dmu_objset_fsid_guid(zfsvfs->z_os);
/*
* The underlying storage pool actually uses multiple block
* size. Under Solaris frsize (fragment size) is reported as
@@ -1066,8 +1263,8 @@ zfs_statvfs(struct dentry *dentry, struct kstatfs *statp)
* interchangeably. Thus we are forced to report both of them
* as the filesystem's maximum block size.
*/
- statp->f_frsize = zsb->z_max_blksz;
- statp->f_bsize = zsb->z_max_blksz;
+ statp->f_frsize = zfsvfs->z_max_blksz;
+ statp->f_bsize = zfsvfs->z_max_blksz;
bshift = fls(statp->f_bsize) - 1;
/*
@@ -1093,7 +1290,7 @@ zfs_statvfs(struct dentry *dentry, struct kstatfs *statp)
statp->f_fsid.val[0] = (uint32_t)fsid;
statp->f_fsid.val[1] = (uint32_t)(fsid >> 32);
statp->f_type = ZFS_SUPER_MAGIC;
- statp->f_namelen = ZFS_MAXNAMELEN;
+ statp->f_namelen = MAXNAMELEN - 1;
/*
* We have all of 40 characters to stuff a string here.
@@ -1101,27 +1298,25 @@ zfs_statvfs(struct dentry *dentry, struct kstatfs *statp)
*/
bzero(statp->f_spare, sizeof (statp->f_spare));
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (0);
}
-EXPORT_SYMBOL(zfs_statvfs);
int
-zfs_root(zfs_sb_t *zsb, struct inode **ipp)
+zfs_root(zfsvfs_t *zfsvfs, struct inode **ipp)
{
znode_t *rootzp;
int error;
- ZFS_ENTER(zsb);
+ ZFS_ENTER(zfsvfs);
- error = zfs_zget(zsb, zsb->z_root, &rootzp);
+ error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
if (error == 0)
*ipp = ZTOI(rootzp);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
-EXPORT_SYMBOL(zfs_root);
#ifdef HAVE_D_PRUNE_ALIASES
/*
@@ -1134,7 +1329,7 @@ EXPORT_SYMBOL(zfs_root);
* end of the list so we're always scanning the oldest znodes first.
*/
static int
-zfs_sb_prune_aliases(zfs_sb_t *zsb, unsigned long nr_to_scan)
+zfs_prune_aliases(zfsvfs_t *zfsvfs, unsigned long nr_to_scan)
{
znode_t **zp_array, *zp;
int max_array = MIN(nr_to_scan, PAGE_SIZE * 8 / sizeof (znode_t *));
@@ -1143,15 +1338,15 @@ zfs_sb_prune_aliases(zfs_sb_t *zsb, unsigned long nr_to_scan)
zp_array = kmem_zalloc(max_array * sizeof (znode_t *), KM_SLEEP);
- mutex_enter(&zsb->z_znodes_lock);
- while ((zp = list_head(&zsb->z_all_znodes)) != NULL) {
+ mutex_enter(&zfsvfs->z_znodes_lock);
+ while ((zp = list_head(&zfsvfs->z_all_znodes)) != NULL) {
if ((i++ > nr_to_scan) || (j >= max_array))
break;
ASSERT(list_link_active(&zp->z_link_node));
- list_remove(&zsb->z_all_znodes, zp);
- list_insert_tail(&zsb->z_all_znodes, zp);
+ list_remove(&zfsvfs->z_all_znodes, zp);
+ list_insert_tail(&zfsvfs->z_all_znodes, zp);
/* Skip active znodes and .zfs entries */
if (MUTEX_HELD(&zp->z_lock) || zp->z_is_ctldir)
@@ -1163,7 +1358,7 @@ zfs_sb_prune_aliases(zfs_sb_t *zsb, unsigned long nr_to_scan)
zp_array[j] = zp;
j++;
}
- mutex_exit(&zsb->z_znodes_lock);
+ mutex_exit(&zfsvfs->z_znodes_lock);
for (i = 0; i < j; i++) {
zp = zp_array[i];
@@ -1189,9 +1384,9 @@ zfs_sb_prune_aliases(zfs_sb_t *zsb, unsigned long nr_to_scan)
* blocks but can't because they are all pinned by entries in these caches.
*/
int
-zfs_sb_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects)
+zfs_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects)
{
- zfs_sb_t *zsb = sb->s_fs_info;
+ zfsvfs_t *zfsvfs = sb->s_fs_info;
int error = 0;
#if defined(HAVE_SHRINK) || defined(HAVE_SPLIT_SHRINKER_CALLBACK)
struct shrinker *shrinker = &sb->s_shrink;
@@ -1201,15 +1396,16 @@ zfs_sb_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects)
};
#endif
- ZFS_ENTER(zsb);
+ ZFS_ENTER(zfsvfs);
#if defined(HAVE_SPLIT_SHRINKER_CALLBACK) && \
defined(SHRINK_CONTROL_HAS_NID) && \
defined(SHRINKER_NUMA_AWARE)
if (sb->s_shrink.flags & SHRINKER_NUMA_AWARE) {
*objects = 0;
- for_each_online_node(sc.nid)
+ for_each_online_node(sc.nid) {
*objects += (*shrinker->scan_objects)(shrinker, &sc);
+ }
} else {
*objects = (*shrinker->scan_objects)(shrinker, &sc);
}
@@ -1220,7 +1416,7 @@ zfs_sb_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects)
*objects = (*shrinker->shrink)(shrinker, &sc);
#elif defined(HAVE_D_PRUNE_ALIASES)
#define D_PRUNE_ALIASES_IS_DEFAULT
- *objects = zfs_sb_prune_aliases(zsb, nr_to_scan);
+ *objects = zfs_prune_aliases(zfsvfs, nr_to_scan);
#else
#error "No available dentry and inode cache pruning mechanism."
#endif
@@ -1228,41 +1424,40 @@ zfs_sb_prune(struct super_block *sb, unsigned long nr_to_scan, int *objects)
#if defined(HAVE_D_PRUNE_ALIASES) && !defined(D_PRUNE_ALIASES_IS_DEFAULT)
#undef D_PRUNE_ALIASES_IS_DEFAULT
/*
- * Fall back to zfs_sb_prune_aliases if the kernel's per-superblock
+ * Fall back to zfs_prune_aliases if the kernel's per-superblock
* shrinker couldn't free anything, possibly due to the inodes being
* allocated in a different memcg.
*/
if (*objects == 0)
- *objects = zfs_sb_prune_aliases(zsb, nr_to_scan);
+ *objects = zfs_prune_aliases(zfsvfs, nr_to_scan);
#endif
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
- dprintf_ds(zsb->z_os->os_dsl_dataset,
+ dprintf_ds(zfsvfs->z_os->os_dsl_dataset,
"pruning, nr_to_scan=%lu objects=%d error=%d\n",
nr_to_scan, *objects, error);
return (error);
}
-EXPORT_SYMBOL(zfs_sb_prune);
/*
- * Teardown the zfs_sb_t.
+ * Teardown the zfsvfs_t.
*
- * Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock'
+ * Note, if 'unmounting' is FALSE, we return with the 'z_teardown_lock'
* and 'z_teardown_inactive_lock' held.
*/
-int
-zfs_sb_teardown(zfs_sb_t *zsb, boolean_t unmounting)
+static int
+zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
{
znode_t *zp;
/*
* If someone has not already unmounted this file system,
* drain the iput_taskq to ensure all active references to the
- * zfs_sb_t have been handled only then can it be safely destroyed.
+ * zfsvfs_t have been handled only then can it be safely destroyed.
*/
- if (zsb->z_os) {
+ if (zfsvfs->z_os) {
/*
* If we're unmounting we have to wait for the list to
* drain completely.
@@ -1277,15 +1472,15 @@ zfs_sb_teardown(zfs_sb_t *zsb, boolean_t unmounting)
* z_all_znodes list and thus increment z_nr_znodes.
*/
int round = 0;
- while (zsb->z_nr_znodes > 0) {
+ while (zfsvfs->z_nr_znodes > 0) {
taskq_wait_outstanding(dsl_pool_iput_taskq(
- dmu_objset_pool(zsb->z_os)), 0);
+ dmu_objset_pool(zfsvfs->z_os)), 0);
if (++round > 1 && !unmounting)
break;
}
}
- rrm_enter(&zsb->z_teardown_lock, RW_WRITER, FTAG);
+ rrm_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);
if (!unmounting) {
/*
@@ -1295,28 +1490,28 @@ zfs_sb_teardown(zfs_sb_t *zsb, boolean_t unmounting)
* super block. Note, 'z_parent' is self referential
* for non-snapshots.
*/
- shrink_dcache_sb(zsb->z_parent->z_sb);
+ shrink_dcache_sb(zfsvfs->z_parent->z_sb);
}
/*
* Close the zil. NB: Can't close the zil while zfs_inactive
* threads are blocked as zil_close can call zfs_inactive.
*/
- if (zsb->z_log) {
- zil_close(zsb->z_log);
- zsb->z_log = NULL;
+ if (zfsvfs->z_log) {
+ zil_close(zfsvfs->z_log);
+ zfsvfs->z_log = NULL;
}
- rw_enter(&zsb->z_teardown_inactive_lock, RW_WRITER);
+ rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER);
/*
* If we are not unmounting (ie: online recv) and someone already
* unmounted this file system while we were doing the switcheroo,
* or a reopen of z_os failed then just bail out now.
*/
- if (!unmounting && (zsb->z_unmounted || zsb->z_os == NULL)) {
- rw_exit(&zsb->z_teardown_inactive_lock);
- rrm_exit(&zsb->z_teardown_lock, FTAG);
+ if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) {
+ rw_exit(&zfsvfs->z_teardown_inactive_lock);
+ rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
return (SET_ERROR(EIO));
}
@@ -1328,13 +1523,13 @@ zfs_sb_teardown(zfs_sb_t *zsb, boolean_t unmounting)
* Release all holds on dbufs.
*/
if (!unmounting) {
- mutex_enter(&zsb->z_znodes_lock);
- for (zp = list_head(&zsb->z_all_znodes); zp != NULL;
- zp = list_next(&zsb->z_all_znodes, zp)) {
+ mutex_enter(&zfsvfs->z_znodes_lock);
+ for (zp = list_head(&zfsvfs->z_all_znodes); zp != NULL;
+ zp = list_next(&zfsvfs->z_all_znodes, zp)) {
if (zp->z_sa_hdl)
zfs_znode_dmu_fini(zp);
}
- mutex_exit(&zsb->z_znodes_lock);
+ mutex_exit(&zfsvfs->z_znodes_lock);
}
/*
@@ -1343,36 +1538,35 @@ zfs_sb_teardown(zfs_sb_t *zsb, boolean_t unmounting)
* other VFS ops will fail with EIO.
*/
if (unmounting) {
- zsb->z_unmounted = B_TRUE;
- rrm_exit(&zsb->z_teardown_lock, FTAG);
- rw_exit(&zsb->z_teardown_inactive_lock);
+ zfsvfs->z_unmounted = B_TRUE;
+ rw_exit(&zfsvfs->z_teardown_inactive_lock);
+ rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
}
/*
* z_os will be NULL if there was an error in attempting to reopen
- * zsb, so just return as the properties had already been
+ * zfsvfs, so just return as the properties had already been
*
* unregistered and cached data had been evicted before.
*/
- if (zsb->z_os == NULL)
+ if (zfsvfs->z_os == NULL)
return (0);
/*
* Unregister properties.
*/
- zfs_unregister_callbacks(zsb);
+ zfs_unregister_callbacks(zfsvfs);
/*
* Evict cached data
*/
- if (dsl_dataset_is_dirty(dmu_objset_ds(zsb->z_os)) &&
- !zfs_is_readonly(zsb))
- txg_wait_synced(dmu_objset_pool(zsb->z_os), 0);
- dmu_objset_evict_dbufs(zsb->z_os);
+ if (dsl_dataset_is_dirty(dmu_objset_ds(zfsvfs->z_os)) &&
+ !zfs_is_readonly(zfsvfs))
+ txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
+ dmu_objset_evict_dbufs(zfsvfs->z_os);
return (0);
}
-EXPORT_SYMBOL(zfs_sb_teardown);
#if !defined(HAVE_2ARGS_BDI_SETUP_AND_REGISTER) && \
!defined(HAVE_3ARGS_BDI_SETUP_AND_REGISTER)
@@ -1380,24 +1574,32 @@ atomic_long_t zfs_bdi_seq = ATOMIC_LONG_INIT(0);
#endif
int
-zfs_domount(struct super_block *sb, zfs_mntopts_t *zmo, int silent)
+zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent)
{
- const char *osname = zmo->z_osname;
- zfs_sb_t *zsb;
+ const char *osname = zm->mnt_osname;
struct inode *root_inode;
uint64_t recordsize;
- int error;
+ int error = 0;
+ zfsvfs_t *zfsvfs;
+
+ ASSERT(zm);
+ ASSERT(osname);
- error = zfs_sb_create(osname, zmo, &zsb);
+ error = zfsvfs_create(osname, &zfsvfs);
if (error)
return (error);
+ error = zfsvfs_parse_options(zm->mnt_data, &zfsvfs->z_vfs);
+ if (error)
+ goto out;
+
if ((error = dsl_prop_get_integer(osname, "recordsize",
&recordsize, NULL)))
goto out;
- zsb->z_sb = sb;
- sb->s_fs_info = zsb;
+ zfsvfs->z_vfs->vfs_data = zfsvfs;
+ zfsvfs->z_sb = sb;
+ sb->s_fs_info = zfsvfs;
sb->s_magic = ZFS_SUPER_MAGIC;
sb->s_maxbytes = MAX_LFS_FILESIZE;
sb->s_time_gran = 1;
@@ -1419,34 +1621,35 @@ zfs_domount(struct super_block *sb, zfs_mntopts_t *zmo, int silent)
#endif /* HAVE_S_D_OP */
/* Set features for file system. */
- zfs_set_fuid_feature(zsb);
+ zfs_set_fuid_feature(zfsvfs);
- if (dmu_objset_is_snapshot(zsb->z_os)) {
+ if (dmu_objset_is_snapshot(zfsvfs->z_os)) {
uint64_t pval;
- atime_changed_cb(zsb, B_FALSE);
- readonly_changed_cb(zsb, B_TRUE);
+ atime_changed_cb(zfsvfs, B_FALSE);
+ readonly_changed_cb(zfsvfs, B_TRUE);
if ((error = dsl_prop_get_integer(osname,
"xattr", &pval, NULL)))
goto out;
- xattr_changed_cb(zsb, pval);
+ xattr_changed_cb(zfsvfs, pval);
if ((error = dsl_prop_get_integer(osname,
"acltype", &pval, NULL)))
goto out;
- acltype_changed_cb(zsb, pval);
- zsb->z_issnap = B_TRUE;
- zsb->z_os->os_sync = ZFS_SYNC_DISABLED;
- zsb->z_snap_defer_time = jiffies;
-
- mutex_enter(&zsb->z_os->os_user_ptr_lock);
- dmu_objset_set_user(zsb->z_os, zsb);
- mutex_exit(&zsb->z_os->os_user_ptr_lock);
+ acltype_changed_cb(zfsvfs, pval);
+ zfsvfs->z_issnap = B_TRUE;
+ zfsvfs->z_os->os_sync = ZFS_SYNC_DISABLED;
+ zfsvfs->z_snap_defer_time = jiffies;
+
+ mutex_enter(&zfsvfs->z_os->os_user_ptr_lock);
+ dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
+ mutex_exit(&zfsvfs->z_os->os_user_ptr_lock);
} else {
- error = zfs_sb_setup(zsb, B_TRUE);
+ if ((error = zfsvfs_setup(zfsvfs, B_TRUE)))
+ goto out;
}
/* Allocate a root inode for the filesystem. */
- error = zfs_root(zsb, &root_inode);
+ error = zfs_root(zfsvfs, &root_inode);
if (error) {
(void) zfs_umount(sb);
goto out;
@@ -1460,19 +1663,23 @@ zfs_domount(struct super_block *sb, zfs_mntopts_t *zmo, int silent)
goto out;
}
- if (!zsb->z_issnap)
- zfsctl_create(zsb);
+ if (!zfsvfs->z_issnap)
+ zfsctl_create(zfsvfs);
- zsb->z_arc_prune = arc_add_prune_callback(zpl_prune_sb, sb);
+ zfsvfs->z_arc_prune = arc_add_prune_callback(zpl_prune_sb, sb);
out:
if (error) {
- dmu_objset_disown(zsb->z_os, zsb);
- zfs_sb_free(zsb);
+ dmu_objset_disown(zfsvfs->z_os, zfsvfs);
+ zfsvfs_free(zfsvfs);
+ /*
+ * make sure we don't have dangling sb->s_fs_info which
+ * zfs_preumount will use.
+ */
+ sb->s_fs_info = NULL;
}
return (error);
}
-EXPORT_SYMBOL(zfs_domount);
/*
* Called when an unmount is requested and certain sanity checks have
@@ -1484,12 +1691,32 @@ EXPORT_SYMBOL(zfs_domount);
void
zfs_preumount(struct super_block *sb)
{
- zfs_sb_t *zsb = sb->s_fs_info;
+ zfsvfs_t *zfsvfs = sb->s_fs_info;
- if (zsb)
+ /* zfsvfs is NULL when zfs_domount fails during mount */
+ if (zfsvfs) {
zfsctl_destroy(sb->s_fs_info);
+ /*
+ * Wait for iput_async before entering evict_inodes in
+ * generic_shutdown_super. The reason we must finish before
+ * evict_inodes is when lazytime is on, or when zfs_purgedir
+ * calls zfs_zget, iput would bump i_count from 0 to 1. This
+ * would race with the i_count check in evict_inodes. This means
+ * it could destroy the inode while we are still using it.
+ *
+ * We wait for two passes. xattr directories in the first pass
+ * may add xattr entries in zfs_purgedir, so in the second pass
+ * we wait for them. We don't use taskq_wait here because it is
+ * a pool wide taskq. Other mounted filesystems can constantly
+ * do iput_async and there's no guarantee when taskq will be
+ * empty.
+ */
+ taskq_wait_outstanding(dsl_pool_iput_taskq(
+ dmu_objset_pool(zfsvfs->z_os)), 0);
+ taskq_wait_outstanding(dsl_pool_iput_taskq(
+ dmu_objset_pool(zfsvfs->z_os)), 0);
+ }
}
-EXPORT_SYMBOL(zfs_preumount);
/*
* Called once all other unmount released tear down has occurred.
@@ -1499,17 +1726,17 @@ EXPORT_SYMBOL(zfs_preumount);
int
zfs_umount(struct super_block *sb)
{
- zfs_sb_t *zsb = sb->s_fs_info;
+ zfsvfs_t *zfsvfs = sb->s_fs_info;
objset_t *os;
- arc_remove_prune_callback(zsb->z_arc_prune);
- VERIFY(zfs_sb_teardown(zsb, B_TRUE) == 0);
- os = zsb->z_os;
+ arc_remove_prune_callback(zfsvfs->z_arc_prune);
+ VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0);
+ os = zfsvfs->z_os;
zpl_bdi_destroy(sb);
/*
* z_os will be NULL if there was an error in
- * attempting to reopen zsb.
+ * attempting to reopen zfsvfs.
*/
if (os != NULL) {
/*
@@ -1522,31 +1749,46 @@ zfs_umount(struct super_block *sb)
/*
* Finally release the objset
*/
- dmu_objset_disown(os, zsb);
+ dmu_objset_disown(os, zfsvfs);
}
- zfs_sb_free(zsb);
+ zfsvfs_free(zfsvfs);
return (0);
}
-EXPORT_SYMBOL(zfs_umount);
int
-zfs_remount(struct super_block *sb, int *flags, zfs_mntopts_t *zmo)
+zfs_remount(struct super_block *sb, int *flags, zfs_mnt_t *zm)
{
- zfs_sb_t *zsb = sb->s_fs_info;
+ zfsvfs_t *zfsvfs = sb->s_fs_info;
+ vfs_t *vfsp;
+ boolean_t issnap = dmu_objset_is_snapshot(zfsvfs->z_os);
int error;
- zfs_unregister_callbacks(zsb);
- error = zfs_register_callbacks(zsb);
+ if ((issnap || !spa_writeable(dmu_objset_spa(zfsvfs->z_os))) &&
+ !(*flags & MS_RDONLY)) {
+ *flags |= MS_RDONLY;
+ return (EROFS);
+ }
+
+ error = zfsvfs_parse_options(zm->mnt_data, &vfsp);
+ if (error)
+ return (error);
+
+ zfs_unregister_callbacks(zfsvfs);
+ zfsvfs_vfs_free(zfsvfs->z_vfs);
+
+ vfsp->vfs_data = zfsvfs;
+ zfsvfs->z_vfs = vfsp;
+ if (!issnap)
+ (void) zfs_register_callbacks(vfsp);
return (error);
}
-EXPORT_SYMBOL(zfs_remount);
int
zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp)
{
- zfs_sb_t *zsb = sb->s_fs_info;
+ zfsvfs_t *zfsvfs = sb->s_fs_info;
znode_t *zp;
uint64_t object = 0;
uint64_t fid_gen = 0;
@@ -1556,8 +1798,19 @@ zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp)
*ipp = NULL;
- ZFS_ENTER(zsb);
+ if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) {
+ zfid_short_t *zfid = (zfid_short_t *)fidp;
+
+ for (i = 0; i < sizeof (zfid->zf_object); i++)
+ object |= ((uint64_t)zfid->zf_object[i]) << (8 * i);
+
+ for (i = 0; i < sizeof (zfid->zf_gen); i++)
+ fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i);
+ } else {
+ return (SET_ERROR(EINVAL));
+ }
+ /* LONG_FID_LEN means snapdirs */
if (fidp->fid_len == LONG_FID_LEN) {
zfid_long_t *zlfid = (zfid_long_t *)fidp;
uint64_t objsetid = 0;
@@ -1569,32 +1822,28 @@ zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp)
for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i);
- ZFS_EXIT(zsb);
+ if (objsetid != ZFSCTL_INO_SNAPDIRS - object) {
+ dprintf("snapdir fid: objsetid (%llu) != "
+ "ZFSCTL_INO_SNAPDIRS (%llu) - object (%llu)\n",
+ objsetid, ZFSCTL_INO_SNAPDIRS, object);
- err = zfsctl_lookup_objset(sb, objsetid, &zsb);
- if (err)
return (SET_ERROR(EINVAL));
+ }
- ZFS_ENTER(zsb);
- }
-
- if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) {
- zfid_short_t *zfid = (zfid_short_t *)fidp;
-
- for (i = 0; i < sizeof (zfid->zf_object); i++)
- object |= ((uint64_t)zfid->zf_object[i]) << (8 * i);
+ if (fid_gen > 1 || setgen != 0) {
+ dprintf("snapdir fid: fid_gen (%llu) and setgen "
+ "(%llu)\n", fid_gen, setgen);
+ return (SET_ERROR(EINVAL));
+ }
- for (i = 0; i < sizeof (zfid->zf_gen); i++)
- fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i);
- } else {
- ZFS_EXIT(zsb);
- return (SET_ERROR(EINVAL));
+ return (zfsctl_snapdir_vget(sb, objsetid, fid_gen, ipp));
}
+ ZFS_ENTER(zfsvfs);
/* A zero fid_gen means we are in the .zfs control directories */
if (fid_gen == 0 &&
(object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) {
- *ipp = zsb->z_ctldir;
+ *ipp = zfsvfs->z_ctldir;
ASSERT(*ipp != NULL);
if (object == ZFSCTL_INO_SNAPDIR) {
VERIFY(zfsctl_root_lookup(*ipp, "snapshot", ipp,
@@ -1602,37 +1851,37 @@ zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp)
} else {
igrab(*ipp);
}
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (0);
}
gen_mask = -1ULL >> (64 - 8 * i);
dprintf("getting %llu [%llu mask %llx]\n", object, fid_gen, gen_mask);
- if ((err = zfs_zget(zsb, object, &zp))) {
- ZFS_EXIT(zsb);
+ if ((err = zfs_zget(zfsvfs, object, &zp))) {
+ ZFS_EXIT(zfsvfs);
return (err);
}
/* Don't export xattr stuff */
if (zp->z_pflags & ZFS_XATTR) {
iput(ZTOI(zp));
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (SET_ERROR(ENOENT));
}
- (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zsb), &zp_gen,
+ (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen,
sizeof (uint64_t));
zp_gen = zp_gen & gen_mask;
if (zp_gen == 0)
zp_gen = 1;
- if ((fid_gen == 0) && (zsb->z_root == object))
+ if ((fid_gen == 0) && (zfsvfs->z_root == object))
fid_gen = zp_gen;
if (zp->z_unlinked || zp_gen != fid_gen) {
dprintf("znode gen (%llu) != fid gen (%llu)\n", zp_gen,
fid_gen);
iput(ZTOI(zp));
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (SET_ERROR(ENOENT));
}
@@ -1640,13 +1889,12 @@ zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp)
if (*ipp)
zfs_inode_update(ITOZ(*ipp));
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (0);
}
-EXPORT_SYMBOL(zfs_vget);
/*
- * Block out VFS ops and close zfs_sb_t
+ * Block out VFS ops and close zfsvfs_t
*
* Note, if successful, then we return with the 'z_teardown_lock' and
* 'z_teardown_inactive_lock' write held. We leave ownership of the underlying
@@ -1654,67 +1902,49 @@ EXPORT_SYMBOL(zfs_vget);
* a subsequent rollback or recv operation and the resume thereafter.
*/
int
-zfs_suspend_fs(zfs_sb_t *zsb)
+zfs_suspend_fs(zfsvfs_t *zfsvfs)
{
int error;
- if ((error = zfs_sb_teardown(zsb, B_FALSE)) != 0)
+ if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0)
return (error);
return (0);
}
-EXPORT_SYMBOL(zfs_suspend_fs);
/*
- * Reopen zfs_sb_t and release VFS ops.
+ * Rebuild SA and release VOPs. Note that ownership of the underlying dataset
+ * is an invariant across any of the operations that can be performed while the
+ * filesystem was suspended. Whether it succeeded or failed, the preconditions
+ * are the same: the relevant objset and associated dataset are owned by
+ * zfsvfs, held, and long held on entry.
*/
int
-zfs_resume_fs(zfs_sb_t *zsb, const char *osname)
+zfs_resume_fs(zfsvfs_t *zfsvfs, dsl_dataset_t *ds)
{
int err, err2;
znode_t *zp;
- uint64_t sa_obj = 0;
-
- ASSERT(RRM_WRITE_HELD(&zsb->z_teardown_lock));
- ASSERT(RW_WRITE_HELD(&zsb->z_teardown_inactive_lock));
- /*
- * We already own this, so just hold and rele it to update the
- * objset_t, as the one we had before may have been evicted.
- */
- VERIFY0(dmu_objset_hold(osname, zsb, &zsb->z_os));
- VERIFY3P(zsb->z_os->os_dsl_dataset->ds_owner, ==, zsb);
- VERIFY(dsl_dataset_long_held(zsb->z_os->os_dsl_dataset));
- dmu_objset_rele(zsb->z_os, zsb);
+ ASSERT(RRM_WRITE_HELD(&zfsvfs->z_teardown_lock));
+ ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));
/*
- * Make sure version hasn't changed
+ * We already own this, so just update the objset_t, as the one we
+ * had before may have been evicted.
*/
+ objset_t *os;
+ VERIFY3P(ds->ds_owner, ==, zfsvfs);
+ VERIFY(dsl_dataset_long_held(ds));
+ VERIFY0(dmu_objset_from_ds(ds, &os));
- err = zfs_get_zplprop(zsb->z_os, ZFS_PROP_VERSION,
- &zsb->z_version);
-
- if (err)
- goto bail;
-
- err = zap_lookup(zsb->z_os, MASTER_NODE_OBJ,
- ZFS_SA_ATTRS, 8, 1, &sa_obj);
-
- if (err && zsb->z_version >= ZPL_VERSION_SA)
- goto bail;
-
- if ((err = sa_setup(zsb->z_os, sa_obj,
- zfs_attr_table, ZPL_END, &zsb->z_attr_table)) != 0)
+ err = zfsvfs_init(zfsvfs, os);
+ if (err != 0)
goto bail;
- if (zsb->z_version >= ZPL_VERSION_SA)
- sa_register_update_callback(zsb->z_os,
- zfs_sa_upgrade);
+ VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0);
- VERIFY(zfs_sb_setup(zsb, B_FALSE) == 0);
-
- zfs_set_fuid_feature(zsb);
- zsb->z_rollback_time = jiffies;
+ zfs_set_fuid_feature(zfsvfs);
+ zfsvfs->z_rollback_time = jiffies;
/*
* Attempt to re-establish all the active inodes with their
@@ -1725,54 +1955,53 @@ zfs_resume_fs(zfs_sb_t *zsb, const char *osname)
* VFS prunes the dentry holding the remaining references
* on the stale inode.
*/
- mutex_enter(&zsb->z_znodes_lock);
- for (zp = list_head(&zsb->z_all_znodes); zp;
- zp = list_next(&zsb->z_all_znodes, zp)) {
+ mutex_enter(&zfsvfs->z_znodes_lock);
+ for (zp = list_head(&zfsvfs->z_all_znodes); zp;
+ zp = list_next(&zfsvfs->z_all_znodes, zp)) {
err2 = zfs_rezget(zp);
if (err2) {
remove_inode_hash(ZTOI(zp));
zp->z_is_stale = B_TRUE;
}
}
- mutex_exit(&zsb->z_znodes_lock);
+ mutex_exit(&zfsvfs->z_znodes_lock);
bail:
/* release the VFS ops */
- rw_exit(&zsb->z_teardown_inactive_lock);
- rrm_exit(&zsb->z_teardown_lock, FTAG);
+ rw_exit(&zfsvfs->z_teardown_inactive_lock);
+ rrm_exit(&zfsvfs->z_teardown_lock, FTAG);
if (err) {
/*
* Since we couldn't setup the sa framework, try to force
* unmount this file system.
*/
- if (zsb->z_os)
- (void) zfs_umount(zsb->z_sb);
+ if (zfsvfs->z_os)
+ (void) zfs_umount(zfsvfs->z_sb);
}
return (err);
}
-EXPORT_SYMBOL(zfs_resume_fs);
int
-zfs_set_version(zfs_sb_t *zsb, uint64_t newvers)
+zfs_set_version(zfsvfs_t *zfsvfs, uint64_t newvers)
{
int error;
- objset_t *os = zsb->z_os;
+ objset_t *os = zfsvfs->z_os;
dmu_tx_t *tx;
if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION)
return (SET_ERROR(EINVAL));
- if (newvers < zsb->z_version)
+ if (newvers < zfsvfs->z_version)
return (SET_ERROR(EINVAL));
if (zfs_spa_version_map(newvers) >
- spa_version(dmu_objset_spa(zsb->z_os)))
+ spa_version(dmu_objset_spa(zfsvfs->z_os)))
return (SET_ERROR(ENOTSUP));
tx = dmu_tx_create(os);
dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_FALSE, ZPL_VERSION_STR);
- if (newvers >= ZPL_VERSION_SA && !zsb->z_use_sa) {
+ if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, B_TRUE,
ZFS_SA_ATTRS);
dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
@@ -1791,10 +2020,10 @@ zfs_set_version(zfs_sb_t *zsb, uint64_t newvers)
return (error);
}
- if (newvers >= ZPL_VERSION_SA && !zsb->z_use_sa) {
+ if (newvers >= ZPL_VERSION_SA && !zfsvfs->z_use_sa) {
uint64_t sa_obj;
- ASSERT3U(spa_version(dmu_objset_spa(zsb->z_os)), >=,
+ ASSERT3U(spa_version(dmu_objset_spa(zfsvfs->z_os)), >=,
SPA_VERSION_SA);
sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE,
DMU_OT_NONE, 0, tx);
@@ -1808,17 +2037,16 @@ zfs_set_version(zfs_sb_t *zsb, uint64_t newvers)
}
spa_history_log_internal_ds(dmu_objset_ds(os), "upgrade", tx,
- "from %llu to %llu", zsb->z_version, newvers);
+ "from %llu to %llu", zfsvfs->z_version, newvers);
dmu_tx_commit(tx);
- zsb->z_version = newvers;
+ zfsvfs->z_version = newvers;
- zfs_set_fuid_feature(zsb);
+ zfs_set_fuid_feature(zfsvfs);
return (0);
}
-EXPORT_SYMBOL(zfs_set_version);
/*
* Read a property stored within the master node.
@@ -1838,8 +2066,10 @@ zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
else
pname = zfs_prop_to_name(prop);
- if (os != NULL)
+ if (os != NULL) {
+ ASSERT3U(os->os_phys->os_type, ==, DMU_OST_ZFS);
error = zap_lookup(os, MASTER_NODE_OBJ, pname, 8, 1, value);
+ }
if (error == ENOENT) {
/* No value set, use the default value */
@@ -1864,7 +2094,28 @@ zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value)
}
return (error);
}
-EXPORT_SYMBOL(zfs_get_zplprop);
+
+/*
+ * Return true if the coresponding vfs's unmounted flag is set.
+ * Otherwise return false.
+ * If this function returns true we know VFS unmount has been initiated.
+ */
+boolean_t
+zfs_get_vfs_flag_unmounted(objset_t *os)
+{
+ zfsvfs_t *zfvp;
+ boolean_t unmounted = B_FALSE;
+
+ ASSERT(dmu_objset_type(os) == DMU_OST_ZFS);
+
+ mutex_enter(&os->os_user_ptr_lock);
+ zfvp = dmu_objset_get_user(os);
+ if (zfvp != NULL && zfvp->z_unmounted)
+ unmounted = B_TRUE;
+ mutex_exit(&os->os_user_ptr_lock);
+
+ return (unmounted);
+}
void
zfs_init(void)
@@ -1881,8 +2132,31 @@ zfs_fini(void)
/*
* we don't use outstanding because zpl_posix_acl_free might add more.
*/
+ taskq_wait(system_delay_taskq);
taskq_wait(system_taskq);
unregister_filesystem(&zpl_fs_type);
zfs_znode_fini();
zfsctl_fini();
}
+
+#if defined(_KERNEL) && defined(HAVE_SPL)
+EXPORT_SYMBOL(zfs_suspend_fs);
+EXPORT_SYMBOL(zfs_resume_fs);
+EXPORT_SYMBOL(zfs_userspace_one);
+EXPORT_SYMBOL(zfs_userspace_many);
+EXPORT_SYMBOL(zfs_set_userquota);
+EXPORT_SYMBOL(zfs_owner_overquota);
+EXPORT_SYMBOL(zfs_fuid_overquota);
+EXPORT_SYMBOL(zfs_fuid_overobjquota);
+EXPORT_SYMBOL(zfs_set_version);
+EXPORT_SYMBOL(zfsvfs_create);
+EXPORT_SYMBOL(zfsvfs_free);
+EXPORT_SYMBOL(zfs_is_readonly);
+EXPORT_SYMBOL(zfs_domount);
+EXPORT_SYMBOL(zfs_preumount);
+EXPORT_SYMBOL(zfs_umount);
+EXPORT_SYMBOL(zfs_remount);
+EXPORT_SYMBOL(zfs_statvfs);
+EXPORT_SYMBOL(zfs_vget);
+EXPORT_SYMBOL(zfs_prune);
+#endif
diff --git a/zfs/module/zfs/zfs_vnops.c b/zfs/module/zfs/zfs_vnops.c
index 437a63a638d1..6a1dab5c984e 100644
--- a/zfs/module/zfs/zfs_vnops.c
+++ b/zfs/module/zfs/zfs_vnops.c
@@ -18,10 +18,12 @@
*
* CDDL HEADER END
*/
+
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
* Copyright (c) 2015 by Chunwei Chen. All rights reserved.
+ * Copyright 2017 Nexenta Systems, Inc.
*/
/* Portions Copyright 2007 Jeremy Teo */
@@ -89,8 +91,8 @@
* to freed memory. The example below illustrates the following Big Rules:
*
* (1) A check must be made in each zfs thread for a mounted file system.
- * This is done avoiding races using ZFS_ENTER(zsb).
- * A ZFS_EXIT(zsb) is needed before all returns. Any znodes
+ * This is done avoiding races using ZFS_ENTER(zfsvfs).
+ * A ZFS_EXIT(zfsvfs) is needed before all returns. Any znodes
* must be checked with ZFS_VERIFY_ZP(zp). Both of these macros
* can return EIO from the calling function.
*
@@ -125,7 +127,7 @@
* Thread A calls dmu_tx_assign(TXG_WAIT) and blocks in txg_wait_open()
* forever, because the previous txg can't quiesce until B's tx commits.
*
- * If dmu_tx_assign() returns ERESTART and zsb->z_assign is TXG_NOWAIT,
+ * If dmu_tx_assign() returns ERESTART and zfsvfs->z_assign is TXG_NOWAIT,
* then drop all locks, call dmu_tx_wait(), and try again. On subsequent
* calls to dmu_tx_assign(), pass TXG_WAITED rather than TXG_NOWAIT,
* to indicate that this operation has already called dmu_tx_wait().
@@ -146,7 +148,7 @@
*
* In general, this is how things should be ordered in each vnode op:
*
- * ZFS_ENTER(zsb); // exit if unmounted
+ * ZFS_ENTER(zfsvfs); // exit if unmounted
* top:
* zfs_dirent_lock(&dl, ...) // lock directory entry (may igrab())
* rw_enter(...); // grab any other locks you need
@@ -164,7 +166,7 @@
* goto top;
* }
* dmu_tx_abort(tx); // abort DMU tx
- * ZFS_EXIT(zsb); // finished in zfs
+ * ZFS_EXIT(zfsvfs); // finished in zfs
* return (error); // really out of space
* }
* error = do_real_work(); // do whatever this VOP does
@@ -175,7 +177,7 @@
* zfs_dirent_unlock(dl); // unlock directory entry
* iput(...); // release held vnodes
* zil_commit(zilog, foid); // synchronous when necessary
- * ZFS_EXIT(zsb); // finished in zfs
+ * ZFS_EXIT(zfsvfs); // finished in zfs
* return (error); // done, report error
*/
@@ -196,23 +198,23 @@ int
zfs_open(struct inode *ip, int mode, int flag, cred_t *cr)
{
znode_t *zp = ITOZ(ip);
- zfs_sb_t *zsb = ITOZSB(ip);
+ zfsvfs_t *zfsvfs = ITOZSB(ip);
- ZFS_ENTER(zsb);
+ ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(zp);
/* Honor ZFS_APPENDONLY file attribute */
if ((mode & FMODE_WRITE) && (zp->z_pflags & ZFS_APPENDONLY) &&
((flag & O_APPEND) == 0)) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (SET_ERROR(EPERM));
}
/* Virus scan eligible files on open */
- if (!zfs_has_ctldir(zp) && zsb->z_vscan && S_ISREG(ip->i_mode) &&
+ if (!zfs_has_ctldir(zp) && zfsvfs->z_vscan && S_ISREG(ip->i_mode) &&
!(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0) {
if (zfs_vscan(ip, cr, 0) != 0) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (SET_ERROR(EACCES));
}
}
@@ -221,33 +223,31 @@ zfs_open(struct inode *ip, int mode, int flag, cred_t *cr)
if (flag & O_SYNC)
atomic_inc_32(&zp->z_sync_cnt);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (0);
}
-EXPORT_SYMBOL(zfs_open);
/* ARGSUSED */
int
zfs_close(struct inode *ip, int flag, cred_t *cr)
{
znode_t *zp = ITOZ(ip);
- zfs_sb_t *zsb = ITOZSB(ip);
+ zfsvfs_t *zfsvfs = ITOZSB(ip);
- ZFS_ENTER(zsb);
+ ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(zp);
/* Decrement the synchronous opens in the znode */
if (flag & O_SYNC)
atomic_dec_32(&zp->z_sync_cnt);
- if (!zfs_has_ctldir(zp) && zsb->z_vscan && S_ISREG(ip->i_mode) &&
+ if (!zfs_has_ctldir(zp) && zfsvfs->z_vscan && S_ISREG(ip->i_mode) &&
!(zp->z_pflags & ZFS_AV_QUARANTINED) && zp->z_size > 0)
VERIFY(zfs_vscan(ip, cr, 1) == 0);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (0);
}
-EXPORT_SYMBOL(zfs_close);
#if defined(SEEK_HOLE) && defined(SEEK_DATA)
/*
@@ -278,6 +278,14 @@ zfs_holey_common(struct inode *ip, int cmd, loff_t *off)
if (error == ESRCH)
return (SET_ERROR(ENXIO));
+ /* file was dirty, so fall back to using generic logic */
+ if (error == EBUSY) {
+ if (hole)
+ *off = file_sz;
+
+ return (0);
+ }
+
/*
* We could find a hole that begins after the logical end-of-file,
* because dmu_offset_next() only works on whole blocks. If the
@@ -300,18 +308,17 @@ int
zfs_holey(struct inode *ip, int cmd, loff_t *off)
{
znode_t *zp = ITOZ(ip);
- zfs_sb_t *zsb = ITOZSB(ip);
+ zfsvfs_t *zfsvfs = ITOZSB(ip);
int error;
- ZFS_ENTER(zsb);
+ ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(zp);
error = zfs_holey_common(ip, cmd, off);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
-EXPORT_SYMBOL(zfs_holey);
#endif /* SEEK_HOLE && SEEK_DATA */
#if defined(_KERNEL)
@@ -417,6 +424,7 @@ mappedread(struct inode *ip, int nbytes, uio_t *uio)
#endif /* _KERNEL */
unsigned long zfs_read_chunk_size = 1024 * 1024; /* Tunable */
+unsigned long zfs_delete_blocks = DMU_MAX_DELETEBLKCNT;
/*
* Read bytes from specified file into supplied buffer.
@@ -440,7 +448,7 @@ int
zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
{
znode_t *zp = ITOZ(ip);
- zfs_sb_t *zsb = ITOZSB(ip);
+ zfsvfs_t *zfsvfs = ITOZSB(ip);
ssize_t n, nbytes;
int error = 0;
rl_t *rl;
@@ -448,11 +456,11 @@ zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
xuio_t *xuio = NULL;
#endif /* HAVE_UIO_ZEROCOPY */
- ZFS_ENTER(zsb);
+ ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(zp);
if (zp->z_pflags & ZFS_AV_QUARANTINED) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (SET_ERROR(EACCES));
}
@@ -460,7 +468,7 @@ zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
* Validate file offset
*/
if (uio->uio_loffset < (offset_t)0) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (SET_ERROR(EINVAL));
}
@@ -468,15 +476,17 @@ zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
* Fasttrack empty reads
*/
if (uio->uio_resid == 0) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (0);
}
/*
* If we're in FRSYNC mode, sync out this znode before reading it.
+ * Only do this for non-snapshots.
*/
- if (ioflag & FRSYNC || zsb->z_os->os_sync == ZFS_SYNC_ALWAYS)
- zil_commit(zsb->z_log, zp->z_id);
+ if (zfsvfs->z_log &&
+ (ioflag & FRSYNC || zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS))
+ zil_commit(zfsvfs->z_log, zp->z_id);
/*
* Lock the range against changes.
@@ -550,10 +560,9 @@ zfs_read(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
out:
zfs_range_unlock(rl);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
-EXPORT_SYMBOL(zfs_read);
/*
* Write the bytes to a file.
@@ -584,23 +593,26 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
ssize_t tx_bytes;
uint64_t end_size;
dmu_tx_t *tx;
- zfs_sb_t *zsb = ZTOZSB(zp);
+ zfsvfs_t *zfsvfs = ZTOZSB(zp);
zilog_t *zilog;
offset_t woff;
ssize_t n, nbytes;
rl_t *rl;
- int max_blksz = zsb->z_max_blksz;
+ int max_blksz = zfsvfs->z_max_blksz;
int error = 0;
arc_buf_t *abuf;
const iovec_t *aiov = NULL;
xuio_t *xuio = NULL;
- int i_iov = 0;
- const iovec_t *iovp = uio->uio_iov;
int write_eof;
int count = 0;
sa_bulk_attr_t bulk[4];
uint64_t mtime[2], ctime[2];
+ uint32_t uid;
+#ifdef HAVE_UIO_ZEROCOPY
+ int i_iov = 0;
+ const iovec_t *iovp = uio->uio_iov;
ASSERTV(int iovcnt = uio->uio_iovcnt);
+#endif
/*
* Fasttrack empty write
@@ -612,33 +624,43 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
if (limit == RLIM64_INFINITY || limit > MAXOFFSET_T)
limit = MAXOFFSET_T;
- ZFS_ENTER(zsb);
+ ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(zp);
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL, &mtime, 16);
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL, &ctime, 16);
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb), NULL, &zp->z_size, 8);
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL,
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
+ &zp->z_size, 8);
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
&zp->z_pflags, 8);
+ /*
+ * Callers might not be able to detect properly that we are read-only,
+ * so check it explicitly here.
+ */
+ if (zfs_is_readonly(zfsvfs)) {
+ ZFS_EXIT(zfsvfs);
+ return (SET_ERROR(EROFS));
+ }
+
/*
* If immutable or not appending then return EPERM
*/
if ((zp->z_pflags & (ZFS_IMMUTABLE | ZFS_READONLY)) ||
((zp->z_pflags & ZFS_APPENDONLY) && !(ioflag & FAPPEND) &&
(uio->uio_loffset < zp->z_size))) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (SET_ERROR(EPERM));
}
- zilog = zsb->z_log;
+ zilog = zfsvfs->z_log;
/*
* Validate file offset
*/
woff = ioflag & FAPPEND ? zp->z_size : uio->uio_loffset;
if (woff < 0) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (SET_ERROR(EINVAL));
}
@@ -685,7 +707,7 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
if (woff >= limit) {
zfs_range_unlock(rl);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (SET_ERROR(EFBIG));
}
@@ -705,8 +727,8 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
while (n > 0) {
abuf = NULL;
woff = uio->uio_loffset;
- if (zfs_owner_overquota(zsb, zp, B_FALSE) ||
- zfs_owner_overquota(zsb, zp, B_TRUE)) {
+ if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) ||
+ zfs_owner_overquota(zfsvfs, zp, B_TRUE)) {
if (abuf != NULL)
dmu_return_arcbuf(abuf);
error = SET_ERROR(EDQUOT);
@@ -714,6 +736,7 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
}
if (xuio && abuf == NULL) {
+#ifdef HAVE_UIO_ZEROCOPY
ASSERT(i_iov < iovcnt);
ASSERT3U(uio->uio_segflg, !=, UIO_BVEC);
aiov = &iovp[i_iov];
@@ -723,6 +746,7 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
((char *)aiov->iov_base - (char *)abuf->b_data +
aiov->iov_len == arc_buf_size(abuf)));
i_iov++;
+#endif
} else if (abuf == NULL && n >= max_blksz &&
woff >= zp->z_size &&
P2PHASE(woff, max_blksz) == 0 &&
@@ -751,7 +775,7 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
/*
* Start a transaction.
*/
- tx = dmu_tx_create(zsb->z_os);
+ tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
dmu_tx_hold_write(tx, zp->z_id, woff, MIN(n, max_blksz));
zfs_sa_upgrade_txholds(tx, zp);
@@ -811,7 +835,7 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
if (tx_bytes < max_blksz && (!write_eof ||
aiov->iov_base != abuf->b_data)) {
ASSERT(xuio);
- dmu_write(zsb->z_os, zp->z_id, woff,
+ dmu_write(zfsvfs->z_os, zp->z_id, woff,
aiov->iov_len, aiov->iov_base, tx);
dmu_return_arcbuf(abuf);
xuio_stat_wbuf_copied();
@@ -823,16 +847,17 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
ASSERT(tx_bytes <= uio->uio_resid);
uioskip(uio, tx_bytes);
}
-
- if (tx_bytes && zp->z_is_mapped && !(ioflag & O_DIRECT))
- update_pages(ip, woff, tx_bytes, zsb->z_os, zp->z_id);
+ if (tx_bytes && zp->z_is_mapped && !(ioflag & O_DIRECT)) {
+ update_pages(ip, woff,
+ tx_bytes, zfsvfs->z_os, zp->z_id);
+ }
/*
* If we made no progress, we're done. If we made even
* partial progress, update the znode and ZIL accordingly.
*/
if (tx_bytes == 0) {
- (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zsb),
+ (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
(void *)&zp->z_size, sizeof (uint64_t), tx);
dmu_tx_commit(tx);
ASSERT(error != 0);
@@ -841,7 +866,7 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
/*
* Clear Set-UID/Set-GID bits on successful write if not
- * privileged and at least one of the excute bits is set.
+ * privileged and at least one of the execute bits is set.
*
* It would be nice to to this after all writes have
* been done, but that would still expose the ISUID/ISGID
@@ -851,15 +876,16 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
* user 0 is not an ephemeral uid.
*/
mutex_enter(&zp->z_acl_lock);
+ uid = KUID_TO_SUID(ip->i_uid);
if ((zp->z_mode & (S_IXUSR | (S_IXUSR >> 3) |
(S_IXUSR >> 6))) != 0 &&
(zp->z_mode & (S_ISUID | S_ISGID)) != 0 &&
secpolicy_vnode_setid_retain(cr,
- (zp->z_mode & S_ISUID) != 0 && zp->z_uid == 0) != 0) {
+ ((zp->z_mode & S_ISUID) != 0 && uid == 0)) != 0) {
uint64_t newmode;
zp->z_mode &= ~(S_ISUID | S_ISGID);
- newmode = zp->z_mode;
- (void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zsb),
+ ip->i_mode = newmode = zp->z_mode;
+ (void) sa_update(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs),
(void *)&newmode, sizeof (uint64_t), tx);
}
mutex_exit(&zp->z_acl_lock);
@@ -880,8 +906,8 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
* the file size to the specified eof. Note, there's no
* concurrency during replay.
*/
- if (zsb->z_replay && zsb->z_replay_eof != 0)
- zp->z_size = zsb->z_replay_eof;
+ if (zfsvfs->z_replay && zfsvfs->z_replay_eof != 0)
+ zp->z_size = zfsvfs->z_replay_eof;
error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
@@ -905,20 +931,25 @@ zfs_write(struct inode *ip, uio_t *uio, int ioflag, cred_t *cr)
* If we're in replay mode, or we made no progress, return error.
* Otherwise, it's at least a partial write, so it's successful.
*/
- if (zsb->z_replay || uio->uio_resid == start_resid) {
- ZFS_EXIT(zsb);
+ if (zfsvfs->z_replay || uio->uio_resid == start_resid) {
+ ZFS_EXIT(zfsvfs);
return (error);
}
if (ioflag & (FSYNC | FDSYNC) ||
- zsb->z_os->os_sync == ZFS_SYNC_ALWAYS)
+ zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
zil_commit(zilog, zp->z_id);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (0);
}
-EXPORT_SYMBOL(zfs_write);
+/*
+ * Drop a reference on the passed inode asynchronously. This ensures
+ * that the caller will never drop the last reference on an inode in
+ * the current context. Doing so while holding open a tx could result
+ * in a deadlock if iput_final() re-enters the filesystem code.
+ */
void
zfs_iput_async(struct inode *ip)
{
@@ -928,8 +959,8 @@ zfs_iput_async(struct inode *ip)
ASSERT(os != NULL);
if (atomic_read(&ip->i_count) == 1)
- taskq_dispatch(dsl_pool_iput_taskq(dmu_objset_pool(os)),
- (task_func_t *)iput, ip, TQ_SLEEP);
+ VERIFY(taskq_dispatch(dsl_pool_iput_taskq(dmu_objset_pool(os)),
+ (task_func_t *)iput, ip, TQ_SLEEP) != TASKQID_INVALID);
else
iput(ip);
}
@@ -966,13 +997,12 @@ static int zil_fault_io = 0;
int
zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
{
- zfs_sb_t *zsb = arg;
- objset_t *os = zsb->z_os;
+ zfsvfs_t *zfsvfs = arg;
+ objset_t *os = zfsvfs->z_os;
znode_t *zp;
uint64_t object = lr->lr_foid;
uint64_t offset = lr->lr_offset;
uint64_t size = lr->lr_length;
- blkptr_t *bp = &lr->lr_blkptr;
dmu_buf_t *db;
zgd_t *zgd;
int error = 0;
@@ -983,7 +1013,7 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
/*
* Nothing to do if the file has been removed
*/
- if (zfs_zget(zsb, object, &zp) != 0)
+ if (zfs_zget(zfsvfs, object, &zp) != 0)
return (SET_ERROR(ENOENT));
if (zp->z_unlinked) {
/*
@@ -995,7 +1025,7 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
}
zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP);
- zgd->zgd_zilog = zsb->z_log;
+ zgd->zgd_zilog = zfsvfs->z_log;
zgd->zgd_private = zp;
/*
@@ -1019,7 +1049,7 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
} else { /* indirect write */
/*
* Have to lock the whole block to ensure when it's
- * written out and it's checksum is being calculated
+ * written out and its checksum is being calculated
* that no one can change the data. We need to re-check
* blocksize after we get the lock in case it's changed!
*/
@@ -1049,11 +1079,7 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
DMU_READ_NO_PREFETCH);
if (error == 0) {
- blkptr_t *obp = dmu_buf_get_blkptr(db);
- if (obp) {
- ASSERT(BP_IS_HOLE(bp));
- *bp = *obp;
- }
+ blkptr_t *bp = &lr->lr_blkptr;
zgd->zgd_db = db;
zgd->zgd_bp = bp;
@@ -1063,7 +1089,7 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
error = dmu_sync(zio, lr->lr_common.lrc_txg,
zfs_get_done, zgd);
- ASSERT(error || lr->lr_length <= zp->z_blksz);
+ ASSERT(error || lr->lr_length <= size);
/*
* On success, we need to wait for the write I/O
@@ -1091,10 +1117,10 @@ int
zfs_access(struct inode *ip, int mode, int flag, cred_t *cr)
{
znode_t *zp = ITOZ(ip);
- zfs_sb_t *zsb = ITOZSB(ip);
+ zfsvfs_t *zfsvfs = ITOZSB(ip);
int error;
- ZFS_ENTER(zsb);
+ ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(zp);
if (flag & V_ACE_MASK)
@@ -1102,10 +1128,9 @@ zfs_access(struct inode *ip, int mode, int flag, cred_t *cr)
else
error = zfs_zaccess_rwx(zp, mode, flag, cr);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
-EXPORT_SYMBOL(zfs_access);
/*
* Lookup an entry in a directory, or an extended attribute directory.
@@ -1131,10 +1156,18 @@ zfs_lookup(struct inode *dip, char *nm, struct inode **ipp, int flags,
cred_t *cr, int *direntflags, pathname_t *realpnp)
{
znode_t *zdp = ITOZ(dip);
- zfs_sb_t *zsb = ITOZSB(dip);
+ zfsvfs_t *zfsvfs = ITOZSB(dip);
int error = 0;
- /* fast path */
+ /*
+ * Fast path lookup, however we must skip DNLC lookup
+ * for case folding or normalizing lookups because the
+ * DNLC code only stores the passed in name. This means
+ * creating 'a' and removing 'A' on a case insensitive
+ * file system would work, but DNLC still thinks 'a'
+ * exists and won't let you create it again on the next
+ * pass through fast path.
+ */
if (!(flags & (LOOKUP_XATTR | FIGNORECASE))) {
if (!S_ISDIR(dip->i_mode)) {
@@ -1152,7 +1185,9 @@ zfs_lookup(struct inode *dip, char *nm, struct inode **ipp, int flags,
}
return (error);
#ifdef HAVE_DNLC
- } else {
+ } else if (!zdp->z_zfsvfs->z_norm &&
+ (zdp->z_zfsvfs->z_case == ZFS_CASE_SENSITIVE)) {
+
vnode_t *tvp = dnlc_lookup(dvp, nm);
if (tvp) {
@@ -1173,7 +1208,7 @@ zfs_lookup(struct inode *dip, char *nm, struct inode **ipp, int flags,
}
}
- ZFS_ENTER(zsb);
+ ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(zdp);
*ipp = NULL;
@@ -1184,12 +1219,12 @@ zfs_lookup(struct inode *dip, char *nm, struct inode **ipp, int flags,
* Maybe someday we will.
*/
if (zdp->z_pflags & ZFS_XATTR) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (SET_ERROR(EINVAL));
}
if ((error = zfs_get_xattrdir(zdp, ipp, cr, flags))) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
@@ -1203,12 +1238,12 @@ zfs_lookup(struct inode *dip, char *nm, struct inode **ipp, int flags,
*ipp = NULL;
}
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
if (!S_ISDIR(dip->i_mode)) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (SET_ERROR(ENOTDIR));
}
@@ -1217,13 +1252,13 @@ zfs_lookup(struct inode *dip, char *nm, struct inode **ipp, int flags,
*/
if ((error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr))) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
- if (zsb->z_utf8 && u8_validate(nm, strlen(nm),
+ if (zfsvfs->z_utf8 && u8_validate(nm, strlen(nm),
NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (SET_ERROR(EILSEQ));
}
@@ -1231,10 +1266,9 @@ zfs_lookup(struct inode *dip, char *nm, struct inode **ipp, int flags,
if ((error == 0) && (*ipp))
zfs_inode_update(ITOZ(*ipp));
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
-EXPORT_SYMBOL(zfs_lookup);
/*
* Attempt to create a new entry in a directory. If the entry
@@ -1265,7 +1299,7 @@ zfs_create(struct inode *dip, char *name, vattr_t *vap, int excl,
int mode, struct inode **ipp, cred_t *cr, int flag, vsecattr_t *vsecp)
{
znode_t *zp, *dzp = ITOZ(dip);
- zfs_sb_t *zsb = ITOZSB(dip);
+ zfsvfs_t *zfsvfs = ITOZSB(dip);
zilog_t *zilog;
objset_t *os;
zfs_dirlock_t *dl;
@@ -1286,25 +1320,28 @@ zfs_create(struct inode *dip, char *name, vattr_t *vap, int excl,
gid = crgetgid(cr);
uid = crgetuid(cr);
- if (zsb->z_use_fuids == B_FALSE &&
+ if (zfsvfs->z_use_fuids == B_FALSE &&
(vsecp || IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
return (SET_ERROR(EINVAL));
- ZFS_ENTER(zsb);
+ if (name == NULL)
+ return (SET_ERROR(EINVAL));
+
+ ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(dzp);
- os = zsb->z_os;
- zilog = zsb->z_log;
+ os = zfsvfs->z_os;
+ zilog = zfsvfs->z_log;
- if (zsb->z_utf8 && u8_validate(name, strlen(name),
+ if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (SET_ERROR(EILSEQ));
}
if (vap->va_mask & ATTR_XVATTR) {
if ((error = secpolicy_xvattr((xvattr_t *)vap,
crgetuid(cr), cr, vap->va_mode)) != 0) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
}
@@ -1333,7 +1370,7 @@ zfs_create(struct inode *dip, char *name, vattr_t *vap, int excl,
zfs_acl_ids_free(&acl_ids);
if (strcmp(name, "..") == 0)
error = SET_ERROR(EISDIR);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
}
@@ -1368,7 +1405,7 @@ zfs_create(struct inode *dip, char *name, vattr_t *vap, int excl,
goto out;
have_acl = B_TRUE;
- if (zfs_acl_ids_overquota(zsb, &acl_ids)) {
+ if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
zfs_acl_ids_free(&acl_ids);
error = SET_ERROR(EDQUOT);
goto out;
@@ -1379,12 +1416,12 @@ zfs_create(struct inode *dip, char *name, vattr_t *vap, int excl,
dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
ZFS_SA_BASE_ATTR_SIZE);
- fuid_dirtied = zsb->z_fuid_dirty;
+ fuid_dirtied = zfsvfs->z_fuid_dirty;
if (fuid_dirtied)
- zfs_fuid_txhold(zsb, tx);
+ zfs_fuid_txhold(zfsvfs, tx);
dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
- if (!zsb->z_use_sa &&
+ if (!zfsvfs->z_use_sa &&
acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
0, acl_ids.z_aclp->z_acl_bytes);
@@ -1400,13 +1437,13 @@ zfs_create(struct inode *dip, char *name, vattr_t *vap, int excl,
}
zfs_acl_ids_free(&acl_ids);
dmu_tx_abort(tx);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
if (fuid_dirtied)
- zfs_fuid_sync(zsb, tx);
+ zfs_fuid_sync(zfsvfs, tx);
(void) zfs_link_create(dl, zp, tx, ZNEW);
txtype = zfs_log_create_txtype(Z_FILE, vsecp, vap);
@@ -1457,8 +1494,10 @@ zfs_create(struct inode *dip, char *name, vattr_t *vap, int excl,
if (S_ISREG(ZTOI(zp)->i_mode) &&
(vap->va_mask & ATTR_SIZE) && (vap->va_size == 0)) {
/* we can't hold any locks when calling zfs_freesp() */
- zfs_dirent_unlock(dl);
- dl = NULL;
+ if (dl) {
+ zfs_dirent_unlock(dl);
+ dl = NULL;
+ }
error = zfs_freesp(zp, 0, 0, mode, TRUE);
}
}
@@ -1476,13 +1515,129 @@ zfs_create(struct inode *dip, char *name, vattr_t *vap, int excl,
*ipp = ZTOI(zp);
}
- if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS)
+ if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
zil_commit(zilog, 0);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
+ return (error);
+}
+
+/* ARGSUSED */
+int
+zfs_tmpfile(struct inode *dip, vattr_t *vap, int excl,
+ int mode, struct inode **ipp, cred_t *cr, int flag, vsecattr_t *vsecp)
+{
+ znode_t *zp = NULL, *dzp = ITOZ(dip);
+ zfsvfs_t *zfsvfs = ITOZSB(dip);
+ objset_t *os;
+ dmu_tx_t *tx;
+ int error;
+ uid_t uid;
+ gid_t gid;
+ zfs_acl_ids_t acl_ids;
+ boolean_t fuid_dirtied;
+ boolean_t have_acl = B_FALSE;
+ boolean_t waited = B_FALSE;
+
+ /*
+ * If we have an ephemeral id, ACL, or XVATTR then
+ * make sure file system is at proper version
+ */
+
+ gid = crgetgid(cr);
+ uid = crgetuid(cr);
+
+ if (zfsvfs->z_use_fuids == B_FALSE &&
+ (vsecp || IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
+ return (SET_ERROR(EINVAL));
+
+ ZFS_ENTER(zfsvfs);
+ ZFS_VERIFY_ZP(dzp);
+ os = zfsvfs->z_os;
+
+ if (vap->va_mask & ATTR_XVATTR) {
+ if ((error = secpolicy_xvattr((xvattr_t *)vap,
+ crgetuid(cr), cr, vap->va_mode)) != 0) {
+ ZFS_EXIT(zfsvfs);
+ return (error);
+ }
+ }
+
+top:
+ *ipp = NULL;
+
+ /*
+ * Create a new file object and update the directory
+ * to reference it.
+ */
+ if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
+ if (have_acl)
+ zfs_acl_ids_free(&acl_ids);
+ goto out;
+ }
+
+ if (!have_acl && (error = zfs_acl_ids_create(dzp, 0, vap,
+ cr, vsecp, &acl_ids)) != 0)
+ goto out;
+ have_acl = B_TRUE;
+
+ if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
+ zfs_acl_ids_free(&acl_ids);
+ error = SET_ERROR(EDQUOT);
+ goto out;
+ }
+
+ tx = dmu_tx_create(os);
+
+ dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
+ ZFS_SA_BASE_ATTR_SIZE);
+ dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
+
+ fuid_dirtied = zfsvfs->z_fuid_dirty;
+ if (fuid_dirtied)
+ zfs_fuid_txhold(zfsvfs, tx);
+ if (!zfsvfs->z_use_sa &&
+ acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
+ dmu_tx_hold_write(tx, DMU_NEW_OBJECT,
+ 0, acl_ids.z_aclp->z_acl_bytes);
+ }
+ error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
+ if (error) {
+ if (error == ERESTART) {
+ waited = B_TRUE;
+ dmu_tx_wait(tx);
+ dmu_tx_abort(tx);
+ goto top;
+ }
+ zfs_acl_ids_free(&acl_ids);
+ dmu_tx_abort(tx);
+ ZFS_EXIT(zfsvfs);
+ return (error);
+ }
+ zfs_mknode(dzp, vap, tx, cr, IS_TMPFILE, &zp, &acl_ids);
+
+ if (fuid_dirtied)
+ zfs_fuid_sync(zfsvfs, tx);
+
+ /* Add to unlinked set */
+ zp->z_unlinked = 1;
+ zfs_unlinked_add(zp, tx);
+ zfs_acl_ids_free(&acl_ids);
+ dmu_tx_commit(tx);
+out:
+
+ if (error) {
+ if (zp)
+ iput(ZTOI(zp));
+ } else {
+ zfs_inode_update(dzp);
+ zfs_inode_update(zp);
+ *ipp = ZTOI(zp);
+ }
+
+ ZFS_EXIT(zfsvfs);
return (error);
}
-EXPORT_SYMBOL(zfs_create);
/*
* Remove an entry from a directory.
@@ -1503,39 +1658,40 @@ uint64_t null_xattr = 0;
/*ARGSUSED*/
int
-zfs_remove(struct inode *dip, char *name, cred_t *cr)
+zfs_remove(struct inode *dip, char *name, cred_t *cr, int flags)
{
znode_t *zp, *dzp = ITOZ(dip);
znode_t *xzp;
struct inode *ip;
- zfs_sb_t *zsb = ITOZSB(dip);
+ zfsvfs_t *zfsvfs = ITOZSB(dip);
zilog_t *zilog;
- uint64_t xattr_obj;
+ uint64_t acl_obj, xattr_obj;
uint64_t xattr_obj_unlinked = 0;
uint64_t obj = 0;
+ uint64_t links;
zfs_dirlock_t *dl;
dmu_tx_t *tx;
- boolean_t unlinked;
+ boolean_t may_delete_now, delete_now = FALSE;
+ boolean_t unlinked, toobig = FALSE;
uint64_t txtype;
pathname_t *realnmp = NULL;
-#ifdef HAVE_PN_UTILS
pathname_t realnm;
-#endif /* HAVE_PN_UTILS */
int error;
int zflg = ZEXISTS;
boolean_t waited = B_FALSE;
- ZFS_ENTER(zsb);
+ if (name == NULL)
+ return (SET_ERROR(EINVAL));
+
+ ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(dzp);
- zilog = zsb->z_log;
+ zilog = zfsvfs->z_log;
-#ifdef HAVE_PN_UTILS
if (flags & FIGNORECASE) {
zflg |= ZCILOOK;
pn_alloc(&realnm);
realnmp = &realnm;
}
-#endif /* HAVE_PN_UTILS */
top:
xattr_obj = 0;
@@ -1545,11 +1701,9 @@ zfs_remove(struct inode *dip, char *name, cred_t *cr)
*/
if ((error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg,
NULL, realnmp))) {
-#ifdef HAVE_PN_UTILS
if (realnmp)
pn_free(realnmp);
-#endif /* HAVE_PN_UTILS */
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
@@ -1574,30 +1728,51 @@ zfs_remove(struct inode *dip, char *name, cred_t *cr)
dnlc_remove(dvp, name);
#endif /* HAVE_DNLC */
+ mutex_enter(&zp->z_lock);
+ may_delete_now = atomic_read(&ip->i_count) == 1 && !(zp->z_is_mapped);
+ mutex_exit(&zp->z_lock);
+
/*
- * We never delete the znode and always place it in the unlinked
- * set. The dentry cache will always hold the last reference and
- * is responsible for safely freeing the znode.
+ * We may delete the znode now, or we may put it in the unlinked set;
+ * it depends on whether we're the last link, and on whether there are
+ * other holds on the inode. So we dmu_tx_hold() the right things to
+ * allow for either case.
*/
obj = zp->z_id;
- tx = dmu_tx_create(zsb->z_os);
+ tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
zfs_sa_upgrade_txholds(tx, zp);
zfs_sa_upgrade_txholds(tx, dzp);
+ if (may_delete_now) {
+ toobig = zp->z_size > zp->z_blksz * zfs_delete_blocks;
+ /* if the file is too big, only hold_free a token amount */
+ dmu_tx_hold_free(tx, zp->z_id, 0,
+ (toobig ? DMU_MAX_ACCESS : DMU_OBJECT_END));
+ }
/* are there any extended attributes? */
- error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zsb),
+ error = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
&xattr_obj, sizeof (xattr_obj));
if (error == 0 && xattr_obj) {
- error = zfs_zget(zsb, xattr_obj, &xzp);
+ error = zfs_zget(zfsvfs, xattr_obj, &xzp);
ASSERT0(error);
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE);
dmu_tx_hold_sa(tx, xzp->z_sa_hdl, B_FALSE);
}
+ mutex_enter(&zp->z_lock);
+ if ((acl_obj = zfs_external_acl(zp)) != 0 && may_delete_now)
+ dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END);
+ mutex_exit(&zp->z_lock);
+
/* charge as an update -- would be nice not to charge at all */
- dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, FALSE, NULL);
+ dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
+
+ /*
+ * Mark this transaction as typically resulting in a net free of space
+ */
+ dmu_tx_mark_netfree(tx);
error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
if (error) {
@@ -1611,15 +1786,13 @@ zfs_remove(struct inode *dip, char *name, cred_t *cr)
iput(ZTOI(xzp));
goto top;
}
-#ifdef HAVE_PN_UTILS
if (realnmp)
pn_free(realnmp);
-#endif /* HAVE_PN_UTILS */
dmu_tx_abort(tx);
iput(ip);
if (xzp)
iput(ZTOI(xzp));
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
@@ -1640,43 +1813,77 @@ zfs_remove(struct inode *dip, char *name, cred_t *cr)
* zfs_sa_upgrade().
*/
mutex_enter(&zp->z_lock);
- (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zsb),
+ (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
&xattr_obj_unlinked, sizeof (xattr_obj_unlinked));
+ delete_now = may_delete_now && !toobig &&
+ atomic_read(&ip->i_count) == 1 && !(zp->z_is_mapped) &&
+ xattr_obj == xattr_obj_unlinked && zfs_external_acl(zp) ==
+ acl_obj;
+ }
+
+ if (delete_now) {
+ if (xattr_obj_unlinked) {
+ ASSERT3U(ZTOI(xzp)->i_nlink, ==, 2);
+ mutex_enter(&xzp->z_lock);
+ xzp->z_unlinked = 1;
+ clear_nlink(ZTOI(xzp));
+ links = 0;
+ error = sa_update(xzp->z_sa_hdl, SA_ZPL_LINKS(zfsvfs),
+ &links, sizeof (links), tx);
+ ASSERT3U(error, ==, 0);
+ mutex_exit(&xzp->z_lock);
+ zfs_unlinked_add(xzp, tx);
+
+ if (zp->z_is_sa)
+ error = sa_remove(zp->z_sa_hdl,
+ SA_ZPL_XATTR(zfsvfs), tx);
+ else
+ error = sa_update(zp->z_sa_hdl,
+ SA_ZPL_XATTR(zfsvfs), &null_xattr,
+ sizeof (uint64_t), tx);
+ ASSERT0(error);
+ }
+ /*
+ * Add to the unlinked set because a new reference could be
+ * taken concurrently resulting in a deferred destruction.
+ */
+ zfs_unlinked_add(zp, tx);
+ mutex_exit(&zp->z_lock);
+ } else if (unlinked) {
mutex_exit(&zp->z_lock);
zfs_unlinked_add(zp, tx);
}
txtype = TX_REMOVE;
-#ifdef HAVE_PN_UTILS
if (flags & FIGNORECASE)
txtype |= TX_CI;
-#endif /* HAVE_PN_UTILS */
zfs_log_remove(zilog, tx, txtype, dzp, name, obj);
dmu_tx_commit(tx);
out:
-#ifdef HAVE_PN_UTILS
if (realnmp)
pn_free(realnmp);
-#endif /* HAVE_PN_UTILS */
zfs_dirent_unlock(dl);
zfs_inode_update(dzp);
zfs_inode_update(zp);
- if (xzp)
- zfs_inode_update(xzp);
- iput(ip);
- if (xzp)
- iput(ZTOI(xzp));
+ if (delete_now)
+ iput(ip);
+ else
+ zfs_iput_async(ip);
+
+ if (xzp) {
+ zfs_inode_update(xzp);
+ zfs_iput_async(ZTOI(xzp));
+ }
- if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS)
+ if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
zil_commit(zilog, 0);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
-EXPORT_SYMBOL(zfs_remove);
/*
* Create a new directory and insert it into dip using the name
@@ -1703,7 +1910,7 @@ zfs_mkdir(struct inode *dip, char *dirname, vattr_t *vap, struct inode **ipp,
cred_t *cr, int flags, vsecattr_t *vsecp)
{
znode_t *zp, *dzp = ITOZ(dip);
- zfs_sb_t *zsb = ITOZSB(dip);
+ zfsvfs_t *zfsvfs = ITOZSB(dip);
zilog_t *zilog;
zfs_dirlock_t *dl;
uint64_t txtype;
@@ -1724,22 +1931,25 @@ zfs_mkdir(struct inode *dip, char *dirname, vattr_t *vap, struct inode **ipp,
*/
uid = crgetuid(cr);
- if (zsb->z_use_fuids == B_FALSE &&
+ if (zfsvfs->z_use_fuids == B_FALSE &&
(vsecp || IS_EPHEMERAL(uid) || IS_EPHEMERAL(gid)))
return (SET_ERROR(EINVAL));
- ZFS_ENTER(zsb);
+ if (dirname == NULL)
+ return (SET_ERROR(EINVAL));
+
+ ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(dzp);
- zilog = zsb->z_log;
+ zilog = zfsvfs->z_log;
if (dzp->z_pflags & ZFS_XATTR) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (SET_ERROR(EINVAL));
}
- if (zsb->z_utf8 && u8_validate(dirname,
+ if (zfsvfs->z_utf8 && u8_validate(dirname,
strlen(dirname), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (SET_ERROR(EILSEQ));
}
if (flags & FIGNORECASE)
@@ -1748,14 +1958,14 @@ zfs_mkdir(struct inode *dip, char *dirname, vattr_t *vap, struct inode **ipp,
if (vap->va_mask & ATTR_XVATTR) {
if ((error = secpolicy_xvattr((xvattr_t *)vap,
crgetuid(cr), cr, vap->va_mode)) != 0) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
}
if ((error = zfs_acl_ids_create(dzp, 0, vap, cr,
vsecp, &acl_ids)) != 0) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
/*
@@ -1771,34 +1981,34 @@ zfs_mkdir(struct inode *dip, char *dirname, vattr_t *vap, struct inode **ipp,
if ((error = zfs_dirent_lock(&dl, dzp, dirname, &zp, zf,
NULL, NULL))) {
zfs_acl_ids_free(&acl_ids);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
if ((error = zfs_zaccess(dzp, ACE_ADD_SUBDIRECTORY, 0, B_FALSE, cr))) {
zfs_acl_ids_free(&acl_ids);
zfs_dirent_unlock(dl);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
- if (zfs_acl_ids_overquota(zsb, &acl_ids)) {
+ if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
zfs_acl_ids_free(&acl_ids);
zfs_dirent_unlock(dl);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (SET_ERROR(EDQUOT));
}
/*
* Add a new entry to the directory.
*/
- tx = dmu_tx_create(zsb->z_os);
+ tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_zap(tx, dzp->z_id, TRUE, dirname);
dmu_tx_hold_zap(tx, DMU_NEW_OBJECT, FALSE, NULL);
- fuid_dirtied = zsb->z_fuid_dirty;
+ fuid_dirtied = zfsvfs->z_fuid_dirty;
if (fuid_dirtied)
- zfs_fuid_txhold(zsb, tx);
- if (!zsb->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
+ zfs_fuid_txhold(zfsvfs, tx);
+ if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
acl_ids.z_aclp->z_acl_bytes);
}
@@ -1817,7 +2027,7 @@ zfs_mkdir(struct inode *dip, char *dirname, vattr_t *vap, struct inode **ipp,
}
zfs_acl_ids_free(&acl_ids);
dmu_tx_abort(tx);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
@@ -1827,7 +2037,7 @@ zfs_mkdir(struct inode *dip, char *dirname, vattr_t *vap, struct inode **ipp,
zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
if (fuid_dirtied)
- zfs_fuid_sync(zsb, tx);
+ zfs_fuid_sync(zfsvfs, tx);
/*
* Now put new name in parent dir.
@@ -1848,15 +2058,14 @@ zfs_mkdir(struct inode *dip, char *dirname, vattr_t *vap, struct inode **ipp,
zfs_dirent_unlock(dl);
- if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS)
+ if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
zil_commit(zilog, 0);
zfs_inode_update(dzp);
zfs_inode_update(zp);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (0);
}
-EXPORT_SYMBOL(zfs_mkdir);
/*
* Remove a directory subdir entry. If the current working
@@ -1882,7 +2091,7 @@ zfs_rmdir(struct inode *dip, char *name, struct inode *cwd, cred_t *cr,
znode_t *dzp = ITOZ(dip);
znode_t *zp;
struct inode *ip;
- zfs_sb_t *zsb = ITOZSB(dip);
+ zfsvfs_t *zfsvfs = ITOZSB(dip);
zilog_t *zilog;
zfs_dirlock_t *dl;
dmu_tx_t *tx;
@@ -1890,9 +2099,12 @@ zfs_rmdir(struct inode *dip, char *name, struct inode *cwd, cred_t *cr,
int zflg = ZEXISTS;
boolean_t waited = B_FALSE;
- ZFS_ENTER(zsb);
+ if (name == NULL)
+ return (SET_ERROR(EINVAL));
+
+ ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(dzp);
- zilog = zsb->z_log;
+ zilog = zfsvfs->z_log;
if (flags & FIGNORECASE)
zflg |= ZCILOOK;
@@ -1904,7 +2116,7 @@ zfs_rmdir(struct inode *dip, char *name, struct inode *cwd, cred_t *cr,
*/
if ((error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg,
NULL, NULL))) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
@@ -1925,7 +2137,7 @@ zfs_rmdir(struct inode *dip, char *name, struct inode *cwd, cred_t *cr,
}
/*
- * Grab a lock on the directory to make sure that noone is
+ * Grab a lock on the directory to make sure that no one is
* trying to add (or lookup) entries while we are removing it.
*/
rw_enter(&zp->z_name_lock, RW_WRITER);
@@ -1936,12 +2148,13 @@ zfs_rmdir(struct inode *dip, char *name, struct inode *cwd, cred_t *cr,
*/
rw_enter(&zp->z_parent_lock, RW_WRITER);
- tx = dmu_tx_create(zsb->z_os);
+ tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_zap(tx, dzp->z_id, FALSE, name);
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
- dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, FALSE, NULL);
+ dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
zfs_sa_upgrade_txholds(tx, zp);
zfs_sa_upgrade_txholds(tx, dzp);
+ dmu_tx_mark_netfree(tx);
error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
if (error) {
rw_exit(&zp->z_parent_lock);
@@ -1956,7 +2169,7 @@ zfs_rmdir(struct inode *dip, char *name, struct inode *cwd, cred_t *cr,
}
dmu_tx_abort(tx);
iput(ip);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
@@ -1980,13 +2193,12 @@ zfs_rmdir(struct inode *dip, char *name, struct inode *cwd, cred_t *cr,
zfs_inode_update(zp);
iput(ip);
- if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS)
+ if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
zil_commit(zilog, 0);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
-EXPORT_SYMBOL(zfs_rmdir);
/*
* Read as many directory entries as will fit into the provided
@@ -2013,7 +2225,7 @@ int
zfs_readdir(struct inode *ip, struct dir_context *ctx, cred_t *cr)
{
znode_t *zp = ITOZ(ip);
- zfs_sb_t *zsb = ITOZSB(ip);
+ zfsvfs_t *zfsvfs = ITOZSB(ip);
objset_t *os;
zap_cursor_t zc;
zap_attribute_t zap;
@@ -2024,10 +2236,10 @@ zfs_readdir(struct inode *ip, struct dir_context *ctx, cred_t *cr)
uint64_t parent;
uint64_t offset; /* must be unsigned; checks for < 1 */
- ZFS_ENTER(zsb);
+ ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(zp);
- if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zsb),
+ if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
&parent, sizeof (parent))) != 0)
goto out;
@@ -2038,7 +2250,7 @@ zfs_readdir(struct inode *ip, struct dir_context *ctx, cred_t *cr)
goto out;
error = 0;
- os = zsb->z_os;
+ os = zfsvfs->z_os;
offset = ctx->pos;
prefetch = zp->z_zn_prefetch;
@@ -2122,7 +2334,8 @@ zfs_readdir(struct inode *ip, struct dir_context *ctx, cred_t *cr)
/* Prefetch znode */
if (prefetch) {
- dmu_prefetch(os, objnum, 0, 0);
+ dmu_prefetch(os, objnum, 0, 0, 0,
+ ZIO_PRIORITY_SYNC_READ);
}
/*
@@ -2143,11 +2356,10 @@ zfs_readdir(struct inode *ip, struct dir_context *ctx, cred_t *cr)
if (error == ENOENT)
error = 0;
out:
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
-EXPORT_SYMBOL(zfs_readdir);
ulong_t zfs_fsync_sync_cnt = 4;
@@ -2155,21 +2367,20 @@ int
zfs_fsync(struct inode *ip, int syncflag, cred_t *cr)
{
znode_t *zp = ITOZ(ip);
- zfs_sb_t *zsb = ITOZSB(ip);
+ zfsvfs_t *zfsvfs = ITOZSB(ip);
(void) tsd_set(zfs_fsyncer_key, (void *)zfs_fsync_sync_cnt);
- if (zsb->z_os->os_sync != ZFS_SYNC_DISABLED) {
- ZFS_ENTER(zsb);
+ if (zfsvfs->z_os->os_sync != ZFS_SYNC_DISABLED) {
+ ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(zp);
- zil_commit(zsb->z_log, zp->z_id);
- ZFS_EXIT(zsb);
+ zil_commit(zfsvfs->z_log, zp->z_id);
+ ZFS_EXIT(zfsvfs);
}
tsd_set(zfs_fsyncer_key, NULL);
return (0);
}
-EXPORT_SYMBOL(zfs_fsync);
/*
@@ -2191,7 +2402,7 @@ int
zfs_getattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr)
{
znode_t *zp = ITOZ(ip);
- zfs_sb_t *zsb = ITOZSB(ip);
+ zfsvfs_t *zfsvfs = ITOZSB(ip);
int error = 0;
uint64_t links;
uint64_t atime[2], mtime[2], ctime[2];
@@ -2201,17 +2412,17 @@ zfs_getattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr)
sa_bulk_attr_t bulk[3];
int count = 0;
- ZFS_ENTER(zsb);
+ ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(zp);
zfs_fuid_map_ids(zp, cr, &vap->va_uid, &vap->va_gid);
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zsb), NULL, &atime, 16);
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL, &mtime, 16);
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL, &ctime, 16);
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, &atime, 16);
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
if ((error = sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) != 0) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
@@ -2224,7 +2435,7 @@ zfs_getattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr)
(vap->va_uid != crgetuid(cr))) {
if ((error = zfs_zaccess(zp, ACE_READ_ATTRIBUTES, 0,
skipaclchk, cr))) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
}
@@ -2239,10 +2450,10 @@ zfs_getattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr)
vap->va_mode = zp->z_mode;
vap->va_fsid = ZTOI(zp)->i_sb->s_dev;
vap->va_nodeid = zp->z_id;
- if ((zp->z_id == zsb->z_root) && zfs_show_ctldir(zp))
- links = zp->z_links + 1;
+ if ((zp->z_id == zfsvfs->z_root) && zfs_show_ctldir(zp))
+ links = ZTOI(zp)->i_nlink + 1;
else
- links = zp->z_links;
+ links = ZTOI(zp)->i_nlink;
vap->va_nlink = MIN(links, ZFS_LINK_MAX);
vap->va_size = i_size_read(ip);
vap->va_rdev = ip->i_rdev;
@@ -2252,7 +2463,7 @@ zfs_getattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr)
* Add in any requested optional attributes and the create time.
* Also set the corresponding bits in the returned attribute bitmap.
*/
- if ((xoap = xva_getxoptattr(xvap)) != NULL && zsb->z_use_fuids) {
+ if ((xoap = xva_getxoptattr(xvap)) != NULL && zfsvfs->z_use_fuids) {
if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) {
xoap->xoa_archive =
((zp->z_pflags & ZFS_ARCHIVE) != 0);
@@ -2327,7 +2538,7 @@ zfs_getattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr)
if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) {
uint64_t times[2];
- (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(zsb),
+ (void) sa_lookup(zp->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs),
times, sizeof (times));
ZFS_TIME_DECODE(&xoap->xoa_createtime, times);
XVA_SET_RTN(xvap, XAT_CREATETIME);
@@ -2338,7 +2549,7 @@ zfs_getattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr)
XVA_SET_RTN(xvap, XAT_REPARSE);
}
if (XVA_ISSET_REQ(xvap, XAT_GEN)) {
- xoap->xoa_generation = zp->z_gen;
+ xoap->xoa_generation = ip->i_generation;
XVA_SET_RTN(xvap, XAT_GEN);
}
@@ -2367,13 +2578,12 @@ zfs_getattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr)
/*
* Block size hasn't been set; suggest maximal I/O transfers.
*/
- vap->va_blksize = zsb->z_max_blksz;
+ vap->va_blksize = zfsvfs->z_max_blksz;
}
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (0);
}
-EXPORT_SYMBOL(zfs_getattr);
/*
* Get the basic file attributes and place them in the provided kstat
@@ -2392,11 +2602,11 @@ int
zfs_getattr_fast(struct inode *ip, struct kstat *sp)
{
znode_t *zp = ITOZ(ip);
- zfs_sb_t *zsb = ITOZSB(ip);
+ zfsvfs_t *zfsvfs = ITOZSB(ip);
uint32_t blksize;
u_longlong_t nblocks;
- ZFS_ENTER(zsb);
+ ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(zp);
mutex_enter(&zp->z_lock);
@@ -2411,7 +2621,7 @@ zfs_getattr_fast(struct inode *ip, struct kstat *sp)
/*
* Block size hasn't been set; suggest maximal I/O transfers.
*/
- sp->blksize = zsb->z_max_blksz;
+ sp->blksize = zfsvfs->z_max_blksz;
}
mutex_exit(&zp->z_lock);
@@ -2420,17 +2630,16 @@ zfs_getattr_fast(struct inode *ip, struct kstat *sp)
* Required to prevent NFS client from detecting different inode
* numbers of snapshot root dentry before and after snapshot mount.
*/
- if (zsb->z_issnap) {
+ if (zfsvfs->z_issnap) {
if (ip->i_sb->s_root->d_inode == ip)
sp->ino = ZFSCTL_INO_SNAPDIRS -
- dmu_objset_id(zsb->z_os);
+ dmu_objset_id(zfsvfs->z_os);
}
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (0);
}
-EXPORT_SYMBOL(zfs_getattr_fast);
/*
* Set the file attributes to the values contained in the
@@ -2454,7 +2663,7 @@ int
zfs_setattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr)
{
znode_t *zp = ITOZ(ip);
- zfs_sb_t *zsb = ITOZSB(ip);
+ zfsvfs_t *zfsvfs = ITOZSB(ip);
zilog_t *zilog;
dmu_tx_t *tx;
vattr_t oldva;
@@ -2463,7 +2672,7 @@ zfs_setattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr)
uint_t saved_mask = 0;
int trim_mask = 0;
uint64_t new_mode;
- uint64_t new_uid, new_gid;
+ uint64_t new_kuid = 0, new_kgid = 0, new_uid, new_gid;
uint64_t xattr_obj;
uint64_t mtime[2], ctime[2], atime[2];
znode_t *attrzp;
@@ -2481,31 +2690,31 @@ zfs_setattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr)
if (mask == 0)
return (0);
- ZFS_ENTER(zsb);
+ ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(zp);
- zilog = zsb->z_log;
+ zilog = zfsvfs->z_log;
/*
* Make sure that if we have ephemeral uid/gid or xvattr specified
* that file system is at proper version level
*/
- if (zsb->z_use_fuids == B_FALSE &&
+ if (zfsvfs->z_use_fuids == B_FALSE &&
(((mask & ATTR_UID) && IS_EPHEMERAL(vap->va_uid)) ||
((mask & ATTR_GID) && IS_EPHEMERAL(vap->va_gid)) ||
(mask & ATTR_XVATTR))) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (SET_ERROR(EINVAL));
}
if (mask & ATTR_SIZE && S_ISDIR(ip->i_mode)) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (SET_ERROR(EISDIR));
}
if (mask & ATTR_SIZE && !S_ISREG(ip->i_mode) && !S_ISFIFO(ip->i_mode)) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (SET_ERROR(EINVAL));
}
@@ -2557,7 +2766,7 @@ zfs_setattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr)
aclp = NULL;
/* Can this be moved to before the top label? */
- if (zfs_is_readonly(zsb)) {
+ if (zfs_is_readonly(zfsvfs)) {
err = EROFS;
goto out3;
}
@@ -2614,7 +2823,7 @@ zfs_setattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr)
take_owner = (mask & ATTR_UID) && (vap->va_uid == crgetuid(cr));
take_group = (mask & ATTR_GID) &&
- zfs_groupmember(zsb, vap->va_gid, cr);
+ zfs_groupmember(zfsvfs, vap->va_gid, cr);
/*
* If both ATTR_UID and ATTR_GID are set then take_owner and
@@ -2774,7 +2983,7 @@ zfs_setattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr)
mask = vap->va_mask;
if ((mask & (ATTR_UID | ATTR_GID))) {
- err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zsb),
+ err = sa_lookup(zp->z_sa_hdl, SA_ZPL_XATTR(zfsvfs),
&xattr_obj, sizeof (xattr_obj));
if (err == 0 && xattr_obj) {
@@ -2783,10 +2992,10 @@ zfs_setattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr)
goto out2;
}
if (mask & ATTR_UID) {
- new_uid = zfs_fuid_create(zsb,
+ new_kuid = zfs_fuid_create(zfsvfs,
(uint64_t)vap->va_uid, cr, ZFS_OWNER, &fuidp);
- if (new_uid != zp->z_uid &&
- zfs_fuid_overquota(zsb, B_FALSE, new_uid)) {
+ if (new_kuid != KUID_TO_SUID(ZTOI(zp)->i_uid) &&
+ zfs_fuid_overquota(zfsvfs, B_FALSE, new_kuid)) {
if (attrzp)
iput(ZTOI(attrzp));
err = EDQUOT;
@@ -2795,10 +3004,10 @@ zfs_setattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr)
}
if (mask & ATTR_GID) {
- new_gid = zfs_fuid_create(zsb, (uint64_t)vap->va_gid,
- cr, ZFS_GROUP, &fuidp);
- if (new_gid != zp->z_gid &&
- zfs_fuid_overquota(zsb, B_TRUE, new_gid)) {
+ new_kgid = zfs_fuid_create(zfsvfs,
+ (uint64_t)vap->va_gid, cr, ZFS_GROUP, &fuidp);
+ if (new_kgid != KGID_TO_SGID(ZTOI(zp)->i_gid) &&
+ zfs_fuid_overquota(zfsvfs, B_TRUE, new_kgid)) {
if (attrzp)
iput(ZTOI(attrzp));
err = EDQUOT;
@@ -2806,7 +3015,7 @@ zfs_setattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr)
}
}
}
- tx = dmu_tx_create(zsb->z_os);
+ tx = dmu_tx_create(zfsvfs->z_os);
if (mask & ATTR_MODE) {
uint64_t pmode = zp->z_mode;
@@ -2821,7 +3030,7 @@ zfs_setattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr)
* Are we upgrading ACL from old V0 format
* to V1 format?
*/
- if (zsb->z_version >= ZPL_VERSION_FUID &&
+ if (zfsvfs->z_version >= ZPL_VERSION_FUID &&
zfs_znode_acl_version(zp) ==
ZFS_ACL_VERSION_INITIAL) {
dmu_tx_hold_free(tx, acl_obj, 0,
@@ -2850,9 +3059,9 @@ zfs_setattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr)
dmu_tx_hold_sa(tx, attrzp->z_sa_hdl, B_FALSE);
}
- fuid_dirtied = zsb->z_fuid_dirty;
+ fuid_dirtied = zfsvfs->z_fuid_dirty;
if (fuid_dirtied)
- zfs_fuid_txhold(zsb, tx);
+ zfs_fuid_txhold(zfsvfs, tx);
zfs_sa_upgrade_txholds(tx, zp);
@@ -2874,7 +3083,7 @@ zfs_setattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr)
mutex_enter(&zp->z_acl_lock);
mutex_enter(&zp->z_lock);
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL,
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
&zp->z_pflags, sizeof (zp->z_pflags));
if (attrzp) {
@@ -2882,37 +3091,39 @@ zfs_setattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr)
mutex_enter(&attrzp->z_acl_lock);
mutex_enter(&attrzp->z_lock);
SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
- SA_ZPL_FLAGS(zsb), NULL, &attrzp->z_pflags,
+ SA_ZPL_FLAGS(zfsvfs), NULL, &attrzp->z_pflags,
sizeof (attrzp->z_pflags));
}
if (mask & (ATTR_UID|ATTR_GID)) {
if (mask & ATTR_UID) {
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zsb), NULL,
+ ZTOI(zp)->i_uid = SUID_TO_KUID(new_kuid);
+ new_uid = zfs_uid_read(ZTOI(zp));
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
&new_uid, sizeof (new_uid));
- zp->z_uid = new_uid;
if (attrzp) {
SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
- SA_ZPL_UID(zsb), NULL, &new_uid,
+ SA_ZPL_UID(zfsvfs), NULL, &new_uid,
sizeof (new_uid));
- attrzp->z_uid = new_uid;
+ ZTOI(attrzp)->i_uid = SUID_TO_KUID(new_uid);
}
}
if (mask & ATTR_GID) {
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zsb),
+ ZTOI(zp)->i_gid = SGID_TO_KGID(new_kgid);
+ new_gid = zfs_gid_read(ZTOI(zp));
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs),
NULL, &new_gid, sizeof (new_gid));
- zp->z_gid = new_gid;
if (attrzp) {
SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
- SA_ZPL_GID(zsb), NULL, &new_gid,
+ SA_ZPL_GID(zfsvfs), NULL, &new_gid,
sizeof (new_gid));
- attrzp->z_gid = new_gid;
+ ZTOI(attrzp)->i_gid = SGID_TO_KGID(new_kgid);
}
}
if (!(mask & ATTR_MODE)) {
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zsb),
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs),
NULL, &new_mode, sizeof (new_mode));
new_mode = zp->z_mode;
}
@@ -2925,9 +3136,9 @@ zfs_setattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr)
}
if (mask & ATTR_MODE) {
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zsb), NULL,
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
&new_mode, sizeof (new_mode));
- zp->z_mode = new_mode;
+ zp->z_mode = ZTOI(zp)->i_mode = new_mode;
ASSERT3P(aclp, !=, NULL);
err = zfs_aclset_common(zp, aclp, cr, tx);
ASSERT0(err);
@@ -2937,39 +3148,36 @@ zfs_setattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr)
aclp = NULL;
}
-
if ((mask & ATTR_ATIME) || zp->z_atime_dirty) {
zp->z_atime_dirty = 0;
ZFS_TIME_ENCODE(&ip->i_atime, atime);
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zsb), NULL,
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
&atime, sizeof (atime));
}
- if (mask & ATTR_MTIME) {
+ if (mask & (ATTR_MTIME | ATTR_SIZE)) {
ZFS_TIME_ENCODE(&vap->va_mtime, mtime);
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL,
+ ZTOI(zp)->i_mtime = timespec_trunc(vap->va_mtime,
+ ZTOI(zp)->i_sb->s_time_gran);
+
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
mtime, sizeof (mtime));
}
- /* XXX - shouldn't this be done *before* the ATIME/MTIME checks? */
- if (mask & ATTR_SIZE && !(mask & ATTR_MTIME)) {
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb),
- NULL, mtime, sizeof (mtime));
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL,
- &ctime, sizeof (ctime));
- zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
- } else if (mask != 0) {
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL,
- &ctime, sizeof (ctime));
- zfs_tstamp_update_setup(zp, STATE_CHANGED, mtime, ctime);
- if (attrzp) {
- SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
- SA_ZPL_CTIME(zsb), NULL,
- &ctime, sizeof (ctime));
- zfs_tstamp_update_setup(attrzp, STATE_CHANGED,
- mtime, ctime);
- }
+ if (mask & (ATTR_CTIME | ATTR_SIZE)) {
+ ZFS_TIME_ENCODE(&vap->va_ctime, ctime);
+ ZTOI(zp)->i_ctime = timespec_trunc(vap->va_ctime,
+ ZTOI(zp)->i_sb->s_time_gran);
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
+ ctime, sizeof (ctime));
+ }
+
+ if (attrzp && mask) {
+ SA_ADD_BULK_ATTR(xattr_bulk, xattr_count,
+ SA_ZPL_CTIME(zfsvfs), NULL, &ctime,
+ sizeof (ctime));
}
+
/*
* Do this after setting timestamps to prevent timestamp
* update from toggling bit
@@ -3008,7 +3216,7 @@ zfs_setattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr)
}
if (fuid_dirtied)
- zfs_fuid_sync(zsb, tx);
+ zfs_fuid_sync(zfsvfs, tx);
if (mask != 0)
zfs_log_setattr(zilog, tx, TX_SETATTR, zp, vap, mask, fuidp);
@@ -3052,17 +3260,16 @@ zfs_setattr(struct inode *ip, vattr_t *vap, int flags, cred_t *cr)
}
out2:
- if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS)
+ if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
zil_commit(zilog, 0);
out3:
kmem_free(xattr_bulk, sizeof (sa_bulk_attr_t) * 7);
kmem_free(bulk, sizeof (sa_bulk_attr_t) * 7);
kmem_free(tmpxvattr, sizeof (xvattr_t));
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (err);
}
-EXPORT_SYMBOL(zfs_setattr);
typedef struct zfs_zlock {
krwlock_t *zl_rwlock; /* lock we acquired */
@@ -3184,7 +3391,7 @@ zfs_rename(struct inode *sdip, char *snm, struct inode *tdip, char *tnm,
{
znode_t *tdzp, *szp, *tzp;
znode_t *sdzp = ITOZ(sdip);
- zfs_sb_t *zsb = ITOZSB(sdip);
+ zfsvfs_t *zfsvfs = ITOZSB(sdip);
zilog_t *zilog;
zfs_dirlock_t *sdl, *tdl;
dmu_tx_t *tx;
@@ -3194,20 +3401,28 @@ zfs_rename(struct inode *sdip, char *snm, struct inode *tdip, char *tnm,
int zflg = 0;
boolean_t waited = B_FALSE;
- ZFS_ENTER(zsb);
+ if (snm == NULL || tnm == NULL)
+ return (SET_ERROR(EINVAL));
+
+ ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(sdzp);
- zilog = zsb->z_log;
+ zilog = zfsvfs->z_log;
+
+ tdzp = ITOZ(tdip);
+ ZFS_VERIFY_ZP(tdzp);
+ /*
+ * We check i_sb because snapshots and the ctldir must have different
+ * super blocks.
+ */
if (tdip->i_sb != sdip->i_sb || zfsctl_is_node(tdip)) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (SET_ERROR(EXDEV));
}
- tdzp = ITOZ(tdip);
- ZFS_VERIFY_ZP(tdzp);
- if (zsb->z_utf8 && u8_validate(tnm,
+ if (zfsvfs->z_utf8 && u8_validate(tnm,
strlen(tnm), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (SET_ERROR(EILSEQ));
}
@@ -3225,7 +3440,7 @@ zfs_rename(struct inode *sdip, char *snm, struct inode *tdip, char *tnm,
* See the comment in zfs_link() for why this is considered bad.
*/
if ((tdzp->z_pflags & ZFS_XATTR) != (sdzp->z_pflags & ZFS_XATTR)) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (SET_ERROR(EINVAL));
}
@@ -3244,10 +3459,10 @@ zfs_rename(struct inode *sdip, char *snm, struct inode *tdip, char *tnm,
* First compare the two name arguments without
* considering any case folding.
*/
- int nofold = (zsb->z_norm & ~U8_TEXTPREP_TOUPPER);
+ int nofold = (zfsvfs->z_norm & ~U8_TEXTPREP_TOUPPER);
cmp = u8_strcmp(snm, tnm, 0, nofold, U8_UNICODE_LATEST, &error);
- ASSERT(error == 0 || !zsb->z_utf8);
+ ASSERT(error == 0 || !zfsvfs->z_utf8);
if (cmp == 0) {
/*
* POSIX: "If the old argument and the new argument
@@ -3255,7 +3470,7 @@ zfs_rename(struct inode *sdip, char *snm, struct inode *tdip, char *tnm,
* the rename() function shall return successfully
* and perform no other action."
*/
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (0);
}
/*
@@ -3276,10 +3491,10 @@ zfs_rename(struct inode *sdip, char *snm, struct inode *tdip, char *tnm,
* is an exact match, we will allow this to proceed as
* a name-change request.
*/
- if ((zsb->z_case == ZFS_CASE_INSENSITIVE ||
- (zsb->z_case == ZFS_CASE_MIXED &&
+ if ((zfsvfs->z_case == ZFS_CASE_INSENSITIVE ||
+ (zfsvfs->z_case == ZFS_CASE_MIXED &&
flags & FIGNORECASE)) &&
- u8_strcmp(snm, tnm, 0, zsb->z_norm, U8_UNICODE_LATEST,
+ u8_strcmp(snm, tnm, 0, zfsvfs->z_norm, U8_UNICODE_LATEST,
&error) == 0) {
/*
* case preserving rename request, require exact
@@ -3327,7 +3542,7 @@ zfs_rename(struct inode *sdip, char *snm, struct inode *tdip, char *tnm,
if (strcmp(snm, "..") == 0)
serr = EINVAL;
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (serr);
}
if (terr) {
@@ -3339,7 +3554,7 @@ zfs_rename(struct inode *sdip, char *snm, struct inode *tdip, char *tnm,
if (strcmp(tnm, "..") == 0)
terr = EINVAL;
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (terr);
}
@@ -3391,7 +3606,7 @@ zfs_rename(struct inode *sdip, char *snm, struct inode *tdip, char *tnm,
}
}
- tx = dmu_tx_create(zsb->z_os);
+ tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
dmu_tx_hold_sa(tx, sdzp->z_sa_hdl, B_FALSE);
dmu_tx_hold_zap(tx, sdzp->z_id, FALSE, snm);
@@ -3406,7 +3621,7 @@ zfs_rename(struct inode *sdip, char *snm, struct inode *tdip, char *tnm,
}
zfs_sa_upgrade_txholds(tx, szp);
- dmu_tx_hold_zap(tx, zsb->z_unlinkedobj, FALSE, NULL);
+ dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
if (error) {
if (zl != NULL)
@@ -3430,7 +3645,7 @@ zfs_rename(struct inode *sdip, char *snm, struct inode *tdip, char *tnm,
iput(ZTOI(szp));
if (tzp)
iput(ZTOI(tzp));
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
@@ -3442,7 +3657,7 @@ zfs_rename(struct inode *sdip, char *snm, struct inode *tdip, char *tnm,
if (error == 0) {
szp->z_pflags |= ZFS_AV_MODIFIED;
- error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zsb),
+ error = sa_update(szp->z_sa_hdl, SA_ZPL_FLAGS(zfsvfs),
(void *)&szp->z_pflags, sizeof (uint64_t), tx);
ASSERT0(error);
@@ -3492,13 +3707,12 @@ zfs_rename(struct inode *sdip, char *snm, struct inode *tdip, char *tnm,
iput(ZTOI(tzp));
}
- if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS)
+ if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
zil_commit(zilog, 0);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
-EXPORT_SYMBOL(zfs_rename);
/*
* Insert the indicated symbolic reference entry into the directory.
@@ -3524,7 +3738,7 @@ zfs_symlink(struct inode *dip, char *name, vattr_t *vap, char *link,
znode_t *zp, *dzp = ITOZ(dip);
zfs_dirlock_t *dl;
dmu_tx_t *tx;
- zfs_sb_t *zsb = ITOZSB(dip);
+ zfsvfs_t *zfsvfs = ITOZSB(dip);
zilog_t *zilog;
uint64_t len = strlen(link);
int error;
@@ -3536,26 +3750,29 @@ zfs_symlink(struct inode *dip, char *name, vattr_t *vap, char *link,
ASSERT(S_ISLNK(vap->va_mode));
- ZFS_ENTER(zsb);
+ if (name == NULL)
+ return (SET_ERROR(EINVAL));
+
+ ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(dzp);
- zilog = zsb->z_log;
+ zilog = zfsvfs->z_log;
- if (zsb->z_utf8 && u8_validate(name, strlen(name),
+ if (zfsvfs->z_utf8 && u8_validate(name, strlen(name),
NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (SET_ERROR(EILSEQ));
}
if (flags & FIGNORECASE)
zflg |= ZCILOOK;
if (len > MAXPATHLEN) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (SET_ERROR(ENAMETOOLONG));
}
if ((error = zfs_acl_ids_create(dzp, 0,
vap, cr, NULL, &acl_ids)) != 0) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
top:
@@ -3567,36 +3784,36 @@ zfs_symlink(struct inode *dip, char *name, vattr_t *vap, char *link,
error = zfs_dirent_lock(&dl, dzp, name, &zp, zflg, NULL, NULL);
if (error) {
zfs_acl_ids_free(&acl_ids);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
zfs_acl_ids_free(&acl_ids);
zfs_dirent_unlock(dl);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
- if (zfs_acl_ids_overquota(zsb, &acl_ids)) {
+ if (zfs_acl_ids_overquota(zfsvfs, &acl_ids)) {
zfs_acl_ids_free(&acl_ids);
zfs_dirent_unlock(dl);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (SET_ERROR(EDQUOT));
}
- tx = dmu_tx_create(zsb->z_os);
- fuid_dirtied = zsb->z_fuid_dirty;
+ tx = dmu_tx_create(zfsvfs->z_os);
+ fuid_dirtied = zfsvfs->z_fuid_dirty;
dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, MAX(1, len));
dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
dmu_tx_hold_sa_create(tx, acl_ids.z_aclp->z_acl_bytes +
ZFS_SA_BASE_ATTR_SIZE + len);
dmu_tx_hold_sa(tx, dzp->z_sa_hdl, B_FALSE);
- if (!zsb->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
+ if (!zfsvfs->z_use_sa && acl_ids.z_aclp->z_acl_bytes > ZFS_ACE_SPACE) {
dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
acl_ids.z_aclp->z_acl_bytes);
}
if (fuid_dirtied)
- zfs_fuid_txhold(zsb, tx);
+ zfs_fuid_txhold(zfsvfs, tx);
error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
if (error) {
zfs_dirent_unlock(dl);
@@ -3608,7 +3825,7 @@ zfs_symlink(struct inode *dip, char *name, vattr_t *vap, char *link,
}
zfs_acl_ids_free(&acl_ids);
dmu_tx_abort(tx);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
@@ -3619,18 +3836,18 @@ zfs_symlink(struct inode *dip, char *name, vattr_t *vap, char *link,
zfs_mknode(dzp, vap, tx, cr, 0, &zp, &acl_ids);
if (fuid_dirtied)
- zfs_fuid_sync(zsb, tx);
+ zfs_fuid_sync(zfsvfs, tx);
mutex_enter(&zp->z_lock);
if (zp->z_is_sa)
- error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zsb),
+ error = sa_update(zp->z_sa_hdl, SA_ZPL_SYMLINK(zfsvfs),
link, len, tx);
else
zfs_sa_symlink(zp, link, len, tx);
mutex_exit(&zp->z_lock);
zp->z_size = len;
- (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zsb),
+ (void) sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zfsvfs),
&zp->z_size, sizeof (zp->z_size), tx);
/*
* Insert the new object into the directory.
@@ -3652,13 +3869,12 @@ zfs_symlink(struct inode *dip, char *name, vattr_t *vap, char *link,
*ipp = ZTOI(zp);
- if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS)
+ if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
zil_commit(zilog, 0);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
-EXPORT_SYMBOL(zfs_symlink);
/*
* Return, in the buffer contained in the provided uio structure,
@@ -3679,24 +3895,23 @@ int
zfs_readlink(struct inode *ip, uio_t *uio, cred_t *cr)
{
znode_t *zp = ITOZ(ip);
- zfs_sb_t *zsb = ITOZSB(ip);
+ zfsvfs_t *zfsvfs = ITOZSB(ip);
int error;
- ZFS_ENTER(zsb);
+ ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(zp);
mutex_enter(&zp->z_lock);
if (zp->z_is_sa)
error = sa_lookup_uio(zp->z_sa_hdl,
- SA_ZPL_SYMLINK(zsb), uio);
+ SA_ZPL_SYMLINK(zfsvfs), uio);
else
error = zfs_sa_readlink(zp, uio);
mutex_exit(&zp->z_lock);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
-EXPORT_SYMBOL(zfs_readlink);
/*
* Insert a new entry into directory tdip referencing sip.
@@ -3715,11 +3930,12 @@ EXPORT_SYMBOL(zfs_readlink);
*/
/* ARGSUSED */
int
-zfs_link(struct inode *tdip, struct inode *sip, char *name, cred_t *cr)
+zfs_link(struct inode *tdip, struct inode *sip, char *name, cred_t *cr,
+ int flags)
{
znode_t *dzp = ITOZ(tdip);
znode_t *tzp, *szp;
- zfs_sb_t *zsb = ITOZSB(tdip);
+ zfsvfs_t *zfsvfs = ITOZSB(tdip);
zilog_t *zilog;
zfs_dirlock_t *dl;
dmu_tx_t *tx;
@@ -3728,51 +3944,60 @@ zfs_link(struct inode *tdip, struct inode *sip, char *name, cred_t *cr)
uint64_t parent;
uid_t owner;
boolean_t waited = B_FALSE;
-
+ boolean_t is_tmpfile = 0;
+ uint64_t txg;
+#ifdef HAVE_TMPFILE
+ is_tmpfile = (sip->i_nlink == 0 && (sip->i_state & I_LINKABLE));
+#endif
ASSERT(S_ISDIR(tdip->i_mode));
- ZFS_ENTER(zsb);
+ if (name == NULL)
+ return (SET_ERROR(EINVAL));
+
+ ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(dzp);
- zilog = zsb->z_log;
+ zilog = zfsvfs->z_log;
/*
* POSIX dictates that we return EPERM here.
* Better choices include ENOTSUP or EISDIR.
*/
if (S_ISDIR(sip->i_mode)) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (SET_ERROR(EPERM));
}
+ szp = ITOZ(sip);
+ ZFS_VERIFY_ZP(szp);
+
+ /*
+ * We check i_sb because snapshots and the ctldir must have different
+ * super blocks.
+ */
if (sip->i_sb != tdip->i_sb || zfsctl_is_node(sip)) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (SET_ERROR(EXDEV));
}
- szp = ITOZ(sip);
- ZFS_VERIFY_ZP(szp);
-
/* Prevent links to .zfs/shares files */
- if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zsb),
+ if ((error = sa_lookup(szp->z_sa_hdl, SA_ZPL_PARENT(zfsvfs),
&parent, sizeof (uint64_t))) != 0) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
- if (parent == zsb->z_shares_dir) {
- ZFS_EXIT(zsb);
+ if (parent == zfsvfs->z_shares_dir) {
+ ZFS_EXIT(zfsvfs);
return (SET_ERROR(EPERM));
}
- if (zsb->z_utf8 && u8_validate(name,
+ if (zfsvfs->z_utf8 && u8_validate(name,
strlen(name), NULL, U8_VALIDATE_ENTIRE, &error) < 0) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (SET_ERROR(EILSEQ));
}
-#ifdef HAVE_PN_UTILS
if (flags & FIGNORECASE)
zf |= ZCILOOK;
-#endif /* HAVE_PN_UTILS */
/*
* We do not support links between attributes and non-attributes
@@ -3781,18 +4006,19 @@ zfs_link(struct inode *tdip, struct inode *sip, char *name, cred_t *cr)
* imposed in attribute space.
*/
if ((szp->z_pflags & ZFS_XATTR) != (dzp->z_pflags & ZFS_XATTR)) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (SET_ERROR(EINVAL));
}
- owner = zfs_fuid_map_id(zsb, szp->z_uid, cr, ZFS_OWNER);
+ owner = zfs_fuid_map_id(zfsvfs, KUID_TO_SUID(sip->i_uid),
+ cr, ZFS_OWNER);
if (owner != crgetuid(cr) && secpolicy_basic_link(cr) != 0) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (SET_ERROR(EPERM));
}
if ((error = zfs_zaccess(dzp, ACE_ADD_FILE, 0, B_FALSE, cr))) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
@@ -3802,13 +4028,16 @@ zfs_link(struct inode *tdip, struct inode *sip, char *name, cred_t *cr)
*/
error = zfs_dirent_lock(&dl, dzp, name, &tzp, zf, NULL, NULL);
if (error) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
- tx = dmu_tx_create(zsb->z_os);
+ tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_sa(tx, szp->z_sa_hdl, B_FALSE);
dmu_tx_hold_zap(tx, dzp->z_id, TRUE, name);
+ if (is_tmpfile)
+ dmu_tx_hold_zap(tx, zfsvfs->z_unlinkedobj, FALSE, NULL);
+
zfs_sa_upgrade_txholds(tx, szp);
zfs_sa_upgrade_txholds(tx, dzp);
error = dmu_tx_assign(tx, waited ? TXG_WAITED : TXG_NOWAIT);
@@ -3821,34 +4050,51 @@ zfs_link(struct inode *tdip, struct inode *sip, char *name, cred_t *cr)
goto top;
}
dmu_tx_abort(tx);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
-
+ /* unmark z_unlinked so zfs_link_create will not reject */
+ if (is_tmpfile)
+ szp->z_unlinked = 0;
error = zfs_link_create(dl, szp, tx, 0);
if (error == 0) {
uint64_t txtype = TX_LINK;
-#ifdef HAVE_PN_UTILS
- if (flags & FIGNORECASE)
- txtype |= TX_CI;
-#endif /* HAVE_PN_UTILS */
- zfs_log_link(zilog, tx, txtype, dzp, szp, name);
+ /*
+ * tmpfile is created to be in z_unlinkedobj, so remove it.
+ * Also, we don't log in ZIL, be cause all previous file
+ * operation on the tmpfile are ignored by ZIL. Instead we
+ * always wait for txg to sync to make sure all previous
+ * operation are sync safe.
+ */
+ if (is_tmpfile) {
+ VERIFY(zap_remove_int(zfsvfs->z_os,
+ zfsvfs->z_unlinkedobj, szp->z_id, tx) == 0);
+ } else {
+ if (flags & FIGNORECASE)
+ txtype |= TX_CI;
+ zfs_log_link(zilog, tx, txtype, dzp, szp, name);
+ }
+ } else if (is_tmpfile) {
+ /* restore z_unlinked since when linking failed */
+ szp->z_unlinked = 1;
}
-
+ txg = dmu_tx_get_txg(tx);
dmu_tx_commit(tx);
zfs_dirent_unlock(dl);
- if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS)
+ if (!is_tmpfile && zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
zil_commit(zilog, 0);
+ if (is_tmpfile)
+ txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), txg);
+
zfs_inode_update(dzp);
zfs_inode_update(szp);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
-EXPORT_SYMBOL(zfs_link);
static void
zfs_putpage_commit_cb(void *arg)
@@ -3878,7 +4124,7 @@ int
zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
{
znode_t *zp = ITOZ(ip);
- zfs_sb_t *zsb = ITOZSB(ip);
+ zfsvfs_t *zfsvfs = ITOZSB(ip);
loff_t offset;
loff_t pgoff;
unsigned int pglen;
@@ -3891,7 +4137,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
int cnt = 0;
struct address_space *mapping;
- ZFS_ENTER(zsb);
+ ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(zp);
ASSERT(PageLocked(pp));
@@ -3904,7 +4150,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
/* Page is beyond end of file */
if (pgoff >= offset) {
unlock_page(pp);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (0);
}
@@ -3918,8 +4164,8 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
* is to register a page_mkwrite() handler to count the page
* against its quota when it is about to be dirtied.
*/
- if (zfs_owner_overquota(zsb, zp, B_FALSE) ||
- zfs_owner_overquota(zsb, zp, B_TRUE)) {
+ if (zfs_owner_overquota(zfsvfs, zp, B_FALSE) ||
+ zfs_owner_overquota(zfsvfs, zp, B_TRUE)) {
err = EDQUOT;
}
#endif
@@ -3958,7 +4204,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
if (unlikely((mapping != pp->mapping) || !PageDirty(pp))) {
unlock_page(pp);
zfs_range_unlock(rl);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (0);
}
@@ -3970,7 +4216,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
if (wbc->sync_mode != WB_SYNC_NONE)
wait_on_page_writeback(pp);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (0);
}
@@ -3978,7 +4224,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
if (!clear_page_dirty_for_io(pp)) {
unlock_page(pp);
zfs_range_unlock(rl);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (0);
}
@@ -3990,7 +4236,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
set_page_writeback(pp);
unlock_page(pp);
- tx = dmu_tx_create(zsb->z_os);
+ tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_write(tx, zp->z_id, pgoff, pglen);
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
zfs_sa_upgrade_txholds(tx, zp);
@@ -4005,18 +4251,19 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
ClearPageError(pp);
end_page_writeback(pp);
zfs_range_unlock(rl);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (err);
}
va = kmap(pp);
ASSERT3U(pglen, <=, PAGE_SIZE);
- dmu_write(zsb->z_os, zp->z_id, pgoff, pglen, va, tx);
+ dmu_write(zfsvfs->z_os, zp->z_id, pgoff, pglen, va, tx);
kunmap(pp);
- SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MTIME(zsb), NULL, &mtime, 16);
- SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CTIME(zsb), NULL, &ctime, 16);
- SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_FLAGS(zsb), NULL, &zp->z_pflags, 8);
+ SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
+ SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
+ SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_FLAGS(zfsvfs), NULL,
+ &zp->z_pflags, 8);
/* Preserve the mtime and ctime provided by the inode */
ZFS_TIME_ENCODE(&ip->i_mtime, mtime);
@@ -4026,7 +4273,7 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
err = sa_bulk_update(zp->z_sa_hdl, bulk, cnt, tx);
- zfs_log_write(zsb->z_log, tx, TX_WRITE, zp, pgoff, pglen, 0,
+ zfs_log_write(zfsvfs->z_log, tx, TX_WRITE, zp, pgoff, pglen, 0,
zfs_putpage_commit_cb, pp);
dmu_tx_commit(tx);
@@ -4038,11 +4285,10 @@ zfs_putpage(struct inode *ip, struct page *pp, struct writeback_control *wbc)
* writepages() normally handles the entire commit for
* performance reasons.
*/
- if (zsb->z_log != NULL)
- zil_commit(zsb->z_log, zp->z_id);
+ zil_commit(zfsvfs->z_log, zp->z_id);
}
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (err);
}
@@ -4054,17 +4300,17 @@ int
zfs_dirty_inode(struct inode *ip, int flags)
{
znode_t *zp = ITOZ(ip);
- zfs_sb_t *zsb = ITOZSB(ip);
+ zfsvfs_t *zfsvfs = ITOZSB(ip);
dmu_tx_t *tx;
uint64_t mode, atime[2], mtime[2], ctime[2];
sa_bulk_attr_t bulk[4];
int error = 0;
int cnt = 0;
- if (zfs_is_readonly(zsb) || dmu_objset_is_snapshot(zsb->z_os))
+ if (zfs_is_readonly(zfsvfs) || dmu_objset_is_snapshot(zfsvfs->z_os))
return (0);
- ZFS_ENTER(zsb);
+ ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(zp);
#ifdef I_DIRTY_TIME
@@ -4081,7 +4327,7 @@ zfs_dirty_inode(struct inode *ip, int flags)
}
#endif
- tx = dmu_tx_create(zsb->z_os);
+ tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
zfs_sa_upgrade_txholds(tx, zp);
@@ -4095,10 +4341,10 @@ zfs_dirty_inode(struct inode *ip, int flags)
mutex_enter(&zp->z_lock);
zp->z_atime_dirty = 0;
- SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MODE(zsb), NULL, &mode, 8);
- SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_ATIME(zsb), NULL, &atime, 16);
- SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MTIME(zsb), NULL, &mtime, 16);
- SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CTIME(zsb), NULL, &ctime, 16);
+ SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8);
+ SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_ATIME(zfsvfs), NULL, &atime, 16);
+ SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
+ SA_ADD_BULK_ATTR(bulk, cnt, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
/* Preserve the mode, mtime and ctime provided by the inode */
ZFS_TIME_ENCODE(&ip->i_atime, atime);
@@ -4113,34 +4359,33 @@ zfs_dirty_inode(struct inode *ip, int flags)
dmu_tx_commit(tx);
out:
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
-EXPORT_SYMBOL(zfs_dirty_inode);
/*ARGSUSED*/
void
zfs_inactive(struct inode *ip)
{
znode_t *zp = ITOZ(ip);
- zfs_sb_t *zsb = ITOZSB(ip);
+ zfsvfs_t *zfsvfs = ITOZSB(ip);
uint64_t atime[2];
int error;
int need_unlock = 0;
/* Only read lock if we haven't already write locked, e.g. rollback */
- if (!RW_WRITE_HELD(&zsb->z_teardown_inactive_lock)) {
+ if (!RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock)) {
need_unlock = 1;
- rw_enter(&zsb->z_teardown_inactive_lock, RW_READER);
+ rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_READER);
}
if (zp->z_sa_hdl == NULL) {
if (need_unlock)
- rw_exit(&zsb->z_teardown_inactive_lock);
+ rw_exit(&zfsvfs->z_teardown_inactive_lock);
return;
}
if (zp->z_atime_dirty && zp->z_unlinked == 0) {
- dmu_tx_t *tx = dmu_tx_create(zsb->z_os);
+ dmu_tx_t *tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
zfs_sa_upgrade_txholds(tx, zp);
@@ -4150,7 +4395,7 @@ zfs_inactive(struct inode *ip)
} else {
ZFS_TIME_ENCODE(&ip->i_atime, atime);
mutex_enter(&zp->z_lock);
- (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zsb),
+ (void) sa_update(zp->z_sa_hdl, SA_ZPL_ATIME(zfsvfs),
(void *)&atime, sizeof (atime), tx);
zp->z_atime_dirty = 0;
mutex_exit(&zp->z_lock);
@@ -4160,9 +4405,8 @@ zfs_inactive(struct inode *ip)
zfs_zinactive(zp);
if (need_unlock)
- rw_exit(&zsb->z_teardown_inactive_lock);
+ rw_exit(&zfsvfs->z_teardown_inactive_lock);
}
-EXPORT_SYMBOL(zfs_inactive);
/*
* Bounds-check the seek operation.
@@ -4183,7 +4427,6 @@ zfs_seek(struct inode *ip, offset_t ooff, offset_t *noffp)
return (0);
return ((*noffp < 0 || *noffp > MAXOFFSET_T) ? EINVAL : 0);
}
-EXPORT_SYMBOL(zfs_seek);
/*
* Fill pages with data from the disk.
@@ -4192,7 +4435,7 @@ static int
zfs_fillpage(struct inode *ip, struct page *pl[], int nr_pages)
{
znode_t *zp = ITOZ(ip);
- zfs_sb_t *zsb = ITOZSB(ip);
+ zfsvfs_t *zfsvfs = ITOZSB(ip);
objset_t *os;
struct page *cur_pp;
u_offset_t io_off, total;
@@ -4201,7 +4444,7 @@ zfs_fillpage(struct inode *ip, struct page *pl[], int nr_pages)
unsigned page_idx;
int err;
- os = zsb->z_os;
+ os = zfsvfs->z_os;
io_len = nr_pages << PAGE_SHIFT;
i_size = i_size_read(ip);
io_off = page_offset(pl[0]);
@@ -4249,21 +4492,20 @@ int
zfs_getpage(struct inode *ip, struct page *pl[], int nr_pages)
{
znode_t *zp = ITOZ(ip);
- zfs_sb_t *zsb = ITOZSB(ip);
+ zfsvfs_t *zfsvfs = ITOZSB(ip);
int err;
if (pl == NULL)
return (0);
- ZFS_ENTER(zsb);
+ ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(zp);
err = zfs_fillpage(ip, pl, nr_pages);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (err);
}
-EXPORT_SYMBOL(zfs_getpage);
/*
* Check ZFS specific permissions to memory map a section of a file.
@@ -4283,32 +4525,31 @@ zfs_map(struct inode *ip, offset_t off, caddr_t *addrp, size_t len,
unsigned long vm_flags)
{
znode_t *zp = ITOZ(ip);
- zfs_sb_t *zsb = ITOZSB(ip);
+ zfsvfs_t *zfsvfs = ITOZSB(ip);
- ZFS_ENTER(zsb);
+ ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(zp);
if ((vm_flags & VM_WRITE) && (zp->z_pflags &
(ZFS_IMMUTABLE | ZFS_READONLY | ZFS_APPENDONLY))) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (SET_ERROR(EPERM));
}
if ((vm_flags & (VM_READ | VM_EXEC)) &&
(zp->z_pflags & ZFS_AV_QUARANTINED)) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (SET_ERROR(EACCES));
}
if (off < 0 || len > MAXOFFSET_T - off) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (SET_ERROR(ENXIO));
}
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (0);
}
-EXPORT_SYMBOL(zfs_map);
/*
* convoff - converts the given data (start, whence) to the
@@ -4321,7 +4562,7 @@ convoff(struct inode *ip, flock64_t *lckdat, int whence, offset_t offset)
int error;
if ((lckdat->l_whence == 2) || (whence == 2)) {
- if ((error = zfs_getattr(ip, &vap, 0, CRED()) != 0))
+ if ((error = zfs_getattr(ip, &vap, 0, CRED())))
return (error);
}
@@ -4382,25 +4623,34 @@ zfs_space(struct inode *ip, int cmd, flock64_t *bfp, int flag,
offset_t offset, cred_t *cr)
{
znode_t *zp = ITOZ(ip);
- zfs_sb_t *zsb = ITOZSB(ip);
+ zfsvfs_t *zfsvfs = ITOZSB(ip);
uint64_t off, len;
int error;
- ZFS_ENTER(zsb);
+ ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(zp);
if (cmd != F_FREESP) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (SET_ERROR(EINVAL));
}
+ /*
+ * Callers might not be able to detect properly that we are read-only,
+ * so check it explicitly here.
+ */
+ if (zfs_is_readonly(zfsvfs)) {
+ ZFS_EXIT(zfsvfs);
+ return (SET_ERROR(EROFS));
+ }
+
if ((error = convoff(ip, bfp, 0, offset))) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
if (bfp->l_len < 0) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (SET_ERROR(EINVAL));
}
@@ -4411,7 +4661,7 @@ zfs_space(struct inode *ip, int cmd, flock64_t *bfp, int flag,
* operates directly on inodes, so we need to check access rights.
*/
if ((error = zfs_zaccess(zp, ACE_WRITE_DATA, 0, B_FALSE, cr))) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
@@ -4420,40 +4670,34 @@ zfs_space(struct inode *ip, int cmd, flock64_t *bfp, int flag,
error = zfs_freesp(zp, off, len, flag, TRUE);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
-EXPORT_SYMBOL(zfs_space);
/*ARGSUSED*/
int
zfs_fid(struct inode *ip, fid_t *fidp)
{
znode_t *zp = ITOZ(ip);
- zfs_sb_t *zsb = ITOZSB(ip);
+ zfsvfs_t *zfsvfs = ITOZSB(ip);
uint32_t gen;
uint64_t gen64;
uint64_t object = zp->z_id;
zfid_short_t *zfid;
int size, i, error;
- ZFS_ENTER(zsb);
+ ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(zp);
- if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zsb),
+ if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs),
&gen64, sizeof (uint64_t))) != 0) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
gen = (uint32_t)gen64;
- size = (zsb->z_parent != zsb) ? LONG_FID_LEN : SHORT_FID_LEN;
- if (fidp->fid_len < size) {
- fidp->fid_len = size;
- ZFS_EXIT(zsb);
- return (SET_ERROR(ENOSPC));
- }
+ size = SHORT_FID_LEN;
zfid = (zfid_short_t *)fidp;
@@ -4468,65 +4712,48 @@ zfs_fid(struct inode *ip, fid_t *fidp)
for (i = 0; i < sizeof (zfid->zf_gen); i++)
zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i));
- if (size == LONG_FID_LEN) {
- uint64_t objsetid = dmu_objset_id(zsb->z_os);
- zfid_long_t *zlfid;
-
- zlfid = (zfid_long_t *)fidp;
-
- for (i = 0; i < sizeof (zlfid->zf_setid); i++)
- zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i));
-
- /* XXX - this should be the generation number for the objset */
- for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
- zlfid->zf_setgen[i] = 0;
- }
-
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (0);
}
-EXPORT_SYMBOL(zfs_fid);
/*ARGSUSED*/
int
zfs_getsecattr(struct inode *ip, vsecattr_t *vsecp, int flag, cred_t *cr)
{
znode_t *zp = ITOZ(ip);
- zfs_sb_t *zsb = ITOZSB(ip);
+ zfsvfs_t *zfsvfs = ITOZSB(ip);
int error;
boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
- ZFS_ENTER(zsb);
+ ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(zp);
error = zfs_getacl(zp, vsecp, skipaclchk, cr);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
-EXPORT_SYMBOL(zfs_getsecattr);
/*ARGSUSED*/
int
zfs_setsecattr(struct inode *ip, vsecattr_t *vsecp, int flag, cred_t *cr)
{
znode_t *zp = ITOZ(ip);
- zfs_sb_t *zsb = ITOZSB(ip);
+ zfsvfs_t *zfsvfs = ITOZSB(ip);
int error;
boolean_t skipaclchk = (flag & ATTR_NOACLCHECK) ? B_TRUE : B_FALSE;
- zilog_t *zilog = zsb->z_log;
+ zilog_t *zilog = zfsvfs->z_log;
- ZFS_ENTER(zsb);
+ ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(zp);
error = zfs_setacl(zp, vsecp, skipaclchk, cr);
- if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS)
+ if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
zil_commit(zilog, 0);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
-EXPORT_SYMBOL(zfs_setsecattr);
#ifdef HAVE_UIO_ZEROCOPY
/*
@@ -4544,8 +4771,8 @@ static int
zfs_reqzcbuf(struct inode *ip, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr)
{
znode_t *zp = ITOZ(ip);
- zfs_sb_t *zsb = ITOZSB(ip);
- int max_blksz = zsb->z_max_blksz;
+ zfsvfs_t *zfsvfs = ITOZSB(ip);
+ int max_blksz = zfsvfs->z_max_blksz;
uio_t *uio = &xuio->xu_uio;
ssize_t size = uio->uio_resid;
offset_t offset = uio->uio_loffset;
@@ -4558,7 +4785,7 @@ zfs_reqzcbuf(struct inode *ip, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr)
if (xuio->xu_type != UIOTYPE_ZEROCOPY)
return (SET_ERROR(EINVAL));
- ZFS_ENTER(zsb);
+ ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(zp);
switch (ioflag) {
case UIO_WRITE:
@@ -4568,7 +4795,7 @@ zfs_reqzcbuf(struct inode *ip, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr)
*/
blksz = max_blksz;
if (size < blksz || zp->z_blksz != blksz) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (SET_ERROR(EINVAL));
}
/*
@@ -4633,7 +4860,7 @@ zfs_reqzcbuf(struct inode *ip, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr)
blksz = zcr_blksz_max;
/* avoid potential complexity of dealing with it */
if (blksz > max_blksz) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (SET_ERROR(EINVAL));
}
@@ -4642,18 +4869,18 @@ zfs_reqzcbuf(struct inode *ip, enum uio_rw ioflag, xuio_t *xuio, cred_t *cr)
size = maxsize;
if (size < blksz) {
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (SET_ERROR(EINVAL));
}
break;
default:
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (SET_ERROR(EINVAL));
}
uio->uio_extflg = UIO_XUIO;
XUIO_XUZC_RW(xuio) = ioflag;
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (0);
}
@@ -4685,6 +4912,39 @@ zfs_retzcbuf(struct inode *ip, xuio_t *xuio, cred_t *cr)
#endif /* HAVE_UIO_ZEROCOPY */
#if defined(_KERNEL) && defined(HAVE_SPL)
+EXPORT_SYMBOL(zfs_open);
+EXPORT_SYMBOL(zfs_close);
+EXPORT_SYMBOL(zfs_read);
+EXPORT_SYMBOL(zfs_write);
+EXPORT_SYMBOL(zfs_access);
+EXPORT_SYMBOL(zfs_lookup);
+EXPORT_SYMBOL(zfs_create);
+EXPORT_SYMBOL(zfs_tmpfile);
+EXPORT_SYMBOL(zfs_remove);
+EXPORT_SYMBOL(zfs_mkdir);
+EXPORT_SYMBOL(zfs_rmdir);
+EXPORT_SYMBOL(zfs_readdir);
+EXPORT_SYMBOL(zfs_fsync);
+EXPORT_SYMBOL(zfs_getattr);
+EXPORT_SYMBOL(zfs_getattr_fast);
+EXPORT_SYMBOL(zfs_setattr);
+EXPORT_SYMBOL(zfs_rename);
+EXPORT_SYMBOL(zfs_symlink);
+EXPORT_SYMBOL(zfs_readlink);
+EXPORT_SYMBOL(zfs_link);
+EXPORT_SYMBOL(zfs_inactive);
+EXPORT_SYMBOL(zfs_space);
+EXPORT_SYMBOL(zfs_fid);
+EXPORT_SYMBOL(zfs_getsecattr);
+EXPORT_SYMBOL(zfs_setsecattr);
+EXPORT_SYMBOL(zfs_getpage);
+EXPORT_SYMBOL(zfs_putpage);
+EXPORT_SYMBOL(zfs_dirty_inode);
+EXPORT_SYMBOL(zfs_map);
+
+/* CSTYLED */
+module_param(zfs_delete_blocks, ulong, 0644);
+MODULE_PARM_DESC(zfs_delete_blocks, "Delete files larger than N blocks async");
module_param(zfs_read_chunk_size, long, 0644);
MODULE_PARM_DESC(zfs_read_chunk_size, "Bytes to read per chunk");
#endif
diff --git a/zfs/module/zfs/zfs_znode.c b/zfs/module/zfs/zfs_znode.c
index e76bdc47b4bd..f508a248f0f7 100644
--- a/zfs/module/zfs/zfs_znode.c
+++ b/zfs/module/zfs/zfs_znode.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
*/
/* Portions Copyright 2007 Jeremy Teo */
@@ -62,6 +62,7 @@
#include <sys/dmu.h>
#include <sys/dmu_objset.h>
+#include <sys/dmu_tx.h>
#include <sys/refcount.h>
#include <sys/stat.h>
#include <sys/zap.h>
@@ -109,7 +110,7 @@ zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
mutex_init(&zp->z_lock, NULL, MUTEX_DEFAULT, NULL);
rw_init(&zp->z_parent_lock, NULL, RW_DEFAULT, NULL);
- rw_init(&zp->z_name_lock, NULL, RW_DEFAULT, NULL);
+ rw_init(&zp->z_name_lock, NULL, RW_NOLOCKDEP, NULL);
mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL);
rw_init(&zp->z_xattr_lock, NULL, RW_DEFAULT, NULL);
@@ -118,6 +119,7 @@ zfs_znode_cache_constructor(void *buf, void *arg, int kmflags)
zp->z_dirlocks = NULL;
zp->z_acl_cached = NULL;
zp->z_xattr_cached = NULL;
+ zp->z_xattr_parent = 0;
zp->z_moved = 0;
return (0);
}
@@ -227,56 +229,51 @@ zfs_znode_fini(void)
int
zfs_znode_hold_compare(const void *a, const void *b)
{
- const znode_hold_t *zh_a = a;
- const znode_hold_t *zh_b = b;
+ const znode_hold_t *zh_a = (const znode_hold_t *)a;
+ const znode_hold_t *zh_b = (const znode_hold_t *)b;
- if (zh_a->zh_obj < zh_b->zh_obj)
- return (-1);
- else if (zh_a->zh_obj > zh_b->zh_obj)
- return (1);
- else
- return (0);
+ return (AVL_CMP(zh_a->zh_obj, zh_b->zh_obj));
}
boolean_t
-zfs_znode_held(zfs_sb_t *zsb, uint64_t obj)
+zfs_znode_held(zfsvfs_t *zfsvfs, uint64_t obj)
{
znode_hold_t *zh, search;
- int i = ZFS_OBJ_HASH(zsb, obj);
+ int i = ZFS_OBJ_HASH(zfsvfs, obj);
boolean_t held;
search.zh_obj = obj;
- mutex_enter(&zsb->z_hold_locks[i]);
- zh = avl_find(&zsb->z_hold_trees[i], &search, NULL);
+ mutex_enter(&zfsvfs->z_hold_locks[i]);
+ zh = avl_find(&zfsvfs->z_hold_trees[i], &search, NULL);
held = (zh && MUTEX_HELD(&zh->zh_lock)) ? B_TRUE : B_FALSE;
- mutex_exit(&zsb->z_hold_locks[i]);
+ mutex_exit(&zfsvfs->z_hold_locks[i]);
return (held);
}
static znode_hold_t *
-zfs_znode_hold_enter(zfs_sb_t *zsb, uint64_t obj)
+zfs_znode_hold_enter(zfsvfs_t *zfsvfs, uint64_t obj)
{
znode_hold_t *zh, *zh_new, search;
- int i = ZFS_OBJ_HASH(zsb, obj);
+ int i = ZFS_OBJ_HASH(zfsvfs, obj);
boolean_t found = B_FALSE;
zh_new = kmem_cache_alloc(znode_hold_cache, KM_SLEEP);
zh_new->zh_obj = obj;
search.zh_obj = obj;
- mutex_enter(&zsb->z_hold_locks[i]);
- zh = avl_find(&zsb->z_hold_trees[i], &search, NULL);
+ mutex_enter(&zfsvfs->z_hold_locks[i]);
+ zh = avl_find(&zfsvfs->z_hold_trees[i], &search, NULL);
if (likely(zh == NULL)) {
zh = zh_new;
- avl_add(&zsb->z_hold_trees[i], zh);
+ avl_add(&zfsvfs->z_hold_trees[i], zh);
} else {
ASSERT3U(zh->zh_obj, ==, obj);
found = B_TRUE;
}
refcount_add(&zh->zh_refcount, NULL);
- mutex_exit(&zsb->z_hold_locks[i]);
+ mutex_exit(&zfsvfs->z_hold_locks[i]);
if (found == B_TRUE)
kmem_cache_free(znode_hold_cache, zh_new);
@@ -289,28 +286,28 @@ zfs_znode_hold_enter(zfs_sb_t *zsb, uint64_t obj)
}
static void
-zfs_znode_hold_exit(zfs_sb_t *zsb, znode_hold_t *zh)
+zfs_znode_hold_exit(zfsvfs_t *zfsvfs, znode_hold_t *zh)
{
- int i = ZFS_OBJ_HASH(zsb, zh->zh_obj);
+ int i = ZFS_OBJ_HASH(zfsvfs, zh->zh_obj);
boolean_t remove = B_FALSE;
- ASSERT(zfs_znode_held(zsb, zh->zh_obj));
+ ASSERT(zfs_znode_held(zfsvfs, zh->zh_obj));
ASSERT3S(refcount_count(&zh->zh_refcount), >, 0);
mutex_exit(&zh->zh_lock);
- mutex_enter(&zsb->z_hold_locks[i]);
+ mutex_enter(&zfsvfs->z_hold_locks[i]);
if (refcount_remove(&zh->zh_refcount, NULL) == 0) {
- avl_remove(&zsb->z_hold_trees[i], zh);
+ avl_remove(&zfsvfs->z_hold_trees[i], zh);
remove = B_TRUE;
}
- mutex_exit(&zsb->z_hold_locks[i]);
+ mutex_exit(&zfsvfs->z_hold_locks[i]);
if (remove == B_TRUE)
kmem_cache_free(znode_hold_cache, zh);
}
int
-zfs_create_share_dir(zfs_sb_t *zsb, dmu_tx_t *tx)
+zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx)
{
#ifdef HAVE_SMB_SHARE
zfs_acl_ids_t acl_ids;
@@ -358,17 +355,17 @@ zfs_create_share_dir(zfs_sb_t *zsb, dmu_tx_t *tx)
}
static void
-zfs_znode_sa_init(zfs_sb_t *zsb, znode_t *zp,
+zfs_znode_sa_init(zfsvfs_t *zfsvfs, znode_t *zp,
dmu_buf_t *db, dmu_object_type_t obj_type, sa_handle_t *sa_hdl)
{
- ASSERT(zfs_znode_held(zsb, zp->z_id));
+ ASSERT(zfs_znode_held(zfsvfs, zp->z_id));
mutex_enter(&zp->z_lock);
ASSERT(zp->z_sa_hdl == NULL);
ASSERT(zp->z_acl_cached == NULL);
if (sa_hdl == NULL) {
- VERIFY(0 == sa_handle_get_from_db(zsb->z_os, db, zp,
+ VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, zp,
SA_HDL_SHARED, &zp->z_sa_hdl));
} else {
zp->z_sa_hdl = sa_hdl;
@@ -411,14 +408,14 @@ void
zfs_inode_destroy(struct inode *ip)
{
znode_t *zp = ITOZ(ip);
- zfs_sb_t *zsb = ZTOZSB(zp);
+ zfsvfs_t *zfsvfs = ZTOZSB(zp);
- mutex_enter(&zsb->z_znodes_lock);
+ mutex_enter(&zfsvfs->z_znodes_lock);
if (list_link_active(&zp->z_link_node)) {
- list_remove(&zsb->z_all_znodes, zp);
- zsb->z_nr_znodes--;
+ list_remove(&zfsvfs->z_all_znodes, zp);
+ zfsvfs->z_nr_znodes--;
}
- mutex_exit(&zsb->z_znodes_lock);
+ mutex_exit(&zfsvfs->z_znodes_lock);
if (zp->z_acl_cached) {
zfs_acl_free(zp->z_acl_cached);
@@ -434,7 +431,7 @@ zfs_inode_destroy(struct inode *ip)
}
static void
-zfs_inode_set_ops(zfs_sb_t *zsb, struct inode *ip)
+zfs_inode_set_ops(zfsvfs_t *zfsvfs, struct inode *ip)
{
uint64_t rdev = 0;
@@ -460,7 +457,7 @@ zfs_inode_set_ops(zfs_sb_t *zsb, struct inode *ip)
*/
case S_IFCHR:
case S_IFBLK:
- sa_lookup(ITOZ(ip)->z_sa_hdl, SA_ZPL_RDEV(zsb), &rdev,
+ (void) sa_lookup(ITOZ(ip)->z_sa_hdl, SA_ZPL_RDEV(zfsvfs), &rdev,
sizeof (rdev));
/*FALLTHROUGH*/
case S_IFIFO:
@@ -489,7 +486,15 @@ zfs_set_inode_flags(znode_t *zp, struct inode *ip)
* Linux and Solaris have different sets of file attributes, so we
* restrict this conversion to the intersection of the two.
*/
+#ifdef HAVE_INODE_SET_FLAGS
+ unsigned int flags = 0;
+ if (zp->z_pflags & ZFS_IMMUTABLE)
+ flags |= S_IMMUTABLE;
+ if (zp->z_pflags & ZFS_APPENDONLY)
+ flags |= S_APPEND;
+ inode_set_flags(ip, flags, S_IMMUTABLE|S_APPEND);
+#else
if (zp->z_pflags & ZFS_IMMUTABLE)
ip->i_flags |= S_IMMUTABLE;
else
@@ -499,6 +504,7 @@ zfs_set_inode_flags(znode_t *zp, struct inode *ip)
ip->i_flags |= S_APPEND;
else
ip->i_flags &= ~S_APPEND;
+#endif
}
/*
@@ -508,63 +514,30 @@ zfs_set_inode_flags(znode_t *zp, struct inode *ip)
* inode has the correct field it should be used, and the ZFS code
* updated to access the inode. This can be done incrementally.
*/
-static void
-zfs_inode_update_impl(znode_t *zp, boolean_t new)
+void
+zfs_inode_update(znode_t *zp)
{
- zfs_sb_t *zsb;
+ zfsvfs_t *zfsvfs;
struct inode *ip;
uint32_t blksize;
u_longlong_t i_blocks;
- uint64_t atime[2], mtime[2], ctime[2];
ASSERT(zp != NULL);
- zsb = ZTOZSB(zp);
+ zfsvfs = ZTOZSB(zp);
ip = ZTOI(zp);
/* Skip .zfs control nodes which do not exist on disk. */
if (zfsctl_is_node(ip))
return;
- sa_lookup(zp->z_sa_hdl, SA_ZPL_ATIME(zsb), &atime, 16);
- sa_lookup(zp->z_sa_hdl, SA_ZPL_MTIME(zsb), &mtime, 16);
- sa_lookup(zp->z_sa_hdl, SA_ZPL_CTIME(zsb), &ctime, 16);
-
dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &blksize, &i_blocks);
spin_lock(&ip->i_lock);
- ip->i_generation = zp->z_gen;
- ip->i_uid = SUID_TO_KUID(zp->z_uid);
- ip->i_gid = SGID_TO_KGID(zp->z_gid);
- set_nlink(ip, zp->z_links);
- ip->i_mode = zp->z_mode;
- zfs_set_inode_flags(zp, ip);
- ip->i_blkbits = SPA_MINBLOCKSHIFT;
ip->i_blocks = i_blocks;
-
- /*
- * Only read atime from SA if we are newly created inode (or rezget),
- * otherwise i_atime might be dirty.
- */
- if (new)
- ZFS_TIME_DECODE(&ip->i_atime, atime);
- ZFS_TIME_DECODE(&ip->i_mtime, mtime);
- ZFS_TIME_DECODE(&ip->i_ctime, ctime);
-
i_size_write(ip, zp->z_size);
spin_unlock(&ip->i_lock);
}
-static void
-zfs_inode_update_new(znode_t *zp)
-{
- zfs_inode_update_impl(zp, B_TRUE);
-}
-
-void
-zfs_inode_update(znode_t *zp)
-{
- zfs_inode_update_impl(zp, B_FALSE);
-}
/*
* Construct a znode+inode and initialize.
@@ -574,19 +547,23 @@ zfs_inode_update(znode_t *zp)
* return the znode
*/
static znode_t *
-zfs_znode_alloc(zfs_sb_t *zsb, dmu_buf_t *db, int blksz,
+zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
dmu_object_type_t obj_type, uint64_t obj, sa_handle_t *hdl)
{
znode_t *zp;
struct inode *ip;
uint64_t mode;
uint64_t parent;
- sa_bulk_attr_t bulk[8];
+ uint64_t tmp_gen;
+ uint64_t links;
+ uint64_t z_uid, z_gid;
+ uint64_t atime[2], mtime[2], ctime[2];
+ sa_bulk_attr_t bulk[11];
int count = 0;
- ASSERT(zsb != NULL);
+ ASSERT(zfsvfs != NULL);
- ip = new_inode(zsb->z_sb);
+ ip = new_inode(zfsvfs->z_sb);
if (ip == NULL)
return (NULL);
@@ -610,31 +587,49 @@ zfs_znode_alloc(zfs_sb_t *zsb, dmu_buf_t *db, int blksz,
zp->z_range_lock.zr_blksz = &zp->z_blksz;
zp->z_range_lock.zr_max_blksz = &ZTOZSB(zp)->z_max_blksz;
- zfs_znode_sa_init(zsb, zp, db, obj_type, hdl);
+ zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl);
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zsb), NULL, &mode, 8);
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zsb), NULL, &zp->z_gen, 8);
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb), NULL, &zp->z_size, 8);
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL, &zp->z_links, 8);
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL,
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8);
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL, &tmp_gen, 8);
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
+ &zp->z_size, 8);
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, &links, 8);
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
&zp->z_pflags, 8);
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zsb), NULL,
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL,
&parent, 8);
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zsb), NULL, &zp->z_uid, 8);
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zsb), NULL, &zp->z_gid, 8);
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, &z_uid, 8);
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL, &z_gid, 8);
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, &atime, 16);
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16);
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16);
- if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count) != 0 || zp->z_gen == 0) {
+ if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count) != 0 || tmp_gen == 0) {
if (hdl == NULL)
sa_handle_destroy(zp->z_sa_hdl);
zp->z_sa_hdl = NULL;
goto error;
}
- zp->z_mode = mode;
+ zp->z_mode = ip->i_mode = mode;
+ ip->i_generation = (uint32_t)tmp_gen;
+ ip->i_blkbits = SPA_MINBLOCKSHIFT;
+ set_nlink(ip, (uint32_t)links);
+ zfs_uid_write(ip, z_uid);
+ zfs_gid_write(ip, z_gid);
+ zfs_set_inode_flags(zp, ip);
+
+ /* Cache the xattr parent id */
+ if (zp->z_pflags & ZFS_XATTR)
+ zp->z_xattr_parent = parent;
+
+ ZFS_TIME_DECODE(&ip->i_atime, atime);
+ ZFS_TIME_DECODE(&ip->i_mtime, mtime);
+ ZFS_TIME_DECODE(&ip->i_ctime, ctime);
ip->i_ino = obj;
- zfs_inode_update_new(zp);
- zfs_inode_set_ops(zsb, ip);
+ zfs_inode_update(zp);
+ zfs_inode_set_ops(zfsvfs, ip);
/*
* The only way insert_inode_locked() can fail is if the ip->i_ino
@@ -646,11 +641,11 @@ zfs_znode_alloc(zfs_sb_t *zsb, dmu_buf_t *db, int blksz,
*/
VERIFY3S(insert_inode_locked(ip), ==, 0);
- mutex_enter(&zsb->z_znodes_lock);
- list_insert_tail(&zsb->z_all_znodes, zp);
- zsb->z_nr_znodes++;
+ mutex_enter(&zfsvfs->z_znodes_lock);
+ list_insert_tail(&zfsvfs->z_all_znodes, zp);
+ zfsvfs->z_nr_znodes++;
membar_producer();
- mutex_exit(&zsb->z_znodes_lock);
+ mutex_exit(&zfsvfs->z_znodes_lock);
unlock_new_inode(ip);
return (zp);
@@ -667,9 +662,9 @@ zfs_znode_alloc(zfs_sb_t *zsb, dmu_buf_t *db, int blksz,
void
zfs_mark_inode_dirty(struct inode *ip)
{
- zfs_sb_t *zsb = ITOZSB(ip);
+ zfsvfs_t *zfsvfs = ITOZSB(ip);
- if (zfs_is_readonly(zsb) || dmu_objset_is_snapshot(zsb->z_os))
+ if (zfs_is_readonly(zfsvfs) || dmu_objset_is_snapshot(zfsvfs->z_os))
return;
mark_inode_dirty(ip);
@@ -703,11 +698,12 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
uint64_t mode, size, links, parent, pflags;
uint64_t dzp_pflags = 0;
uint64_t rdev = 0;
- zfs_sb_t *zsb = ZTOZSB(dzp);
+ zfsvfs_t *zfsvfs = ZTOZSB(dzp);
dmu_buf_t *db;
timestruc_t now;
uint64_t gen, obj;
int bonuslen;
+ int dnodesize;
sa_handle_t *sa_hdl;
dmu_object_type_t obj_type;
sa_bulk_attr_t *sa_attrs;
@@ -715,19 +711,25 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
zfs_acl_locator_cb_t locate = { 0 };
znode_hold_t *zh;
- if (zsb->z_replay) {
+ if (zfsvfs->z_replay) {
obj = vap->va_nodeid;
now = vap->va_ctime; /* see zfs_replay_create() */
gen = vap->va_nblocks; /* ditto */
+ dnodesize = vap->va_fsid; /* ditto */
} else {
obj = 0;
gethrestime(&now);
gen = dmu_tx_get_txg(tx);
+ dnodesize = dmu_objset_dnodesize(zfsvfs->z_os);
}
- obj_type = zsb->z_use_sa ? DMU_OT_SA : DMU_OT_ZNODE;
+ if (dnodesize == 0)
+ dnodesize = DNODE_MIN_SIZE;
+
+ obj_type = zfsvfs->z_use_sa ? DMU_OT_SA : DMU_OT_ZNODE;
+
bonuslen = (obj_type == DMU_OT_SA) ?
- DN_MAX_BONUSLEN : ZFS_OLD_ZNODE_PHYS_SIZE;
+ DN_BONUS_SIZE(dnodesize) : ZFS_OLD_ZNODE_PHYS_SIZE;
/*
* Create a new DMU object.
@@ -739,29 +741,29 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
* assertions below.
*/
if (S_ISDIR(vap->va_mode)) {
- if (zsb->z_replay) {
- VERIFY0(zap_create_claim_norm(zsb->z_os, obj,
- zsb->z_norm, DMU_OT_DIRECTORY_CONTENTS,
- obj_type, bonuslen, tx));
+ if (zfsvfs->z_replay) {
+ VERIFY0(zap_create_claim_norm_dnsize(zfsvfs->z_os, obj,
+ zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS,
+ obj_type, bonuslen, dnodesize, tx));
} else {
- obj = zap_create_norm(zsb->z_os,
- zsb->z_norm, DMU_OT_DIRECTORY_CONTENTS,
- obj_type, bonuslen, tx);
+ obj = zap_create_norm_dnsize(zfsvfs->z_os,
+ zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS,
+ obj_type, bonuslen, dnodesize, tx);
}
} else {
- if (zsb->z_replay) {
- VERIFY0(dmu_object_claim(zsb->z_os, obj,
+ if (zfsvfs->z_replay) {
+ VERIFY0(dmu_object_claim_dnsize(zfsvfs->z_os, obj,
DMU_OT_PLAIN_FILE_CONTENTS, 0,
- obj_type, bonuslen, tx));
+ obj_type, bonuslen, dnodesize, tx));
} else {
- obj = dmu_object_alloc(zsb->z_os,
+ obj = dmu_object_alloc_dnsize(zfsvfs->z_os,
DMU_OT_PLAIN_FILE_CONTENTS, 0,
- obj_type, bonuslen, tx);
+ obj_type, bonuslen, dnodesize, tx);
}
}
- zh = zfs_znode_hold_enter(zsb, obj);
- VERIFY(0 == sa_buf_hold(zsb->z_os, obj, NULL, &db));
+ zh = zfs_znode_hold_enter(zfsvfs, obj);
+ VERIFY0(sa_buf_hold(zfsvfs->z_os, obj, NULL, &db));
/*
* If this is the root, fix up the half-initialized parent pointer
@@ -780,16 +782,17 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
flag |= IS_XATTR;
}
- if (zsb->z_use_fuids)
+ if (zfsvfs->z_use_fuids)
pflags = ZFS_ARCHIVE | ZFS_AV_MODIFIED;
else
pflags = 0;
if (S_ISDIR(vap->va_mode)) {
size = 2; /* contents ("." and "..") */
- links = (flag & (IS_ROOT_NODE | IS_XATTR)) ? 2 : 1;
+ links = 2;
} else {
- size = links = 0;
+ size = 0;
+ links = (flag & IS_TMPFILE) ? 0 : 1;
}
if (S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))
@@ -823,7 +826,7 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
}
/* Now add in all of the "SA" attributes */
- VERIFY(0 == sa_handle_get_from_db(zsb->z_os, db, NULL, SA_HDL_SHARED,
+ VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, NULL, SA_HDL_SHARED,
&sa_hdl));
/*
@@ -835,74 +838,74 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
sa_attrs = kmem_alloc(sizeof (sa_bulk_attr_t) * ZPL_END, KM_SLEEP);
if (obj_type == DMU_OT_ZNODE) {
- SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zsb),
+ SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs),
NULL, &atime, 16);
- SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zsb),
+ SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs),
NULL, &mtime, 16);
- SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zsb),
+ SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs),
NULL, &ctime, 16);
- SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zsb),
+ SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs),
NULL, &crtime, 16);
- SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zsb),
+ SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs),
NULL, &gen, 8);
- SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zsb),
+ SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs),
NULL, &mode, 8);
- SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zsb),
+ SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs),
NULL, &size, 8);
- SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zsb),
+ SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs),
NULL, &parent, 8);
} else {
- SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zsb),
+ SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs),
NULL, &mode, 8);
- SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zsb),
+ SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs),
NULL, &size, 8);
- SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zsb),
+ SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs),
NULL, &gen, 8);
- SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zsb),
+ SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs),
NULL, &acl_ids->z_fuid, 8);
- SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zsb),
+ SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs),
NULL, &acl_ids->z_fgid, 8);
- SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zsb),
+ SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs),
NULL, &parent, 8);
- SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zsb),
+ SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs),
NULL, &pflags, 8);
- SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zsb),
+ SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs),
NULL, &atime, 16);
- SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zsb),
+ SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs),
NULL, &mtime, 16);
- SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zsb),
+ SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs),
NULL, &ctime, 16);
- SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zsb),
+ SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs),
NULL, &crtime, 16);
}
- SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_LINKS(zsb), NULL, &links, 8);
+ SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_LINKS(zfsvfs), NULL, &links, 8);
if (obj_type == DMU_OT_ZNODE) {
- SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_XATTR(zsb), NULL,
+ SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_XATTR(zfsvfs), NULL,
&empty_xattr, 8);
}
if (obj_type == DMU_OT_ZNODE ||
(S_ISBLK(vap->va_mode) || S_ISCHR(vap->va_mode))) {
- SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zsb),
+ SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zfsvfs),
NULL, &rdev, 8);
}
if (obj_type == DMU_OT_ZNODE) {
- SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zsb),
+ SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs),
NULL, &pflags, 8);
- SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zsb), NULL,
+ SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), NULL,
&acl_ids->z_fuid, 8);
- SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zsb), NULL,
+ SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), NULL,
&acl_ids->z_fgid, 8);
- SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PAD(zsb), NULL, pad,
+ SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PAD(zfsvfs), NULL, pad,
sizeof (uint64_t) * 4);
- SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ZNODE_ACL(zsb), NULL,
+ SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ZNODE_ACL(zfsvfs), NULL,
&acl_phys, sizeof (zfs_acl_phys_t));
} else if (acl_ids->z_aclp->z_version >= ZFS_ACL_VERSION_FUID) {
- SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_COUNT(zsb), NULL,
+ SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_COUNT(zfsvfs), NULL,
&acl_ids->z_aclp->z_acl_count, 8);
locate.cb_aclp = acl_ids->z_aclp;
- SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_ACES(zsb),
+ SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_ACES(zfsvfs),
zfs_acl_data_locator, &locate,
acl_ids->z_aclp->z_acl_bytes);
mode = zfs_mode_compute(mode, acl_ids->z_aclp, &pflags,
@@ -912,7 +915,18 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0);
if (!(flag & IS_ROOT_NODE)) {
- *zpp = zfs_znode_alloc(zsb, db, 0, obj_type, obj, sa_hdl);
+ /*
+ * The call to zfs_znode_alloc() may fail if memory is low
+ * via the call path: alloc_inode() -> inode_init_always() ->
+ * security_inode_alloc() -> inode_alloc_security(). Since
+ * the existing code is written such that zfs_mknode() can
+ * not fail retry until sufficient memory has been reclaimed.
+ */
+ do {
+ *zpp = zfs_znode_alloc(zfsvfs, db, 0, obj_type, obj,
+ sa_hdl);
+ } while (*zpp == NULL);
+
VERIFY(*zpp != NULL);
VERIFY(dzp != NULL);
} else {
@@ -926,14 +940,15 @@ zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr,
}
(*zpp)->z_pflags = pflags;
- (*zpp)->z_mode = mode;
+ (*zpp)->z_mode = ZTOI(*zpp)->i_mode = mode;
+ (*zpp)->z_dnodesize = dnodesize;
if (obj_type == DMU_OT_ZNODE ||
acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) {
VERIFY0(zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx));
}
kmem_free(sa_attrs, sizeof (sa_bulk_attr_t) * ZPL_END);
- zfs_znode_hold_exit(zsb, zh);
+ zfs_znode_hold_exit(zfsvfs, zh);
}
/*
@@ -944,6 +959,7 @@ void
zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
{
xoptattr_t *xoap;
+ boolean_t update_inode = B_FALSE;
xoap = xva_getxoptattr(xvap);
ASSERT(xoap);
@@ -979,6 +995,8 @@ zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable,
zp->z_pflags, tx);
XVA_SET_RTN(xvap, XAT_IMMUTABLE);
+
+ update_inode = B_TRUE;
}
if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) {
ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink,
@@ -989,6 +1007,8 @@ zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly,
zp->z_pflags, tx);
XVA_SET_RTN(xvap, XAT_APPENDONLY);
+
+ update_inode = B_TRUE;
}
if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) {
ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump,
@@ -1029,10 +1049,13 @@ zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx)
zp->z_pflags, tx);
XVA_SET_RTN(xvap, XAT_SPARSE);
}
+
+ if (update_inode)
+ zfs_set_inode_flags(zp, ZTOI(zp));
}
int
-zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp)
+zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp)
{
dmu_object_info_t doi;
dmu_buf_t *db;
@@ -1044,11 +1067,11 @@ zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp)
*zpp = NULL;
again:
- zh = zfs_znode_hold_enter(zsb, obj_num);
+ zh = zfs_znode_hold_enter(zfsvfs, obj_num);
- err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db);
+ err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db);
if (err) {
- zfs_znode_hold_exit(zsb, zh);
+ zfs_znode_hold_exit(zfsvfs, zh);
return (err);
}
@@ -1058,7 +1081,7 @@ zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp)
(doi.doi_bonus_type == DMU_OT_ZNODE &&
doi.doi_bonus_size < sizeof (znode_phys_t)))) {
sa_buf_rele(db, NULL);
- zfs_znode_hold_exit(zsb, zh);
+ zfs_znode_hold_exit(zfsvfs, zh);
return (SET_ERROR(EINVAL));
}
@@ -1077,37 +1100,33 @@ zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp)
mutex_enter(&zp->z_lock);
ASSERT3U(zp->z_id, ==, obj_num);
- if (zp->z_unlinked) {
- err = SET_ERROR(ENOENT);
- } else {
- /*
- * If igrab() returns NULL the VFS has independently
- * determined the inode should be evicted and has
- * called iput_final() to start the eviction process.
- * The SA handle is still valid but because the VFS
- * requires that the eviction succeed we must drop
- * our locks and references to allow the eviction to
- * complete. The zfs_zget() may then be retried.
- *
- * This unlikely case could be optimized by registering
- * a sops->drop_inode() callback. The callback would
- * need to detect the active SA hold thereby informing
- * the VFS that this inode should not be evicted.
- */
- if (igrab(ZTOI(zp)) == NULL) {
- mutex_exit(&zp->z_lock);
- sa_buf_rele(db, NULL);
- zfs_znode_hold_exit(zsb, zh);
- /* inode might need this to finish evict */
- cond_resched();
- goto again;
- }
- *zpp = zp;
- err = 0;
+ /*
+ * If igrab() returns NULL the VFS has independently
+ * determined the inode should be evicted and has
+ * called iput_final() to start the eviction process.
+ * The SA handle is still valid but because the VFS
+ * requires that the eviction succeed we must drop
+ * our locks and references to allow the eviction to
+ * complete. The zfs_zget() may then be retried.
+ *
+ * This unlikely case could be optimized by registering
+ * a sops->drop_inode() callback. The callback would
+ * need to detect the active SA hold thereby informing
+ * the VFS that this inode should not be evicted.
+ */
+ if (igrab(ZTOI(zp)) == NULL) {
+ mutex_exit(&zp->z_lock);
+ sa_buf_rele(db, NULL);
+ zfs_znode_hold_exit(zfsvfs, zh);
+ /* inode might need this to finish evict */
+ cond_resched();
+ goto again;
}
+ *zpp = zp;
+ err = 0;
mutex_exit(&zp->z_lock);
sa_buf_rele(db, NULL);
- zfs_znode_hold_exit(zsb, zh);
+ zfs_znode_hold_exit(zfsvfs, zh);
return (err);
}
@@ -1121,29 +1140,32 @@ zfs_zget(zfs_sb_t *zsb, uint64_t obj_num, znode_t **zpp)
* if zfs_znode_alloc() fails it will drop the hold on the
* bonus buffer.
*/
- zp = zfs_znode_alloc(zsb, db, doi.doi_data_block_size,
+ zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size,
doi.doi_bonus_type, obj_num, NULL);
if (zp == NULL) {
err = SET_ERROR(ENOENT);
} else {
*zpp = zp;
}
- zfs_znode_hold_exit(zsb, zh);
+ zfs_znode_hold_exit(zfsvfs, zh);
return (err);
}
int
zfs_rezget(znode_t *zp)
{
- zfs_sb_t *zsb = ZTOZSB(zp);
+ zfsvfs_t *zfsvfs = ZTOZSB(zp);
dmu_object_info_t doi;
dmu_buf_t *db;
uint64_t obj_num = zp->z_id;
uint64_t mode;
- sa_bulk_attr_t bulk[7];
+ uint64_t links;
+ sa_bulk_attr_t bulk[10];
int err;
int count = 0;
uint64_t gen;
+ uint64_t z_uid, z_gid;
+ uint64_t atime[2], mtime[2], ctime[2];
znode_hold_t *zh;
/*
@@ -1156,7 +1178,7 @@ zfs_rezget(znode_t *zp)
if (zp->z_is_ctldir)
return (0);
- zh = zfs_znode_hold_enter(zsb, obj_num);
+ zh = zfs_znode_hold_enter(zfsvfs, obj_num);
mutex_enter(&zp->z_acl_lock);
if (zp->z_acl_cached) {
@@ -1173,9 +1195,9 @@ zfs_rezget(znode_t *zp)
rw_exit(&zp->z_xattr_lock);
ASSERT(zp->z_sa_hdl == NULL);
- err = sa_buf_hold(zsb->z_os, obj_num, NULL, &db);
+ err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db);
if (err) {
- zfs_znode_hold_exit(zsb, zh);
+ zfs_znode_hold_exit(zfsvfs, zh);
return (err);
}
@@ -1185,48 +1207,63 @@ zfs_rezget(znode_t *zp)
(doi.doi_bonus_type == DMU_OT_ZNODE &&
doi.doi_bonus_size < sizeof (znode_phys_t)))) {
sa_buf_rele(db, NULL);
- zfs_znode_hold_exit(zsb, zh);
+ zfs_znode_hold_exit(zfsvfs, zh);
return (SET_ERROR(EINVAL));
}
- zfs_znode_sa_init(zsb, zp, db, doi.doi_bonus_type, NULL);
+ zfs_znode_sa_init(zfsvfs, zp, db, doi.doi_bonus_type, NULL);
/* reload cached values */
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zsb), NULL,
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL,
&gen, sizeof (gen));
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb), NULL,
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL,
&zp->z_size, sizeof (zp->z_size));
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zsb), NULL,
- &zp->z_links, sizeof (zp->z_links));
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb), NULL,
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL,
+ &links, sizeof (links));
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL,
&zp->z_pflags, sizeof (zp->z_pflags));
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zsb), NULL,
- &zp->z_uid, sizeof (zp->z_uid));
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zsb), NULL,
- &zp->z_gid, sizeof (zp->z_gid));
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zsb), NULL,
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL,
+ &z_uid, sizeof (z_uid));
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL,
+ &z_gid, sizeof (z_gid));
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL,
&mode, sizeof (mode));
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL,
+ &atime, 16);
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL,
+ &mtime, 16);
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL,
+ &ctime, 16);
if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) {
zfs_znode_dmu_fini(zp);
- zfs_znode_hold_exit(zsb, zh);
+ zfs_znode_hold_exit(zfsvfs, zh);
return (SET_ERROR(EIO));
}
- zp->z_mode = mode;
+ zp->z_mode = ZTOI(zp)->i_mode = mode;
+ zfs_uid_write(ZTOI(zp), z_uid);
+ zfs_gid_write(ZTOI(zp), z_gid);
- if (gen != zp->z_gen) {
+ ZFS_TIME_DECODE(&ZTOI(zp)->i_atime, atime);
+ ZFS_TIME_DECODE(&ZTOI(zp)->i_mtime, mtime);
+ ZFS_TIME_DECODE(&ZTOI(zp)->i_ctime, ctime);
+
+ if (gen != ZTOI(zp)->i_generation) {
zfs_znode_dmu_fini(zp);
- zfs_znode_hold_exit(zsb, zh);
+ zfs_znode_hold_exit(zfsvfs, zh);
return (SET_ERROR(EIO));
}
- zp->z_unlinked = (zp->z_links == 0);
+ zp->z_unlinked = (ZTOI(zp)->i_nlink == 0);
+ set_nlink(ZTOI(zp), (uint32_t)links);
+ zfs_set_inode_flags(zp, ZTOI(zp));
+
zp->z_blksz = doi.doi_data_block_size;
zp->z_atime_dirty = 0;
- zfs_inode_update_new(zp);
+ zfs_inode_update(zp);
- zfs_znode_hold_exit(zsb, zh);
+ zfs_znode_hold_exit(zfsvfs, zh);
return (0);
}
@@ -1234,26 +1271,26 @@ zfs_rezget(znode_t *zp)
void
zfs_znode_delete(znode_t *zp, dmu_tx_t *tx)
{
- zfs_sb_t *zsb = ZTOZSB(zp);
- objset_t *os = zsb->z_os;
+ zfsvfs_t *zfsvfs = ZTOZSB(zp);
+ objset_t *os = zfsvfs->z_os;
uint64_t obj = zp->z_id;
uint64_t acl_obj = zfs_external_acl(zp);
znode_hold_t *zh;
- zh = zfs_znode_hold_enter(zsb, obj);
+ zh = zfs_znode_hold_enter(zfsvfs, obj);
if (acl_obj) {
VERIFY(!zp->z_is_sa);
VERIFY(0 == dmu_object_free(os, acl_obj, tx));
}
VERIFY(0 == dmu_object_free(os, obj, tx));
zfs_znode_dmu_fini(zp);
- zfs_znode_hold_exit(zsb, zh);
+ zfs_znode_hold_exit(zfsvfs, zh);
}
void
zfs_zinactive(znode_t *zp)
{
- zfs_sb_t *zsb = ZTOZSB(zp);
+ zfsvfs_t *zfsvfs = ZTOZSB(zp);
uint64_t z_id = zp->z_id;
znode_hold_t *zh;
@@ -1262,7 +1299,7 @@ zfs_zinactive(znode_t *zp)
/*
* Don't allow a zfs_zget() while were trying to release this znode.
*/
- zh = zfs_znode_hold_enter(zsb, z_id);
+ zh = zfs_znode_hold_enter(zfsvfs, z_id);
mutex_enter(&zp->z_lock);
@@ -1272,7 +1309,7 @@ zfs_zinactive(znode_t *zp)
*/
if (zp->z_unlinked) {
mutex_exit(&zp->z_lock);
- zfs_znode_hold_exit(zsb, zh);
+ zfs_znode_hold_exit(zfsvfs, zh);
zfs_rmnode(zp);
return;
}
@@ -1280,7 +1317,7 @@ zfs_zinactive(znode_t *zp)
mutex_exit(&zp->z_lock);
zfs_znode_dmu_fini(zp);
- zfs_znode_hold_exit(zsb, zh);
+ zfs_znode_hold_exit(zfsvfs, zh);
}
static inline int
@@ -1320,6 +1357,7 @@ zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2],
if (flag & ATTR_MTIME) {
ZFS_TIME_ENCODE(&now, mtime);
+ ZFS_TIME_DECODE(&(ZTOI(zp)->i_mtime), mtime);
if (ZTOZSB(zp)->z_use_fuids) {
zp->z_pflags |= (ZFS_ARCHIVE |
ZFS_AV_MODIFIED);
@@ -1328,6 +1366,7 @@ zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2],
if (flag & ATTR_CTIME) {
ZFS_TIME_ENCODE(&now, ctime);
+ ZFS_TIME_DECODE(&(ZTOI(zp)->i_ctime), ctime);
if (ZTOZSB(zp)->z_use_fuids)
zp->z_pflags |= ZFS_ARCHIVE;
}
@@ -1375,12 +1414,12 @@ zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx)
* IN: zp - znode of file to free data in.
* end - new end-of-file
*
- * RETURN: 0 on success, error code on failure
+ * RETURN: 0 on success, error code on failure
*/
static int
zfs_extend(znode_t *zp, uint64_t end)
{
- zfs_sb_t *zsb = ZTOZSB(zp);
+ zfsvfs_t *zfsvfs = ZTOZSB(zp);
dmu_tx_t *tx;
rl_t *rl;
uint64_t newblksz;
@@ -1398,11 +1437,11 @@ zfs_extend(znode_t *zp, uint64_t end)
zfs_range_unlock(rl);
return (0);
}
- tx = dmu_tx_create(zsb->z_os);
+ tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
zfs_sa_upgrade_txholds(tx, zp);
if (end > zp->z_blksz &&
- (!ISP2(zp->z_blksz) || zp->z_blksz < zsb->z_max_blksz)) {
+ (!ISP2(zp->z_blksz) || zp->z_blksz < zfsvfs->z_max_blksz)) {
/*
* We are growing the file past the current block size.
*/
@@ -1494,12 +1533,12 @@ zfs_zero_partial_page(znode_t *zp, uint64_t start, uint64_t len)
* off - start of section to free.
* len - length of section to free.
*
- * RETURN: 0 on success, error code on failure
+ * RETURN: 0 on success, error code on failure
*/
static int
zfs_free_range(znode_t *zp, uint64_t off, uint64_t len)
{
- zfs_sb_t *zsb = ZTOZSB(zp);
+ zfsvfs_t *zfsvfs = ZTOZSB(zp);
rl_t *rl;
int error;
@@ -1519,7 +1558,7 @@ zfs_free_range(znode_t *zp, uint64_t off, uint64_t len)
if (off + len > zp->z_size)
len = zp->z_size - off;
- error = dmu_free_long_range(zsb->z_os, zp->z_id, off, len);
+ error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, off, len);
/*
* Zero partial page cache entries. This must be done under a
@@ -1573,12 +1612,12 @@ zfs_free_range(znode_t *zp, uint64_t off, uint64_t len)
* IN: zp - znode of file to free data in.
* end - new end-of-file.
*
- * RETURN: 0 on success, error code on failure
+ * RETURN: 0 on success, error code on failure
*/
static int
zfs_trunc(znode_t *zp, uint64_t end)
{
- zfs_sb_t *zsb = ZTOZSB(zp);
+ zfsvfs_t *zfsvfs = ZTOZSB(zp);
dmu_tx_t *tx;
rl_t *rl;
int error;
@@ -1598,14 +1637,15 @@ zfs_trunc(znode_t *zp, uint64_t end)
return (0);
}
- error = dmu_free_long_range(zsb->z_os, zp->z_id, end, -1);
+ error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, end, -1);
if (error) {
zfs_range_unlock(rl);
return (error);
}
- tx = dmu_tx_create(zsb->z_os);
+ tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
zfs_sa_upgrade_txholds(tx, zp);
+ dmu_tx_mark_netfree(tx);
error = dmu_tx_assign(tx, TXG_WAIT);
if (error) {
dmu_tx_abort(tx);
@@ -1614,12 +1654,12 @@ zfs_trunc(znode_t *zp, uint64_t end)
}
zp->z_size = end;
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zsb),
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs),
NULL, &zp->z_size, sizeof (zp->z_size));
if (end == 0) {
zp->z_pflags &= ~ZFS_SPARSE;
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb),
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
NULL, &zp->z_pflags, 8);
}
VERIFY(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx) == 0);
@@ -1640,21 +1680,21 @@ zfs_trunc(znode_t *zp, uint64_t end)
* flag - current file open mode flags.
* log - TRUE if this action should be logged
*
- * RETURN: 0 on success, error code on failure
+ * RETURN: 0 on success, error code on failure
*/
int
zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log)
{
dmu_tx_t *tx;
- zfs_sb_t *zsb = ZTOZSB(zp);
- zilog_t *zilog = zsb->z_log;
+ zfsvfs_t *zfsvfs = ZTOZSB(zp);
+ zilog_t *zilog = zfsvfs->z_log;
uint64_t mode;
uint64_t mtime[2], ctime[2];
sa_bulk_attr_t bulk[3];
int count = 0;
int error;
- if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zsb), &mode,
+ if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), &mode,
sizeof (mode))) != 0)
return (error);
@@ -1675,7 +1715,7 @@ zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log)
if (error || !log)
goto out;
log:
- tx = dmu_tx_create(zsb->z_os);
+ tx = dmu_tx_create(zfsvfs->z_os);
dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE);
zfs_sa_upgrade_txholds(tx, zp);
error = dmu_tx_assign(tx, TXG_WAIT);
@@ -1684,9 +1724,9 @@ zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log)
goto out;
}
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zsb), NULL, mtime, 16);
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zsb), NULL, ctime, 16);
- SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zsb),
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, mtime, 16);
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, ctime, 16);
+ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs),
NULL, &zp->z_pflags, 8);
zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime);
error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx);
@@ -1714,7 +1754,7 @@ void
zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
{
struct super_block *sb;
- zfs_sb_t *zsb;
+ zfsvfs_t *zfsvfs;
uint64_t moid, obj, sa_obj, version;
uint64_t sense = ZFS_CASE_SENSITIVE;
uint64_t norm = 0;
@@ -1788,7 +1828,7 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
ASSERT(error == 0);
/*
- * Create root znode. Create minimal znode/inode/zsb/sb
+ * Create root znode. Create minimal znode/inode/zfsvfs/sb
* to allow zfs_mknode to work.
*/
vattr.va_mask = ATTR_MODE|ATTR_UID|ATTR_GID;
@@ -1802,21 +1842,21 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
rootzp->z_atime_dirty = 0;
rootzp->z_is_sa = USE_SA(version, os);
- zsb = kmem_zalloc(sizeof (zfs_sb_t), KM_SLEEP);
- zsb->z_os = os;
- zsb->z_parent = zsb;
- zsb->z_version = version;
- zsb->z_use_fuids = USE_FUIDS(version, os);
- zsb->z_use_sa = USE_SA(version, os);
- zsb->z_norm = norm;
+ zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
+ zfsvfs->z_os = os;
+ zfsvfs->z_parent = zfsvfs;
+ zfsvfs->z_version = version;
+ zfsvfs->z_use_fuids = USE_FUIDS(version, os);
+ zfsvfs->z_use_sa = USE_SA(version, os);
+ zfsvfs->z_norm = norm;
sb = kmem_zalloc(sizeof (struct super_block), KM_SLEEP);
- sb->s_fs_info = zsb;
+ sb->s_fs_info = zfsvfs;
ZTOI(rootzp)->i_sb = sb;
error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END,
- &zsb->z_attr_table);
+ &zfsvfs->z_attr_table);
ASSERT(error == 0);
@@ -1825,20 +1865,21 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
* insensitive.
*/
if (sense == ZFS_CASE_INSENSITIVE || sense == ZFS_CASE_MIXED)
- zsb->z_norm |= U8_TEXTPREP_TOUPPER;
+ zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
- mutex_init(&zsb->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
- list_create(&zsb->z_all_znodes, sizeof (znode_t),
+ mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
+ list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
offsetof(znode_t, z_link_node));
size = MIN(1 << (highbit64(zfs_object_mutex_size)-1), ZFS_OBJ_MTX_MAX);
- zsb->z_hold_size = size;
- zsb->z_hold_trees = vmem_zalloc(sizeof (avl_tree_t) * size, KM_SLEEP);
- zsb->z_hold_locks = vmem_zalloc(sizeof (kmutex_t) * size, KM_SLEEP);
+ zfsvfs->z_hold_size = size;
+ zfsvfs->z_hold_trees = vmem_zalloc(sizeof (avl_tree_t) * size,
+ KM_SLEEP);
+ zfsvfs->z_hold_locks = vmem_zalloc(sizeof (kmutex_t) * size, KM_SLEEP);
for (i = 0; i != size; i++) {
- avl_create(&zsb->z_hold_trees[i], zfs_znode_hold_compare,
+ avl_create(&zfsvfs->z_hold_trees[i], zfs_znode_hold_compare,
sizeof (znode_hold_t), offsetof(znode_hold_t, zh_node));
- mutex_init(&zsb->z_hold_locks[i], NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&zfsvfs->z_hold_locks[i], NULL, MUTEX_DEFAULT, NULL);
}
VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr,
@@ -1856,18 +1897,20 @@ zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx)
/*
* Create shares directory
*/
- error = zfs_create_share_dir(zsb, tx);
+ error = zfs_create_share_dir(zfsvfs, tx);
ASSERT(error == 0);
for (i = 0; i != size; i++) {
- avl_destroy(&zsb->z_hold_trees[i]);
- mutex_destroy(&zsb->z_hold_locks[i]);
+ avl_destroy(&zfsvfs->z_hold_trees[i]);
+ mutex_destroy(&zfsvfs->z_hold_locks[i]);
}
- vmem_free(zsb->z_hold_trees, sizeof (avl_tree_t) * size);
- vmem_free(zsb->z_hold_locks, sizeof (kmutex_t) * size);
+ mutex_destroy(&zfsvfs->z_znodes_lock);
+
+ vmem_free(zfsvfs->z_hold_trees, sizeof (avl_tree_t) * size);
+ vmem_free(zfsvfs->z_hold_locks, sizeof (kmutex_t) * size);
kmem_free(sb, sizeof (struct super_block));
- kmem_free(zsb, sizeof (zfs_sb_t));
+ kmem_free(zfsvfs, sizeof (zfsvfs_t));
}
#endif /* _KERNEL */
@@ -1925,13 +1968,16 @@ zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, void *tag)
* or not the object is an extended attribute directory.
*/
static int
-zfs_obj_to_pobj(sa_handle_t *hdl, sa_attr_type_t *sa_table, uint64_t *pobjp,
- int *is_xattrdir)
+zfs_obj_to_pobj(objset_t *osp, sa_handle_t *hdl, sa_attr_type_t *sa_table,
+ uint64_t *pobjp, int *is_xattrdir)
{
uint64_t parent;
uint64_t pflags;
uint64_t mode;
+ uint64_t parent_mode;
sa_bulk_attr_t bulk[3];
+ sa_handle_t *sa_hdl;
+ dmu_buf_t *sa_db;
int count = 0;
int error;
@@ -1945,9 +1991,32 @@ zfs_obj_to_pobj(sa_handle_t *hdl, sa_attr_type_t *sa_table, uint64_t *pobjp,
if ((error = sa_bulk_lookup(hdl, bulk, count)) != 0)
return (error);
- *pobjp = parent;
+ /*
+ * When a link is removed its parent pointer is not changed and will
+ * be invalid. There are two cases where a link is removed but the
+ * file stays around, when it goes to the delete queue and when there
+ * are additional links.
+ */
+ error = zfs_grab_sa_handle(osp, parent, &sa_hdl, &sa_db, FTAG);
+ if (error != 0)
+ return (error);
+
+ error = sa_lookup(sa_hdl, ZPL_MODE, &parent_mode, sizeof (parent_mode));
+ zfs_release_sa_handle(sa_hdl, sa_db, FTAG);
+ if (error != 0)
+ return (error);
+
*is_xattrdir = ((pflags & ZFS_XATTR) != 0) && S_ISDIR(mode);
+ /*
+ * Extended attributes can be applied to files, directories, etc.
+ * Otherwise the parent must be a directory.
+ */
+ if (!*is_xattrdir && !S_ISDIR(parent_mode))
+ return (EINVAL);
+
+ *pobjp = parent;
+
return (0);
}
@@ -1996,7 +2065,7 @@ zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl,
if (prevdb)
zfs_release_sa_handle(prevhdl, prevdb, FTAG);
- if ((error = zfs_obj_to_pobj(sa_hdl, sa_table, &pobj,
+ if ((error = zfs_obj_to_pobj(osp, sa_hdl, sa_table, &pobj,
&is_xattrdir)) != 0)
break;
@@ -2103,6 +2172,7 @@ zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb,
EXPORT_SYMBOL(zfs_create_fs);
EXPORT_SYMBOL(zfs_obj_to_path);
+/* CSTYLED */
module_param(zfs_object_mutex_size, uint, 0644);
MODULE_PARM_DESC(zfs_object_mutex_size, "Size of znode hold array");
#endif
diff --git a/zfs/module/zfs/zil.c b/zfs/module/zfs/zil.c
index 289b23c7f488..6a1f190f5e6c 100644
--- a/zfs/module/zfs/zil.c
+++ b/zfs/module/zfs/zil.c
@@ -20,7 +20,8 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
+ * Copyright (c) 2014 Integros [integros.com]
*/
/* Portions Copyright 2010 Robert Milkowski */
@@ -40,6 +41,7 @@
#include <sys/dsl_pool.h>
#include <sys/metaslab.h>
#include <sys/trace_zil.h>
+#include <sys/abd.h>
/*
* The zfs intent log (ZIL) saves transaction records of system calls
@@ -100,6 +102,13 @@ int zil_replay_disable = 0;
*/
int zfs_nocacheflush = 0;
+/*
+ * Limit SLOG write size per commit executed with synchronous priority.
+ * Any writes above that will be executed with lower (asynchronous) priority
+ * to limit potential SLOG device abuse by single active ZIL writer.
+ */
+unsigned long zil_slog_bulk = 768 * 1024;
+
static kmem_cache_t *zil_lwb_cache;
static void zil_async_to_sync(zilog_t *zilog, uint64_t foid);
@@ -107,33 +116,17 @@ static void zil_async_to_sync(zilog_t *zilog, uint64_t foid);
#define LWB_EMPTY(lwb) ((BP_GET_LSIZE(&lwb->lwb_blk) - \
sizeof (zil_chain_t)) == (lwb->lwb_sz - lwb->lwb_nused))
-
-/*
- * ziltest is by and large an ugly hack, but very useful in
- * checking replay without tedious work.
- * When running ziltest we want to keep all itx's and so maintain
- * a single list in the zl_itxg[] that uses a high txg: ZILTEST_TXG
- * We subtract TXG_CONCURRENT_STATES to allow for common code.
- */
-#define ZILTEST_TXG (UINT64_MAX - TXG_CONCURRENT_STATES)
-
static int
zil_bp_compare(const void *x1, const void *x2)
{
const dva_t *dva1 = &((zil_bp_node_t *)x1)->zn_dva;
const dva_t *dva2 = &((zil_bp_node_t *)x2)->zn_dva;
- if (DVA_GET_VDEV(dva1) < DVA_GET_VDEV(dva2))
- return (-1);
- if (DVA_GET_VDEV(dva1) > DVA_GET_VDEV(dva2))
- return (1);
-
- if (DVA_GET_OFFSET(dva1) < DVA_GET_OFFSET(dva2))
- return (-1);
- if (DVA_GET_OFFSET(dva1) > DVA_GET_OFFSET(dva2))
- return (1);
+ int cmp = AVL_CMP(DVA_GET_VDEV(dva1), DVA_GET_VDEV(dva2));
+ if (likely(cmp))
+ return (cmp);
- return (0);
+ return (AVL_CMP(DVA_GET_OFFSET(dva1), DVA_GET_OFFSET(dva2)));
}
static void
@@ -266,7 +259,7 @@ zil_read_log_block(zilog_t *zilog, const blkptr_t *bp, blkptr_t *nbp, void *dst,
}
}
- VERIFY(arc_buf_remove_ref(abuf, &abuf));
+ arc_buf_destroy(abuf, &abuf);
}
return (error);
@@ -303,7 +296,7 @@ zil_read_log_data(zilog_t *zilog, const lr_write_t *lr, void *wbuf)
if (error == 0) {
if (wbuf != NULL)
bcopy(abuf->b_data, wbuf, arc_buf_size(abuf));
- (void) arc_buf_remove_ref(abuf, &abuf);
+ arc_buf_destroy(abuf, &abuf);
}
return (error);
@@ -463,7 +456,8 @@ zil_free_log_record(zilog_t *zilog, lr_t *lrc, void *tx, uint64_t claim_txg)
}
static lwb_t *
-zil_alloc_lwb(zilog_t *zilog, blkptr_t *bp, uint64_t txg, boolean_t fastwrite)
+zil_alloc_lwb(zilog_t *zilog, blkptr_t *bp, boolean_t slog, uint64_t txg,
+ boolean_t fastwrite)
{
lwb_t *lwb;
@@ -471,6 +465,7 @@ zil_alloc_lwb(zilog_t *zilog, blkptr_t *bp, uint64_t txg, boolean_t fastwrite)
lwb->lwb_zilog = zilog;
lwb->lwb_blk = *bp;
lwb->lwb_fastwrite = fastwrite;
+ lwb->lwb_slog = slog;
lwb->lwb_buf = zio_buf_alloc(BP_GET_LSIZE(bp));
lwb->lwb_max_txg = txg;
lwb->lwb_zio = NULL;
@@ -509,6 +504,27 @@ zilog_dirty(zilog_t *zilog, uint64_t txg)
}
}
+/*
+ * Determine if the zil is dirty in the specified txg. Callers wanting to
+ * ensure that the dirty state does not change must hold the itxg_lock for
+ * the specified txg. Holding the lock will ensure that the zil cannot be
+ * dirtied (zil_itx_assign) or cleaned (zil_clean) while we check its current
+ * state.
+ */
+boolean_t
+zilog_is_dirty_in_txg(zilog_t *zilog, uint64_t txg)
+{
+ dsl_pool_t *dp = zilog->zl_dmu_pool;
+
+ if (txg_list_member(&dp->dp_dirty_zilogs, zilog, txg & TXG_MASK))
+ return (B_TRUE);
+ return (B_FALSE);
+}
+
+/*
+ * Determine if the zil is dirty. The zil is considered dirty if it has
+ * any pending itx records that have not been cleaned by zil_clean().
+ */
boolean_t
zilog_is_dirty(zilog_t *zilog)
{
@@ -535,6 +551,7 @@ zil_create(zilog_t *zilog)
blkptr_t blk;
int error = 0;
boolean_t fastwrite = FALSE;
+ boolean_t slog = FALSE;
/*
* Wait for any previous destroy to complete.
@@ -549,7 +566,7 @@ zil_create(zilog_t *zilog)
/*
* Allocate an initial log block if:
* - there isn't one already
- * - the existing block is the wrong endianess
+ * - the existing block is the wrong endianness
*/
if (BP_IS_HOLE(&blk) || BP_SHOULD_BYTESWAP(&blk)) {
tx = dmu_tx_create(zilog->zl_os);
@@ -563,7 +580,7 @@ zil_create(zilog_t *zilog)
}
error = zio_alloc_zil(zilog->zl_spa, txg, &blk,
- ZIL_MIN_BLKSZ, B_TRUE);
+ ZIL_MIN_BLKSZ, &slog);
fastwrite = TRUE;
if (error == 0)
@@ -574,7 +591,7 @@ zil_create(zilog_t *zilog)
* Allocate a log write buffer (lwb) for the first log block.
*/
if (error == 0)
- lwb = zil_alloc_lwb(zilog, &blk, txg, fastwrite);
+ lwb = zil_alloc_lwb(zilog, &blk, slog, txg, fastwrite);
/*
* If we just allocated the first log block, commit our transaction
@@ -786,12 +803,7 @@ zil_vdev_compare(const void *x1, const void *x2)
const uint64_t v1 = ((zil_vdev_node_t *)x1)->zv_vdev;
const uint64_t v2 = ((zil_vdev_node_t *)x2)->zv_vdev;
- if (v1 < v2)
- return (-1);
- if (v1 > v2)
- return (1);
-
- return (0);
+ return (AVL_CMP(v1, v2));
}
void
@@ -889,6 +901,7 @@ zil_lwb_write_done(zio_t *zio)
* one in zil_commit_writer(). zil_sync() will only remove
* the lwb if lwb_buf is null.
*/
+ abd_put(zio->io_abd);
zio_buf_free(lwb->lwb_buf, lwb->lwb_sz);
mutex_enter(&zilog->zl_lock);
lwb->lwb_zio = NULL;
@@ -912,6 +925,7 @@ static void
zil_lwb_write_init(zilog_t *zilog, lwb_t *lwb)
{
zbookmark_phys_t zb;
+ zio_priority_t prio;
SET_BOOKMARK(&zb, lwb->lwb_blk.blk_cksum.zc_word[ZIL_ZC_OBJSET],
ZB_ZIL_OBJECT, ZB_ZIL_LEVEL,
@@ -925,13 +939,19 @@ zil_lwb_write_init(zilog_t *zilog, lwb_t *lwb)
/* Lock so zil_sync() doesn't fastwrite_unmark after zio is created */
mutex_enter(&zilog->zl_lock);
if (lwb->lwb_zio == NULL) {
+ abd_t *lwb_abd = abd_get_from_buf(lwb->lwb_buf,
+ BP_GET_LSIZE(&lwb->lwb_blk));
if (!lwb->lwb_fastwrite) {
metaslab_fastwrite_mark(zilog->zl_spa, &lwb->lwb_blk);
lwb->lwb_fastwrite = 1;
}
+ if (!lwb->lwb_slog || zilog->zl_cur_used <= zil_slog_bulk)
+ prio = ZIO_PRIORITY_SYNC_WRITE;
+ else
+ prio = ZIO_PRIORITY_ASYNC_WRITE;
lwb->lwb_zio = zio_rewrite(zilog->zl_root_zio, zilog->zl_spa,
- 0, &lwb->lwb_blk, lwb->lwb_buf, BP_GET_LSIZE(&lwb->lwb_blk),
- zil_lwb_write_done, lwb, ZIO_PRIORITY_SYNC_WRITE,
+ 0, &lwb->lwb_blk, lwb_abd, BP_GET_LSIZE(&lwb->lwb_blk),
+ zil_lwb_write_done, lwb, prio,
ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_PROPAGATE |
ZIO_FLAG_FASTWRITE, &zb);
}
@@ -952,15 +972,6 @@ uint64_t zil_block_buckets[] = {
UINT64_MAX
};
-/*
- * Use the slog as long as the current commit size is less than the
- * limit or the total list size is less than 2X the limit. Limit
- * checking is disabled by setting zil_slog_limit to UINT64_MAX.
- */
-unsigned long zil_slog_limit = 1024 * 1024;
-#define USE_SLOG(zilog) (((zilog)->zl_cur_used < zil_slog_limit) || \
- ((zilog)->zl_itx_list_sz < (zil_slog_limit << 1)))
-
/*
* Start a log block write and advance to the next log block.
* Calls are serialized.
@@ -976,7 +987,7 @@ zil_lwb_write_start(zilog_t *zilog, lwb_t *lwb)
uint64_t txg;
uint64_t zil_blksz, wsz;
int i, error;
- boolean_t use_slog;
+ boolean_t slog;
if (BP_GET_CHECKSUM(&lwb->lwb_blk) == ZIO_CHECKSUM_ZILOG2) {
zilc = (zil_chain_t *)lwb->lwb_buf;
@@ -1032,10 +1043,8 @@ zil_lwb_write_start(zilog_t *zilog, lwb_t *lwb)
zilog->zl_prev_rotor = (zilog->zl_prev_rotor + 1) & (ZIL_PREV_BLKS - 1);
BP_ZERO(bp);
- use_slog = USE_SLOG(zilog);
- error = zio_alloc_zil(spa, txg, bp, zil_blksz,
- USE_SLOG(zilog));
- if (use_slog) {
+ error = zio_alloc_zil(spa, txg, bp, zil_blksz, &slog);
+ if (slog) {
ZIL_STAT_BUMP(zil_itx_metaslab_slog_count);
ZIL_STAT_INCR(zil_itx_metaslab_slog_bytes, lwb->lwb_nused);
} else {
@@ -1050,7 +1059,7 @@ zil_lwb_write_start(zilog_t *zilog, lwb_t *lwb)
/*
* Allocate a new log write buffer (lwb).
*/
- nlwb = zil_alloc_lwb(zilog, bp, txg, TRUE);
+ nlwb = zil_alloc_lwb(zilog, bp, slog, txg, TRUE);
/* Record the block for later vdev flushing */
zil_add_block(zilog, &lwb->lwb_blk);
@@ -1087,47 +1096,53 @@ zil_lwb_write_start(zilog_t *zilog, lwb_t *lwb)
static lwb_t *
zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb)
{
- lr_t *lrc = &itx->itx_lr; /* common log record */
- lr_write_t *lrw = (lr_write_t *)lrc;
+ lr_t *lrcb, *lrc;
+ lr_write_t *lrwb, *lrw;
char *lr_buf;
- uint64_t txg = lrc->lrc_txg;
- uint64_t reclen = lrc->lrc_reclen;
- uint64_t dlen = 0;
+ uint64_t dlen, dnow, lwb_sp, reclen, txg;
if (lwb == NULL)
return (NULL);
ASSERT(lwb->lwb_buf != NULL);
- ASSERT(zilog_is_dirty(zilog) ||
- spa_freeze_txg(zilog->zl_spa) != UINT64_MAX);
- if (lrc->lrc_txtype == TX_WRITE && itx->itx_wr_state == WR_NEED_COPY)
+ lrc = &itx->itx_lr; /* Common log record inside itx. */
+ lrw = (lr_write_t *)lrc; /* Write log record inside itx. */
+ if (lrc->lrc_txtype == TX_WRITE && itx->itx_wr_state == WR_NEED_COPY) {
dlen = P2ROUNDUP_TYPED(
lrw->lr_length, sizeof (uint64_t), uint64_t);
-
+ } else {
+ dlen = 0;
+ }
+ reclen = lrc->lrc_reclen;
zilog->zl_cur_used += (reclen + dlen);
+ txg = lrc->lrc_txg;
zil_lwb_write_init(zilog, lwb);
+cont:
/*
* If this record won't fit in the current log block, start a new one.
+ * For WR_NEED_COPY optimize layout for minimal number of chunks.
*/
- if (lwb->lwb_nused + reclen + dlen > lwb->lwb_sz) {
+ lwb_sp = lwb->lwb_sz - lwb->lwb_nused;
+ if (reclen > lwb_sp || (reclen + dlen > lwb_sp &&
+ lwb_sp < ZIL_MAX_WASTE_SPACE && (dlen % ZIL_MAX_LOG_DATA == 0 ||
+ lwb_sp < reclen + dlen % ZIL_MAX_LOG_DATA))) {
lwb = zil_lwb_write_start(zilog, lwb);
if (lwb == NULL)
return (NULL);
zil_lwb_write_init(zilog, lwb);
ASSERT(LWB_EMPTY(lwb));
- if (lwb->lwb_nused + reclen + dlen > lwb->lwb_sz) {
- txg_wait_synced(zilog->zl_dmu_pool, txg);
- return (lwb);
- }
+ lwb_sp = lwb->lwb_sz - lwb->lwb_nused;
+ ASSERT3U(reclen + MIN(dlen, sizeof (uint64_t)), <=, lwb_sp);
}
+ dnow = MIN(dlen, lwb_sp - reclen);
lr_buf = lwb->lwb_buf + lwb->lwb_nused;
bcopy(lrc, lr_buf, reclen);
- lrc = (lr_t *)lr_buf;
- lrw = (lr_write_t *)lrc;
+ lrcb = (lr_t *)lr_buf; /* Like lrc, but inside lwb. */
+ lrwb = (lr_write_t *)lrcb; /* Like lrw, but inside lwb. */
ZIL_STAT_BUMP(zil_itx_count);
@@ -1144,10 +1159,13 @@ zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb)
char *dbuf;
int error;
- if (dlen) {
- ASSERT(itx->itx_wr_state == WR_NEED_COPY);
+ if (itx->itx_wr_state == WR_NEED_COPY) {
dbuf = lr_buf + reclen;
- lrw->lr_common.lrc_reclen += dlen;
+ lrcb->lrc_reclen += dnow;
+ if (lrwb->lr_length > dnow)
+ lrwb->lr_length = dnow;
+ lrw->lr_offset += dnow;
+ lrw->lr_length -= dnow;
ZIL_STAT_BUMP(zil_itx_needcopy_count);
ZIL_STAT_INCR(zil_itx_needcopy_bytes,
lrw->lr_length);
@@ -1159,7 +1177,7 @@ zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb)
lrw->lr_length);
}
error = zilog->zl_get_data(
- itx->itx_private, lrw, dbuf, lwb->lwb_zio);
+ itx->itx_private, lrwb, dbuf, lwb->lwb_zio);
if (error == EIO) {
txg_wait_synced(zilog->zl_dmu_pool, txg);
return (lwb);
@@ -1178,12 +1196,18 @@ zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb)
* equal to the itx sequence number because not all transactions
* are synchronous, and sometimes spa_sync() gets there first.
*/
- lrc->lrc_seq = ++zilog->zl_lr_seq; /* we are single threaded */
- lwb->lwb_nused += reclen + dlen;
+ lrcb->lrc_seq = ++zilog->zl_lr_seq; /* we are single threaded */
+ lwb->lwb_nused += reclen + dnow;
lwb->lwb_max_txg = MAX(lwb->lwb_max_txg, txg);
ASSERT3U(lwb->lwb_nused, <=, lwb->lwb_sz);
ASSERT0(P2PHASE(lwb->lwb_nused, sizeof (uint64_t)));
+ dlen -= dnow;
+ if (dlen > 0) {
+ zilog->zl_cur_used += reclen;
+ goto cont;
+ }
+
return (lwb);
}
@@ -1197,7 +1221,6 @@ zil_itx_create(uint64_t txtype, size_t lrsize)
itx = zio_data_buf_alloc(offsetof(itx_t, itx_lr) + lrsize);
itx->itx_lr.lrc_txtype = txtype;
itx->itx_lr.lrc_reclen = lrsize;
- itx->itx_sod = lrsize; /* if write & WR_NEED_COPY will be increased */
itx->itx_lr.lrc_seq = 0; /* defensive */
itx->itx_sync = B_TRUE; /* default is synchronous */
itx->itx_callback = NULL;
@@ -1257,12 +1280,7 @@ zil_aitx_compare(const void *x1, const void *x2)
const uint64_t o1 = ((itx_async_node_t *)x1)->ia_foid;
const uint64_t o2 = ((itx_async_node_t *)x2)->ia_foid;
- if (o1 < o2)
- return (-1);
- if (o1 > o2)
- return (1);
-
- return (0);
+ return (AVL_CMP(o1, o2));
}
/*
@@ -1351,11 +1369,10 @@ zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx)
* this itxg. Save the itxs for release below.
* This should be rare.
*/
- atomic_add_64(&zilog->zl_itx_list_sz, -itxg->itxg_sod);
- itxg->itxg_sod = 0;
+ zfs_dbgmsg("zil_itx_assign: missed itx cleanup for "
+ "txg %llu", itxg->itxg_txg);
clean = itxg->itxg_itxs;
}
- ASSERT(itxg->itxg_sod == 0);
itxg->itxg_txg = txg;
itxs = itxg->itxg_itxs = kmem_zalloc(sizeof (itxs_t),
KM_SLEEP);
@@ -1368,11 +1385,10 @@ zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx)
}
if (itx->itx_sync) {
list_insert_tail(&itxs->i_sync_list, itx);
- atomic_add_64(&zilog->zl_itx_list_sz, itx->itx_sod);
- itxg->itxg_sod += itx->itx_sod;
} else {
avl_tree_t *t = &itxs->i_async_tree;
- uint64_t foid = ((lr_ooo_t *)&itx->itx_lr)->lr_foid;
+ uint64_t foid =
+ LR_FOID_GET_OBJ(((lr_ooo_t *)&itx->itx_lr)->lr_foid);
itx_async_node_t *ian;
avl_index_t where;
@@ -1418,8 +1434,6 @@ zil_clean(zilog_t *zilog, uint64_t synced_txg)
ASSERT3U(itxg->itxg_txg, <=, synced_txg);
ASSERT(itxg->itxg_txg != 0);
ASSERT(zilog->zl_clean_taskq != NULL);
- atomic_add_64(&zilog->zl_itx_list_sz, -itxg->itxg_sod);
- itxg->itxg_sod = 0;
clean_me = itxg->itxg_itxs;
itxg->itxg_itxs = NULL;
itxg->itxg_txg = 0;
@@ -1443,13 +1457,17 @@ zil_get_commit_list(zilog_t *zilog)
{
uint64_t otxg, txg;
list_t *commit_list = &zilog->zl_itx_commit_list;
- uint64_t push_sod = 0;
if (spa_freeze_txg(zilog->zl_spa) != UINT64_MAX) /* ziltest support */
otxg = ZILTEST_TXG;
else
otxg = spa_last_synced_txg(zilog->zl_spa) + 1;
+ /*
+ * This is inherently racy, since there is nothing to prevent
+ * the last synced txg from changing. That's okay since we'll
+ * only commit things in the future.
+ */
for (txg = otxg; txg < (otxg + TXG_CONCURRENT_STATES); txg++) {
itxg_t *itxg = &zilog->zl_itxg[txg & TXG_MASK];
@@ -1459,13 +1477,20 @@ zil_get_commit_list(zilog_t *zilog)
continue;
}
+ /*
+ * If we're adding itx records to the zl_itx_commit_list,
+ * then the zil better be dirty in this "txg". We can assert
+ * that here since we're holding the itxg_lock which will
+ * prevent spa_sync from cleaning it. Once we add the itxs
+ * to the zl_itx_commit_list we must commit it to disk even
+ * if it's unnecessary (i.e. the txg was synced).
+ */
+ ASSERT(zilog_is_dirty_in_txg(zilog, txg) ||
+ spa_freeze_txg(zilog->zl_spa) != UINT64_MAX);
list_move_tail(commit_list, &itxg->itxg_itxs->i_sync_list);
- push_sod += itxg->itxg_sod;
- itxg->itxg_sod = 0;
mutex_exit(&itxg->itxg_lock);
}
- atomic_add_64(&zilog->zl_itx_list_sz, -push_sod);
}
/*
@@ -1484,6 +1509,10 @@ zil_async_to_sync(zilog_t *zilog, uint64_t foid)
else
otxg = spa_last_synced_txg(zilog->zl_spa) + 1;
+ /*
+ * This is inherently racy, since there is nothing to prevent
+ * the last synced txg from changing.
+ */
for (txg = otxg; txg < (otxg + TXG_CONCURRENT_STATES); txg++) {
itxg_t *itxg = &zilog->zl_itxg[txg & TXG_MASK];
@@ -1556,8 +1585,14 @@ zil_commit_writer(zilog_t *zilog)
for (itx = list_head(&zilog->zl_itx_commit_list); itx != NULL;
itx = list_next(&zilog->zl_itx_commit_list, itx)) {
txg = itx->itx_lr.lrc_txg;
- ASSERT(txg);
+ ASSERT3U(txg, !=, 0);
+ /*
+ * This is inherently racy and may result in us writing
+ * out a log block for a txg that was just synced. This is
+ * ok since we'll end cleaning up that log block the next
+ * time we call zil_sync().
+ */
if (txg > spa_last_synced_txg(spa) || txg > spa_freeze_txg(spa))
lwb = zil_lwb_commit(zilog, itx, lwb);
}
@@ -1918,7 +1953,11 @@ zil_close(zilog_t *zilog)
mutex_exit(&zilog->zl_lock);
if (txg)
txg_wait_synced(zilog->zl_dmu_pool, txg);
- ASSERT(!zilog_is_dirty(zilog));
+
+ if (zilog_is_dirty(zilog))
+ zfs_dbgmsg("zil (%p) is dirty, txg %llu", zilog, txg);
+ if (txg < spa_freeze_txg(zilog->zl_spa))
+ VERIFY(!zilog_is_dirty(zilog));
taskq_destroy(zilog->zl_clean_taskq);
zilog->zl_clean_taskq = NULL;
@@ -2078,7 +2117,7 @@ typedef struct zil_replay_arg {
static int
zil_replay_error(zilog_t *zilog, lr_t *lr, int error)
{
- char name[MAXNAMELEN];
+ char name[ZFS_MAX_DATASET_NAME_LEN];
zilog->zl_replaying_seq--; /* didn't actually replay this one */
@@ -2122,7 +2161,7 @@ zil_replay_log_record(zilog_t *zilog, lr_t *lr, void *zra, uint64_t claim_txg)
*/
if (TX_OOO(txtype)) {
error = dmu_object_info(zilog->zl_os,
- ((lr_ooo_t *)lr)->lr_foid, NULL);
+ LR_FOID_GET_OBJ(((lr_ooo_t *)lr)->lr_foid), NULL);
if (error == ENOENT || error == EEXIST)
return (0);
}
@@ -2274,12 +2313,14 @@ EXPORT_SYMBOL(zil_bp_tree_add);
EXPORT_SYMBOL(zil_set_sync);
EXPORT_SYMBOL(zil_set_logbias);
+/* BEGIN CSTYLED */
module_param(zil_replay_disable, int, 0644);
MODULE_PARM_DESC(zil_replay_disable, "Disable intent logging replay");
module_param(zfs_nocacheflush, int, 0644);
MODULE_PARM_DESC(zfs_nocacheflush, "Disable cache flushes");
-module_param(zil_slog_limit, ulong, 0644);
-MODULE_PARM_DESC(zil_slog_limit, "Max commit bytes to separate log device");
+module_param(zil_slog_bulk, ulong, 0644);
+MODULE_PARM_DESC(zil_slog_bulk, "Limit in bytes slog sync writes per commit");
+/* END CSTYLED */
#endif
diff --git a/zfs/module/zfs/zio.c b/zfs/module/zfs/zio.c
index f45dfe64268f..1d69d8d8ded9 100644
--- a/zfs/module/zfs/zio.c
+++ b/zfs/module/zfs/zio.c
@@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2011, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2011, 2017 by Delphix. All rights reserved.
* Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
*/
@@ -39,6 +39,10 @@
#include <sys/ddt.h>
#include <sys/blkptr.h>
#include <sys/zfeature.h>
+#include <sys/metaslab_impl.h>
+#include <sys/time.h>
+#include <sys/trace_zio.h>
+#include <sys/abd.h>
/*
* ==========================================================================
@@ -46,9 +50,15 @@
* ==========================================================================
*/
const char *zio_type_name[ZIO_TYPES] = {
+ /*
+ * Note: Linux kernel thread name length is limited
+ * so these names will differ from upstream open zfs.
+ */
"z_null", "z_rd", "z_wr", "z_fr", "z_cl", "z_ioctl"
};
+int zio_dva_throttle_enabled = B_TRUE;
+
/*
* ==========================================================================
* I/O kmem caches
@@ -58,11 +68,19 @@ kmem_cache_t *zio_cache;
kmem_cache_t *zio_link_cache;
kmem_cache_t *zio_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
kmem_cache_t *zio_data_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
+#if defined(ZFS_DEBUG) && !defined(_KERNEL)
+uint64_t zio_buf_cache_allocs[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
+uint64_t zio_buf_cache_frees[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
+#endif
+
int zio_delay_max = ZIO_DELAY_MAX;
#define ZIO_PIPELINE_CONTINUE 0x100
#define ZIO_PIPELINE_STOP 0x101
+#define BP_SPANB(indblkshift, level) \
+ (((uint64_t)1) << ((level) * ((indblkshift) - SPA_BLKPTRSHIFT)))
+#define COMPARE_META_LEVEL 0x80000000ul
/*
* The following actions directly effect the spa's sync-to-convergence logic.
* The values below define the sync pass when we start performing the action.
@@ -95,6 +113,8 @@ int zio_buf_debug_limit = 0;
static inline void __zio_execute(zio_t *zio);
+static void zio_taskq_dispatch(zio_t *, zio_taskq_type_t, boolean_t);
+
void
zio_init(void)
{
@@ -117,7 +137,7 @@ zio_init(void)
size_t align = 0;
size_t cflags = (size > zio_buf_debug_limit) ? KMC_NODEBUG : 0;
-#ifdef _ILP32
+#if defined(_ILP32) && defined(_KERNEL)
/*
* Cache size limited to 1M on 32-bit platforms until ARC
* buffers no longer require virtual address space.
@@ -138,12 +158,21 @@ zio_init(void)
*/
if (arc_watch && !IS_P2ALIGNED(size, PAGESIZE))
continue;
-#endif
+ /*
+ * Here's the problem - on 4K native devices in userland on
+ * Linux using O_DIRECT, buffers must be 4K aligned or I/O
+ * will fail with EINVAL, causing zdb (and others) to coredump.
+ * Since userland probably doesn't need optimized buffer caches,
+ * we just force 4K alignment on everything.
+ */
+ align = 8 * SPA_MINBLOCKSIZE;
+#else
if (size < PAGESIZE) {
align = SPA_MINBLOCKSIZE;
} else if (IS_P2ALIGNED(size, p2 >> 2)) {
align = PAGESIZE;
}
+#endif
if (align != 0) {
char name[36];
@@ -188,6 +217,13 @@ zio_fini(void)
*/
if (((c + 1) << SPA_MINBLOCKSHIFT) > zfs_max_recordsize)
break;
+#endif
+#if defined(ZFS_DEBUG) && !defined(_KERNEL)
+ if (zio_buf_cache_allocs[c] != zio_buf_cache_frees[c])
+ (void) printf("zio_fini: [%d] %llu != %llu\n",
+ (int)((c + 1) << SPA_MINBLOCKSHIFT),
+ (long long unsigned)zio_buf_cache_allocs[c],
+ (long long unsigned)zio_buf_cache_frees[c]);
#endif
if (zio_buf_cache[c] != last_cache) {
last_cache = zio_buf_cache[c];
@@ -228,6 +264,9 @@ zio_buf_alloc(size_t size)
size_t c = (size - 1) >> SPA_MINBLOCKSHIFT;
VERIFY3U(c, <, SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);
+#if defined(ZFS_DEBUG) && !defined(_KERNEL)
+ atomic_add_64(&zio_buf_cache_allocs[c], 1);
+#endif
return (kmem_cache_alloc(zio_buf_cache[c], KM_PUSHPAGE));
}
@@ -248,26 +287,15 @@ zio_data_buf_alloc(size_t size)
return (kmem_cache_alloc(zio_data_buf_cache[c], KM_PUSHPAGE));
}
-/*
- * Use zio_buf_alloc_flags when specific allocation flags are needed. e.g.
- * passing KM_NOSLEEP when it is acceptable for an allocation to fail.
- */
-void *
-zio_buf_alloc_flags(size_t size, int flags)
-{
- size_t c = (size - 1) >> SPA_MINBLOCKSHIFT;
-
- VERIFY3U(c, <, SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);
-
- return (kmem_cache_alloc(zio_buf_cache[c], flags));
-}
-
void
zio_buf_free(void *buf, size_t size)
{
size_t c = (size - 1) >> SPA_MINBLOCKSHIFT;
VERIFY3U(c, <, SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT);
+#if defined(ZFS_DEBUG) && !defined(_KERNEL)
+ atomic_add_64(&zio_buf_cache_frees[c], 1);
+#endif
kmem_cache_free(zio_buf_cache[c], buf);
}
@@ -282,18 +310,30 @@ zio_data_buf_free(void *buf, size_t size)
kmem_cache_free(zio_data_buf_cache[c], buf);
}
+static void
+zio_abd_free(void *abd, size_t size)
+{
+ abd_free((abd_t *)abd);
+}
+
/*
* ==========================================================================
* Push and pop I/O transform buffers
* ==========================================================================
*/
-static void
-zio_push_transform(zio_t *zio, void *data, uint64_t size, uint64_t bufsize,
- zio_transform_func_t *transform)
+void
+zio_push_transform(zio_t *zio, abd_t *data, uint64_t size, uint64_t bufsize,
+ zio_transform_func_t *transform)
{
zio_transform_t *zt = kmem_alloc(sizeof (zio_transform_t), KM_SLEEP);
- zt->zt_orig_data = zio->io_data;
+ /*
+ * Ensure that anyone expecting this zio to contain a linear ABD isn't
+ * going to get a nasty surprise when they try to access the data.
+ */
+ IMPLY(abd_is_linear(zio->io_abd), abd_is_linear(data));
+
+ zt->zt_orig_abd = zio->io_abd;
zt->zt_orig_size = zio->io_size;
zt->zt_bufsize = bufsize;
zt->zt_transform = transform;
@@ -301,11 +341,11 @@ zio_push_transform(zio_t *zio, void *data, uint64_t size, uint64_t bufsize,
zt->zt_next = zio->io_transform_stack;
zio->io_transform_stack = zt;
- zio->io_data = data;
+ zio->io_abd = data;
zio->io_size = size;
}
-static void
+void
zio_pop_transforms(zio_t *zio)
{
zio_transform_t *zt;
@@ -313,12 +353,12 @@ zio_pop_transforms(zio_t *zio)
while ((zt = zio->io_transform_stack) != NULL) {
if (zt->zt_transform != NULL)
zt->zt_transform(zio,
- zt->zt_orig_data, zt->zt_orig_size);
+ zt->zt_orig_abd, zt->zt_orig_size);
if (zt->zt_bufsize != 0)
- zio_buf_free(zio->io_data, zt->zt_bufsize);
+ abd_free(zio->io_abd);
- zio->io_data = zt->zt_orig_data;
+ zio->io_abd = zt->zt_orig_abd;
zio->io_size = zt->zt_orig_size;
zio->io_transform_stack = zt->zt_next;
@@ -332,21 +372,26 @@ zio_pop_transforms(zio_t *zio)
* ==========================================================================
*/
static void
-zio_subblock(zio_t *zio, void *data, uint64_t size)
+zio_subblock(zio_t *zio, abd_t *data, uint64_t size)
{
ASSERT(zio->io_size > size);
if (zio->io_type == ZIO_TYPE_READ)
- bcopy(zio->io_data, data, size);
+ abd_copy(data, zio->io_abd, size);
}
static void
-zio_decompress(zio_t *zio, void *data, uint64_t size)
+zio_decompress(zio_t *zio, abd_t *data, uint64_t size)
{
- if (zio->io_error == 0 &&
- zio_decompress_data(BP_GET_COMPRESS(zio->io_bp),
- zio->io_data, data, zio->io_size, size) != 0)
- zio->io_error = SET_ERROR(EIO);
+ if (zio->io_error == 0) {
+ void *tmp = abd_borrow_buf(data, size);
+ int ret = zio_decompress_data(BP_GET_COMPRESS(zio->io_bp),
+ zio->io_abd, tmp, zio->io_size, size);
+ abd_return_buf_copy(data, tmp, size);
+
+ if (ret != 0)
+ zio->io_error = SET_ERROR(EIO);
+ }
}
/*
@@ -354,52 +399,39 @@ zio_decompress(zio_t *zio, void *data, uint64_t size)
* I/O parent/child relationships and pipeline interlocks
* ==========================================================================
*/
-/*
- * NOTE - Callers to zio_walk_parents() and zio_walk_children must
- * continue calling these functions until they return NULL.
- * Otherwise, the next caller will pick up the list walk in
- * some indeterminate state. (Otherwise every caller would
- * have to pass in a cookie to keep the state represented by
- * io_walk_link, which gets annoying.)
- */
zio_t *
-zio_walk_parents(zio_t *cio)
+zio_walk_parents(zio_t *cio, zio_link_t **zl)
{
- zio_link_t *zl = cio->io_walk_link;
list_t *pl = &cio->io_parent_list;
- zl = (zl == NULL) ? list_head(pl) : list_next(pl, zl);
- cio->io_walk_link = zl;
-
- if (zl == NULL)
+ *zl = (*zl == NULL) ? list_head(pl) : list_next(pl, *zl);
+ if (*zl == NULL)
return (NULL);
- ASSERT(zl->zl_child == cio);
- return (zl->zl_parent);
+ ASSERT((*zl)->zl_child == cio);
+ return ((*zl)->zl_parent);
}
zio_t *
-zio_walk_children(zio_t *pio)
+zio_walk_children(zio_t *pio, zio_link_t **zl)
{
- zio_link_t *zl = pio->io_walk_link;
list_t *cl = &pio->io_child_list;
- zl = (zl == NULL) ? list_head(cl) : list_next(cl, zl);
- pio->io_walk_link = zl;
-
- if (zl == NULL)
+ *zl = (*zl == NULL) ? list_head(cl) : list_next(cl, *zl);
+ if (*zl == NULL)
return (NULL);
- ASSERT(zl->zl_parent == pio);
- return (zl->zl_child);
+ ASSERT((*zl)->zl_parent == pio);
+ return ((*zl)->zl_child);
}
zio_t *
zio_unique_parent(zio_t *cio)
{
- zio_t *pio = zio_walk_parents(cio);
+ zio_link_t *zl = NULL;
+ zio_t *pio = zio_walk_parents(cio, &zl);
- VERIFY(zio_walk_parents(cio) == NULL);
+ VERIFY3P(zio_walk_parents(cio, &zl), ==, NULL);
return (pio);
}
@@ -455,7 +487,6 @@ zio_remove_child(zio_t *pio, zio_t *cio, zio_link_t *zl)
mutex_exit(&pio->io_lock);
mutex_exit(&cio->io_lock);
-
kmem_cache_free(zio_link_cache, zl);
}
@@ -469,6 +500,7 @@ zio_wait_for_children(zio_t *zio, enum zio_child child, enum zio_wait_type wait)
ASSERT(zio->io_stall == NULL);
if (*countp != 0) {
zio->io_stage >>= 1;
+ ASSERT3U(zio->io_stage, !=, ZIO_STAGE_OPEN);
zio->io_stall = countp;
waiting = B_TRUE;
}
@@ -493,9 +525,18 @@ zio_notify_parent(zio_t *pio, zio_t *zio, enum zio_wait_type wait)
(*countp)--;
if (*countp == 0 && pio->io_stall == countp) {
+ zio_taskq_type_t type =
+ pio->io_stage < ZIO_STAGE_VDEV_IO_START ? ZIO_TASKQ_ISSUE :
+ ZIO_TASKQ_INTERRUPT;
pio->io_stall = NULL;
mutex_exit(&pio->io_lock);
- __zio_execute(pio);
+ /*
+ * Dispatch the parent zio in its own taskq so that
+ * the child can continue to make progress. This also
+ * prevents overflowing the stack when we have deeply nested
+ * parent-child relationships.
+ */
+ zio_taskq_dispatch(pio, type, B_FALSE);
} else {
mutex_exit(&pio->io_lock);
}
@@ -508,6 +549,40 @@ zio_inherit_child_errors(zio_t *zio, enum zio_child c)
zio->io_error = zio->io_child_error[c];
}
+int
+zio_bookmark_compare(const void *x1, const void *x2)
+{
+ const zio_t *z1 = x1;
+ const zio_t *z2 = x2;
+
+ if (z1->io_bookmark.zb_objset < z2->io_bookmark.zb_objset)
+ return (-1);
+ if (z1->io_bookmark.zb_objset > z2->io_bookmark.zb_objset)
+ return (1);
+
+ if (z1->io_bookmark.zb_object < z2->io_bookmark.zb_object)
+ return (-1);
+ if (z1->io_bookmark.zb_object > z2->io_bookmark.zb_object)
+ return (1);
+
+ if (z1->io_bookmark.zb_level < z2->io_bookmark.zb_level)
+ return (-1);
+ if (z1->io_bookmark.zb_level > z2->io_bookmark.zb_level)
+ return (1);
+
+ if (z1->io_bookmark.zb_blkid < z2->io_bookmark.zb_blkid)
+ return (-1);
+ if (z1->io_bookmark.zb_blkid > z2->io_bookmark.zb_blkid)
+ return (1);
+
+ if (z1 < z2)
+ return (-1);
+ if (z1 > z2)
+ return (1);
+
+ return (0);
+}
+
/*
* ==========================================================================
* Create the various types of I/O (read, write, free, etc)
@@ -515,31 +590,35 @@ zio_inherit_child_errors(zio_t *zio, enum zio_child c)
*/
static zio_t *
zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
- void *data, uint64_t size, zio_done_func_t *done, void *private,
- zio_type_t type, zio_priority_t priority, enum zio_flag flags,
- vdev_t *vd, uint64_t offset, const zbookmark_phys_t *zb,
- enum zio_stage stage, enum zio_stage pipeline)
+ abd_t *data, uint64_t lsize, uint64_t psize, zio_done_func_t *done,
+ void *private, zio_type_t type, zio_priority_t priority,
+ enum zio_flag flags, vdev_t *vd, uint64_t offset,
+ const zbookmark_phys_t *zb, enum zio_stage stage,
+ enum zio_stage pipeline)
{
zio_t *zio;
- ASSERT3U(size, <=, SPA_MAXBLOCKSIZE);
- ASSERT(P2PHASE(size, SPA_MINBLOCKSIZE) == 0);
+ ASSERT3U(psize, <=, SPA_MAXBLOCKSIZE);
+ ASSERT(P2PHASE(psize, SPA_MINBLOCKSIZE) == 0);
ASSERT(P2PHASE(offset, SPA_MINBLOCKSIZE) == 0);
ASSERT(!vd || spa_config_held(spa, SCL_STATE_ALL, RW_READER));
ASSERT(!bp || !(flags & ZIO_FLAG_CONFIG_WRITER));
ASSERT(vd || stage == ZIO_STAGE_OPEN);
+ IMPLY(lsize != psize, (flags & ZIO_FLAG_RAW) != 0);
+
zio = kmem_cache_alloc(zio_cache, KM_SLEEP);
bzero(zio, sizeof (zio_t));
- mutex_init(&zio->io_lock, NULL, MUTEX_DEFAULT, NULL);
+ mutex_init(&zio->io_lock, NULL, MUTEX_NOLOCKDEP, NULL);
cv_init(&zio->io_cv, NULL, CV_DEFAULT, NULL);
list_create(&zio->io_parent_list, sizeof (zio_link_t),
offsetof(zio_link_t, zl_parent_node));
list_create(&zio->io_child_list, sizeof (zio_link_t),
offsetof(zio_link_t, zl_child_node));
+ metaslab_trace_init(&zio->io_alloc_list);
if (vd != NULL)
zio->io_child_type = ZIO_CHILD_VDEV;
@@ -571,11 +650,13 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
zio->io_priority = priority;
zio->io_vd = vd;
zio->io_offset = offset;
- zio->io_orig_data = zio->io_data = data;
- zio->io_orig_size = zio->io_size = size;
+ zio->io_orig_abd = zio->io_abd = data;
+ zio->io_orig_size = zio->io_size = psize;
+ zio->io_lsize = lsize;
zio->io_orig_flags = zio->io_flags = flags;
zio->io_orig_stage = zio->io_stage = stage;
zio->io_orig_pipeline = zio->io_pipeline = pipeline;
+ zio->io_pipeline_trace = ZIO_STAGE_OPEN;
zio->io_state[ZIO_WAIT_READY] = (stage >= ZIO_STAGE_READY);
zio->io_state[ZIO_WAIT_DONE] = (stage >= ZIO_STAGE_DONE);
@@ -599,6 +680,7 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
static void
zio_destroy(zio_t *zio)
{
+ metaslab_trace_fini(&zio->io_alloc_list);
list_destroy(&zio->io_parent_list);
list_destroy(&zio->io_child_list);
mutex_destroy(&zio->io_lock);
@@ -612,7 +694,7 @@ zio_null(zio_t *pio, spa_t *spa, vdev_t *vd, zio_done_func_t *done,
{
zio_t *zio;
- zio = zio_create(pio, spa, 0, NULL, NULL, 0, done, private,
+ zio = zio_create(pio, spa, 0, NULL, NULL, 0, 0, done, private,
ZIO_TYPE_NULL, ZIO_PRIORITY_NOW, flags, vd, 0, NULL,
ZIO_STAGE_OPEN, ZIO_INTERLOCK_PIPELINE);
@@ -676,18 +758,20 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp)
zfs_panic_recover("blkptr at %p DVA %u has invalid "
"VDEV %llu",
bp, i, (longlong_t)vdevid);
+ continue;
}
vd = spa->spa_root_vdev->vdev_child[vdevid];
if (vd == NULL) {
zfs_panic_recover("blkptr at %p DVA %u has invalid "
"VDEV %llu",
bp, i, (longlong_t)vdevid);
+ continue;
}
if (vd->vdev_ops == &vdev_hole_ops) {
zfs_panic_recover("blkptr at %p DVA %u has hole "
"VDEV %llu",
bp, i, (longlong_t)vdevid);
-
+ continue;
}
if (vd->vdev_ops == &vdev_missing_ops) {
/*
@@ -711,7 +795,7 @@ zfs_blkptr_verify(spa_t *spa, const blkptr_t *bp)
zio_t *
zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
- void *data, uint64_t size, zio_done_func_t *done, void *private,
+ abd_t *data, uint64_t size, zio_done_func_t *done, void *private,
zio_priority_t priority, enum zio_flag flags, const zbookmark_phys_t *zb)
{
zio_t *zio;
@@ -719,7 +803,7 @@ zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
zfs_blkptr_verify(spa, bp);
zio = zio_create(pio, spa, BP_PHYSICAL_BIRTH(bp), bp,
- data, size, done, private,
+ data, size, size, done, private,
ZIO_TYPE_READ, priority, flags, NULL, 0, zb,
ZIO_STAGE_OPEN, (flags & ZIO_FLAG_DDT_CHILD) ?
ZIO_DDT_CHILD_READ_PIPELINE : ZIO_READ_PIPELINE);
@@ -729,10 +813,11 @@ zio_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
zio_t *
zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
- void *data, uint64_t size, const zio_prop_t *zp,
- zio_done_func_t *ready, zio_done_func_t *physdone, zio_done_func_t *done,
- void *private,
- zio_priority_t priority, enum zio_flag flags, const zbookmark_phys_t *zb)
+ abd_t *data, uint64_t lsize, uint64_t psize, const zio_prop_t *zp,
+ zio_done_func_t *ready, zio_done_func_t *children_ready,
+ zio_done_func_t *physdone, zio_done_func_t *done,
+ void *private, zio_priority_t priority, enum zio_flag flags,
+ const zbookmark_phys_t *zb)
{
zio_t *zio;
@@ -745,12 +830,13 @@ zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
zp->zp_copies > 0 &&
zp->zp_copies <= spa_max_replication(spa));
- zio = zio_create(pio, spa, txg, bp, data, size, done, private,
+ zio = zio_create(pio, spa, txg, bp, data, lsize, psize, done, private,
ZIO_TYPE_WRITE, priority, flags, NULL, 0, zb,
ZIO_STAGE_OPEN, (flags & ZIO_FLAG_DDT_CHILD) ?
ZIO_DDT_CHILD_WRITE_PIPELINE : ZIO_WRITE_PIPELINE);
zio->io_ready = ready;
+ zio->io_children_ready = children_ready;
zio->io_physdone = physdone;
zio->io_prop = *zp;
@@ -768,14 +854,14 @@ zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
}
zio_t *
-zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, void *data,
+zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, abd_t *data,
uint64_t size, zio_done_func_t *done, void *private,
zio_priority_t priority, enum zio_flag flags, zbookmark_phys_t *zb)
{
zio_t *zio;
- zio = zio_create(pio, spa, txg, bp, data, size, done, private,
- ZIO_TYPE_WRITE, priority, flags, NULL, 0, zb,
+ zio = zio_create(pio, spa, txg, bp, data, size, size, done, private,
+ ZIO_TYPE_WRITE, priority, flags | ZIO_FLAG_IO_REWRITE, NULL, 0, zb,
ZIO_STAGE_OPEN, ZIO_REWRITE_PIPELINE);
return (zio);
@@ -854,8 +940,8 @@ zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
stage |= ZIO_STAGE_ISSUE_ASYNC;
zio = zio_create(pio, spa, txg, bp, NULL, BP_GET_PSIZE(bp),
- NULL, NULL, ZIO_TYPE_FREE, ZIO_PRIORITY_NOW, flags,
- NULL, 0, NULL, ZIO_STAGE_OPEN, stage);
+ BP_GET_PSIZE(bp), NULL, NULL, ZIO_TYPE_FREE, ZIO_PRIORITY_NOW,
+ flags, NULL, 0, NULL, ZIO_STAGE_OPEN, stage);
return (zio);
}
@@ -888,8 +974,9 @@ zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
ASSERT(!BP_GET_DEDUP(bp) || !spa_writeable(spa)); /* zdb(1M) */
zio = zio_create(pio, spa, txg, bp, NULL, BP_GET_PSIZE(bp),
- done, private, ZIO_TYPE_CLAIM, ZIO_PRIORITY_NOW, flags,
- NULL, 0, NULL, ZIO_STAGE_OPEN, ZIO_CLAIM_PIPELINE);
+ BP_GET_PSIZE(bp), done, private, ZIO_TYPE_CLAIM, ZIO_PRIORITY_NOW,
+ flags, NULL, 0, NULL, ZIO_STAGE_OPEN, ZIO_CLAIM_PIPELINE);
+ ASSERT0(zio->io_queued_timestamp);
return (zio);
}
@@ -902,7 +989,7 @@ zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd,
int c;
if (vd->vdev_children == 0) {
- zio = zio_create(pio, spa, 0, NULL, NULL, 0, done, private,
+ zio = zio_create(pio, spa, 0, NULL, NULL, 0, 0, done, private,
ZIO_TYPE_IOCTL, ZIO_PRIORITY_NOW, flags, vd, 0, NULL,
ZIO_STAGE_OPEN, ZIO_IOCTL_PIPELINE);
@@ -920,7 +1007,7 @@ zio_ioctl(zio_t *pio, spa_t *spa, vdev_t *vd, int cmd,
zio_t *
zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
- void *data, int checksum, zio_done_func_t *done, void *private,
+ abd_t *data, int checksum, zio_done_func_t *done, void *private,
zio_priority_t priority, enum zio_flag flags, boolean_t labels)
{
zio_t *zio;
@@ -930,9 +1017,9 @@ zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
offset >= vd->vdev_psize - VDEV_LABEL_END_SIZE);
ASSERT3U(offset + size, <=, vd->vdev_psize);
- zio = zio_create(pio, vd->vdev_spa, 0, NULL, data, size, done, private,
- ZIO_TYPE_READ, priority, flags | ZIO_FLAG_PHYSICAL, vd, offset,
- NULL, ZIO_STAGE_OPEN, ZIO_READ_PHYS_PIPELINE);
+ zio = zio_create(pio, vd->vdev_spa, 0, NULL, data, size, size, done,
+ private, ZIO_TYPE_READ, priority, flags | ZIO_FLAG_PHYSICAL, vd,
+ offset, NULL, ZIO_STAGE_OPEN, ZIO_READ_PHYS_PIPELINE);
zio->io_prop.zp_checksum = checksum;
@@ -941,7 +1028,7 @@ zio_read_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
zio_t *
zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
- void *data, int checksum, zio_done_func_t *done, void *private,
+ abd_t *data, int checksum, zio_done_func_t *done, void *private,
zio_priority_t priority, enum zio_flag flags, boolean_t labels)
{
zio_t *zio;
@@ -951,21 +1038,22 @@ zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
offset >= vd->vdev_psize - VDEV_LABEL_END_SIZE);
ASSERT3U(offset + size, <=, vd->vdev_psize);
- zio = zio_create(pio, vd->vdev_spa, 0, NULL, data, size, done, private,
- ZIO_TYPE_WRITE, priority, flags | ZIO_FLAG_PHYSICAL, vd, offset,
- NULL, ZIO_STAGE_OPEN, ZIO_WRITE_PHYS_PIPELINE);
+ zio = zio_create(pio, vd->vdev_spa, 0, NULL, data, size, size, done,
+ private, ZIO_TYPE_WRITE, priority, flags | ZIO_FLAG_PHYSICAL, vd,
+ offset, NULL, ZIO_STAGE_OPEN, ZIO_WRITE_PHYS_PIPELINE);
zio->io_prop.zp_checksum = checksum;
- if (zio_checksum_table[checksum].ci_eck) {
+ if (zio_checksum_table[checksum].ci_flags & ZCHECKSUM_FLAG_EMBEDDED) {
/*
* zec checksums are necessarily destructive -- they modify
* the end of the write buffer to hold the verifier/checksum.
* Therefore, we must make a local copy in case the data is
* being written to multiple places in parallel.
*/
- void *wbuf = zio_buf_alloc(size);
- bcopy(data, wbuf, size);
+ abd_t *wbuf = abd_alloc_sametype(data, size);
+ abd_copy(wbuf, data, size);
+
zio_push_transform(zio, wbuf, size, size, NULL);
}
@@ -977,8 +1065,8 @@ zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
*/
zio_t *
zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset,
- void *data, uint64_t size, int type, zio_priority_t priority,
- enum zio_flag flags, zio_done_func_t *done, void *private)
+ abd_t *data, uint64_t size, int type, zio_priority_t priority,
+ enum zio_flag flags, zio_done_func_t *done, void *private)
{
enum zio_stage pipeline = ZIO_VDEV_CHILD_PIPELINE;
zio_t *zio;
@@ -1009,9 +1097,31 @@ zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset,
if (flags & ZIO_FLAG_IO_REPAIR)
flags &= ~ZIO_FLAG_SPECULATIVE;
- zio = zio_create(pio, pio->io_spa, pio->io_txg, bp, data, size,
+ /*
+ * If we're creating a child I/O that is not associated with a
+ * top-level vdev, then the child zio is not an allocating I/O.
+ * If this is a retried I/O then we ignore it since we will
+ * have already processed the original allocating I/O.
+ */
+ if (flags & ZIO_FLAG_IO_ALLOCATING &&
+ (vd != vd->vdev_top || (flags & ZIO_FLAG_IO_RETRY))) {
+ ASSERTV(metaslab_class_t *mc = spa_normal_class(pio->io_spa));
+
+ ASSERT(mc->mc_alloc_throttle_enabled);
+ ASSERT(type == ZIO_TYPE_WRITE);
+ ASSERT(priority == ZIO_PRIORITY_ASYNC_WRITE);
+ ASSERT(!(flags & ZIO_FLAG_IO_REPAIR));
+ ASSERT(!(pio->io_flags & ZIO_FLAG_IO_REWRITE) ||
+ pio->io_child_type == ZIO_CHILD_GANG);
+
+ flags &= ~ZIO_FLAG_IO_ALLOCATING;
+ }
+
+
+ zio = zio_create(pio, pio->io_spa, pio->io_txg, bp, data, size, size,
done, private, type, priority, flags, vd, offset, &pio->io_bookmark,
ZIO_STAGE_VDEV_IO_START >> 1, pipeline);
+ ASSERT3U(zio->io_child_type, ==, ZIO_CHILD_VDEV);
zio->io_physdone = pio->io_physdone;
if (vd->vdev_ops->vdev_op_leaf && zio->io_logical != NULL)
@@ -1021,16 +1131,16 @@ zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset,
}
zio_t *
-zio_vdev_delegated_io(vdev_t *vd, uint64_t offset, void *data, uint64_t size,
- int type, zio_priority_t priority, enum zio_flag flags,
- zio_done_func_t *done, void *private)
+zio_vdev_delegated_io(vdev_t *vd, uint64_t offset, abd_t *data, uint64_t size,
+ int type, zio_priority_t priority, enum zio_flag flags,
+ zio_done_func_t *done, void *private)
{
zio_t *zio;
ASSERT(vd->vdev_ops->vdev_op_leaf);
zio = zio_create(NULL, vd->vdev_spa, 0, NULL,
- data, size, done, private, type, priority,
+ data, size, size, done, private, type, priority,
flags | ZIO_FLAG_CANFAIL | ZIO_FLAG_DONT_RETRY | ZIO_FLAG_DELEGATED,
vd, offset, NULL,
ZIO_STAGE_VDEV_IO_START >> 1, ZIO_VDEV_CHILD_PIPELINE);
@@ -1059,8 +1169,11 @@ zio_shrink(zio_t *zio, uint64_t size)
* Note, BP_IS_RAIDZ() assumes no compression.
*/
ASSERT(BP_GET_COMPRESS(zio->io_bp) == ZIO_COMPRESS_OFF);
- if (!BP_IS_RAIDZ(zio->io_bp))
- zio->io_orig_size = zio->io_size = size;
+ if (!BP_IS_RAIDZ(zio->io_bp)) {
+ /* we are not doing a raw write */
+ ASSERT3U(zio->io_size, ==, zio->io_lsize);
+ zio->io_orig_size = zio->io_size = zio->io_lsize = size;
+ }
}
/*
@@ -1079,14 +1192,17 @@ zio_read_bp_init(zio_t *zio)
!(zio->io_flags & ZIO_FLAG_RAW)) {
uint64_t psize =
BP_IS_EMBEDDED(bp) ? BPE_GET_PSIZE(bp) : BP_GET_PSIZE(bp);
- void *cbuf = zio_buf_alloc(psize);
-
- zio_push_transform(zio, cbuf, psize, psize, zio_decompress);
+ zio_push_transform(zio, abd_alloc_sametype(zio->io_abd, psize),
+ psize, psize, zio_decompress);
}
if (BP_IS_EMBEDDED(bp) && BPE_GET_ETYPE(bp) == BP_EMBEDDED_TYPE_DATA) {
+ int psize = BPE_GET_PSIZE(bp);
+ void *data = abd_borrow_buf(zio->io_abd, psize);
+
zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
- decode_embedded_bp_compressed(bp, zio->io_data);
+ decode_embedded_bp_compressed(bp, data);
+ abd_return_buf_copy(zio->io_abd, data, psize);
} else {
ASSERT(!BP_IS_EMBEDDED(bp));
}
@@ -1106,21 +1222,6 @@ zio_read_bp_init(zio_t *zio)
static int
zio_write_bp_init(zio_t *zio)
{
- spa_t *spa = zio->io_spa;
- zio_prop_t *zp = &zio->io_prop;
- enum zio_compress compress = zp->zp_compress;
- blkptr_t *bp = zio->io_bp;
- uint64_t lsize = zio->io_size;
- uint64_t psize = lsize;
- int pass = 1;
-
- /*
- * If our children haven't all reached the ready stage,
- * wait for them and then repeat this pipeline stage.
- */
- if (zio_wait_for_children(zio, ZIO_CHILD_GANG, ZIO_WAIT_READY) ||
- zio_wait_for_children(zio, ZIO_CHILD_LOGICAL, ZIO_WAIT_READY))
- return (ZIO_PIPELINE_STOP);
if (!IO_IS_ALLOCATING(zio))
return (ZIO_PIPELINE_CONTINUE);
@@ -1128,6 +1229,9 @@ zio_write_bp_init(zio_t *zio)
ASSERT(zio->io_child_type != ZIO_CHILD_DDT);
if (zio->io_bp_override) {
+ blkptr_t *bp = zio->io_bp;
+ zio_prop_t *zp = &zio->io_prop;
+
ASSERT(bp->blk_birth != zio->io_txg);
ASSERT(BP_GET_DEDUP(zio->io_bp_override) == 0);
@@ -1144,6 +1248,7 @@ zio_write_bp_init(zio_t *zio)
*/
if (!BP_IS_HOLE(bp) && zp->zp_nopwrite) {
ASSERT(!zp->zp_dedup);
+ ASSERT3U(BP_GET_CHECKSUM(bp), ==, zp->zp_checksum);
zio->io_flags |= ZIO_FLAG_NOPWRITE;
return (ZIO_PIPELINE_CONTINUE);
}
@@ -1153,16 +1258,64 @@ zio_write_bp_init(zio_t *zio)
if (BP_IS_HOLE(bp) || !zp->zp_dedup)
return (ZIO_PIPELINE_CONTINUE);
- ASSERT(zio_checksum_table[zp->zp_checksum].ci_dedup ||
- zp->zp_dedup_verify);
+ ASSERT((zio_checksum_table[zp->zp_checksum].ci_flags &
+ ZCHECKSUM_FLAG_DEDUP) || zp->zp_dedup_verify);
if (BP_GET_CHECKSUM(bp) == zp->zp_checksum) {
BP_SET_DEDUP(bp, 1);
zio->io_pipeline |= ZIO_STAGE_DDT_WRITE;
return (ZIO_PIPELINE_CONTINUE);
}
+
+ /*
+ * We were unable to handle this as an override bp, treat
+ * it as a regular write I/O.
+ */
+ zio->io_bp_override = NULL;
+ *bp = zio->io_bp_orig;
+ zio->io_pipeline = zio->io_orig_pipeline;
+ }
+
+ return (ZIO_PIPELINE_CONTINUE);
+}
+
+static int
+zio_write_compress(zio_t *zio)
+{
+ spa_t *spa = zio->io_spa;
+ zio_prop_t *zp = &zio->io_prop;
+ enum zio_compress compress = zp->zp_compress;
+ blkptr_t *bp = zio->io_bp;
+ uint64_t lsize = zio->io_lsize;
+ uint64_t psize = zio->io_size;
+ int pass = 1;
+
+ EQUIV(lsize != psize, (zio->io_flags & ZIO_FLAG_RAW) != 0);
+
+ /*
+ * If our children haven't all reached the ready stage,
+ * wait for them and then repeat this pipeline stage.
+ */
+ if (zio_wait_for_children(zio, ZIO_CHILD_GANG, ZIO_WAIT_READY) ||
+ zio_wait_for_children(zio, ZIO_CHILD_LOGICAL, ZIO_WAIT_READY))
+ return (ZIO_PIPELINE_STOP);
+
+ if (!IO_IS_ALLOCATING(zio))
+ return (ZIO_PIPELINE_CONTINUE);
+
+ if (zio->io_children_ready != NULL) {
+ /*
+ * Now that all our children are ready, run the callback
+ * associated with this zio in case it wants to modify the
+ * data to be written.
+ */
+ ASSERT3U(zp->zp_level, >, 0);
+ zio->io_children_ready(zio);
}
+ ASSERT(zio->io_child_type != ZIO_CHILD_DDT);
+ ASSERT(zio->io_bp_override == NULL);
+
if (!BP_IS_HOLE(bp) && bp->blk_birth == zio->io_txg) {
/*
* We're rewriting an existing block, which means we're
@@ -1187,9 +1340,10 @@ zio_write_bp_init(zio_t *zio)
spa_max_replication(spa)) == BP_GET_NDVAS(bp));
}
- if (compress != ZIO_COMPRESS_OFF) {
+ /* If it's a compressed write that is not raw, compress the buffer. */
+ if (compress != ZIO_COMPRESS_OFF && psize == lsize) {
void *cbuf = zio_buf_alloc(lsize);
- psize = zio_compress_data(compress, zio->io_data, cbuf, lsize);
+ psize = zio_compress_data(compress, zio->io_abd, cbuf, lsize);
if (psize == 0 || psize == lsize) {
compress = ZIO_COMPRESS_OFF;
zio_buf_free(cbuf, lsize);
@@ -1227,12 +1381,26 @@ zio_write_bp_init(zio_t *zio)
zio_buf_free(cbuf, lsize);
psize = lsize;
} else {
- bzero((char *)cbuf + psize, rounded - psize);
+ abd_t *cdata = abd_get_from_buf(cbuf, lsize);
+ abd_take_ownership_of_buf(cdata, B_TRUE);
+ abd_zero_off(cdata, psize, rounded - psize);
psize = rounded;
- zio_push_transform(zio, cbuf,
+ zio_push_transform(zio, cdata,
psize, lsize, NULL);
}
}
+
+ /*
+ * We were unable to handle this as an override bp, treat
+ * it as a regular write I/O.
+ */
+ zio->io_bp_override = NULL;
+ *bp = zio->io_bp_orig;
+ zio->io_pipeline = zio->io_orig_pipeline;
+
+ } else {
+ ASSERT3U(psize, !=, 0);
+
}
/*
@@ -1285,7 +1453,6 @@ zio_write_bp_init(zio_t *zio)
zio->io_pipeline |= ZIO_STAGE_NOP_WRITE;
}
}
-
return (ZIO_PIPELINE_CONTINUE);
}
@@ -1382,6 +1549,76 @@ zio_interrupt(zio_t *zio)
zio_taskq_dispatch(zio, ZIO_TASKQ_INTERRUPT, B_FALSE);
}
+void
+zio_delay_interrupt(zio_t *zio)
+{
+ /*
+ * The timeout_generic() function isn't defined in userspace, so
+ * rather than trying to implement the function, the zio delay
+ * functionality has been disabled for userspace builds.
+ */
+
+#ifdef _KERNEL
+ /*
+ * If io_target_timestamp is zero, then no delay has been registered
+ * for this IO, thus jump to the end of this function and "skip" the
+ * delay; issuing it directly to the zio layer.
+ */
+ if (zio->io_target_timestamp != 0) {
+ hrtime_t now = gethrtime();
+
+ if (now >= zio->io_target_timestamp) {
+ /*
+ * This IO has already taken longer than the target
+ * delay to complete, so we don't want to delay it
+ * any longer; we "miss" the delay and issue it
+ * directly to the zio layer. This is likely due to
+ * the target latency being set to a value less than
+ * the underlying hardware can satisfy (e.g. delay
+ * set to 1ms, but the disks take 10ms to complete an
+ * IO request).
+ */
+
+ DTRACE_PROBE2(zio__delay__miss, zio_t *, zio,
+ hrtime_t, now);
+
+ zio_interrupt(zio);
+ } else {
+ taskqid_t tid;
+ hrtime_t diff = zio->io_target_timestamp - now;
+ clock_t expire_at_tick = ddi_get_lbolt() +
+ NSEC_TO_TICK(diff);
+
+ DTRACE_PROBE3(zio__delay__hit, zio_t *, zio,
+ hrtime_t, now, hrtime_t, diff);
+
+ if (NSEC_TO_TICK(diff) == 0) {
+ /* Our delay is less than a jiffy - just spin */
+ zfs_sleep_until(zio->io_target_timestamp);
+ } else {
+ /*
+ * Use taskq_dispatch_delay() in the place of
+ * OpenZFS's timeout_generic().
+ */
+ tid = taskq_dispatch_delay(system_taskq,
+ (task_func_t *)zio_interrupt,
+ zio, TQ_NOSLEEP, expire_at_tick);
+ if (tid == TASKQID_INVALID) {
+ /*
+ * Couldn't allocate a task. Just
+ * finish the zio without a delay.
+ */
+ zio_interrupt(zio);
+ }
+ }
+ }
+ return;
+ }
+#endif
+ DTRACE_PROBE1(zio__delay__skip, zio_t *, zio);
+ zio_interrupt(zio);
+}
+
/*
* Execute the I/O pipeline until one of the following occurs:
* (1) the I/O completes; (2) the pipeline stalls waiting for
@@ -1446,6 +1683,8 @@ __zio_execute(zio_t *zio)
{
zio->io_executor = curthread;
+ ASSERT3U(zio->io_queued_timestamp, >, 0);
+
while (zio->io_stage < ZIO_STAGE_DONE) {
enum zio_stage pipeline = zio->io_pipeline;
enum zio_stage stage = zio->io_stage;
@@ -1490,6 +1729,7 @@ __zio_execute(zio_t *zio)
}
zio->io_stage = stage;
+ zio->io_pipeline_trace |= zio->io_stage;
rv = zio_pipeline[highbit64(stage) - 1](zio);
if (rv == ZIO_PIPELINE_STOP)
@@ -1514,6 +1754,8 @@ zio_wait(zio_t *zio)
ASSERT(zio->io_executor == NULL);
zio->io_waiter = curthread;
+ ASSERT0(zio->io_queued_timestamp);
+ zio->io_queued_timestamp = gethrtime();
__zio_execute(zio);
@@ -1550,6 +1792,8 @@ zio_nowait(zio_t *zio)
zio_add_child(pio, zio);
}
+ ASSERT0(zio->io_queued_timestamp);
+ zio->io_queued_timestamp = gethrtime();
__zio_execute(zio);
}
@@ -1564,6 +1808,7 @@ zio_reexecute(zio_t *pio)
{
zio_t *cio, *cio_next;
int c, w;
+ zio_link_t *zl = NULL;
ASSERT(pio->io_child_type == ZIO_CHILD_LOGICAL);
ASSERT(pio->io_orig_stage == ZIO_STAGE_OPEN);
@@ -1575,6 +1820,7 @@ zio_reexecute(zio_t *pio)
pio->io_pipeline = pio->io_orig_pipeline;
pio->io_reexecute = 0;
pio->io_flags |= ZIO_FLAG_REEXECUTED;
+ pio->io_pipeline_trace = 0;
pio->io_error = 0;
for (w = 0; w < ZIO_WAIT_TYPES; w++)
pio->io_state[w] = 0;
@@ -1591,8 +1837,8 @@ zio_reexecute(zio_t *pio)
* the remainder of pio's io_child_list, from 'cio_next' onward,
* cannot be affected by any side effects of reexecuting 'cio'.
*/
- for (cio = zio_walk_children(pio); cio != NULL; cio = cio_next) {
- cio_next = zio_walk_children(pio);
+ for (cio = zio_walk_children(pio, &zl); cio != NULL; cio = cio_next) {
+ cio_next = zio_walk_children(pio, &zl);
mutex_enter(&pio->io_lock);
for (w = 0; w < ZIO_WAIT_TYPES; w++)
pio->io_children[cio->io_child_type][w]++;
@@ -1603,10 +1849,12 @@ zio_reexecute(zio_t *pio)
/*
* Now that all children have been reexecuted, execute the parent.
* We don't reexecute "The Godfather" I/O here as it's the
- * responsibility of the caller to wait on him.
+ * responsibility of the caller to wait on it.
*/
- if (!(pio->io_flags & ZIO_FLAG_GODFATHER))
+ if (!(pio->io_flags & ZIO_FLAG_GODFATHER)) {
+ pio->io_queued_timestamp = gethrtime();
__zio_execute(pio);
+ }
}
void
@@ -1740,26 +1988,38 @@ zio_resume_wait(spa_t *spa)
* ==========================================================================
*/
+static void
+zio_gang_issue_func_done(zio_t *zio)
+{
+ abd_put(zio->io_abd);
+}
+
static zio_t *
-zio_read_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, void *data)
+zio_read_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, abd_t *data,
+ uint64_t offset)
{
if (gn != NULL)
return (pio);
- return (zio_read(pio, pio->io_spa, bp, data, BP_GET_PSIZE(bp),
- NULL, NULL, pio->io_priority, ZIO_GANG_CHILD_FLAGS(pio),
+ return (zio_read(pio, pio->io_spa, bp, abd_get_offset(data, offset),
+ BP_GET_PSIZE(bp), zio_gang_issue_func_done,
+ NULL, pio->io_priority, ZIO_GANG_CHILD_FLAGS(pio),
&pio->io_bookmark));
}
-zio_t *
-zio_rewrite_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, void *data)
+static zio_t *
+zio_rewrite_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, abd_t *data,
+ uint64_t offset)
{
zio_t *zio;
if (gn != NULL) {
+ abd_t *gbh_abd =
+ abd_get_from_buf(gn->gn_gbh, SPA_GANGBLOCKSIZE);
zio = zio_rewrite(pio, pio->io_spa, pio->io_txg, bp,
- gn->gn_gbh, SPA_GANGBLOCKSIZE, NULL, NULL, pio->io_priority,
- ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark);
+ gbh_abd, SPA_GANGBLOCKSIZE, zio_gang_issue_func_done, NULL,
+ pio->io_priority, ZIO_GANG_CHILD_FLAGS(pio),
+ &pio->io_bookmark);
/*
* As we rewrite each gang header, the pipeline will compute
* a new gang block header checksum for it; but no one will
@@ -1770,8 +2030,12 @@ zio_rewrite_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, void *data)
* this is just good hygiene.)
*/
if (gn != pio->io_gang_leader->io_gang_tree) {
+ abd_t *buf = abd_get_offset(data, offset);
+
zio_checksum_compute(zio, BP_GET_CHECKSUM(bp),
- data, BP_GET_PSIZE(bp));
+ buf, BP_GET_PSIZE(bp));
+
+ abd_put(buf);
}
/*
* If we are here to damage data for testing purposes,
@@ -1781,7 +2045,8 @@ zio_rewrite_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, void *data)
zio->io_pipeline &= ~ZIO_VDEV_IO_STAGES;
} else {
zio = zio_rewrite(pio, pio->io_spa, pio->io_txg, bp,
- data, BP_GET_PSIZE(bp), NULL, NULL, pio->io_priority,
+ abd_get_offset(data, offset), BP_GET_PSIZE(bp),
+ zio_gang_issue_func_done, NULL, pio->io_priority,
ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark);
}
@@ -1789,16 +2054,18 @@ zio_rewrite_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, void *data)
}
/* ARGSUSED */
-zio_t *
-zio_free_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, void *data)
+static zio_t *
+zio_free_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, abd_t *data,
+ uint64_t offset)
{
return (zio_free_sync(pio, pio->io_spa, pio->io_txg, bp,
ZIO_GANG_CHILD_FLAGS(pio)));
}
/* ARGSUSED */
-zio_t *
-zio_claim_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, void *data)
+static zio_t *
+zio_claim_gang(zio_t *pio, blkptr_t *bp, zio_gang_node_t *gn, abd_t *data,
+ uint64_t offset)
{
return (zio_claim(pio, pio->io_spa, pio->io_txg, bp,
NULL, NULL, ZIO_GANG_CHILD_FLAGS(pio)));
@@ -1862,13 +2129,14 @@ static void
zio_gang_tree_assemble(zio_t *gio, blkptr_t *bp, zio_gang_node_t **gnpp)
{
zio_gang_node_t *gn = zio_gang_node_alloc(gnpp);
+ abd_t *gbh_abd = abd_get_from_buf(gn->gn_gbh, SPA_GANGBLOCKSIZE);
ASSERT(gio->io_gang_leader == gio);
ASSERT(BP_IS_GANG(bp));
- zio_nowait(zio_read(gio, gio->io_spa, bp, gn->gn_gbh,
- SPA_GANGBLOCKSIZE, zio_gang_tree_assemble_done, gn,
- gio->io_priority, ZIO_GANG_CHILD_FLAGS(gio), &gio->io_bookmark));
+ zio_nowait(zio_read(gio, gio->io_spa, bp, gbh_abd, SPA_GANGBLOCKSIZE,
+ zio_gang_tree_assemble_done, gn, gio->io_priority,
+ ZIO_GANG_CHILD_FLAGS(gio), &gio->io_bookmark));
}
static void
@@ -1885,13 +2153,16 @@ zio_gang_tree_assemble_done(zio_t *zio)
if (zio->io_error)
return;
+ /* this ABD was created from a linear buf in zio_gang_tree_assemble */
if (BP_SHOULD_BYTESWAP(bp))
- byteswap_uint64_array(zio->io_data, zio->io_size);
+ byteswap_uint64_array(abd_to_buf(zio->io_abd), zio->io_size);
- ASSERT(zio->io_data == gn->gn_gbh);
+ ASSERT3P(abd_to_buf(zio->io_abd), ==, gn->gn_gbh);
ASSERT(zio->io_size == SPA_GANGBLOCKSIZE);
ASSERT(gn->gn_gbh->zg_tail.zec_magic == ZEC_MAGIC);
+ abd_put(zio->io_abd);
+
for (g = 0; g < SPA_GBH_NBLKPTRS; g++) {
blkptr_t *gbp = &gn->gn_gbh->zg_blkptr[g];
if (!BP_IS_GANG(gbp))
@@ -1901,7 +2172,8 @@ zio_gang_tree_assemble_done(zio_t *zio)
}
static void
-zio_gang_tree_issue(zio_t *pio, zio_gang_node_t *gn, blkptr_t *bp, void *data)
+zio_gang_tree_issue(zio_t *pio, zio_gang_node_t *gn, blkptr_t *bp, abd_t *data,
+ uint64_t offset)
{
zio_t *gio = pio->io_gang_leader;
zio_t *zio;
@@ -1915,7 +2187,7 @@ zio_gang_tree_issue(zio_t *pio, zio_gang_node_t *gn, blkptr_t *bp, void *data)
* If you're a gang header, your data is in gn->gn_gbh.
* If you're a gang member, your data is in 'data' and gn == NULL.
*/
- zio = zio_gang_issue_func[gio->io_type](pio, bp, gn, data);
+ zio = zio_gang_issue_func[gio->io_type](pio, bp, gn, data, offset);
if (gn != NULL) {
ASSERT(gn->gn_gbh->zg_tail.zec_magic == ZEC_MAGIC);
@@ -1924,13 +2196,14 @@ zio_gang_tree_issue(zio_t *pio, zio_gang_node_t *gn, blkptr_t *bp, void *data)
blkptr_t *gbp = &gn->gn_gbh->zg_blkptr[g];
if (BP_IS_HOLE(gbp))
continue;
- zio_gang_tree_issue(zio, gn->gn_child[g], gbp, data);
- data = (char *)data + BP_GET_PSIZE(gbp);
+ zio_gang_tree_issue(zio, gn->gn_child[g], gbp, data,
+ offset);
+ offset += BP_GET_PSIZE(gbp);
}
}
if (gn == gio->io_gang_tree)
- ASSERT3P((char *)gio->io_data + gio->io_size, ==, data);
+ ASSERT3U(gio->io_size, ==, offset);
if (zio != pio)
zio_nowait(zio);
@@ -1963,7 +2236,8 @@ zio_gang_issue(zio_t *zio)
ASSERT(zio->io_child_type > ZIO_CHILD_GANG);
if (zio->io_child_error[ZIO_CHILD_GANG] == 0)
- zio_gang_tree_issue(zio, zio->io_gang_tree, bp, zio->io_data);
+ zio_gang_tree_issue(zio, zio->io_gang_tree, bp, zio->io_abd,
+ 0);
else
zio_gang_tree_free(&zio->io_gang_tree);
@@ -2003,15 +2277,23 @@ zio_write_gang_member_ready(zio_t *zio)
mutex_exit(&pio->io_lock);
}
+static void
+zio_write_gang_done(zio_t *zio)
+{
+ abd_put(zio->io_abd);
+}
+
static int
zio_write_gang_block(zio_t *pio)
{
spa_t *spa = pio->io_spa;
+ metaslab_class_t *mc = spa_normal_class(spa);
blkptr_t *bp = pio->io_bp;
zio_t *gio = pio->io_gang_leader;
zio_t *zio;
zio_gang_node_t *gn, **gnpp;
zio_gbh_phys_t *gbh;
+ abd_t *gbh_abd;
uint64_t txg = pio->io_txg;
uint64_t resid = pio->io_size;
uint64_t lsize;
@@ -2020,10 +2302,45 @@ zio_write_gang_block(zio_t *pio)
zio_prop_t zp;
int g, error;
- error = metaslab_alloc(spa, spa_normal_class(spa), SPA_GANGBLOCKSIZE,
- bp, gbh_copies, txg, pio == gio ? NULL : gio->io_bp,
- METASLAB_HINTBP_FAVOR | METASLAB_GANG_HEADER);
+ int flags = METASLAB_HINTBP_FAVOR | METASLAB_GANG_HEADER;
+ if (pio->io_flags & ZIO_FLAG_IO_ALLOCATING) {
+ ASSERT(pio->io_priority == ZIO_PRIORITY_ASYNC_WRITE);
+ ASSERT(!(pio->io_flags & ZIO_FLAG_NODATA));
+
+ flags |= METASLAB_ASYNC_ALLOC;
+ VERIFY(refcount_held(&mc->mc_alloc_slots, pio));
+
+ /*
+ * The logical zio has already placed a reservation for
+ * 'copies' allocation slots but gang blocks may require
+ * additional copies. These additional copies
+ * (i.e. gbh_copies - copies) are guaranteed to succeed
+ * since metaslab_class_throttle_reserve() always allows
+ * additional reservations for gang blocks.
+ */
+ VERIFY(metaslab_class_throttle_reserve(mc, gbh_copies - copies,
+ pio, flags));
+ }
+
+ error = metaslab_alloc(spa, mc, SPA_GANGBLOCKSIZE,
+ bp, gbh_copies, txg, pio == gio ? NULL : gio->io_bp, flags,
+ &pio->io_alloc_list, pio);
if (error) {
+ if (pio->io_flags & ZIO_FLAG_IO_ALLOCATING) {
+ ASSERT(pio->io_priority == ZIO_PRIORITY_ASYNC_WRITE);
+ ASSERT(!(pio->io_flags & ZIO_FLAG_NODATA));
+
+ /*
+ * If we failed to allocate the gang block header then
+ * we remove any additional allocation reservations that
+ * we placed here. The original reservation will
+ * be removed when the logical I/O goes to the ready
+ * stage.
+ */
+ metaslab_class_throttle_unreserve(mc,
+ gbh_copies - copies, pio);
+ }
+
pio->io_error = error;
return (ZIO_PIPELINE_CONTINUE);
}
@@ -2038,17 +2355,21 @@ zio_write_gang_block(zio_t *pio)
gn = zio_gang_node_alloc(gnpp);
gbh = gn->gn_gbh;
bzero(gbh, SPA_GANGBLOCKSIZE);
+ gbh_abd = abd_get_from_buf(gbh, SPA_GANGBLOCKSIZE);
/*
* Create the gang header.
*/
- zio = zio_rewrite(pio, spa, txg, bp, gbh, SPA_GANGBLOCKSIZE, NULL, NULL,
- pio->io_priority, ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark);
+ zio = zio_rewrite(pio, spa, txg, bp, gbh_abd, SPA_GANGBLOCKSIZE,
+ zio_write_gang_done, NULL, pio->io_priority,
+ ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark);
/*
* Create and nowait the gang children.
*/
for (g = 0; resid != 0; resid -= lsize, g++) {
+ zio_t *cio;
+
lsize = P2ROUNDUP(resid / (SPA_GBH_NBLKPTRS - g),
SPA_MINBLOCKSIZE);
ASSERT(lsize >= SPA_MINBLOCKSIZE && lsize <= resid);
@@ -2062,11 +2383,25 @@ zio_write_gang_block(zio_t *pio)
zp.zp_dedup_verify = B_FALSE;
zp.zp_nopwrite = B_FALSE;
- zio_nowait(zio_write(zio, spa, txg, &gbh->zg_blkptr[g],
- (char *)pio->io_data + (pio->io_size - resid), lsize, &zp,
- zio_write_gang_member_ready, NULL, NULL, &gn->gn_child[g],
- pio->io_priority, ZIO_GANG_CHILD_FLAGS(pio),
- &pio->io_bookmark));
+ cio = zio_write(zio, spa, txg, &gbh->zg_blkptr[g],
+ abd_get_offset(pio->io_abd, pio->io_size - resid), lsize,
+ lsize, &zp, zio_write_gang_member_ready, NULL, NULL,
+ zio_write_gang_done, &gn->gn_child[g], pio->io_priority,
+ ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark);
+
+ if (pio->io_flags & ZIO_FLAG_IO_ALLOCATING) {
+ ASSERT(pio->io_priority == ZIO_PRIORITY_ASYNC_WRITE);
+ ASSERT(!(pio->io_flags & ZIO_FLAG_NODATA));
+
+ /*
+ * Gang children won't throttle but we should
+ * account for their work, so reserve an allocation
+ * slot for them here.
+ */
+ VERIFY(metaslab_class_throttle_reserve(mc,
+ zp.zp_copies, cio, flags));
+ }
+ zio_nowait(cio);
}
/*
@@ -2085,12 +2420,22 @@ zio_write_gang_block(zio_t *pio)
}
/*
- * The zio_nop_write stage in the pipeline determines if allocating
- * a new bp is necessary. By leveraging a cryptographically secure checksum,
- * such as SHA256, we can compare the checksums of the new data and the old
- * to determine if allocating a new block is required. The nopwrite
- * feature can handle writes in either syncing or open context (i.e. zil
- * writes) and as a result is mutually exclusive with dedup.
+ * The zio_nop_write stage in the pipeline determines if allocating a
+ * new bp is necessary. The nopwrite feature can handle writes in
+ * either syncing or open context (i.e. zil writes) and as a result is
+ * mutually exclusive with dedup.
+ *
+ * By leveraging a cryptographically secure checksum, such as SHA256, we
+ * can compare the checksums of the new data and the old to determine if
+ * allocating a new block is required. Note that our requirements for
+ * cryptographic strength are fairly weak: there can't be any accidental
+ * hash collisions, but we don't need to be secure against intentional
+ * (malicious) collisions. To trigger a nopwrite, you have to be able
+ * to write the file to begin with, and triggering an incorrect (hash
+ * collision) nopwrite is no worse than simply writing to the file.
+ * That said, there are no known attacks against the checksum algorithms
+ * used for nopwrite, assuming that the salt and the checksums
+ * themselves remain secret.
*/
static int
zio_nop_write(zio_t *zio)
@@ -2113,7 +2458,8 @@ zio_nop_write(zio_t *zio)
* allocate a new bp.
*/
if (BP_IS_HOLE(bp_orig) ||
- !zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_dedup ||
+ !(zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_flags &
+ ZCHECKSUM_FLAG_NOPWRITE) ||
BP_GET_CHECKSUM(bp) != BP_GET_CHECKSUM(bp_orig) ||
BP_GET_COMPRESS(bp) != BP_GET_COMPRESS(bp_orig) ||
BP_GET_DEDUP(bp) != BP_GET_DEDUP(bp_orig) ||
@@ -2125,7 +2471,8 @@ zio_nop_write(zio_t *zio)
* avoid allocating a new bp and issuing any I/O.
*/
if (ZIO_CHECKSUM_EQUAL(bp->blk_cksum, bp_orig->blk_cksum)) {
- ASSERT(zio_checksum_table[zp->zp_checksum].ci_dedup);
+ ASSERT(zio_checksum_table[zp->zp_checksum].ci_flags &
+ ZCHECKSUM_FLAG_NOPWRITE);
ASSERT3U(BP_GET_PSIZE(bp), ==, BP_GET_PSIZE(bp_orig));
ASSERT3U(BP_GET_LSIZE(bp), ==, BP_GET_LSIZE(bp_orig));
ASSERT(zp->zp_compress != ZIO_COMPRESS_OFF);
@@ -2157,10 +2504,11 @@ zio_ddt_child_read_done(zio_t *zio)
ddp = ddt_phys_select(dde, bp);
if (zio->io_error == 0)
ddt_phys_clear(ddp); /* this ddp doesn't need repair */
- if (zio->io_error == 0 && dde->dde_repair_data == NULL)
- dde->dde_repair_data = zio->io_data;
+
+ if (zio->io_error == 0 && dde->dde_repair_abd == NULL)
+ dde->dde_repair_abd = zio->io_abd;
else
- zio_buf_free(zio->io_data, zio->io_size);
+ abd_free(zio->io_abd);
mutex_exit(&pio->io_lock);
}
@@ -2193,16 +2541,16 @@ zio_ddt_read_start(zio_t *zio)
ddt_bp_create(ddt->ddt_checksum, &dde->dde_key, ddp,
&blk);
zio_nowait(zio_read(zio, zio->io_spa, &blk,
- zio_buf_alloc(zio->io_size), zio->io_size,
- zio_ddt_child_read_done, dde, zio->io_priority,
- ZIO_DDT_CHILD_FLAGS(zio) | ZIO_FLAG_DONT_PROPAGATE,
- &zio->io_bookmark));
+ abd_alloc_for_io(zio->io_size, B_TRUE),
+ zio->io_size, zio_ddt_child_read_done, dde,
+ zio->io_priority, ZIO_DDT_CHILD_FLAGS(zio) |
+ ZIO_FLAG_DONT_PROPAGATE, &zio->io_bookmark));
}
return (ZIO_PIPELINE_CONTINUE);
}
zio_nowait(zio_read(zio, zio->io_spa, bp,
- zio->io_data, zio->io_size, NULL, NULL, zio->io_priority,
+ zio->io_abd, zio->io_size, NULL, NULL, zio->io_priority,
ZIO_DDT_CHILD_FLAGS(zio), &zio->io_bookmark));
return (ZIO_PIPELINE_CONTINUE);
@@ -2232,8 +2580,9 @@ zio_ddt_read_done(zio_t *zio)
zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE, B_FALSE);
return (ZIO_PIPELINE_STOP);
}
- if (dde->dde_repair_data != NULL) {
- bcopy(dde->dde_repair_data, zio->io_data, zio->io_size);
+ if (dde->dde_repair_abd != NULL) {
+ abd_copy(zio->io_abd, dde->dde_repair_abd,
+ zio->io_size);
zio->io_child_error[ZIO_CHILD_DDT] = 0;
}
ddt_repair_done(ddt, dde);
@@ -2250,27 +2599,67 @@ zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde)
{
spa_t *spa = zio->io_spa;
int p;
+ boolean_t do_raw = !!(zio->io_flags & ZIO_FLAG_RAW);
+
+ ASSERT(!(zio->io_bp_override && do_raw));
/*
* Note: we compare the original data, not the transformed data,
* because when zio->io_bp is an override bp, we will not have
* pushed the I/O transforms. That's an important optimization
* because otherwise we'd compress/encrypt all dmu_sync() data twice.
+ * However, we should never get a raw, override zio so in these
+ * cases we can compare the io_data directly. This is useful because
+ * it allows us to do dedup verification even if we don't have access
+ * to the original data (for instance, if the encryption keys aren't
+ * loaded).
*/
+
for (p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) {
zio_t *lio = dde->dde_lead_zio[p];
- if (lio != NULL) {
+ if (lio != NULL && do_raw) {
+ return (lio->io_size != zio->io_size ||
+ abd_cmp(zio->io_abd, lio->io_abd) != 0);
+ } else if (lio != NULL) {
return (lio->io_orig_size != zio->io_orig_size ||
- bcmp(zio->io_orig_data, lio->io_orig_data,
- zio->io_orig_size) != 0);
+ abd_cmp(zio->io_orig_abd, lio->io_orig_abd) != 0);
}
}
for (p = DDT_PHYS_SINGLE; p <= DDT_PHYS_TRIPLE; p++) {
ddt_phys_t *ddp = &dde->dde_phys[p];
- if (ddp->ddp_phys_birth != 0) {
+ if (ddp->ddp_phys_birth != 0 && do_raw) {
+ blkptr_t blk = *zio->io_bp;
+ uint64_t psize;
+ abd_t *tmpabd;
+ int error;
+
+ ddt_bp_fill(ddp, &blk, ddp->ddp_phys_birth);
+ psize = BP_GET_PSIZE(&blk);
+
+ if (psize != zio->io_size)
+ return (B_TRUE);
+
+ ddt_exit(ddt);
+
+ tmpabd = abd_alloc_for_io(psize, B_TRUE);
+
+ error = zio_wait(zio_read(NULL, spa, &blk, tmpabd,
+ psize, NULL, NULL, ZIO_PRIORITY_SYNC_READ,
+ ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE |
+ ZIO_FLAG_RAW, &zio->io_bookmark));
+
+ if (error == 0) {
+ if (abd_cmp(tmpabd, zio->io_abd) != 0)
+ error = SET_ERROR(ENOENT);
+ }
+
+ abd_free(tmpabd);
+ ddt_enter(ddt);
+ return (error != 0);
+ } else if (ddp->ddp_phys_birth != 0) {
arc_buf_t *abuf = NULL;
arc_flags_t aflags = ARC_FLAG_WAIT;
blkptr_t blk = *zio->io_bp;
@@ -2278,6 +2667,9 @@ zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde)
ddt_bp_fill(ddp, &blk, ddp->ddp_phys_birth);
+ if (BP_GET_LSIZE(&blk) != zio->io_orig_size)
+ return (B_TRUE);
+
ddt_exit(ddt);
error = arc_read(NULL, spa, &blk,
@@ -2286,11 +2678,10 @@ zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde)
&aflags, &zio->io_bookmark);
if (error == 0) {
- if (arc_buf_size(abuf) != zio->io_orig_size ||
- bcmp(abuf->b_data, zio->io_orig_data,
+ if (abd_cmp_buf(zio->io_orig_abd, abuf->b_data,
zio->io_orig_size) != 0)
- error = SET_ERROR(EEXIST);
- VERIFY(arc_buf_remove_ref(abuf, &abuf));
+ error = SET_ERROR(ENOENT);
+ arc_buf_destroy(abuf, &abuf);
}
ddt_enter(ddt);
@@ -2309,6 +2700,7 @@ zio_ddt_child_write_ready(zio_t *zio)
ddt_entry_t *dde = zio->io_private;
ddt_phys_t *ddp = &dde->dde_phys[p];
zio_t *pio;
+ zio_link_t *zl;
if (zio->io_error)
return;
@@ -2319,7 +2711,8 @@ zio_ddt_child_write_ready(zio_t *zio)
ddt_phys_fill(ddp, zio->io_bp);
- while ((pio = zio_walk_parents(zio)) != NULL)
+ zl = NULL;
+ while ((pio = zio_walk_parents(zio, &zl)) != NULL)
ddt_bp_fill(ddp, pio->io_bp, zio->io_txg);
ddt_exit(ddt);
@@ -2340,7 +2733,8 @@ zio_ddt_child_write_done(zio_t *zio)
dde->dde_lead_zio[p] = NULL;
if (zio->io_error == 0) {
- while (zio_walk_parents(zio) != NULL)
+ zio_link_t *zl = NULL;
+ while (zio_walk_parents(zio, &zl) != NULL)
ddt_phys_addref(ddp);
} else {
ddt_phys_clear(ddp);
@@ -2396,6 +2790,7 @@ zio_ddt_write(zio_t *zio)
ASSERT(BP_GET_DEDUP(bp));
ASSERT(BP_GET_CHECKSUM(bp) == zp->zp_checksum);
ASSERT(BP_IS_HOLE(bp) || zio->io_bp_override);
+ ASSERT(!(zio->io_bp_override && (zio->io_flags & ZIO_FLAG_RAW)));
ddt_enter(ddt);
dde = ddt_lookup(ddt, bp, B_TRUE);
@@ -2408,7 +2803,8 @@ zio_ddt_write(zio_t *zio)
* we can't resolve it, so just convert to an ordinary write.
* (And automatically e-mail a paper to Nature?)
*/
- if (!zio_checksum_table[zp->zp_checksum].ci_dedup) {
+ if (!(zio_checksum_table[zp->zp_checksum].ci_flags &
+ ZCHECKSUM_FLAG_DEDUP)) {
zp->zp_checksum = spa_dedup_checksum(spa);
zio_pop_transforms(zio);
zio->io_stage = ZIO_STAGE_OPEN;
@@ -2447,12 +2843,12 @@ zio_ddt_write(zio_t *zio)
return (ZIO_PIPELINE_CONTINUE);
}
- dio = zio_write(zio, spa, txg, bp, zio->io_orig_data,
- zio->io_orig_size, &czp, NULL, NULL,
- zio_ddt_ditto_write_done, dde, zio->io_priority,
+ dio = zio_write(zio, spa, txg, bp, zio->io_orig_abd,
+ zio->io_orig_size, zio->io_orig_size, &czp, NULL, NULL,
+ NULL, zio_ddt_ditto_write_done, dde, zio->io_priority,
ZIO_DDT_CHILD_FLAGS(zio), &zio->io_bookmark);
- zio_push_transform(dio, zio->io_data, zio->io_size, 0, NULL);
+ zio_push_transform(dio, zio->io_abd, zio->io_size, 0, NULL);
dde->dde_lead_zio[DDT_PHYS_DITTO] = dio;
}
@@ -2469,12 +2865,13 @@ zio_ddt_write(zio_t *zio)
ddt_phys_fill(ddp, bp);
ddt_phys_addref(ddp);
} else {
- cio = zio_write(zio, spa, txg, bp, zio->io_orig_data,
- zio->io_orig_size, zp, zio_ddt_child_write_ready, NULL,
+ cio = zio_write(zio, spa, txg, bp, zio->io_orig_abd,
+ zio->io_orig_size, zio->io_orig_size, zp,
+ zio_ddt_child_write_ready, NULL, NULL,
zio_ddt_child_write_done, dde, zio->io_priority,
ZIO_DDT_CHILD_FLAGS(zio), &zio->io_bookmark);
- zio_push_transform(cio, zio->io_data, zio->io_size, 0, NULL);
+ zio_push_transform(cio, zio->io_abd, zio->io_size, 0, NULL);
dde->dde_lead_zio[p] = cio;
}
@@ -2519,6 +2916,95 @@ zio_ddt_free(zio_t *zio)
* Allocate and free blocks
* ==========================================================================
*/
+
+static zio_t *
+zio_io_to_allocate(spa_t *spa)
+{
+ zio_t *zio;
+
+ ASSERT(MUTEX_HELD(&spa->spa_alloc_lock));
+
+ zio = avl_first(&spa->spa_alloc_tree);
+ if (zio == NULL)
+ return (NULL);
+
+ ASSERT(IO_IS_ALLOCATING(zio));
+
+ /*
+ * Try to place a reservation for this zio. If we're unable to
+ * reserve then we throttle.
+ */
+ if (!metaslab_class_throttle_reserve(spa_normal_class(spa),
+ zio->io_prop.zp_copies, zio, 0)) {
+ return (NULL);
+ }
+
+ avl_remove(&spa->spa_alloc_tree, zio);
+ ASSERT3U(zio->io_stage, <, ZIO_STAGE_DVA_ALLOCATE);
+
+ return (zio);
+}
+
+static int
+zio_dva_throttle(zio_t *zio)
+{
+ spa_t *spa = zio->io_spa;
+ zio_t *nio;
+
+ if (zio->io_priority == ZIO_PRIORITY_SYNC_WRITE ||
+ !spa_normal_class(zio->io_spa)->mc_alloc_throttle_enabled ||
+ zio->io_child_type == ZIO_CHILD_GANG ||
+ zio->io_flags & ZIO_FLAG_NODATA) {
+ return (ZIO_PIPELINE_CONTINUE);
+ }
+
+ ASSERT(zio->io_child_type > ZIO_CHILD_GANG);
+
+ ASSERT3U(zio->io_queued_timestamp, >, 0);
+ ASSERT(zio->io_stage == ZIO_STAGE_DVA_THROTTLE);
+
+ mutex_enter(&spa->spa_alloc_lock);
+
+ ASSERT(zio->io_type == ZIO_TYPE_WRITE);
+ avl_add(&spa->spa_alloc_tree, zio);
+
+ nio = zio_io_to_allocate(zio->io_spa);
+ mutex_exit(&spa->spa_alloc_lock);
+
+ if (nio == zio)
+ return (ZIO_PIPELINE_CONTINUE);
+
+ if (nio != NULL) {
+ ASSERT(nio->io_stage == ZIO_STAGE_DVA_THROTTLE);
+ /*
+ * We are passing control to a new zio so make sure that
+ * it is processed by a different thread. We do this to
+ * avoid stack overflows that can occur when parents are
+ * throttled and children are making progress. We allow
+ * it to go to the head of the taskq since it's already
+ * been waiting.
+ */
+ zio_taskq_dispatch(nio, ZIO_TASKQ_ISSUE, B_TRUE);
+ }
+ return (ZIO_PIPELINE_STOP);
+}
+
+void
+zio_allocate_dispatch(spa_t *spa)
+{
+ zio_t *zio;
+
+ mutex_enter(&spa->spa_alloc_lock);
+ zio = zio_io_to_allocate(spa);
+ mutex_exit(&spa->spa_alloc_lock);
+ if (zio == NULL)
+ return;
+
+ ASSERT3U(zio->io_stage, ==, ZIO_STAGE_DVA_THROTTLE);
+ ASSERT0(zio->io_error);
+ zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE, B_TRUE);
+}
+
static int
zio_dva_allocate(zio_t *zio)
{
@@ -2539,19 +3025,19 @@ zio_dva_allocate(zio_t *zio)
ASSERT3U(zio->io_prop.zp_copies, <=, spa_max_replication(spa));
ASSERT3U(zio->io_size, ==, BP_GET_PSIZE(bp));
- /*
- * The dump device does not support gang blocks so allocation on
- * behalf of the dump device (i.e. ZIO_FLAG_NODATA) must avoid
- * the "fast" gang feature.
- */
- flags |= (zio->io_flags & ZIO_FLAG_NODATA) ? METASLAB_GANG_AVOID : 0;
- flags |= (zio->io_flags & ZIO_FLAG_GANG_CHILD) ?
- METASLAB_GANG_CHILD : 0;
flags |= (zio->io_flags & ZIO_FLAG_FASTWRITE) ? METASLAB_FASTWRITE : 0;
+ if (zio->io_flags & ZIO_FLAG_NODATA)
+ flags |= METASLAB_DONT_THROTTLE;
+ if (zio->io_flags & ZIO_FLAG_GANG_CHILD)
+ flags |= METASLAB_GANG_CHILD;
+ if (zio->io_priority == ZIO_PRIORITY_ASYNC_WRITE)
+ flags |= METASLAB_ASYNC_ALLOC;
+
error = metaslab_alloc(spa, mc, zio->io_size, bp,
- zio->io_prop.zp_copies, zio->io_txg, NULL, flags);
+ zio->io_prop.zp_copies, zio->io_txg, NULL, flags,
+ &zio->io_alloc_list, zio);
- if (error) {
+ if (error != 0) {
spa_dbgmsg(spa, "%s: metaslab allocation failure: zio %p, "
"size %llu, error %d", spa_name(spa), zio, zio->io_size,
error);
@@ -2612,28 +3098,26 @@ zio_dva_unallocate(zio_t *zio, zio_gang_node_t *gn, blkptr_t *bp)
*/
int
zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, uint64_t size,
- boolean_t use_slog)
+ boolean_t *slog)
{
int error = 1;
+ zio_alloc_list_t io_alloc_list;
ASSERT(txg > spa_syncing_txg(spa));
- /*
- * ZIL blocks are always contiguous (i.e. not gang blocks) so we
- * set the METASLAB_GANG_AVOID flag so that they don't "fast gang"
- * when allocating them.
- */
- if (use_slog) {
- error = metaslab_alloc(spa, spa_log_class(spa), size,
- new_bp, 1, txg, NULL,
- METASLAB_FASTWRITE | METASLAB_GANG_AVOID);
- }
-
- if (error) {
+ metaslab_trace_init(&io_alloc_list);
+ error = metaslab_alloc(spa, spa_log_class(spa), size, new_bp, 1,
+ txg, NULL, METASLAB_FASTWRITE, &io_alloc_list, NULL);
+ if (error == 0) {
+ *slog = TRUE;
+ } else {
error = metaslab_alloc(spa, spa_normal_class(spa), size,
- new_bp, 1, txg, NULL,
- METASLAB_FASTWRITE);
+ new_bp, 1, txg, NULL, METASLAB_FASTWRITE,
+ &io_alloc_list, NULL);
+ if (error == 0)
+ *slog = FALSE;
}
+ metaslab_trace_fini(&io_alloc_list);
if (error == 0) {
BP_SET_LSIZE(new_bp, size);
@@ -2687,6 +3171,8 @@ zio_vdev_io_start(zio_t *zio)
uint64_t align;
spa_t *spa = zio->io_spa;
+ zio->io_delay = 0;
+
ASSERT(zio->io_error == 0);
ASSERT(zio->io_child_error[ZIO_CHILD_VDEV] == 0);
@@ -2701,6 +3187,8 @@ zio_vdev_io_start(zio_t *zio)
return (ZIO_PIPELINE_STOP);
}
+ ASSERT3P(zio->io_logical, !=, zio);
+
/*
* We keep track of time-sensitive I/Os so that the scan thread
* can quickly react to certain workloads. In particular, we care
@@ -2727,11 +3215,11 @@ zio_vdev_io_start(zio_t *zio)
P2PHASE(zio->io_size, align) != 0) {
/* Transform logical writes to be a full physical block size. */
uint64_t asize = P2ROUNDUP(zio->io_size, align);
- char *abuf = zio_buf_alloc(asize);
+ abd_t *abuf = abd_alloc_sametype(zio->io_abd, asize);
ASSERT(vd == vd->vdev_top);
if (zio->io_type == ZIO_TYPE_WRITE) {
- bcopy(zio->io_data, abuf, zio->io_size);
- bzero(abuf + zio->io_size, asize - zio->io_size);
+ abd_copy(abuf, zio->io_abd, zio->io_size);
+ abd_zero_off(abuf, zio->io_size, asize - zio->io_size);
}
zio_push_transform(zio, abuf, asize, asize, zio_subblock);
}
@@ -2790,6 +3278,7 @@ zio_vdev_io_start(zio_t *zio)
zio_interrupt(zio);
return (ZIO_PIPELINE_STOP);
}
+ zio->io_delay = gethrtime();
}
vd->vdev_ops->vdev_op_io_start(zio);
@@ -2808,6 +3297,9 @@ zio_vdev_io_done(zio_t *zio)
ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE);
+ if (zio->io_delay)
+ zio->io_delay = gethrtime() - zio->io_delay;
+
if (vd != NULL && vd->vdev_ops->vdev_op_leaf) {
vdev_queue_io_done(zio);
@@ -2845,7 +3337,7 @@ zio_vdev_io_done(zio_t *zio)
*/
static void
zio_vsd_default_cksum_finish(zio_cksum_report_t *zcr,
- const void *good_buf)
+ const abd_t *good_buf)
{
/* no processing needed */
zfs_ereport_finish_checksum(zcr, good_buf, zcr->zcr_cbdata, B_FALSE);
@@ -2855,14 +3347,14 @@ zio_vsd_default_cksum_finish(zio_cksum_report_t *zcr,
void
zio_vsd_default_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *ignored)
{
- void *buf = zio_buf_alloc(zio->io_size);
+ void *abd = abd_alloc_sametype(zio->io_abd, zio->io_size);
- bcopy(zio->io_data, buf, zio->io_size);
+ abd_copy(abd, zio->io_abd, zio->io_size);
zcr->zcr_cbinfo = zio->io_size;
- zcr->zcr_cbdata = buf;
+ zcr->zcr_cbdata = abd;
zcr->zcr_finish = zio_vsd_default_cksum_finish;
- zcr->zcr_free = zio_buf_free;
+ zcr->zcr_free = zio_abd_free;
}
static int
@@ -2920,6 +3412,16 @@ zio_vdev_io_assess(zio_t *zio)
vd->vdev_cant_write = B_TRUE;
}
+ /*
+ * If a cache flush returns ENOTSUP or ENOTTY, we know that no future
+ * attempts will ever succeed. In this case we set a persistent bit so
+ * that we don't bother with it in the future.
+ */
+ if ((zio->io_error == ENOTSUP || zio->io_error == ENOTTY) &&
+ zio->io_type == ZIO_TYPE_IOCTL &&
+ zio->io_cmd == DKIOCFLUSHWRITECACHE && vd != NULL)
+ vd->vdev_nowritecache = B_TRUE;
+
if (zio->io_error)
zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
@@ -2991,7 +3493,7 @@ zio_checksum_generate(zio_t *zio)
}
}
- zio_checksum_compute(zio, checksum, zio->io_data, zio->io_size);
+ zio_checksum_compute(zio, checksum, zio->io_abd, zio->io_size);
return (ZIO_PIPELINE_CONTINUE);
}
@@ -3074,6 +3576,7 @@ zio_ready(zio_t *zio)
{
blkptr_t *bp = zio->io_bp;
zio_t *pio, *pio_next;
+ zio_link_t *zl = NULL;
if (zio_wait_for_children(zio, ZIO_CHILD_GANG, ZIO_WAIT_READY) ||
zio_wait_for_children(zio, ZIO_CHILD_DDT, ZIO_WAIT_READY))
@@ -3091,12 +3594,26 @@ zio_ready(zio_t *zio)
if (bp != NULL && bp != &zio->io_bp_copy)
zio->io_bp_copy = *bp;
- if (zio->io_error)
+ if (zio->io_error != 0) {
zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
+ if (zio->io_flags & ZIO_FLAG_IO_ALLOCATING) {
+ ASSERT(IO_IS_ALLOCATING(zio));
+ ASSERT(zio->io_priority == ZIO_PRIORITY_ASYNC_WRITE);
+ /*
+ * We were unable to allocate anything, unreserve and
+ * issue the next I/O to allocate.
+ */
+ metaslab_class_throttle_unreserve(
+ spa_normal_class(zio->io_spa),
+ zio->io_prop.zp_copies, zio);
+ zio_allocate_dispatch(zio->io_spa);
+ }
+ }
+
mutex_enter(&zio->io_lock);
zio->io_state[ZIO_WAIT_READY] = 1;
- pio = zio_walk_parents(zio);
+ pio = zio_walk_parents(zio, &zl);
mutex_exit(&zio->io_lock);
/*
@@ -3107,7 +3624,7 @@ zio_ready(zio_t *zio)
* all parents must wait for us to be done before they can be done.
*/
for (; pio != NULL; pio = pio_next) {
- pio_next = zio_walk_parents(zio);
+ pio_next = zio_walk_parents(zio, &zl);
zio_notify_parent(pio, zio, ZIO_WAIT_READY);
}
@@ -3115,7 +3632,7 @@ zio_ready(zio_t *zio)
if (BP_IS_GANG(bp)) {
zio->io_flags &= ~ZIO_FLAG_NODATA;
} else {
- ASSERT((uintptr_t)zio->io_data < SPA_MAXBLOCKSIZE);
+ ASSERT((uintptr_t)zio->io_abd < SPA_MAXBLOCKSIZE);
zio->io_pipeline &= ~ZIO_VDEV_IO_STAGES;
}
}
@@ -3127,11 +3644,78 @@ zio_ready(zio_t *zio)
return (ZIO_PIPELINE_CONTINUE);
}
+/*
+ * Update the allocation throttle accounting.
+ */
+static void
+zio_dva_throttle_done(zio_t *zio)
+{
+ zio_t *pio = zio_unique_parent(zio);
+ vdev_t *vd = zio->io_vd;
+ int flags = METASLAB_ASYNC_ALLOC;
+ ASSERTV(zio_t *lio = zio->io_logical);
+
+ ASSERT3P(zio->io_bp, !=, NULL);
+ ASSERT3U(zio->io_type, ==, ZIO_TYPE_WRITE);
+ ASSERT3U(zio->io_priority, ==, ZIO_PRIORITY_ASYNC_WRITE);
+ ASSERT3U(zio->io_child_type, ==, ZIO_CHILD_VDEV);
+ ASSERT(vd != NULL);
+ ASSERT3P(vd, ==, vd->vdev_top);
+ ASSERT(zio_injection_enabled || !(zio->io_flags & ZIO_FLAG_IO_RETRY));
+ ASSERT(!(zio->io_flags & ZIO_FLAG_IO_REPAIR));
+ ASSERT(zio->io_flags & ZIO_FLAG_IO_ALLOCATING);
+ ASSERT(!(lio->io_flags & ZIO_FLAG_IO_REWRITE));
+ ASSERT(!(lio->io_orig_flags & ZIO_FLAG_NODATA));
+
+ /*
+ * Parents of gang children can have two flavors -- ones that
+ * allocated the gang header (will have ZIO_FLAG_IO_REWRITE set)
+ * and ones that allocated the constituent blocks. The allocation
+ * throttle needs to know the allocating parent zio so we must find
+ * it here.
+ */
+ if (pio->io_child_type == ZIO_CHILD_GANG) {
+ /*
+ * If our parent is a rewrite gang child then our grandparent
+ * would have been the one that performed the allocation.
+ */
+ if (pio->io_flags & ZIO_FLAG_IO_REWRITE)
+ pio = zio_unique_parent(pio);
+ flags |= METASLAB_GANG_CHILD;
+ }
+
+ ASSERT(IO_IS_ALLOCATING(pio));
+ ASSERT3P(zio, !=, zio->io_logical);
+ ASSERT(zio->io_logical != NULL);
+ ASSERT(!(zio->io_flags & ZIO_FLAG_IO_REPAIR));
+ ASSERT0(zio->io_flags & ZIO_FLAG_NOPWRITE);
+
+ mutex_enter(&pio->io_lock);
+ metaslab_group_alloc_decrement(zio->io_spa, vd->vdev_id, pio, flags);
+ mutex_exit(&pio->io_lock);
+
+ metaslab_class_throttle_unreserve(spa_normal_class(zio->io_spa),
+ 1, pio);
+
+ /*
+ * Call into the pipeline to see if there is more work that
+ * needs to be done. If there is work to be done it will be
+ * dispatched to another taskq thread.
+ */
+ zio_allocate_dispatch(zio->io_spa);
+}
+
static int
zio_done(zio_t *zio)
{
+ /*
+ * Always attempt to keep stack usage minimal here since
+ * we can be called recurisvely up to 19 levels deep.
+ */
+ const uint64_t psize = zio->io_size;
zio_t *pio, *pio_next;
int c, w;
+ zio_link_t *zl = NULL;
/*
* If our children haven't all completed,
@@ -3143,6 +3727,33 @@ zio_done(zio_t *zio)
zio_wait_for_children(zio, ZIO_CHILD_LOGICAL, ZIO_WAIT_DONE))
return (ZIO_PIPELINE_STOP);
+ /*
+ * If the allocation throttle is enabled, then update the accounting.
+ * We only track child I/Os that are part of an allocating async
+ * write. We must do this since the allocation is performed
+ * by the logical I/O but the actual write is done by child I/Os.
+ */
+ if (zio->io_flags & ZIO_FLAG_IO_ALLOCATING &&
+ zio->io_child_type == ZIO_CHILD_VDEV) {
+ ASSERT(spa_normal_class(
+ zio->io_spa)->mc_alloc_throttle_enabled);
+ zio_dva_throttle_done(zio);
+ }
+
+ /*
+ * If the allocation throttle is enabled, verify that
+ * we have decremented the refcounts for every I/O that was throttled.
+ */
+ if (zio->io_flags & ZIO_FLAG_IO_ALLOCATING) {
+ ASSERT(zio->io_type == ZIO_TYPE_WRITE);
+ ASSERT(zio->io_priority == ZIO_PRIORITY_ASYNC_WRITE);
+ ASSERT(zio->io_bp != NULL);
+ metaslab_group_alloc_verify(zio->io_spa, zio->io_bp, zio);
+ VERIFY(refcount_not_held(
+ &(spa_normal_class(zio->io_spa)->mc_alloc_slots), zio));
+ }
+
+
for (c = 0; c < ZIO_CHILD_TYPES; c++)
for (w = 0; w < ZIO_WAIT_TYPES; w++)
ASSERT(zio->io_children[c][w] == 0);
@@ -3182,35 +3793,35 @@ zio_done(zio_t *zio)
while (zio->io_cksum_report != NULL) {
zio_cksum_report_t *zcr = zio->io_cksum_report;
uint64_t align = zcr->zcr_align;
- uint64_t asize = P2ROUNDUP(zio->io_size, align);
- char *abuf = zio->io_data;
+ uint64_t asize = P2ROUNDUP(psize, align);
+ abd_t *adata = zio->io_abd;
- if (asize != zio->io_size) {
- abuf = zio_buf_alloc(asize);
- bcopy(zio->io_data, abuf, zio->io_size);
- bzero(abuf+zio->io_size, asize-zio->io_size);
+ if (asize != psize) {
+ adata = abd_alloc(asize, B_TRUE);
+ abd_copy(adata, zio->io_abd, psize);
+ abd_zero_off(adata, psize, asize - psize);
}
zio->io_cksum_report = zcr->zcr_next;
zcr->zcr_next = NULL;
- zcr->zcr_finish(zcr, abuf);
+ zcr->zcr_finish(zcr, adata);
zfs_ereport_free_checksum(zcr);
- if (asize != zio->io_size)
- zio_buf_free(abuf, asize);
+ if (asize != psize)
+ abd_free(adata);
}
}
zio_pop_transforms(zio); /* note: may set zio->io_error */
- vdev_stat_update(zio, zio->io_size);
+ vdev_stat_update(zio, psize);
/*
* If this I/O is attached to a particular vdev is slow, exceeding
* 30 seconds to complete, post an error described the I/O delay.
* We ignore these errors if the device is currently unavailable.
*/
- if (zio->io_delay >= MSEC_TO_TICK(zio_delay_max)) {
+ if (zio->io_delay >= MSEC2NSEC(zio_delay_max)) {
if (zio->io_vd != NULL && !vdev_is_dead(zio->io_vd))
zfs_ereport_post(FM_EREPORT_ZFS_DELAY, zio->io_spa,
zio->io_vd, zio, 0, 0);
@@ -3224,9 +3835,9 @@ zio_done(zio_t *zio)
* device is currently unavailable.
*/
if (zio->io_error != ECKSUM && zio->io_vd != NULL &&
- !vdev_is_dead(zio->io_vd))
+ !vdev_is_dead(zio->io_vd))
zfs_ereport_post(FM_EREPORT_ZFS_IO, zio->io_spa,
- zio->io_vd, zio, 0, 0);
+ zio->io_vd, zio, 0, 0);
if ((zio->io_error == EIO || !(zio->io_flags &
(ZIO_FLAG_SPECULATIVE | ZIO_FLAG_DONT_PROPAGATE))) &&
@@ -3297,7 +3908,7 @@ zio_done(zio_t *zio)
*/
if ((zio->io_flags & ZIO_FLAG_GODFATHER) &&
(zio->io_reexecute & ZIO_REEXECUTE_SUSPEND))
- zio->io_reexecute = 0;
+ zio->io_reexecute &= ~ZIO_REEXECUTE_SUSPEND;
if (zio->io_reexecute) {
/*
@@ -3328,13 +3939,15 @@ zio_done(zio_t *zio)
* trouble (e.g. suspended). This allows "The Godfather"
* I/O to return status without blocking.
*/
- for (pio = zio_walk_parents(zio); pio != NULL; pio = pio_next) {
- zio_link_t *zl = zio->io_walk_link;
- pio_next = zio_walk_parents(zio);
+ zl = NULL;
+ for (pio = zio_walk_parents(zio, &zl); pio != NULL;
+ pio = pio_next) {
+ zio_link_t *remove_zl = zl;
+ pio_next = zio_walk_parents(zio, &zl);
if ((pio->io_flags & ZIO_FLAG_GODFATHER) &&
(zio->io_reexecute & ZIO_REEXECUTE_SUSPEND)) {
- zio_remove_child(pio, zio, zl);
+ zio_remove_child(pio, zio, remove_zl);
zio_notify_parent(pio, zio, ZIO_WAIT_DONE);
}
}
@@ -3401,10 +4014,11 @@ zio_done(zio_t *zio)
zio->io_state[ZIO_WAIT_DONE] = 1;
mutex_exit(&zio->io_lock);
- for (pio = zio_walk_parents(zio); pio != NULL; pio = pio_next) {
- zio_link_t *zl = zio->io_walk_link;
- pio_next = zio_walk_parents(zio);
- zio_remove_child(pio, zio, zl);
+ zl = NULL;
+ for (pio = zio_walk_parents(zio, &zl); pio != NULL; pio = pio_next) {
+ zio_link_t *remove_zl = zl;
+ pio_next = zio_walk_parents(zio, &zl);
+ zio_remove_child(pio, zio, remove_zl);
zio_notify_parent(pio, zio, ZIO_WAIT_DONE);
}
@@ -3428,9 +4042,10 @@ zio_done(zio_t *zio)
static zio_pipe_stage_t *zio_pipeline[] = {
NULL,
zio_read_bp_init,
+ zio_write_bp_init,
zio_free_bp_init,
zio_issue_async,
- zio_write_bp_init,
+ zio_write_compress,
zio_checksum_generate,
zio_nop_write,
zio_ddt_read_start,
@@ -3439,6 +4054,7 @@ static zio_pipe_stage_t *zio_pipeline[] = {
zio_ddt_free,
zio_gang_assemble,
zio_gang_issue,
+ zio_dva_throttle,
zio_dva_allocate,
zio_dva_free,
zio_dva_claim,
@@ -3450,46 +4066,135 @@ static zio_pipe_stage_t *zio_pipeline[] = {
zio_done
};
-/* dnp is the dnode for zb1->zb_object */
-boolean_t
-zbookmark_is_before(const dnode_phys_t *dnp, const zbookmark_phys_t *zb1,
- const zbookmark_phys_t *zb2)
-{
- uint64_t zb1nextL0, zb2thisobj;
- ASSERT(zb1->zb_objset == zb2->zb_objset);
- ASSERT(zb2->zb_level == 0);
- /* The objset_phys_t isn't before anything. */
- if (dnp == NULL)
- return (B_FALSE);
- zb1nextL0 = (zb1->zb_blkid + 1) <<
- ((zb1->zb_level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT));
+/*
+ * Compare two zbookmark_phys_t's to see which we would reach first in a
+ * pre-order traversal of the object tree.
+ *
+ * This is simple in every case aside from the meta-dnode object. For all other
+ * objects, we traverse them in order (object 1 before object 2, and so on).
+ * However, all of these objects are traversed while traversing object 0, since
+ * the data it points to is the list of objects. Thus, we need to convert to a
+ * canonical representation so we can compare meta-dnode bookmarks to
+ * non-meta-dnode bookmarks.
+ *
+ * We do this by calculating "equivalents" for each field of the zbookmark.
+ * zbookmarks outside of the meta-dnode use their own object and level, and
+ * calculate the level 0 equivalent (the first L0 blkid that is contained in the
+ * blocks this bookmark refers to) by multiplying their blkid by their span
+ * (the number of L0 blocks contained within one block at their level).
+ * zbookmarks inside the meta-dnode calculate their object equivalent
+ * (which is L0equiv * dnodes per data block), use 0 for their L0equiv, and use
+ * level + 1<<31 (any value larger than a level could ever be) for their level.
+ * This causes them to always compare before a bookmark in their object
+ * equivalent, compare appropriately to bookmarks in other objects, and to
+ * compare appropriately to other bookmarks in the meta-dnode.
+ */
+int
+zbookmark_compare(uint16_t dbss1, uint8_t ibs1, uint16_t dbss2, uint8_t ibs2,
+ const zbookmark_phys_t *zb1, const zbookmark_phys_t *zb2)
+{
+ /*
+ * These variables represent the "equivalent" values for the zbookmark,
+ * after converting zbookmarks inside the meta dnode to their
+ * normal-object equivalents.
+ */
+ uint64_t zb1obj, zb2obj;
+ uint64_t zb1L0, zb2L0;
+ uint64_t zb1level, zb2level;
- zb2thisobj = zb2->zb_object ? zb2->zb_object :
- zb2->zb_blkid << (DNODE_BLOCK_SHIFT - DNODE_SHIFT);
+ if (zb1->zb_object == zb2->zb_object &&
+ zb1->zb_level == zb2->zb_level &&
+ zb1->zb_blkid == zb2->zb_blkid)
+ return (0);
+
+ /*
+ * BP_SPANB calculates the span in blocks.
+ */
+ zb1L0 = (zb1->zb_blkid) * BP_SPANB(ibs1, zb1->zb_level);
+ zb2L0 = (zb2->zb_blkid) * BP_SPANB(ibs2, zb2->zb_level);
if (zb1->zb_object == DMU_META_DNODE_OBJECT) {
- uint64_t nextobj = zb1nextL0 *
- (dnp->dn_datablkszsec << (SPA_MINBLOCKSHIFT - DNODE_SHIFT));
- return (nextobj <= zb2thisobj);
+ zb1obj = zb1L0 * (dbss1 << (SPA_MINBLOCKSHIFT - DNODE_SHIFT));
+ zb1L0 = 0;
+ zb1level = zb1->zb_level + COMPARE_META_LEVEL;
+ } else {
+ zb1obj = zb1->zb_object;
+ zb1level = zb1->zb_level;
}
- if (zb1->zb_object < zb2thisobj)
- return (B_TRUE);
- if (zb1->zb_object > zb2thisobj)
- return (B_FALSE);
- if (zb2->zb_object == DMU_META_DNODE_OBJECT)
+ if (zb2->zb_object == DMU_META_DNODE_OBJECT) {
+ zb2obj = zb2L0 * (dbss2 << (SPA_MINBLOCKSHIFT - DNODE_SHIFT));
+ zb2L0 = 0;
+ zb2level = zb2->zb_level + COMPARE_META_LEVEL;
+ } else {
+ zb2obj = zb2->zb_object;
+ zb2level = zb2->zb_level;
+ }
+
+ /* Now that we have a canonical representation, do the comparison. */
+ if (zb1obj != zb2obj)
+ return (zb1obj < zb2obj ? -1 : 1);
+ else if (zb1L0 != zb2L0)
+ return (zb1L0 < zb2L0 ? -1 : 1);
+ else if (zb1level != zb2level)
+ return (zb1level > zb2level ? -1 : 1);
+ /*
+ * This can (theoretically) happen if the bookmarks have the same object
+ * and level, but different blkids, if the block sizes are not the same.
+ * There is presently no way to change the indirect block sizes
+ */
+ return (0);
+}
+
+/*
+ * This function checks the following: given that last_block is the place that
+ * our traversal stopped last time, does that guarantee that we've visited
+ * every node under subtree_root? Therefore, we can't just use the raw output
+ * of zbookmark_compare. We have to pass in a modified version of
+ * subtree_root; by incrementing the block id, and then checking whether
+ * last_block is before or equal to that, we can tell whether or not having
+ * visited last_block implies that all of subtree_root's children have been
+ * visited.
+ */
+boolean_t
+zbookmark_subtree_completed(const dnode_phys_t *dnp,
+ const zbookmark_phys_t *subtree_root, const zbookmark_phys_t *last_block)
+{
+ zbookmark_phys_t mod_zb = *subtree_root;
+ mod_zb.zb_blkid++;
+ ASSERT(last_block->zb_level == 0);
+
+ /* The objset_phys_t isn't before anything. */
+ if (dnp == NULL)
return (B_FALSE);
- return (zb1nextL0 <= zb2->zb_blkid);
+
+ /*
+ * We pass in 1ULL << (DNODE_BLOCK_SHIFT - SPA_MINBLOCKSHIFT) for the
+ * data block size in sectors, because that variable is only used if
+ * the bookmark refers to a block in the meta-dnode. Since we don't
+ * know without examining it what object it refers to, and there's no
+ * harm in passing in this value in other cases, we always pass it in.
+ *
+ * We pass in 0 for the indirect block size shift because zb2 must be
+ * level 0. The indirect block size is only used to calculate the span
+ * of the bookmark, but since the bookmark must be level 0, the span is
+ * always 1, so the math works out.
+ *
+ * If you make changes to how the zbookmark_compare code works, be sure
+ * to make sure that this code still works afterwards.
+ */
+ return (zbookmark_compare(dnp->dn_datablkszsec, dnp->dn_indblkshift,
+ 1ULL << (DNODE_BLOCK_SHIFT - SPA_MINBLOCKSHIFT), 0, &mod_zb,
+ last_block) <= 0);
}
#if defined(_KERNEL) && defined(HAVE_SPL)
EXPORT_SYMBOL(zio_type_name);
EXPORT_SYMBOL(zio_buf_alloc);
EXPORT_SYMBOL(zio_data_buf_alloc);
-EXPORT_SYMBOL(zio_buf_alloc_flags);
EXPORT_SYMBOL(zio_buf_free);
EXPORT_SYMBOL(zio_data_buf_free);
@@ -3510,4 +4215,8 @@ MODULE_PARM_DESC(zfs_sync_pass_dont_compress,
module_param(zfs_sync_pass_rewrite, int, 0644);
MODULE_PARM_DESC(zfs_sync_pass_rewrite,
"Rewrite new bps starting in this pass");
+
+module_param(zio_dva_throttle_enabled, int, 0644);
+MODULE_PARM_DESC(zio_dva_throttle_enabled,
+ "Throttle block allocations in the ZIO pipeline");
#endif
diff --git a/zfs/module/zfs/zio_checksum.c b/zfs/module/zfs/zio_checksum.c
index 3a5c73a6a1e9..6dfcb063162d 100644
--- a/zfs/module/zfs/zio_checksum.c
+++ b/zfs/module/zfs/zio_checksum.c
@@ -20,14 +20,17 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2016 by Delphix. All rights reserved.
+ * Copyright 2013 Saso Kiselkov. All rights reserved.
*/
#include <sys/zfs_context.h>
#include <sys/spa.h>
+#include <sys/spa_impl.h>
#include <sys/zio.h>
#include <sys/zio_checksum.h>
#include <sys/zil.h>
+#include <sys/abd.h>
#include <zfs_fletcher.h>
/*
@@ -58,28 +61,163 @@
* checksum function of the appropriate strength. When reading a block,
* we compare the expected checksum against the actual checksum, which we
* compute via the checksum function specified by BP_GET_CHECKSUM(bp).
+ *
+ * SALTED CHECKSUMS
+ *
+ * To enable the use of less secure hash algorithms with dedup, we
+ * introduce the notion of salted checksums (MACs, really). A salted
+ * checksum is fed both a random 256-bit value (the salt) and the data
+ * to be checksummed. This salt is kept secret (stored on the pool, but
+ * never shown to the user). Thus even if an attacker knew of collision
+ * weaknesses in the hash algorithm, they won't be able to mount a known
+ * plaintext attack on the DDT, since the actual hash value cannot be
+ * known ahead of time. How the salt is used is algorithm-specific
+ * (some might simply prefix it to the data block, others might need to
+ * utilize a full-blown HMAC). On disk the salt is stored in a ZAP
+ * object in the MOS (DMU_POOL_CHECKSUM_SALT).
+ *
+ * CONTEXT TEMPLATES
+ *
+ * Some hashing algorithms need to perform a substantial amount of
+ * initialization work (e.g. salted checksums above may need to pre-hash
+ * the salt) before being able to process data. Performing this
+ * redundant work for each block would be wasteful, so we instead allow
+ * a checksum algorithm to do the work once (the first time it's used)
+ * and then keep this pre-initialized context as a template inside the
+ * spa_t (spa_cksum_tmpls). If the zio_checksum_info_t contains
+ * non-NULL ci_tmpl_init and ci_tmpl_free callbacks, they are used to
+ * construct and destruct the pre-initialized checksum context. The
+ * pre-initialized context is then reused during each checksum
+ * invocation and passed to the checksum function.
*/
/*ARGSUSED*/
static void
-zio_checksum_off(const void *buf, uint64_t size, zio_cksum_t *zcp)
+abd_checksum_off(abd_t *abd, uint64_t size,
+ const void *ctx_template, zio_cksum_t *zcp)
{
ZIO_SET_CHECKSUM(zcp, 0, 0, 0, 0);
}
+/*ARGSUSED*/
+void
+abd_fletcher_2_native(abd_t *abd, uint64_t size,
+ const void *ctx_template, zio_cksum_t *zcp)
+{
+ fletcher_init(zcp);
+ (void) abd_iterate_func(abd, 0, size,
+ fletcher_2_incremental_native, zcp);
+}
+
+/*ARGSUSED*/
+void
+abd_fletcher_2_byteswap(abd_t *abd, uint64_t size,
+ const void *ctx_template, zio_cksum_t *zcp)
+{
+ fletcher_init(zcp);
+ (void) abd_iterate_func(abd, 0, size,
+ fletcher_2_incremental_byteswap, zcp);
+}
+
+static inline void
+abd_fletcher_4_impl(abd_t *abd, uint64_t size, zio_abd_checksum_data_t *acdp)
+{
+ fletcher_4_abd_ops.acf_init(acdp);
+ abd_iterate_func(abd, 0, size, fletcher_4_abd_ops.acf_iter, acdp);
+ fletcher_4_abd_ops.acf_fini(acdp);
+}
+
+/*ARGSUSED*/
+void
+abd_fletcher_4_native(abd_t *abd, uint64_t size,
+ const void *ctx_template, zio_cksum_t *zcp)
+{
+ fletcher_4_ctx_t ctx;
+
+ zio_abd_checksum_data_t acd = {
+ .acd_byteorder = ZIO_CHECKSUM_NATIVE,
+ .acd_zcp = zcp,
+ .acd_ctx = &ctx
+ };
+
+ abd_fletcher_4_impl(abd, size, &acd);
+
+}
+
+/*ARGSUSED*/
+void
+abd_fletcher_4_byteswap(abd_t *abd, uint64_t size,
+ const void *ctx_template, zio_cksum_t *zcp)
+{
+ fletcher_4_ctx_t ctx;
+
+ zio_abd_checksum_data_t acd = {
+ .acd_byteorder = ZIO_CHECKSUM_BYTESWAP,
+ .acd_zcp = zcp,
+ .acd_ctx = &ctx
+ };
+
+ abd_fletcher_4_impl(abd, size, &acd);
+}
+
zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = {
- {{NULL, NULL}, 0, 0, 0, "inherit"},
- {{NULL, NULL}, 0, 0, 0, "on"},
- {{zio_checksum_off, zio_checksum_off}, 0, 0, 0, "off"},
- {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 1, 0, "label"},
- {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 1, 0, "gang_header"},
- {{fletcher_2_native, fletcher_2_byteswap}, 0, 1, 0, "zilog"},
- {{fletcher_2_native, fletcher_2_byteswap}, 0, 0, 0, "fletcher2"},
- {{fletcher_4_native, fletcher_4_byteswap}, 1, 0, 0, "fletcher4"},
- {{zio_checksum_SHA256, zio_checksum_SHA256}, 1, 0, 1, "sha256"},
- {{fletcher_4_native, fletcher_4_byteswap}, 0, 1, 0, "zilog2"},
+ {{NULL, NULL}, NULL, NULL, 0, "inherit"},
+ {{NULL, NULL}, NULL, NULL, 0, "on"},
+ {{abd_checksum_off, abd_checksum_off},
+ NULL, NULL, 0, "off"},
+ {{abd_checksum_SHA256, abd_checksum_SHA256},
+ NULL, NULL, ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_EMBEDDED,
+ "label"},
+ {{abd_checksum_SHA256, abd_checksum_SHA256},
+ NULL, NULL, ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_EMBEDDED,
+ "gang_header"},
+ {{abd_fletcher_2_native, abd_fletcher_2_byteswap},
+ NULL, NULL, ZCHECKSUM_FLAG_EMBEDDED, "zilog"},
+ {{abd_fletcher_2_native, abd_fletcher_2_byteswap},
+ NULL, NULL, 0, "fletcher2"},
+ {{abd_fletcher_4_native, abd_fletcher_4_byteswap},
+ NULL, NULL, ZCHECKSUM_FLAG_METADATA, "fletcher4"},
+ {{abd_checksum_SHA256, abd_checksum_SHA256},
+ NULL, NULL, ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_DEDUP |
+ ZCHECKSUM_FLAG_NOPWRITE, "sha256"},
+ {{abd_fletcher_4_native, abd_fletcher_4_byteswap},
+ NULL, NULL, ZCHECKSUM_FLAG_EMBEDDED, "zilog2"},
+ {{abd_checksum_off, abd_checksum_off},
+ NULL, NULL, 0, "noparity"},
+ {{abd_checksum_SHA512_native, abd_checksum_SHA512_byteswap},
+ NULL, NULL, ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_DEDUP |
+ ZCHECKSUM_FLAG_NOPWRITE, "sha512"},
+ {{abd_checksum_skein_native, abd_checksum_skein_byteswap},
+ abd_checksum_skein_tmpl_init, abd_checksum_skein_tmpl_free,
+ ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_DEDUP |
+ ZCHECKSUM_FLAG_SALTED | ZCHECKSUM_FLAG_NOPWRITE, "skein"},
+ {{abd_checksum_edonr_native, abd_checksum_edonr_byteswap},
+ abd_checksum_edonr_tmpl_init, abd_checksum_edonr_tmpl_free,
+ ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_SALTED |
+ ZCHECKSUM_FLAG_NOPWRITE, "edonr"},
};
+/*
+ * The flag corresponding to the "verify" in dedup=[checksum,]verify
+ * must be cleared first, so callers should use ZIO_CHECKSUM_MASK.
+ */
+spa_feature_t
+zio_checksum_to_feature(enum zio_checksum cksum)
+{
+ VERIFY((cksum & ~ZIO_CHECKSUM_MASK) == 0);
+
+ switch (cksum) {
+ case ZIO_CHECKSUM_SHA512:
+ return (SPA_FEATURE_SHA512);
+ case ZIO_CHECKSUM_SKEIN:
+ return (SPA_FEATURE_SKEIN);
+ case ZIO_CHECKSUM_EDONR:
+ return (SPA_FEATURE_EDONR);
+ default:
+ return (SPA_FEATURE_NONE);
+ }
+}
+
enum zio_checksum
zio_checksum_select(enum zio_checksum child, enum zio_checksum parent)
{
@@ -113,7 +251,8 @@ zio_checksum_dedup_select(spa_t *spa, enum zio_checksum child,
if (child == (ZIO_CHECKSUM_ON | ZIO_CHECKSUM_VERIFY))
return (spa_dedup_checksum(spa) | ZIO_CHECKSUM_VERIFY);
- ASSERT(zio_checksum_table[child & ZIO_CHECKSUM_MASK].ci_dedup ||
+ ASSERT((zio_checksum_table[child & ZIO_CHECKSUM_MASK].ci_flags &
+ ZCHECKSUM_FLAG_DEDUP) ||
(child & ZIO_CHECKSUM_VERIFY) || child == ZIO_CHECKSUM_OFF);
return (child);
@@ -124,7 +263,7 @@ zio_checksum_dedup_select(spa_t *spa, enum zio_checksum child,
* a tuple which is guaranteed to be unique for the life of the pool.
*/
static void
-zio_checksum_gang_verifier(zio_cksum_t *zcp, blkptr_t *bp)
+zio_checksum_gang_verifier(zio_cksum_t *zcp, const blkptr_t *bp)
{
const dva_t *dva = BP_IDENTITY(bp);
uint64_t txg = BP_PHYSICAL_BIRTH(bp);
@@ -145,86 +284,144 @@ zio_checksum_label_verifier(zio_cksum_t *zcp, uint64_t offset)
ZIO_SET_CHECKSUM(zcp, offset, 0, 0, 0);
}
+/*
+ * Calls the template init function of a checksum which supports context
+ * templates and installs the template into the spa_t.
+ */
+static void
+zio_checksum_template_init(enum zio_checksum checksum, spa_t *spa)
+{
+ zio_checksum_info_t *ci = &zio_checksum_table[checksum];
+
+ if (ci->ci_tmpl_init == NULL)
+ return;
+ if (spa->spa_cksum_tmpls[checksum] != NULL)
+ return;
+
+ VERIFY(ci->ci_tmpl_free != NULL);
+ mutex_enter(&spa->spa_cksum_tmpls_lock);
+ if (spa->spa_cksum_tmpls[checksum] == NULL) {
+ spa->spa_cksum_tmpls[checksum] =
+ ci->ci_tmpl_init(&spa->spa_cksum_salt);
+ VERIFY(spa->spa_cksum_tmpls[checksum] != NULL);
+ }
+ mutex_exit(&spa->spa_cksum_tmpls_lock);
+}
+
/*
* Generate the checksum.
*/
void
zio_checksum_compute(zio_t *zio, enum zio_checksum checksum,
- void *data, uint64_t size)
+ abd_t *abd, uint64_t size)
{
+ static const uint64_t zec_magic = ZEC_MAGIC;
blkptr_t *bp = zio->io_bp;
uint64_t offset = zio->io_offset;
zio_checksum_info_t *ci = &zio_checksum_table[checksum];
zio_cksum_t cksum;
+ spa_t *spa = zio->io_spa;
ASSERT((uint_t)checksum < ZIO_CHECKSUM_FUNCTIONS);
ASSERT(ci->ci_func[0] != NULL);
- if (ci->ci_eck) {
- zio_eck_t *eck;
+ zio_checksum_template_init(checksum, spa);
+
+ if (ci->ci_flags & ZCHECKSUM_FLAG_EMBEDDED) {
+ zio_eck_t eck;
+ size_t eck_offset;
if (checksum == ZIO_CHECKSUM_ZILOG2) {
- zil_chain_t *zilc = data;
+ zil_chain_t zilc;
+ abd_copy_to_buf(&zilc, abd, sizeof (zil_chain_t));
- size = P2ROUNDUP_TYPED(zilc->zc_nused, ZIL_MIN_BLKSZ,
+ size = P2ROUNDUP_TYPED(zilc.zc_nused, ZIL_MIN_BLKSZ,
uint64_t);
- eck = &zilc->zc_eck;
+ eck = zilc.zc_eck;
+ eck_offset = offsetof(zil_chain_t, zc_eck);
} else {
- eck = (zio_eck_t *)((char *)data + size) - 1;
+ eck_offset = size - sizeof (zio_eck_t);
+ abd_copy_to_buf_off(&eck, abd, eck_offset,
+ sizeof (zio_eck_t));
}
- if (checksum == ZIO_CHECKSUM_GANG_HEADER)
- zio_checksum_gang_verifier(&eck->zec_cksum, bp);
- else if (checksum == ZIO_CHECKSUM_LABEL)
- zio_checksum_label_verifier(&eck->zec_cksum, offset);
- else
- bp->blk_cksum = eck->zec_cksum;
- eck->zec_magic = ZEC_MAGIC;
- ci->ci_func[0](data, size, &cksum);
- eck->zec_cksum = cksum;
+
+ if (checksum == ZIO_CHECKSUM_GANG_HEADER) {
+ zio_checksum_gang_verifier(&eck.zec_cksum, bp);
+ abd_copy_from_buf_off(abd, &eck.zec_cksum,
+ eck_offset + offsetof(zio_eck_t, zec_cksum),
+ sizeof (zio_cksum_t));
+ } else if (checksum == ZIO_CHECKSUM_LABEL) {
+ zio_checksum_label_verifier(&eck.zec_cksum, offset);
+ abd_copy_from_buf_off(abd, &eck.zec_cksum,
+ eck_offset + offsetof(zio_eck_t, zec_cksum),
+ sizeof (zio_cksum_t));
+ } else {
+ bp->blk_cksum = eck.zec_cksum;
+ }
+
+ abd_copy_from_buf_off(abd, &zec_magic,
+ eck_offset + offsetof(zio_eck_t, zec_magic),
+ sizeof (zec_magic));
+
+ ci->ci_func[0](abd, size, spa->spa_cksum_tmpls[checksum],
+ &cksum);
+
+ abd_copy_from_buf_off(abd, &cksum,
+ eck_offset + offsetof(zio_eck_t, zec_cksum),
+ sizeof (zio_cksum_t));
} else {
- ci->ci_func[0](data, size, &bp->blk_cksum);
+ ci->ci_func[0](abd, size, spa->spa_cksum_tmpls[checksum],
+ &bp->blk_cksum);
}
}
int
-zio_checksum_error(zio_t *zio, zio_bad_cksum_t *info)
+zio_checksum_error_impl(spa_t *spa, const blkptr_t *bp,
+ enum zio_checksum checksum, abd_t *abd, uint64_t size, uint64_t offset,
+ zio_bad_cksum_t *info)
{
- blkptr_t *bp = zio->io_bp;
- uint_t checksum = (bp == NULL ? zio->io_prop.zp_checksum :
- (BP_IS_GANG(bp) ? ZIO_CHECKSUM_GANG_HEADER : BP_GET_CHECKSUM(bp)));
- int byteswap;
- int error;
- uint64_t size = (bp == NULL ? zio->io_size :
- (BP_IS_GANG(bp) ? SPA_GANGBLOCKSIZE : BP_GET_PSIZE(bp)));
- uint64_t offset = zio->io_offset;
- void *data = zio->io_data;
zio_checksum_info_t *ci = &zio_checksum_table[checksum];
- zio_cksum_t actual_cksum, expected_cksum, verifier;
+ zio_cksum_t actual_cksum, expected_cksum;
+ zio_eck_t eck;
+ int byteswap;
if (checksum >= ZIO_CHECKSUM_FUNCTIONS || ci->ci_func[0] == NULL)
return (SET_ERROR(EINVAL));
- if (ci->ci_eck) {
- zio_eck_t *eck;
+ zio_checksum_template_init(checksum, spa);
+
+ if (ci->ci_flags & ZCHECKSUM_FLAG_EMBEDDED) {
+ zio_cksum_t verifier;
+ size_t eck_offset;
if (checksum == ZIO_CHECKSUM_ZILOG2) {
- zil_chain_t *zilc = data;
+ zil_chain_t zilc;
uint64_t nused;
- eck = &zilc->zc_eck;
- if (eck->zec_magic == ZEC_MAGIC)
- nused = zilc->zc_nused;
- else if (eck->zec_magic == BSWAP_64(ZEC_MAGIC))
- nused = BSWAP_64(zilc->zc_nused);
- else
+ abd_copy_to_buf(&zilc, abd, sizeof (zil_chain_t));
+
+ eck = zilc.zc_eck;
+ eck_offset = offsetof(zil_chain_t, zc_eck) +
+ offsetof(zio_eck_t, zec_cksum);
+
+ if (eck.zec_magic == ZEC_MAGIC) {
+ nused = zilc.zc_nused;
+ } else if (eck.zec_magic == BSWAP_64(ZEC_MAGIC)) {
+ nused = BSWAP_64(zilc.zc_nused);
+ } else {
return (SET_ERROR(ECKSUM));
+ }
- if (nused > size)
+ if (nused > size) {
return (SET_ERROR(ECKSUM));
+ }
size = P2ROUNDUP_TYPED(nused, ZIL_MIN_BLKSZ, uint64_t);
} else {
- eck = (zio_eck_t *)((char *)data + size) - 1;
+ eck_offset = size - sizeof (zio_eck_t);
+ abd_copy_to_buf_off(&eck, abd, eck_offset,
+ sizeof (zio_eck_t));
+ eck_offset += offsetof(zio_eck_t, zec_cksum);
}
if (checksum == ZIO_CHECKSUM_GANG_HEADER)
@@ -234,42 +431,90 @@ zio_checksum_error(zio_t *zio, zio_bad_cksum_t *info)
else
verifier = bp->blk_cksum;
- byteswap = (eck->zec_magic == BSWAP_64(ZEC_MAGIC));
+ byteswap = (eck.zec_magic == BSWAP_64(ZEC_MAGIC));
if (byteswap)
byteswap_uint64_array(&verifier, sizeof (zio_cksum_t));
- expected_cksum = eck->zec_cksum;
- eck->zec_cksum = verifier;
- ci->ci_func[byteswap](data, size, &actual_cksum);
- eck->zec_cksum = expected_cksum;
+ expected_cksum = eck.zec_cksum;
- if (byteswap)
+ abd_copy_from_buf_off(abd, &verifier, eck_offset,
+ sizeof (zio_cksum_t));
+
+ ci->ci_func[byteswap](abd, size,
+ spa->spa_cksum_tmpls[checksum], &actual_cksum);
+
+ abd_copy_from_buf_off(abd, &expected_cksum, eck_offset,
+ sizeof (zio_cksum_t));
+
+ if (byteswap) {
byteswap_uint64_array(&expected_cksum,
sizeof (zio_cksum_t));
+ }
} else {
- ASSERT(!BP_IS_GANG(bp));
byteswap = BP_SHOULD_BYTESWAP(bp);
expected_cksum = bp->blk_cksum;
- ci->ci_func[byteswap](data, size, &actual_cksum);
+ ci->ci_func[byteswap](abd, size,
+ spa->spa_cksum_tmpls[checksum], &actual_cksum);
}
- info->zbc_expected = expected_cksum;
- info->zbc_actual = actual_cksum;
- info->zbc_checksum_name = ci->ci_name;
- info->zbc_byteswapped = byteswap;
- info->zbc_injected = 0;
- info->zbc_has_cksum = 1;
+ if (info != NULL) {
+ info->zbc_expected = expected_cksum;
+ info->zbc_actual = actual_cksum;
+ info->zbc_checksum_name = ci->ci_name;
+ info->zbc_byteswapped = byteswap;
+ info->zbc_injected = 0;
+ info->zbc_has_cksum = 1;
+ }
if (!ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum))
return (SET_ERROR(ECKSUM));
- if (zio_injection_enabled && !zio->io_error &&
- (error = zio_handle_fault_injection(zio, ECKSUM)) != 0) {
+ return (0);
+}
- info->zbc_injected = 1;
- return (error);
+int
+zio_checksum_error(zio_t *zio, zio_bad_cksum_t *info)
+{
+ blkptr_t *bp = zio->io_bp;
+ uint_t checksum = (bp == NULL ? zio->io_prop.zp_checksum :
+ (BP_IS_GANG(bp) ? ZIO_CHECKSUM_GANG_HEADER : BP_GET_CHECKSUM(bp)));
+ int error;
+ uint64_t size = (bp == NULL ? zio->io_size :
+ (BP_IS_GANG(bp) ? SPA_GANGBLOCKSIZE : BP_GET_PSIZE(bp)));
+ uint64_t offset = zio->io_offset;
+ abd_t *data = zio->io_abd;
+ spa_t *spa = zio->io_spa;
+
+ error = zio_checksum_error_impl(spa, bp, checksum, data, size,
+ offset, info);
+
+ if (zio_injection_enabled && error == 0 && zio->io_error == 0) {
+ error = zio_handle_fault_injection(zio, ECKSUM);
+ if (error != 0)
+ info->zbc_injected = 1;
}
- return (0);
+ return (error);
+}
+
+/*
+ * Called by a spa_t that's about to be deallocated. This steps through
+ * all of the checksum context templates and deallocates any that were
+ * initialized using the algorithm-specific template init function.
+ */
+void
+zio_checksum_templates_free(spa_t *spa)
+{
+ enum zio_checksum checksum;
+ for (checksum = 0; checksum < ZIO_CHECKSUM_FUNCTIONS;
+ checksum++) {
+ if (spa->spa_cksum_tmpls[checksum] != NULL) {
+ zio_checksum_info_t *ci = &zio_checksum_table[checksum];
+
+ VERIFY(ci->ci_tmpl_free != NULL);
+ ci->ci_tmpl_free(spa->spa_cksum_tmpls[checksum]);
+ spa->spa_cksum_tmpls[checksum] = NULL;
+ }
+ }
}
diff --git a/zfs/module/zfs/zio_compress.c b/zfs/module/zfs/zio_compress.c
index 6b8d6c39bd91..7e44d16e403e 100644
--- a/zfs/module/zfs/zio_compress.c
+++ b/zfs/module/zfs/zio_compress.c
@@ -28,7 +28,7 @@
*/
/*
- * Copyright (c) 2013 by Delphix. All rights reserved.
+ * Copyright (c) 2013, 2016 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@@ -41,24 +41,23 @@
/*
* Compression vectors.
*/
-
zio_compress_info_t zio_compress_table[ZIO_COMPRESS_FUNCTIONS] = {
- {NULL, NULL, 0, "inherit"},
- {NULL, NULL, 0, "on"},
- {NULL, NULL, 0, "uncompressed"},
- {lzjb_compress, lzjb_decompress, 0, "lzjb"},
- {NULL, NULL, 0, "empty"},
- {gzip_compress, gzip_decompress, 1, "gzip-1"},
- {gzip_compress, gzip_decompress, 2, "gzip-2"},
- {gzip_compress, gzip_decompress, 3, "gzip-3"},
- {gzip_compress, gzip_decompress, 4, "gzip-4"},
- {gzip_compress, gzip_decompress, 5, "gzip-5"},
- {gzip_compress, gzip_decompress, 6, "gzip-6"},
- {gzip_compress, gzip_decompress, 7, "gzip-7"},
- {gzip_compress, gzip_decompress, 8, "gzip-8"},
- {gzip_compress, gzip_decompress, 9, "gzip-9"},
- {zle_compress, zle_decompress, 64, "zle"},
- {lz4_compress_zfs, lz4_decompress_zfs, 0, "lz4"},
+ {"inherit", 0, NULL, NULL},
+ {"on", 0, NULL, NULL},
+ {"uncompressed", 0, NULL, NULL},
+ {"lzjb", 0, lzjb_compress, lzjb_decompress},
+ {"empty", 0, NULL, NULL},
+ {"gzip-1", 1, gzip_compress, gzip_decompress},
+ {"gzip-2", 2, gzip_compress, gzip_decompress},
+ {"gzip-3", 3, gzip_compress, gzip_decompress},
+ {"gzip-4", 4, gzip_compress, gzip_decompress},
+ {"gzip-5", 5, gzip_compress, gzip_decompress},
+ {"gzip-6", 6, gzip_compress, gzip_decompress},
+ {"gzip-7", 7, gzip_compress, gzip_decompress},
+ {"gzip-8", 8, gzip_compress, gzip_decompress},
+ {"gzip-9", 9, gzip_compress, gzip_decompress},
+ {"zle", 64, zle_compress, zle_decompress},
+ {"lz4", 0, lz4_compress_zfs, lz4_decompress_zfs}
};
enum zio_compress
@@ -85,12 +84,26 @@ zio_compress_select(spa_t *spa, enum zio_compress child,
return (result);
}
+/*ARGSUSED*/
+static int
+zio_compress_zeroed_cb(void *data, size_t len, void *private)
+{
+ uint64_t *end = (uint64_t *)((char *)data + len);
+ uint64_t *word;
+
+ for (word = data; word < end; word++)
+ if (*word != 0)
+ return (1);
+
+ return (0);
+}
+
size_t
-zio_compress_data(enum zio_compress c, void *src, void *dst, size_t s_len)
+zio_compress_data(enum zio_compress c, abd_t *src, void *dst, size_t s_len)
{
- uint64_t *word, *word_end;
size_t c_len, d_len;
zio_compress_info_t *ci = &zio_compress_table[c];
+ void *tmp;
ASSERT((uint_t)c < ZIO_COMPRESS_FUNCTIONS);
ASSERT((uint_t)c == ZIO_COMPRESS_EMPTY || ci->ci_compress != NULL);
@@ -99,12 +112,7 @@ zio_compress_data(enum zio_compress c, void *src, void *dst, size_t s_len)
* If the data is all zeroes, we don't even need to allocate
* a block for it. We indicate this by returning zero size.
*/
- word_end = (uint64_t *)((char *)src + s_len);
- for (word = src; word < word_end; word++)
- if (*word != 0)
- break;
-
- if (word == word_end)
+ if (abd_iterate_func(src, 0, s_len, zio_compress_zeroed_cb, NULL) == 0)
return (0);
if (c == ZIO_COMPRESS_EMPTY)
@@ -112,7 +120,11 @@ zio_compress_data(enum zio_compress c, void *src, void *dst, size_t s_len)
/* Compress at least 12.5% */
d_len = s_len - (s_len >> 3);
- c_len = ci->ci_compress(src, dst, s_len, d_len, ci->ci_level);
+
+ /* No compression algorithms can read from ABDs directly */
+ tmp = abd_borrow_buf_copy(src, s_len);
+ c_len = ci->ci_compress(tmp, dst, s_len, d_len, ci->ci_level);
+ abd_return_buf(src, tmp, s_len);
if (c_len > d_len)
return (s_len);
@@ -122,13 +134,23 @@ zio_compress_data(enum zio_compress c, void *src, void *dst, size_t s_len)
}
int
-zio_decompress_data(enum zio_compress c, void *src, void *dst,
+zio_decompress_data_buf(enum zio_compress c, void *src, void *dst,
size_t s_len, size_t d_len)
{
zio_compress_info_t *ci = &zio_compress_table[c];
-
if ((uint_t)c >= ZIO_COMPRESS_FUNCTIONS || ci->ci_decompress == NULL)
return (SET_ERROR(EINVAL));
return (ci->ci_decompress(src, dst, s_len, d_len, ci->ci_level));
}
+
+int
+zio_decompress_data(enum zio_compress c, abd_t *src, void *dst,
+ size_t s_len, size_t d_len)
+{
+ void *tmp = abd_borrow_buf_copy(src, s_len);
+ int ret = zio_decompress_data_buf(c, tmp, dst, s_len, d_len);
+ abd_return_buf(src, tmp, s_len);
+
+ return (ret);
+}
diff --git a/zfs/module/zfs/zio_inject.c b/zfs/module/zfs/zio_inject.c
index 40b507a0b6d8..4a4d431e33bc 100644
--- a/zfs/module/zfs/zio_inject.c
+++ b/zfs/module/zfs/zio_inject.c
@@ -20,7 +20,8 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2017, Intel Corporation.
*/
/*
@@ -41,7 +42,7 @@
*/
#include <sys/arc.h>
-#include <sys/zio_impl.h>
+#include <sys/zio.h>
#include <sys/zfs_ioctl.h>
#include <sys/vdev_impl.h>
#include <sys/dmu_objset.h>
@@ -49,17 +50,75 @@
uint32_t zio_injection_enabled = 0;
+/*
+ * Data describing each zinject handler registered on the system, and
+ * contains the list node linking the handler in the global zinject
+ * handler list.
+ */
typedef struct inject_handler {
int zi_id;
spa_t *zi_spa;
zinject_record_t zi_record;
+ uint64_t *zi_lanes;
+ int zi_next_lane;
list_node_t zi_link;
} inject_handler_t;
+/*
+ * List of all zinject handlers registered on the system, protected by
+ * the inject_lock defined below.
+ */
static list_t inject_handlers;
+
+/*
+ * This protects insertion into, and traversal of, the inject handler
+ * list defined above; as well as the inject_delay_count. Any time a
+ * handler is inserted or removed from the list, this lock should be
+ * taken as a RW_WRITER; and any time traversal is done over the list
+ * (without modification to it) this lock should be taken as a RW_READER.
+ */
static krwlock_t inject_lock;
+
+/*
+ * This holds the number of zinject delay handlers that have been
+ * registered on the system. It is protected by the inject_lock defined
+ * above. Thus modifications to this count must be a RW_WRITER of the
+ * inject_lock, and reads of this count must be (at least) a RW_READER
+ * of the lock.
+ */
+static int inject_delay_count = 0;
+
+/*
+ * This lock is used only in zio_handle_io_delay(), refer to the comment
+ * in that function for more details.
+ */
+static kmutex_t inject_delay_mtx;
+
+/*
+ * Used to assign unique identifying numbers to each new zinject handler.
+ */
static int inject_next_id = 1;
+/*
+ * Test if the requested frequency was triggered
+ */
+static boolean_t
+freq_triggered(uint32_t frequency)
+{
+ /*
+ * zero implies always (100%)
+ */
+ if (frequency == 0)
+ return (B_TRUE);
+
+ /*
+ * Note: we still handle legacy (unscaled) frequecy values
+ */
+ uint32_t maximum = (frequency <= 100) ? 100 : ZI_PERCENTAGE_MAX;
+
+ return (spa_get_random(maximum) < frequency);
+}
+
/*
* Returns true if the given record matches the I/O in progress.
*/
@@ -75,8 +134,7 @@ zio_match_handler(zbookmark_phys_t *zb, uint64_t type,
record->zi_object == DMU_META_DNODE_OBJECT) {
if (record->zi_type == DMU_OT_NONE ||
type == record->zi_type)
- return (record->zi_freq == 0 ||
- spa_get_random(100) < record->zi_freq);
+ return (freq_triggered(record->zi_freq));
else
return (B_FALSE);
}
@@ -90,8 +148,7 @@ zio_match_handler(zbookmark_phys_t *zb, uint64_t type,
zb->zb_blkid >= record->zi_start &&
zb->zb_blkid <= record->zi_end &&
error == record->zi_error)
- return (record->zi_freq == 0 ||
- spa_get_random(100) < record->zi_freq);
+ return (freq_triggered(record->zi_freq));
return (B_FALSE);
}
@@ -255,6 +312,12 @@ zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error)
continue;
if (handler->zi_record.zi_error == error) {
+ /*
+ * limit error injection if requested
+ */
+ if (!freq_triggered(handler->zi_record.zi_freq))
+ continue;
+
/*
* For a failed open, pretend like the device
* has gone away.
@@ -361,32 +424,171 @@ spa_handle_ignored_writes(spa_t *spa)
rw_exit(&inject_lock);
}
-uint64_t
+hrtime_t
zio_handle_io_delay(zio_t *zio)
{
vdev_t *vd = zio->io_vd;
+ inject_handler_t *min_handler = NULL;
+ hrtime_t min_target = 0;
inject_handler_t *handler;
- uint64_t seconds = 0;
-
- if (zio_injection_enabled == 0)
- return (0);
+ hrtime_t idle;
+ hrtime_t busy;
+ hrtime_t target;
rw_enter(&inject_lock, RW_READER);
- for (handler = list_head(&inject_handlers); handler != NULL;
- handler = list_next(&inject_handlers, handler)) {
+ /*
+ * inject_delay_count is a subset of zio_injection_enabled that
+ * is only incremented for delay handlers. These checks are
+ * mainly added to remind the reader why we're not explicitly
+ * checking zio_injection_enabled like the other functions.
+ */
+ IMPLY(inject_delay_count > 0, zio_injection_enabled > 0);
+ IMPLY(zio_injection_enabled == 0, inject_delay_count == 0);
+ /*
+ * If there aren't any inject delay handlers registered, then we
+ * can short circuit and simply return 0 here. A value of zero
+ * informs zio_delay_interrupt() that this request should not be
+ * delayed. This short circuit keeps us from acquiring the
+ * inject_delay_mutex unnecessarily.
+ */
+ if (inject_delay_count == 0) {
+ rw_exit(&inject_lock);
+ return (0);
+ }
+
+ /*
+ * Each inject handler has a number of "lanes" associated with
+ * it. Each lane is able to handle requests independently of one
+ * another, and at a latency defined by the inject handler
+ * record's zi_timer field. Thus if a handler in configured with
+ * a single lane with a 10ms latency, it will delay requests
+ * such that only a single request is completed every 10ms. So,
+ * if more than one request is attempted per each 10ms interval,
+ * the average latency of the requests will be greater than
+ * 10ms; but if only a single request is submitted each 10ms
+ * interval the average latency will be 10ms.
+ *
+ * We need to acquire this mutex to prevent multiple concurrent
+ * threads being assigned to the same lane of a given inject
+ * handler. The mutex allows us to perform the following two
+ * operations atomically:
+ *
+ * 1. determine the minimum handler and minimum target
+ * value of all the possible handlers
+ * 2. update that minimum handler's lane array
+ *
+ * Without atomicity, two (or more) threads could pick the same
+ * lane in step (1), and then conflict with each other in step
+ * (2). This could allow a single lane handler to process
+ * multiple requests simultaneously, which shouldn't be possible.
+ */
+ mutex_enter(&inject_delay_mtx);
+
+ for (handler = list_head(&inject_handlers);
+ handler != NULL; handler = list_next(&inject_handlers, handler)) {
if (handler->zi_record.zi_cmd != ZINJECT_DELAY_IO)
continue;
- if (vd->vdev_guid == handler->zi_record.zi_guid) {
- seconds = handler->zi_record.zi_timer;
- break;
+ if (!freq_triggered(handler->zi_record.zi_freq))
+ continue;
+
+ if (vd->vdev_guid != handler->zi_record.zi_guid)
+ continue;
+
+ /*
+ * Defensive; should never happen as the array allocation
+ * occurs prior to inserting this handler on the list.
+ */
+ ASSERT3P(handler->zi_lanes, !=, NULL);
+
+ /*
+ * This should never happen, the zinject command should
+ * prevent a user from setting an IO delay with zero lanes.
+ */
+ ASSERT3U(handler->zi_record.zi_nlanes, !=, 0);
+
+ ASSERT3U(handler->zi_record.zi_nlanes, >,
+ handler->zi_next_lane);
+
+ /*
+ * We want to issue this IO to the lane that will become
+ * idle the soonest, so we compare the soonest this
+ * specific handler can complete the IO with all other
+ * handlers, to find the lowest value of all possible
+ * lanes. We then use this lane to submit the request.
+ *
+ * Since each handler has a constant value for its
+ * delay, we can just use the "next" lane for that
+ * handler; as it will always be the lane with the
+ * lowest value for that particular handler (i.e. the
+ * lane that will become idle the soonest). This saves a
+ * scan of each handler's lanes array.
+ *
+ * There's two cases to consider when determining when
+ * this specific IO request should complete. If this
+ * lane is idle, we want to "submit" the request now so
+ * it will complete after zi_timer milliseconds. Thus,
+ * we set the target to now + zi_timer.
+ *
+ * If the lane is busy, we want this request to complete
+ * zi_timer milliseconds after the lane becomes idle.
+ * Since the 'zi_lanes' array holds the time at which
+ * each lane will become idle, we use that value to
+ * determine when this request should complete.
+ */
+ idle = handler->zi_record.zi_timer + gethrtime();
+ busy = handler->zi_record.zi_timer +
+ handler->zi_lanes[handler->zi_next_lane];
+ target = MAX(idle, busy);
+
+ if (min_handler == NULL) {
+ min_handler = handler;
+ min_target = target;
+ continue;
}
+ ASSERT3P(min_handler, !=, NULL);
+ ASSERT3U(min_target, !=, 0);
+
+ /*
+ * We don't yet increment the "next lane" variable since
+ * we still might find a lower value lane in another
+ * handler during any remaining iterations. Once we're
+ * sure we've selected the absolute minimum, we'll claim
+ * the lane and increment the handler's "next lane"
+ * field below.
+ */
+
+ if (target < min_target) {
+ min_handler = handler;
+ min_target = target;
+ }
+ }
+
+ /*
+ * 'min_handler' will be NULL if no IO delays are registered for
+ * this vdev, otherwise it will point to the handler containing
+ * the lane that will become idle the soonest.
+ */
+ if (min_handler != NULL) {
+ ASSERT3U(min_target, !=, 0);
+ min_handler->zi_lanes[min_handler->zi_next_lane] = min_target;
+
+ /*
+ * If we've used all possible lanes for this handler,
+ * loop back and start using the first lane again;
+ * otherwise, just increment the lane index.
+ */
+ min_handler->zi_next_lane = (min_handler->zi_next_lane + 1) %
+ min_handler->zi_record.zi_nlanes;
}
+
+ mutex_exit(&inject_delay_mtx);
rw_exit(&inject_lock);
- return (seconds);
+
+ return (min_target);
}
/*
@@ -410,6 +612,24 @@ zio_inject_fault(char *name, int flags, int *id, zinject_record_t *record)
if ((error = spa_reset(name)) != 0)
return (error);
+ if (record->zi_cmd == ZINJECT_DELAY_IO) {
+ /*
+ * A value of zero for the number of lanes or for the
+ * delay time doesn't make sense.
+ */
+ if (record->zi_timer == 0 || record->zi_nlanes == 0)
+ return (SET_ERROR(EINVAL));
+
+ /*
+ * The number of lanes is directly mapped to the size of
+ * an array used by the handler. Thus, to ensure the
+ * user doesn't trigger an allocation that's "too large"
+ * we cap the number of lanes here.
+ */
+ if (record->zi_nlanes >= UINT16_MAX)
+ return (SET_ERROR(EINVAL));
+ }
+
if (!(flags & ZINJECT_NULL)) {
/*
* spa_inject_ref() will add an injection reference, which will
@@ -421,13 +641,36 @@ zio_inject_fault(char *name, int flags, int *id, zinject_record_t *record)
handler = kmem_alloc(sizeof (inject_handler_t), KM_SLEEP);
+ handler->zi_spa = spa;
+ handler->zi_record = *record;
+
+ if (handler->zi_record.zi_cmd == ZINJECT_DELAY_IO) {
+ handler->zi_lanes = kmem_zalloc(
+ sizeof (*handler->zi_lanes) *
+ handler->zi_record.zi_nlanes, KM_SLEEP);
+ handler->zi_next_lane = 0;
+ } else {
+ handler->zi_lanes = NULL;
+ handler->zi_next_lane = 0;
+ }
+
rw_enter(&inject_lock, RW_WRITER);
+ /*
+ * We can't move this increment into the conditional
+ * above because we need to hold the RW_WRITER lock of
+ * inject_lock, and we don't want to hold that while
+ * allocating the handler's zi_lanes array.
+ */
+ if (handler->zi_record.zi_cmd == ZINJECT_DELAY_IO) {
+ ASSERT3S(inject_delay_count, >=, 0);
+ inject_delay_count++;
+ ASSERT3S(inject_delay_count, >, 0);
+ }
+
*id = handler->zi_id = inject_next_id++;
- handler->zi_spa = spa;
- handler->zi_record = *record;
list_insert_tail(&inject_handlers, handler);
- atomic_add_32(&zio_injection_enabled, 1);
+ atomic_inc_32(&zio_injection_enabled);
rw_exit(&inject_lock);
}
@@ -503,12 +746,26 @@ zio_clear_fault(int id)
return (SET_ERROR(ENOENT));
}
+ if (handler->zi_record.zi_cmd == ZINJECT_DELAY_IO) {
+ ASSERT3S(inject_delay_count, >, 0);
+ inject_delay_count--;
+ ASSERT3S(inject_delay_count, >=, 0);
+ }
+
list_remove(&inject_handlers, handler);
rw_exit(&inject_lock);
+ if (handler->zi_record.zi_cmd == ZINJECT_DELAY_IO) {
+ ASSERT3P(handler->zi_lanes, !=, NULL);
+ kmem_free(handler->zi_lanes, sizeof (*handler->zi_lanes) *
+ handler->zi_record.zi_nlanes);
+ } else {
+ ASSERT3P(handler->zi_lanes, ==, NULL);
+ }
+
spa_inject_delref(handler->zi_spa);
kmem_free(handler, sizeof (inject_handler_t));
- atomic_add_32(&zio_injection_enabled, -1);
+ atomic_dec_32(&zio_injection_enabled);
return (0);
}
@@ -517,6 +774,7 @@ void
zio_inject_init(void)
{
rw_init(&inject_lock, NULL, RW_DEFAULT, NULL);
+ mutex_init(&inject_delay_mtx, NULL, MUTEX_DEFAULT, NULL);
list_create(&inject_handlers, sizeof (inject_handler_t),
offsetof(inject_handler_t, zi_link));
}
@@ -525,6 +783,7 @@ void
zio_inject_fini(void)
{
list_destroy(&inject_handlers);
+ mutex_destroy(&inject_delay_mtx);
rw_destroy(&inject_lock);
}
diff --git a/zfs/module/zfs/zpl_ctldir.c b/zfs/module/zfs/zpl_ctldir.c
index 50fb06bdef80..1c5fb34e656a 100644
--- a/zfs/module/zfs/zpl_ctldir.c
+++ b/zfs/module/zfs/zpl_ctldir.c
@@ -52,10 +52,10 @@ zpl_common_open(struct inode *ip, struct file *filp)
static int
zpl_root_iterate(struct file *filp, struct dir_context *ctx)
{
- zfs_sb_t *zsb = ITOZSB(filp->f_path.dentry->d_inode);
+ zfsvfs_t *zfsvfs = ITOZSB(file_inode(filp));
int error = 0;
- ZFS_ENTER(zsb);
+ ZFS_ENTER(zfsvfs);
if (!dir_emit_dots(filp, ctx))
goto out;
@@ -76,7 +76,7 @@ zpl_root_iterate(struct file *filp, struct dir_context *ctx)
ctx->pos++;
}
out:
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (error);
}
@@ -250,14 +250,14 @@ zpl_snapdir_lookup(struct inode *dip, struct dentry *dentry,
static int
zpl_snapdir_iterate(struct file *filp, struct dir_context *ctx)
{
- zfs_sb_t *zsb = ITOZSB(filp->f_path.dentry->d_inode);
+ zfsvfs_t *zfsvfs = ITOZSB(file_inode(filp));
fstrans_cookie_t cookie;
char snapname[MAXNAMELEN];
boolean_t case_conflict;
uint64_t id, pos;
int error = 0;
- ZFS_ENTER(zsb);
+ ZFS_ENTER(zfsvfs);
cookie = spl_fstrans_mark();
if (!dir_emit_dots(filp, ctx))
@@ -265,10 +265,10 @@ zpl_snapdir_iterate(struct file *filp, struct dir_context *ctx)
pos = ctx->pos;
while (error == 0) {
- dsl_pool_config_enter(dmu_objset_pool(zsb->z_os), FTAG);
- error = -dmu_snapshot_list_next(zsb->z_os, MAXNAMELEN,
+ dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os), FTAG);
+ error = -dmu_snapshot_list_next(zfsvfs->z_os, MAXNAMELEN,
snapname, &id, &pos, &case_conflict);
- dsl_pool_config_exit(dmu_objset_pool(zsb->z_os), FTAG);
+ dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os), FTAG);
if (error)
goto out;
@@ -280,7 +280,7 @@ zpl_snapdir_iterate(struct file *filp, struct dir_context *ctx)
}
out:
spl_fstrans_unmark(cookie);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
if (error == -ENOENT)
return (0);
@@ -380,14 +380,15 @@ zpl_snapdir_getattr_impl(const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
struct inode *ip = path->dentry->d_inode;
- zfs_sb_t *zsb = ITOZSB(path->dentry->d_inode);
+ zfsvfs_t *zfsvfs = ITOZSB(ip);
+
+ ZFS_ENTER(zfsvfs);
+ generic_fillattr(ip, stat);
- ZFS_ENTER(zsb);
- generic_fillattr(path->dentry->d_inode, stat);
stat->nlink = stat->size = 2;
- stat->ctime = stat->mtime = dmu_objset_snap_cmtime(zsb->z_os);
+ stat->ctime = stat->mtime = dmu_objset_snap_cmtime(zfsvfs->z_os);
stat->atime = current_time(ip);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (0);
}
@@ -466,19 +467,19 @@ zpl_shares_iterate(struct file *filp, struct dir_context *ctx)
{
fstrans_cookie_t cookie;
cred_t *cr = CRED();
- zfs_sb_t *zsb = ITOZSB(filp->f_path.dentry->d_inode);
+ zfsvfs_t *zfsvfs = ITOZSB(file_inode(filp));
znode_t *dzp;
int error = 0;
- ZFS_ENTER(zsb);
+ ZFS_ENTER(zfsvfs);
cookie = spl_fstrans_mark();
- if (zsb->z_shares_dir == 0) {
+ if (zfsvfs->z_shares_dir == 0) {
dir_emit_dots(filp, ctx);
goto out;
}
- error = -zfs_zget(zsb, zsb->z_shares_dir, &dzp);
+ error = -zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp);
if (error)
goto out;
@@ -489,7 +490,7 @@ zpl_shares_iterate(struct file *filp, struct dir_context *ctx)
iput(ZTOI(dzp));
out:
spl_fstrans_unmark(cookie);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
ASSERT3S(error, <=, 0);
return (error);
@@ -515,27 +516,27 @@ zpl_shares_getattr_impl(const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
struct inode *ip = path->dentry->d_inode;
- zfs_sb_t *zsb = ITOZSB(ip);
+ zfsvfs_t *zfsvfs = ITOZSB(ip);
znode_t *dzp;
int error;
- ZFS_ENTER(zsb);
+ ZFS_ENTER(zfsvfs);
- if (zsb->z_shares_dir == 0) {
+ if (zfsvfs->z_shares_dir == 0) {
generic_fillattr(path->dentry->d_inode, stat);
stat->nlink = stat->size = 2;
stat->atime = current_time(ip);
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
return (0);
}
- error = -zfs_zget(zsb, zsb->z_shares_dir, &dzp);
+ error = -zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp);
if (error == 0) {
error = -zfs_getattr_fast(ZTOI(dzp), stat);
iput(ZTOI(dzp));
}
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
ASSERT3S(error, <=, 0);
return (error);
diff --git a/zfs/module/zfs/zpl_export.c b/zfs/module/zfs/zpl_export.c
index 6f051a0485a1..a264d664cbf1 100644
--- a/zfs/module/zfs/zpl_export.c
+++ b/zfs/module/zfs/zpl_export.c
@@ -37,6 +37,7 @@ zpl_encode_fh(struct inode *ip, __u32 *fh, int *max_len, struct inode *parent)
#else
zpl_encode_fh(struct dentry *dentry, __u32 *fh, int *max_len, int connectable)
{
+ /* CSTYLED */
struct inode *ip = dentry->d_inode;
#endif /* HAVE_ENCODE_FH_WITH_INODE */
fstrans_cookie_t cookie;
diff --git a/zfs/module/zfs/zpl_file.c b/zfs/module/zfs/zpl_file.c
index 8781d8ca9605..4805abe695f2 100644
--- a/zfs/module/zfs/zpl_file.c
+++ b/zfs/module/zfs/zpl_file.c
@@ -78,14 +78,13 @@ zpl_release(struct inode *ip, struct file *filp)
static int
zpl_iterate(struct file *filp, struct dir_context *ctx)
{
- struct dentry *dentry = filp->f_path.dentry;
cred_t *cr = CRED();
int error;
fstrans_cookie_t cookie;
crhold(cr);
cookie = spl_fstrans_mark();
- error = -zfs_readdir(dentry->d_inode, ctx, cr);
+ error = -zfs_readdir(file_inode(filp), ctx, cr);
spl_fstrans_unmark(cookie);
crfree(cr);
ASSERT3S(error, <=, 0);
@@ -136,7 +135,7 @@ static int
zpl_aio_fsync(struct kiocb *kiocb, int datasync)
{
struct file *filp = kiocb->ki_filp;
- return (zpl_fsync(filp, filp->f_path.dentry, datasync));
+ return (zpl_fsync(filp, file_dentry(filp), datasync));
}
#endif
@@ -580,7 +579,7 @@ zpl_readpage(struct file *filp, struct page *pp)
*/
static int
zpl_readpages(struct file *filp, struct address_space *mapping,
- struct list_head *pages, unsigned nr_pages)
+ struct list_head *pages, unsigned nr_pages)
{
return (read_cache_pages(mapping, pages,
(filler_t *)zpl_readpage, filp));
@@ -606,14 +605,14 @@ static int
zpl_writepages(struct address_space *mapping, struct writeback_control *wbc)
{
znode_t *zp = ITOZ(mapping->host);
- zfs_sb_t *zsb = ITOZSB(mapping->host);
+ zfsvfs_t *zfsvfs = ITOZSB(mapping->host);
enum writeback_sync_modes sync_mode;
int result;
- ZFS_ENTER(zsb);
- if (zsb->z_os->os_sync == ZFS_SYNC_ALWAYS)
+ ZFS_ENTER(zfsvfs);
+ if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS)
wbc->sync_mode = WB_SYNC_ALL;
- ZFS_EXIT(zsb);
+ ZFS_EXIT(zfsvfs);
sync_mode = wbc->sync_mode;
/*
@@ -626,11 +625,11 @@ zpl_writepages(struct address_space *mapping, struct writeback_control *wbc)
wbc->sync_mode = WB_SYNC_NONE;
result = write_cache_pages(mapping, wbc, zpl_putpage, mapping);
if (sync_mode != wbc->sync_mode) {
- ZFS_ENTER(zsb);
+ ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(zp);
- if (zsb->z_log != NULL)
- zil_commit(zsb->z_log, zp->z_id);
- ZFS_EXIT(zsb);
+ if (zfsvfs->z_log != NULL)
+ zil_commit(zfsvfs->z_log, zp->z_id);
+ ZFS_EXIT(zfsvfs);
/*
* We need to call write_cache_pages() again (we can't just
@@ -716,7 +715,7 @@ zpl_fallocate_common(struct inode *ip, int mode, loff_t offset, loff_t len)
static long
zpl_fallocate(struct file *filp, int mode, loff_t offset, loff_t len)
{
- return zpl_fallocate_common(filp->f_path.dentry->d_inode,
+ return zpl_fallocate_common(file_inode(filp),
mode, offset, len);
}
#endif /* HAVE_FILE_FALLOCATE */
diff --git a/zfs/module/zfs/zpl_inode.c b/zfs/module/zfs/zpl_inode.c
index 18401fe3084e..3b5643d09177 100644
--- a/zfs/module/zfs/zpl_inode.c
+++ b/zfs/module/zfs/zpl_inode.c
@@ -48,7 +48,7 @@ zpl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
pathname_t *ppn = NULL;
pathname_t pn;
int zfs_flags = 0;
- zfs_sb_t *zsb = dentry->d_sb->s_fs_info;
+ zfsvfs_t *zfsvfs = dentry->d_sb->s_fs_info;
if (dlen(dentry) >= ZAP_MAXNAMELEN)
return (ERR_PTR(-ENAMETOOLONG));
@@ -57,10 +57,9 @@ zpl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
cookie = spl_fstrans_mark();
/* If we are a case insensitive fs, we need the real name */
- if (zsb->z_case == ZFS_CASE_INSENSITIVE) {
+ if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
zfs_flags = FIGNORECASE;
- pn.pn_bufsize = ZFS_MAXNAMELEN;
- pn.pn_buf = kmem_zalloc(ZFS_MAXNAMELEN, KM_SLEEP);
+ pn_alloc(&pn);
ppn = &pn;
}
@@ -83,7 +82,7 @@ zpl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
* Fall through if the error is not ENOENT. Also free memory.
*/
if (ppn) {
- kmem_free(pn.pn_buf, ZFS_MAXNAMELEN);
+ pn_free(ppn);
if (error == -ENOENT)
return (NULL);
}
@@ -109,7 +108,7 @@ zpl_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
ci_name.len = strlen(pn.pn_buf);
new_dentry = d_add_ci(dentry, ip, &ci_name);
}
- kmem_free(pn.pn_buf, ZFS_MAXNAMELEN);
+ pn_free(ppn);
return (new_dentry);
} else {
return (d_splice_alias(ip, dentry));
@@ -161,7 +160,7 @@ zpl_create(struct inode *dir, struct dentry *dentry, zpl_umode_t mode,
error = zpl_init_acl(ip, dir);
if (error)
- (void) zfs_remove(dir, dname(dentry), cr);
+ (void) zfs_remove(dir, dname(dentry), cr, 0);
}
spl_fstrans_unmark(cookie);
@@ -204,7 +203,7 @@ zpl_mknod(struct inode *dir, struct dentry *dentry, zpl_umode_t mode,
error = zpl_init_acl(ip, dir);
if (error)
- (void) zfs_remove(dir, dname(dentry), cr);
+ (void) zfs_remove(dir, dname(dentry), cr, 0);
}
spl_fstrans_unmark(cookie);
@@ -215,23 +214,62 @@ zpl_mknod(struct inode *dir, struct dentry *dentry, zpl_umode_t mode,
return (error);
}
+#ifdef HAVE_TMPFILE
+static int
+zpl_tmpfile(struct inode *dir, struct dentry *dentry, zpl_umode_t mode)
+{
+ cred_t *cr = CRED();
+ struct inode *ip;
+ vattr_t *vap;
+ int error;
+ fstrans_cookie_t cookie;
+
+ crhold(cr);
+ vap = kmem_zalloc(sizeof (vattr_t), KM_SLEEP);
+ zpl_vap_init(vap, dir, mode, cr);
+
+ cookie = spl_fstrans_mark();
+ error = -zfs_tmpfile(dir, vap, 0, mode, &ip, cr, 0, NULL);
+ if (error == 0) {
+ /* d_tmpfile will do drop_nlink, so we should set it first */
+ set_nlink(ip, 1);
+ d_tmpfile(dentry, ip);
+
+ error = zpl_xattr_security_init(ip, dir, &dentry->d_name);
+ if (error == 0)
+ error = zpl_init_acl(ip, dir);
+ /*
+ * don't need to handle error here, file is already in
+ * unlinked set.
+ */
+ }
+
+ spl_fstrans_unmark(cookie);
+ kmem_free(vap, sizeof (vattr_t));
+ crfree(cr);
+ ASSERT3S(error, <=, 0);
+
+ return (error);
+}
+#endif
+
static int
zpl_unlink(struct inode *dir, struct dentry *dentry)
{
cred_t *cr = CRED();
int error;
fstrans_cookie_t cookie;
- zfs_sb_t *zsb = dentry->d_sb->s_fs_info;
+ zfsvfs_t *zfsvfs = dentry->d_sb->s_fs_info;
crhold(cr);
cookie = spl_fstrans_mark();
- error = -zfs_remove(dir, dname(dentry), cr);
+ error = -zfs_remove(dir, dname(dentry), cr, 0);
/*
* For a CI FS we must invalidate the dentry to prevent the
* creation of negative entries.
*/
- if (error == 0 && zsb->z_case == ZFS_CASE_INSENSITIVE)
+ if (error == 0 && zfsvfs->z_case == ZFS_CASE_INSENSITIVE)
d_invalidate(dentry);
spl_fstrans_unmark(cookie);
@@ -276,12 +314,12 @@ zpl_mkdir(struct inode *dir, struct dentry *dentry, zpl_umode_t mode)
}
static int
-zpl_rmdir(struct inode * dir, struct dentry *dentry)
+zpl_rmdir(struct inode *dir, struct dentry *dentry)
{
cred_t *cr = CRED();
int error;
fstrans_cookie_t cookie;
- zfs_sb_t *zsb = dentry->d_sb->s_fs_info;
+ zfsvfs_t *zfsvfs = dentry->d_sb->s_fs_info;
crhold(cr);
cookie = spl_fstrans_mark();
@@ -291,7 +329,7 @@ zpl_rmdir(struct inode * dir, struct dentry *dentry)
* For a CI FS we must invalidate the dentry to prevent the
* creation of negative entries.
*/
- if (error == 0 && zsb->z_case == ZFS_CASE_INSENSITIVE)
+ if (error == 0 && zfsvfs->z_case == ZFS_CASE_INSENSITIVE)
d_invalidate(dentry);
spl_fstrans_unmark(cookie);
@@ -347,7 +385,8 @@ zpl_setattr(struct dentry *dentry, struct iattr *ia)
vap->va_ctime = ia->ia_ctime;
if (vap->va_mask & ATTR_ATIME)
- ip->i_atime = ia->ia_atime;
+ ip->i_atime = timespec_trunc(ia->ia_atime,
+ ip->i_sb->s_time_gran);
cookie = spl_fstrans_mark();
error = -zfs_setattr(ip, vap, 0, cr);
@@ -413,7 +452,7 @@ zpl_symlink(struct inode *dir, struct dentry *dentry, const char *name)
error = zpl_xattr_security_init(ip, dir, &dentry->d_name);
if (error)
- (void) zfs_remove(dir, dname(dentry), cr);
+ (void) zfs_remove(dir, dname(dentry), cr, 0);
}
spl_fstrans_unmark(cookie);
@@ -561,7 +600,7 @@ zpl_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
igrab(ip); /* Use ihold() if available */
cookie = spl_fstrans_mark();
- error = -zfs_link(dir, ip, dname(dentry), cr);
+ error = -zfs_link(dir, ip, dname(dentry), cr, 0);
if (error) {
iput(ip);
goto out;
@@ -625,7 +664,8 @@ zpl_revalidate(struct dentry *dentry, struct nameidata *nd)
zpl_revalidate(struct dentry *dentry, unsigned int flags)
{
#endif /* HAVE_D_REVALIDATE_NAMEIDATA */
- zfs_sb_t *zsb = dentry->d_sb->s_fs_info;
+ /* CSTYLED */
+ zfsvfs_t *zfsvfs = dentry->d_sb->s_fs_info;
int error;
if (flags & LOOKUP_RCU)
@@ -635,12 +675,12 @@ zpl_revalidate(struct dentry *dentry, unsigned int flags)
* Automounted snapshots rely on periodic dentry revalidation
* to defer snapshots from being automatically unmounted.
*/
- if (zsb->z_issnap) {
- if (time_after(jiffies, zsb->z_snap_defer_time +
+ if (zfsvfs->z_issnap) {
+ if (time_after(jiffies, zfsvfs->z_snap_defer_time +
MAX(zfs_expire_snapshot * HZ / 2, HZ))) {
- zsb->z_snap_defer_time = jiffies;
- zfsctl_snapshot_unmount_delay(zsb->z_os->os_spa,
- dmu_objset_id(zsb->z_os), zfs_expire_snapshot);
+ zfsvfs->z_snap_defer_time = jiffies;
+ zfsctl_snapshot_unmount_delay(zfsvfs->z_os->os_spa,
+ dmu_objset_id(zfsvfs->z_os), zfs_expire_snapshot);
}
}
@@ -651,7 +691,7 @@ zpl_revalidate(struct dentry *dentry, unsigned int flags)
*/
if (dentry->d_inode == NULL) {
spin_lock(&dentry->d_lock);
- error = time_before(dentry->d_time, zsb->z_rollback_time);
+ error = time_before(dentry->d_time, zfsvfs->z_rollback_time);
spin_unlock(&dentry->d_lock);
if (error)
@@ -710,6 +750,9 @@ const struct inode_operations zpl_dir_inode_operations = {
.rename = zpl_rename2,
#else
.rename = zpl_rename,
+#endif
+#ifdef HAVE_TMPFILE
+ .tmpfile = zpl_tmpfile,
#endif
.setattr = zpl_setattr,
.getattr = zpl_getattr,
diff --git a/zfs/module/zfs/zpl_super.c b/zfs/module/zfs/zpl_super.c
index 91c36c9e3675..b6ef60277664 100644
--- a/zfs/module/zfs/zpl_super.c
+++ b/zfs/module/zfs/zpl_super.c
@@ -184,211 +184,15 @@ zpl_statfs(struct dentry *dentry, struct kstatfs *statp)
return (error);
}
-enum {
- TOKEN_RO,
- TOKEN_RW,
- TOKEN_SETUID,
- TOKEN_NOSETUID,
- TOKEN_EXEC,
- TOKEN_NOEXEC,
- TOKEN_DEVICES,
- TOKEN_NODEVICES,
- TOKEN_DIRXATTR,
- TOKEN_SAXATTR,
- TOKEN_XATTR,
- TOKEN_NOXATTR,
- TOKEN_ATIME,
- TOKEN_NOATIME,
- TOKEN_RELATIME,
- TOKEN_NORELATIME,
- TOKEN_NBMAND,
- TOKEN_NONBMAND,
- TOKEN_MNTPOINT,
- TOKEN_LAST,
-};
-
-static const match_table_t zpl_tokens = {
- { TOKEN_RO, MNTOPT_RO },
- { TOKEN_RW, MNTOPT_RW },
- { TOKEN_SETUID, MNTOPT_SETUID },
- { TOKEN_NOSETUID, MNTOPT_NOSETUID },
- { TOKEN_EXEC, MNTOPT_EXEC },
- { TOKEN_NOEXEC, MNTOPT_NOEXEC },
- { TOKEN_DEVICES, MNTOPT_DEVICES },
- { TOKEN_NODEVICES, MNTOPT_NODEVICES },
- { TOKEN_DIRXATTR, MNTOPT_DIRXATTR },
- { TOKEN_SAXATTR, MNTOPT_SAXATTR },
- { TOKEN_XATTR, MNTOPT_XATTR },
- { TOKEN_NOXATTR, MNTOPT_NOXATTR },
- { TOKEN_ATIME, MNTOPT_ATIME },
- { TOKEN_NOATIME, MNTOPT_NOATIME },
- { TOKEN_RELATIME, MNTOPT_RELATIME },
- { TOKEN_NORELATIME, MNTOPT_NORELATIME },
- { TOKEN_NBMAND, MNTOPT_NBMAND },
- { TOKEN_NONBMAND, MNTOPT_NONBMAND },
- { TOKEN_MNTPOINT, MNTOPT_MNTPOINT "=%s" },
- { TOKEN_LAST, NULL },
-};
-
-static int
-zpl_parse_option(char *option, int token, substring_t *args, zfs_mntopts_t *zmo)
-{
- switch (token) {
- case TOKEN_RO:
- zmo->z_readonly = B_TRUE;
- zmo->z_do_readonly = B_TRUE;
- break;
- case TOKEN_RW:
- zmo->z_readonly = B_FALSE;
- zmo->z_do_readonly = B_TRUE;
- break;
- case TOKEN_SETUID:
- zmo->z_setuid = B_TRUE;
- zmo->z_do_setuid = B_TRUE;
- break;
- case TOKEN_NOSETUID:
- zmo->z_setuid = B_FALSE;
- zmo->z_do_setuid = B_TRUE;
- break;
- case TOKEN_EXEC:
- zmo->z_exec = B_TRUE;
- zmo->z_do_exec = B_TRUE;
- break;
- case TOKEN_NOEXEC:
- zmo->z_exec = B_FALSE;
- zmo->z_do_exec = B_TRUE;
- break;
- case TOKEN_DEVICES:
- zmo->z_devices = B_TRUE;
- zmo->z_do_devices = B_TRUE;
- break;
- case TOKEN_NODEVICES:
- zmo->z_devices = B_FALSE;
- zmo->z_do_devices = B_TRUE;
- break;
- case TOKEN_DIRXATTR:
- zmo->z_xattr = ZFS_XATTR_DIR;
- zmo->z_do_xattr = B_TRUE;
- break;
- case TOKEN_SAXATTR:
- zmo->z_xattr = ZFS_XATTR_SA;
- zmo->z_do_xattr = B_TRUE;
- break;
- case TOKEN_XATTR:
- zmo->z_xattr = ZFS_XATTR_DIR;
- zmo->z_do_xattr = B_TRUE;
- break;
- case TOKEN_NOXATTR:
- zmo->z_xattr = ZFS_XATTR_OFF;
- zmo->z_do_xattr = B_TRUE;
- break;
- case TOKEN_ATIME:
- zmo->z_atime = B_TRUE;
- zmo->z_do_atime = B_TRUE;
- break;
- case TOKEN_NOATIME:
- zmo->z_atime = B_FALSE;
- zmo->z_do_atime = B_TRUE;
- break;
- case TOKEN_RELATIME:
- zmo->z_relatime = B_TRUE;
- zmo->z_do_relatime = B_TRUE;
- break;
- case TOKEN_NORELATIME:
- zmo->z_relatime = B_FALSE;
- zmo->z_do_relatime = B_TRUE;
- break;
- case TOKEN_NBMAND:
- zmo->z_nbmand = B_TRUE;
- zmo->z_do_nbmand = B_TRUE;
- break;
- case TOKEN_NONBMAND:
- zmo->z_nbmand = B_FALSE;
- zmo->z_do_nbmand = B_TRUE;
- break;
- case TOKEN_MNTPOINT:
- zmo->z_mntpoint = match_strdup(&args[0]);
- if (zmo->z_mntpoint == NULL)
- return (-ENOMEM);
-
- break;
- default:
- break;
- }
-
- return (0);
-}
-
-/*
- * Parse the mntopts string storing the results in provided zmo argument.
- * If an error occurs the zmo argument will not be modified. The caller
- * needs to set isremount when recycling an existing zfs_mntopts_t.
- */
-static int
-zpl_parse_options(char *osname, char *mntopts, zfs_mntopts_t *zmo,
- boolean_t isremount)
-{
- zfs_mntopts_t *tmp_zmo;
- int error;
-
- tmp_zmo = zfs_mntopts_alloc();
- tmp_zmo->z_osname = strdup(osname);
-
- if (mntopts) {
- substring_t args[MAX_OPT_ARGS];
- char *tmp_mntopts, *p, *t;
- int token;
-
- t = tmp_mntopts = strdup(mntopts);
-
- while ((p = strsep(&t, ",")) != NULL) {
- if (!*p)
- continue;
-
- args[0].to = args[0].from = NULL;
- token = match_token(p, zpl_tokens, args);
- error = zpl_parse_option(p, token, args, tmp_zmo);
- if (error) {
- zfs_mntopts_free(tmp_zmo);
- strfree(tmp_mntopts);
- return (error);
- }
- }
-
- strfree(tmp_mntopts);
- }
-
- if (isremount == B_TRUE) {
- if (zmo->z_osname)
- strfree(zmo->z_osname);
-
- if (zmo->z_mntpoint)
- strfree(zmo->z_mntpoint);
- } else {
- ASSERT3P(zmo->z_osname, ==, NULL);
- ASSERT3P(zmo->z_mntpoint, ==, NULL);
- }
-
- memcpy(zmo, tmp_zmo, sizeof (zfs_mntopts_t));
- kmem_free(tmp_zmo, sizeof (zfs_mntopts_t));
-
- return (0);
-}
-
static int
zpl_remount_fs(struct super_block *sb, int *flags, char *data)
{
- zfs_sb_t *zsb = sb->s_fs_info;
+ zfs_mnt_t zm = { .mnt_osname = NULL, .mnt_data = data };
fstrans_cookie_t cookie;
int error;
- error = zpl_parse_options(zsb->z_mntopts->z_osname, data,
- zsb->z_mntopts, B_TRUE);
- if (error)
- return (error);
-
cookie = spl_fstrans_mark();
- error = -zfs_remount(sb, flags, zsb->z_mntopts);
+ error = -zfs_remount(sb, flags, &zm);
spl_fstrans_unmark(cookie);
ASSERT3S(error, <=, 0);
@@ -396,12 +200,13 @@ zpl_remount_fs(struct super_block *sb, int *flags, char *data)
}
static int
-__zpl_show_options(struct seq_file *seq, zfs_sb_t *zsb)
+__zpl_show_options(struct seq_file *seq, zfsvfs_t *zfsvfs)
{
- seq_printf(seq, ",%s", zsb->z_flags & ZSB_XATTR ? "xattr" : "noxattr");
+ seq_printf(seq, ",%s",
+ zfsvfs->z_flags & ZSB_XATTR ? "xattr" : "noxattr");
#ifdef CONFIG_FS_POSIX_ACL
- switch (zsb->z_acl_type) {
+ switch (zfsvfs->z_acl_type) {
case ZFS_ACLTYPE_POSIXACL:
seq_puts(seq, ",posixacl");
break;
@@ -431,12 +236,12 @@ zpl_show_options(struct seq_file *seq, struct vfsmount *vfsp)
static int
zpl_fill_super(struct super_block *sb, void *data, int silent)
{
- zfs_mntopts_t *zmo = (zfs_mntopts_t *)data;
+ zfs_mnt_t *zm = (zfs_mnt_t *)data;
fstrans_cookie_t cookie;
int error;
cookie = spl_fstrans_mark();
- error = -zfs_domount(sb, zmo, silent);
+ error = -zfs_domount(sb, zm, silent);
spl_fstrans_unmark(cookie);
ASSERT3S(error, <=, 0);
@@ -448,32 +253,18 @@ static struct dentry *
zpl_mount(struct file_system_type *fs_type, int flags,
const char *osname, void *data)
{
- zfs_mntopts_t *zmo = zfs_mntopts_alloc();
- int error;
-
- error = zpl_parse_options((char *)osname, (char *)data, zmo, B_FALSE);
- if (error) {
- zfs_mntopts_free(zmo);
- return (ERR_PTR(error));
- }
+ zfs_mnt_t zm = { .mnt_osname = osname, .mnt_data = data };
- return (mount_nodev(fs_type, flags, zmo, zpl_fill_super));
+ return (mount_nodev(fs_type, flags, &zm, zpl_fill_super));
}
#else
static int
zpl_get_sb(struct file_system_type *fs_type, int flags,
const char *osname, void *data, struct vfsmount *mnt)
{
- zfs_mntopts_t *zmo = zfs_mntopts_alloc();
- int error;
-
- error = zpl_parse_options((char *)osname, (char *)data, zmo, B_FALSE);
- if (error) {
- zfs_mntopts_free(zmo);
- return (error);
- }
+ zfs_mnt_t zm = { .mnt_osname = osname, .mnt_data = data };
- return (get_sb_nodev(fs_type, flags, zmo, zpl_fill_super, mnt));
+ return (get_sb_nodev(fs_type, flags, &zm, zpl_fill_super, mnt));
}
#endif /* HAVE_MOUNT_NODEV */
@@ -494,7 +285,7 @@ zpl_prune_sb(int64_t nr_to_scan, void *arg)
struct super_block *sb = (struct super_block *)arg;
int objects = 0;
- (void) -zfs_sb_prune(sb, nr_to_scan, &objects);
+ (void) -zfs_prune(sb, nr_to_scan, &objects);
}
#ifdef HAVE_NR_CACHED_OBJECTS
diff --git a/zfs/module/zfs/zpl_xattr.c b/zfs/module/zfs/zpl_xattr.c
index b74b53a0e135..0c626b122193 100644
--- a/zfs/module/zfs/zpl_xattr.c
+++ b/zfs/module/zfs/zpl_xattr.c
@@ -50,7 +50,7 @@
* are the security.selinux xattrs which are less than 100 bytes and
* exist for every file when xattr labeling is enabled.
*
- * The Linux xattr implemenation has been written to take advantage of
+ * The Linux xattr implementation has been written to take advantage of
* this typical usage. When the dataset property 'xattr=sa' is set,
* then xattrs will be preferentially stored as System Attributes (SA).
* This allows tiny xattrs (~100 bytes) to be stored with the dnode and
@@ -237,7 +237,7 @@ ssize_t
zpl_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
{
znode_t *zp = ITOZ(dentry->d_inode);
- zfs_sb_t *zsb = ZTOZSB(zp);
+ zfsvfs_t *zfsvfs = ZTOZSB(zp);
xattr_filldir_t xf = { buffer_size, 0, buffer, dentry };
cred_t *cr = CRED();
fstrans_cookie_t cookie;
@@ -245,10 +245,10 @@ zpl_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
crhold(cr);
cookie = spl_fstrans_mark();
- rrm_enter_read(&(zsb)->z_teardown_lock, FTAG);
+ rrm_enter_read(&(zfsvfs)->z_teardown_lock, FTAG);
rw_enter(&zp->z_xattr_lock, RW_READER);
- if (zsb->z_use_sa && zp->z_is_sa) {
+ if (zfsvfs->z_use_sa && zp->z_is_sa) {
error = zpl_xattr_list_sa(&xf);
if (error)
goto out;
@@ -262,7 +262,7 @@ zpl_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
out:
rw_exit(&zp->z_xattr_lock);
- rrm_exit(&(zsb)->z_teardown_lock, FTAG);
+ rrm_exit(&(zfsvfs)->z_teardown_lock, FTAG);
spl_fstrans_unmark(cookie);
crfree(cr);
@@ -349,12 +349,12 @@ __zpl_xattr_get(struct inode *ip, const char *name, void *value, size_t size,
cred_t *cr)
{
znode_t *zp = ITOZ(ip);
- zfs_sb_t *zsb = ZTOZSB(zp);
+ zfsvfs_t *zfsvfs = ZTOZSB(zp);
int error;
ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
- if (zsb->z_use_sa && zp->z_is_sa) {
+ if (zfsvfs->z_use_sa && zp->z_is_sa) {
error = zpl_xattr_get_sa(ip, name, value, size);
if (error != -ENOENT)
goto out;
@@ -376,14 +376,14 @@ static int
__zpl_xattr_where(struct inode *ip, const char *name, int *where, cred_t *cr)
{
znode_t *zp = ITOZ(ip);
- zfs_sb_t *zsb = ZTOZSB(zp);
+ zfsvfs_t *zfsvfs = ZTOZSB(zp);
int error;
ASSERT(where);
ASSERT(RW_LOCK_HELD(&zp->z_xattr_lock));
*where = XATTR_NOENT;
- if (zsb->z_use_sa && zp->z_is_sa) {
+ if (zfsvfs->z_use_sa && zp->z_is_sa) {
error = zpl_xattr_get_sa(ip, name, NULL, 0);
if (error >= 0)
*where |= XATTR_IN_SA;
@@ -411,18 +411,18 @@ static int
zpl_xattr_get(struct inode *ip, const char *name, void *value, size_t size)
{
znode_t *zp = ITOZ(ip);
- zfs_sb_t *zsb = ZTOZSB(zp);
+ zfsvfs_t *zfsvfs = ZTOZSB(zp);
cred_t *cr = CRED();
fstrans_cookie_t cookie;
int error;
crhold(cr);
cookie = spl_fstrans_mark();
- rrm_enter_read(&(zsb)->z_teardown_lock, FTAG);
+ rrm_enter_read(&(zfsvfs)->z_teardown_lock, FTAG);
rw_enter(&zp->z_xattr_lock, RW_READER);
error = __zpl_xattr_get(ip, name, value, size, cr);
rw_exit(&zp->z_xattr_lock);
- rrm_exit(&(zsb)->z_teardown_lock, FTAG);
+ rrm_exit(&(zfsvfs)->z_teardown_lock, FTAG);
spl_fstrans_unmark(cookie);
crfree(cr);
@@ -465,7 +465,7 @@ zpl_xattr_set_dir(struct inode *ip, const char *name, const void *value,
/* Remove a specific name xattr when value is set to NULL. */
if (value == NULL) {
if (xip)
- error = -zfs_remove(dxip, (char *)name, cr);
+ error = -zfs_remove(dxip, (char *)name, cr, 0);
goto out;
}
@@ -495,6 +495,12 @@ zpl_xattr_set_dir(struct inode *ip, const char *name, const void *value,
error = wrote;
out:
+
+ if (error == 0) {
+ ip->i_ctime = current_time(ip);
+ zfs_mark_inode_dirty(ip);
+ }
+
if (vap)
kmem_free(vap, sizeof (vattr_t));
@@ -576,7 +582,7 @@ zpl_xattr_set(struct inode *ip, const char *name, const void *value,
size_t size, int flags)
{
znode_t *zp = ITOZ(ip);
- zfs_sb_t *zsb = ZTOZSB(zp);
+ zfsvfs_t *zfsvfs = ZTOZSB(zp);
cred_t *cr = CRED();
fstrans_cookie_t cookie;
int where;
@@ -584,7 +590,7 @@ zpl_xattr_set(struct inode *ip, const char *name, const void *value,
crhold(cr);
cookie = spl_fstrans_mark();
- rrm_enter_read(&(zsb)->z_teardown_lock, FTAG);
+ rrm_enter_read(&(zfsvfs)->z_teardown_lock, FTAG);
rw_enter(&ITOZ(ip)->z_xattr_lock, RW_WRITER);
/*
@@ -615,8 +621,8 @@ zpl_xattr_set(struct inode *ip, const char *name, const void *value,
}
/* Preferentially store the xattr as a SA for better performance */
- if (zsb->z_use_sa && zp->z_is_sa &&
- (zsb->z_xattr_sa || (value == NULL && where & XATTR_IN_SA))) {
+ if (zfsvfs->z_use_sa && zp->z_is_sa &&
+ (zfsvfs->z_xattr_sa || (value == NULL && where & XATTR_IN_SA))) {
error = zpl_xattr_set_sa(ip, name, value, size, flags, cr);
if (error == 0) {
/*
@@ -637,7 +643,7 @@ zpl_xattr_set(struct inode *ip, const char *name, const void *value,
zpl_xattr_set_sa(ip, name, NULL, 0, 0, cr);
out:
rw_exit(&ITOZ(ip)->z_xattr_lock);
- rrm_exit(&(zsb)->z_teardown_lock, FTAG);
+ rrm_exit(&(zfsvfs)->z_teardown_lock, FTAG);
spl_fstrans_unmark(cookie);
crfree(cr);
ASSERT3S(error, <=, 0);
@@ -1510,8 +1516,8 @@ zpl_posix_acl_free(void *arg)
}
if (refire)
- taskq_dispatch_delay(system_taskq, zpl_posix_acl_free, NULL,
- TQ_SLEEP, new_time);
+ taskq_dispatch_delay(system_delay_taskq, zpl_posix_acl_free,
+ NULL, TQ_SLEEP, new_time);
while (freelist) {
a = freelist;
@@ -1536,7 +1542,7 @@ zpl_posix_acl_release_impl(struct posix_acl *acl)
*prev = a;
/* if it was empty before, schedule the free task */
if (prev == &acl_rel_head)
- taskq_dispatch_delay(system_taskq, zpl_posix_acl_free, NULL,
- TQ_SLEEP, ddi_get_lbolt() + ACL_REL_SCHED);
+ taskq_dispatch_delay(system_delay_taskq, zpl_posix_acl_free,
+ NULL, TQ_SLEEP, ddi_get_lbolt() + ACL_REL_SCHED);
}
#endif
diff --git a/zfs/module/zfs/zrlock.c b/zfs/module/zfs/zrlock.c
index 52f9ee83930e..02795112f042 100644
--- a/zfs/module/zfs/zrlock.c
+++ b/zfs/module/zfs/zrlock.c
@@ -20,7 +20,8 @@
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014 by Delphix. All rights reserved.
+ * Copyright (c) 2014, 2015 by Delphix. All rights reserved.
+ * Copyright 2016 The MathWorks, Inc. All rights reserved.
*/
/*
@@ -70,43 +71,34 @@ zrl_destroy(zrlock_t *zrl)
}
void
-#ifdef ZFS_DEBUG
-zrl_add_debug(zrlock_t *zrl, const char *zc)
-#else
-zrl_add(zrlock_t *zrl)
-#endif
+zrl_add_impl(zrlock_t *zrl, const char *zc)
{
- uint32_t n = (uint32_t)zrl->zr_refcount;
-
- while (n != ZRL_LOCKED) {
- uint32_t cas = atomic_cas_32(
- (uint32_t *)&zrl->zr_refcount, n, n + 1);
- if (cas == n) {
- ASSERT3S((int32_t)n, >=, 0);
+ for (;;) {
+ uint32_t n = (uint32_t)zrl->zr_refcount;
+ while (n != ZRL_LOCKED) {
+ uint32_t cas = atomic_cas_32(
+ (uint32_t *)&zrl->zr_refcount, n, n + 1);
+ if (cas == n) {
+ ASSERT3S((int32_t)n, >=, 0);
#ifdef ZFS_DEBUG
- if (zrl->zr_owner == curthread) {
- DTRACE_PROBE2(zrlock__reentry,
- zrlock_t *, zrl, uint32_t, n);
- }
- zrl->zr_owner = curthread;
- zrl->zr_caller = zc;
+ if (zrl->zr_owner == curthread) {
+ DTRACE_PROBE2(zrlock__reentry,
+ zrlock_t *, zrl, uint32_t, n);
+ }
+ zrl->zr_owner = curthread;
+ zrl->zr_caller = zc;
#endif
- return;
+ return;
+ }
+ n = cas;
}
- n = cas;
- }
- mutex_enter(&zrl->zr_mtx);
- while (zrl->zr_refcount == ZRL_LOCKED) {
- cv_wait(&zrl->zr_cv, &zrl->zr_mtx);
+ mutex_enter(&zrl->zr_mtx);
+ while (zrl->zr_refcount == ZRL_LOCKED) {
+ cv_wait(&zrl->zr_cv, &zrl->zr_mtx);
+ }
+ mutex_exit(&zrl->zr_mtx);
}
- ASSERT3S(zrl->zr_refcount, >=, 0);
- zrl->zr_refcount++;
-#ifdef ZFS_DEBUG
- zrl->zr_owner = curthread;
- zrl->zr_caller = zc;
-#endif
- mutex_exit(&zrl->zr_mtx);
}
void
@@ -199,11 +191,7 @@ zrl_owner(zrlock_t *zrl)
#if defined(_KERNEL) && defined(HAVE_SPL)
-#ifdef ZFS_DEBUG
-EXPORT_SYMBOL(zrl_add_debug);
-#else
-EXPORT_SYMBOL(zrl_add);
-#endif
+EXPORT_SYMBOL(zrl_add_impl);
EXPORT_SYMBOL(zrl_remove);
#endif
diff --git a/zfs/module/zfs/zvol.c b/zfs/module/zfs/zvol.c
index e03130cfaa90..5293f95fb020 100644
--- a/zfs/module/zfs/zvol.c
+++ b/zfs/module/zfs/zvol.c
@@ -34,7 +34,44 @@
* Volumes are persistent through reboot and module load. No user command
* needs to be run before opening and using a device.
*
+ * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2016 Actifio, Inc. All rights reserved.
+ * Copyright (c) 2012, 2017 by Delphix. All rights reserved.
+ */
+
+/*
+ * Note on locking of zvol state structures.
+ *
+ * These structures are used to maintain internal state used to emulate block
+ * devices on top of zvols. In particular, management of device minor number
+ * operations - create, remove, rename, and set_snapdev - involves access to
+ * these structures. The zvol_state_lock is primarily used to protect the
+ * zvol_state_list. The zv->zv_state_lock is used to protect the contents
+ * of the zvol_state_t structures, as well as to make sure that when the
+ * time comes to remove the structure from the list, it is not in use, and
+ * therefore, it can be taken off zvol_state_list and freed.
+ *
+ * The zv_suspend_lock was introduced to allow for suspending I/O to a zvol,
+ * e.g. for the duration of receive and rollback operations. This lock can be
+ * held for significant periods of time. Given that it is undesirable to hold
+ * mutexes for long periods of time, the following lock ordering applies:
+ * - take zvol_state_lock if necessary, to protect zvol_state_list
+ * - take zv_suspend_lock if necessary, by the code path in question
+ * - take zv_state_lock to protect zvol_state_t
+ *
+ * The minor operations are issued to spa->spa_zvol_taskq queues, that are
+ * single-threaded (to preserve order of minor operations), and are executed
+ * through the zvol_task_cb that dispatches the specific operations. Therefore,
+ * these operations are serialized per pool. Consequently, we can be certain
+ * that for a given zvol, there is only one operation at a time in progress.
+ * That is why one can be sure that first, zvol_state_t for a given zvol is
+ * allocated and placed on zvol_state_list, and then other minor operations
+ * for this zvol are going to proceed in the order of issue.
+ *
+ * It is also worth keeping in mind that once add_disk() is called, the zvol is
+ * announced to the world, and zvol_open()/zvol_release() can be called at any
+ * time. Incidentally, add_disk() itself calls zvol_open()->zvol_first_open()
+ * and zvol_release()->zvol_last_close() directly as well.
*/
#include <sys/dbuf.h>
@@ -52,21 +89,29 @@
#include <sys/spa_impl.h>
#include <sys/zvol.h>
#include <linux/blkdev_compat.h>
-#include <linux/version.h>
unsigned int zvol_inhibit_dev = 0;
unsigned int zvol_major = ZVOL_MAJOR;
+unsigned int zvol_threads = 32;
+unsigned int zvol_request_sync = 0;
unsigned int zvol_prefetch_bytes = (128 * 1024);
unsigned long zvol_max_discard_blocks = 16384;
+unsigned int zvol_volmode = ZFS_VOLMODE_GEOM;
+static taskq_t *zvol_taskq;
static kmutex_t zvol_state_lock;
static list_t zvol_state_list;
-static char *zvol_tag = "zvol_tag";
+
+#define ZVOL_HT_SIZE 1024
+static struct hlist_head *zvol_htable;
+#define ZVOL_HT_HEAD(hash) (&zvol_htable[(hash) & (ZVOL_HT_SIZE-1)])
+
+static struct ida zvol_ida;
/*
* The in-core state of each volume.
*/
-typedef struct zvol_state {
+struct zvol_state {
char zv_name[MAXNAMELEN]; /* name */
uint64_t zv_volsize; /* advertised space */
uint64_t zv_volblocksize; /* volume block size */
@@ -76,19 +121,24 @@ typedef struct zvol_state {
uint32_t zv_changed; /* disk changed */
zilog_t *zv_zilog; /* ZIL handle */
zfs_rlock_t zv_range_lock; /* range lock */
- dmu_buf_t *zv_dbuf; /* bonus handle */
+ dnode_t *zv_dn; /* dnode hold */
dev_t zv_dev; /* device id */
struct gendisk *zv_disk; /* generic disk */
struct request_queue *zv_queue; /* request queue */
- spinlock_t zv_lock; /* request queue lock */
list_node_t zv_next; /* next zvol_state_t linkage */
-} zvol_state_t;
+ uint64_t zv_hash; /* name hash */
+ struct hlist_node zv_hlink; /* hash link */
+ kmutex_t zv_state_lock; /* protects zvol_state_t */
+ atomic_t zv_suspend_ref; /* refcount for suspend */
+ krwlock_t zv_suspend_lock; /* suspend lock */
+};
typedef enum {
ZVOL_ASYNC_CREATE_MINORS,
ZVOL_ASYNC_REMOVE_MINORS,
ZVOL_ASYNC_RENAME_MINORS,
ZVOL_ASYNC_SET_SNAPDEV,
+ ZVOL_ASYNC_SET_VOLMODE,
ZVOL_ASYNC_MAX
} zvol_async_op_t;
@@ -98,73 +148,108 @@ typedef struct {
char name1[MAXNAMELEN];
char name2[MAXNAMELEN];
zprop_source_t source;
- uint64_t snapdev;
+ uint64_t value;
} zvol_task_t;
#define ZVOL_RDONLY 0x1
-/*
- * Find the next available range of ZVOL_MINORS minor numbers. The
- * zvol_state_list is kept in ascending minor order so we simply need
- * to scan the list for the first gap in the sequence. This allows us
- * to recycle minor number as devices are created and removed.
- */
-static int
-zvol_find_minor(unsigned *minor)
+static uint64_t
+zvol_name_hash(const char *name)
{
- zvol_state_t *zv;
-
- *minor = 0;
- ASSERT(MUTEX_HELD(&zvol_state_lock));
- for (zv = list_head(&zvol_state_list); zv != NULL;
- zv = list_next(&zvol_state_list, zv), *minor += ZVOL_MINORS) {
- if (MINOR(zv->zv_dev) != MINOR(*minor))
- break;
+ int i;
+ uint64_t crc = -1ULL;
+ uint8_t *p = (uint8_t *)name;
+ ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY);
+ for (i = 0; i < MAXNAMELEN - 1 && *p; i++, p++) {
+ crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (*p)) & 0xFF];
}
-
- /* All minors are in use */
- if (*minor >= (1 << MINORBITS))
- return (SET_ERROR(ENXIO));
-
- return (0);
+ return (crc);
}
/*
- * Find a zvol_state_t given the full major+minor dev_t.
+ * Find a zvol_state_t given the full major+minor dev_t. If found,
+ * return with zv_state_lock taken, otherwise, return (NULL) without
+ * taking zv_state_lock.
*/
static zvol_state_t *
zvol_find_by_dev(dev_t dev)
{
zvol_state_t *zv;
- ASSERT(MUTEX_HELD(&zvol_state_lock));
+ mutex_enter(&zvol_state_lock);
for (zv = list_head(&zvol_state_list); zv != NULL;
zv = list_next(&zvol_state_list, zv)) {
- if (zv->zv_dev == dev)
+ mutex_enter(&zv->zv_state_lock);
+ if (zv->zv_dev == dev) {
+ mutex_exit(&zvol_state_lock);
return (zv);
+ }
+ mutex_exit(&zv->zv_state_lock);
}
+ mutex_exit(&zvol_state_lock);
return (NULL);
}
/*
- * Find a zvol_state_t given the name provided at zvol_alloc() time.
+ * Find a zvol_state_t given the name and hash generated by zvol_name_hash.
+ * If found, return with zv_suspend_lock and zv_state_lock taken, otherwise,
+ * return (NULL) without the taking locks. The zv_suspend_lock is always taken
+ * before zv_state_lock. The mode argument indicates the mode (including none)
+ * for zv_suspend_lock to be taken.
*/
static zvol_state_t *
-zvol_find_by_name(const char *name)
+zvol_find_by_name_hash(const char *name, uint64_t hash, int mode)
{
zvol_state_t *zv;
+ struct hlist_node *p;
- ASSERT(MUTEX_HELD(&zvol_state_lock));
- for (zv = list_head(&zvol_state_list); zv != NULL;
- zv = list_next(&zvol_state_list, zv)) {
- if (strncmp(zv->zv_name, name, MAXNAMELEN) == 0)
+ mutex_enter(&zvol_state_lock);
+ hlist_for_each(p, ZVOL_HT_HEAD(hash)) {
+ zv = hlist_entry(p, zvol_state_t, zv_hlink);
+ mutex_enter(&zv->zv_state_lock);
+ if (zv->zv_hash == hash &&
+ strncmp(zv->zv_name, name, MAXNAMELEN) == 0) {
+ /*
+ * this is the right zvol, take the locks in the
+ * right order
+ */
+ if (mode != RW_NONE &&
+ !rw_tryenter(&zv->zv_suspend_lock, mode)) {
+ mutex_exit(&zv->zv_state_lock);
+ rw_enter(&zv->zv_suspend_lock, mode);
+ mutex_enter(&zv->zv_state_lock);
+ /*
+ * zvol cannot be renamed as we continue
+ * to hold zvol_state_lock
+ */
+ ASSERT(zv->zv_hash == hash &&
+ strncmp(zv->zv_name, name, MAXNAMELEN)
+ == 0);
+ }
+ mutex_exit(&zvol_state_lock);
return (zv);
+ }
+ mutex_exit(&zv->zv_state_lock);
}
+ mutex_exit(&zvol_state_lock);
return (NULL);
}
+/*
+ * Find a zvol_state_t given the name.
+ * If found, return with zv_suspend_lock and zv_state_lock taken, otherwise,
+ * return (NULL) without the taking locks. The zv_suspend_lock is always taken
+ * before zv_state_lock. The mode argument indicates the mode (including none)
+ * for zv_suspend_lock to be taken.
+ */
+static zvol_state_t *
+zvol_find_by_name(const char *name, int mode)
+{
+ return (zvol_find_by_name_hash(name, zvol_name_hash(name), mode));
+}
+
/*
* Given a path, return TRUE if path is a ZVOL.
@@ -259,22 +344,15 @@ zvol_size_changed(zvol_state_t *zv, uint64_t volsize)
{
struct block_device *bdev;
+ ASSERT(MUTEX_HELD(&zv->zv_state_lock));
+
bdev = bdget_disk(zv->zv_disk, 0);
if (bdev == NULL)
return;
-/*
- * 2.6.28 API change
- * Added check_disk_size_change() helper function.
- */
-#ifdef HAVE_CHECK_DISK_SIZE_CHANGE
+
set_capacity(zv->zv_disk, volsize >> 9);
zv->zv_volsize = volsize;
check_disk_size_change(zv->zv_disk, bdev);
-#else
- zv->zv_volsize = volsize;
- zv->zv_changed = 1;
- (void) check_disk_change(bdev);
-#endif /* HAVE_CHECK_DISK_SIZE_CHANGE */
bdput(bdev);
}
@@ -292,7 +370,7 @@ zvol_check_volsize(uint64_t volsize, uint64_t blocksize)
return (SET_ERROR(EINVAL));
#ifdef _ILP32
- if (volsize - 1 > MAXOFFSET_T)
+ if (volsize - 1 > SPEC_MAXOFFSET_T)
return (SET_ERROR(EOVERFLOW));
#endif
return (0);
@@ -306,21 +384,24 @@ zvol_update_volsize(uint64_t volsize, objset_t *os)
{
dmu_tx_t *tx;
int error;
-
- ASSERT(MUTEX_HELD(&zvol_state_lock));
+ uint64_t txg;
tx = dmu_tx_create(os);
dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL);
+ dmu_tx_mark_netfree(tx);
error = dmu_tx_assign(tx, TXG_WAIT);
if (error) {
dmu_tx_abort(tx);
return (SET_ERROR(error));
}
+ txg = dmu_tx_get_txg(tx);
error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1,
&volsize, tx);
dmu_tx_commit(tx);
+ txg_wait_synced(dmu_objset_pool(os), txg);
+
if (error == 0)
error = dmu_free_long_range(os,
ZVOL_OBJ, volsize, DMU_OBJECT_END);
@@ -362,13 +443,18 @@ zvol_set_volsize(const char *name, uint64_t volsize)
if (readonly)
return (SET_ERROR(EROFS));
- mutex_enter(&zvol_state_lock);
- zv = zvol_find_by_name(name);
+ zv = zvol_find_by_name(name, RW_READER);
+
+ ASSERT(zv == NULL || (MUTEX_HELD(&zv->zv_state_lock) &&
+ RW_READ_HELD(&zv->zv_suspend_lock)));
if (zv == NULL || zv->zv_objset == NULL) {
+ if (zv != NULL)
+ rw_exit(&zv->zv_suspend_lock);
if ((error = dmu_objset_own(name, DMU_OST_ZVOL, B_FALSE,
FTAG, &os)) != 0) {
- mutex_exit(&zvol_state_lock);
+ if (zv != NULL)
+ mutex_exit(&zv->zv_state_lock);
return (SET_ERROR(error));
}
owned = B_TRUE;
@@ -385,18 +471,24 @@ zvol_set_volsize(const char *name, uint64_t volsize)
goto out;
error = zvol_update_volsize(volsize, os);
- kmem_free(doi, sizeof (dmu_object_info_t));
if (error == 0 && zv != NULL)
error = zvol_update_live_volsize(zv, volsize);
out:
+ kmem_free(doi, sizeof (dmu_object_info_t));
+
if (owned) {
dmu_objset_disown(os, FTAG);
if (zv != NULL)
zv->zv_objset = NULL;
+ } else {
+ rw_exit(&zv->zv_suspend_lock);
}
- mutex_exit(&zvol_state_lock);
- return (error);
+
+ if (zv != NULL)
+ mutex_exit(&zv->zv_state_lock);
+
+ return (SET_ERROR(error));
}
/*
@@ -446,17 +538,18 @@ zvol_set_volblocksize(const char *name, uint64_t volblocksize)
dmu_tx_t *tx;
int error;
- mutex_enter(&zvol_state_lock);
+ zv = zvol_find_by_name(name, RW_READER);
- zv = zvol_find_by_name(name);
- if (zv == NULL) {
- error = SET_ERROR(ENXIO);
- goto out;
- }
+ if (zv == NULL)
+ return (SET_ERROR(ENXIO));
+
+ ASSERT(MUTEX_HELD(&zv->zv_state_lock) &&
+ RW_READ_HELD(&zv->zv_suspend_lock));
if (zv->zv_flags & ZVOL_RDONLY) {
- error = SET_ERROR(EROFS);
- goto out;
+ mutex_exit(&zv->zv_state_lock);
+ rw_exit(&zv->zv_suspend_lock);
+ return (SET_ERROR(EROFS));
}
tx = dmu_tx_create(zv->zv_objset);
@@ -473,12 +566,31 @@ zvol_set_volblocksize(const char *name, uint64_t volblocksize)
if (error == 0)
zv->zv_volblocksize = volblocksize;
}
-out:
- mutex_exit(&zvol_state_lock);
+
+ mutex_exit(&zv->zv_state_lock);
+ rw_exit(&zv->zv_suspend_lock);
return (SET_ERROR(error));
}
+/*
+ * Replay a TX_TRUNCATE ZIL transaction if asked. TX_TRUNCATE is how we
+ * implement DKIOCFREE/free-long-range.
+ */
+static int
+zvol_replay_truncate(zvol_state_t *zv, lr_truncate_t *lr, boolean_t byteswap)
+{
+ uint64_t offset, length;
+
+ if (byteswap)
+ byteswap_uint64_array(lr, sizeof (*lr));
+
+ offset = lr->lr_offset;
+ length = lr->lr_length;
+
+ return (dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, offset, length));
+}
+
/*
* Replay a TX_WRITE ZIL transaction that didn't get committed
* after a system failure
@@ -487,26 +599,37 @@ static int
zvol_replay_write(zvol_state_t *zv, lr_write_t *lr, boolean_t byteswap)
{
objset_t *os = zv->zv_objset;
- char *data = (char *)(lr + 1); /* data follows lr_write_t */
- uint64_t off = lr->lr_offset;
- uint64_t len = lr->lr_length;
+ char *data = (char *)(lr + 1); /* data follows lr_write_t */
+ uint64_t offset, length;
dmu_tx_t *tx;
int error;
if (byteswap)
byteswap_uint64_array(lr, sizeof (*lr));
+ offset = lr->lr_offset;
+ length = lr->lr_length;
+
+ /* If it's a dmu_sync() block, write the whole block */
+ if (lr->lr_common.lrc_reclen == sizeof (lr_write_t)) {
+ uint64_t blocksize = BP_GET_LSIZE(&lr->lr_blkptr);
+ if (length < blocksize) {
+ offset -= offset % blocksize;
+ length = blocksize;
+ }
+ }
+
tx = dmu_tx_create(os);
- dmu_tx_hold_write(tx, ZVOL_OBJ, off, len);
+ dmu_tx_hold_write(tx, ZVOL_OBJ, offset, length);
error = dmu_tx_assign(tx, TXG_WAIT);
if (error) {
dmu_tx_abort(tx);
} else {
- dmu_write(os, ZVOL_OBJ, off, len, data, tx);
+ dmu_write(os, ZVOL_OBJ, offset, length, data, tx);
dmu_tx_commit(tx);
}
- return (SET_ERROR(error));
+ return (error);
}
static int
@@ -517,7 +640,7 @@ zvol_replay_err(zvol_state_t *zv, lr_t *lr, boolean_t byteswap)
/*
* Callback vectors for replaying records.
- * Only TX_WRITE is needed for zvol.
+ * Only TX_WRITE and TX_TRUNCATE are needed for zvol.
*/
zil_replay_func_t zvol_replay_vector[TX_MAX_TYPE] = {
(zil_replay_func_t)zvol_replay_err, /* no such transaction type */
@@ -530,7 +653,7 @@ zil_replay_func_t zvol_replay_vector[TX_MAX_TYPE] = {
(zil_replay_func_t)zvol_replay_err, /* TX_LINK */
(zil_replay_func_t)zvol_replay_err, /* TX_RENAME */
(zil_replay_func_t)zvol_replay_write, /* TX_WRITE */
- (zil_replay_func_t)zvol_replay_err, /* TX_TRUNCATE */
+ (zil_replay_func_t)zvol_replay_truncate, /* TX_TRUNCATE */
(zil_replay_func_t)zvol_replay_err, /* TX_SETATTR */
(zil_replay_func_t)zvol_replay_err, /* TX_ACL */
};
@@ -549,53 +672,44 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
{
uint32_t blocksize = zv->zv_volblocksize;
zilog_t *zilog = zv->zv_zilog;
- boolean_t slogging;
- ssize_t immediate_write_sz;
+ itx_wr_state_t write_state;
if (zil_replaying(zilog, tx))
return;
- immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
- ? 0 : zvol_immediate_write_sz;
- slogging = spa_has_slogs(zilog->zl_spa) &&
- (zilog->zl_logbias == ZFS_LOGBIAS_LATENCY);
+ if (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
+ write_state = WR_INDIRECT;
+ else if (!spa_has_slogs(zilog->zl_spa) &&
+ size >= blocksize && blocksize > zvol_immediate_write_sz)
+ write_state = WR_INDIRECT;
+ else if (sync)
+ write_state = WR_COPIED;
+ else
+ write_state = WR_NEED_COPY;
while (size) {
itx_t *itx;
lr_write_t *lr;
- ssize_t len;
- itx_wr_state_t write_state;
+ itx_wr_state_t wr_state = write_state;
+ ssize_t len = size;
- /*
- * Unlike zfs_log_write() we can be called with
- * up to DMU_MAX_ACCESS/2 (5MB) writes.
- */
- if (blocksize > immediate_write_sz && !slogging &&
- size >= blocksize && offset % blocksize == 0) {
- write_state = WR_INDIRECT; /* uses dmu_sync */
- len = blocksize;
- } else if (sync) {
- write_state = WR_COPIED;
- len = MIN(ZIL_MAX_LOG_DATA, size);
- } else {
- write_state = WR_NEED_COPY;
- len = MIN(ZIL_MAX_LOG_DATA, size);
- }
+ if (wr_state == WR_COPIED && size > ZIL_MAX_COPIED_DATA)
+ wr_state = WR_NEED_COPY;
+ else if (wr_state == WR_INDIRECT)
+ len = MIN(blocksize - P2PHASE(offset, blocksize), size);
itx = zil_itx_create(TX_WRITE, sizeof (*lr) +
- (write_state == WR_COPIED ? len : 0));
+ (wr_state == WR_COPIED ? len : 0));
lr = (lr_write_t *)&itx->itx_lr;
- if (write_state == WR_COPIED && dmu_read(zv->zv_objset,
- ZVOL_OBJ, offset, len, lr+1, DMU_READ_NO_PREFETCH) != 0) {
+ if (wr_state == WR_COPIED && dmu_read_by_dnode(zv->zv_dn,
+ offset, len, lr+1, DMU_READ_NO_PREFETCH) != 0) {
zil_itx_destroy(itx);
itx = zil_itx_create(TX_WRITE, sizeof (*lr));
lr = (lr_write_t *)&itx->itx_lr;
- write_state = WR_NEED_COPY;
+ wr_state = WR_NEED_COPY;
}
- itx->itx_wr_state = write_state;
- if (write_state == WR_NEED_COPY)
- itx->itx_sod += len;
+ itx->itx_wr_state = wr_state;
lr->lr_foid = ZVOL_OBJ;
lr->lr_offset = offset;
lr->lr_length = len;
@@ -612,78 +726,131 @@ zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
}
}
-static int
-zvol_write(struct bio *bio)
+typedef struct zv_request {
+ zvol_state_t *zv;
+ struct bio *bio;
+ rl_t *rl;
+} zv_request_t;
+
+static void
+uio_from_bio(uio_t *uio, struct bio *bio)
{
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4,14,0)
- zvol_state_t *zv = bio->bi_bdev->bd_disk->private_data;
-#else
- zvol_state_t *zv = bio->bi_disk->private_data;
-#endif
- uint64_t offset = BIO_BI_SECTOR(bio) << 9;
- uint64_t size = BIO_BI_SIZE(bio);
+ uio->uio_bvec = &bio->bi_io_vec[BIO_BI_IDX(bio)];
+ uio->uio_skip = BIO_BI_SKIP(bio);
+ uio->uio_resid = BIO_BI_SIZE(bio);
+ uio->uio_iovcnt = bio->bi_vcnt - BIO_BI_IDX(bio);
+ uio->uio_loffset = BIO_BI_SECTOR(bio) << 9;
+ uio->uio_limit = MAXOFFSET_T;
+ uio->uio_segflg = UIO_BVEC;
+}
+
+static void
+zvol_write(void *arg)
+{
+ zv_request_t *zvr = arg;
+ struct bio *bio = zvr->bio;
+ uio_t uio;
+ zvol_state_t *zv = zvr->zv;
+ uint64_t volsize = zv->zv_volsize;
+ boolean_t sync;
int error = 0;
- dmu_tx_t *tx;
- rl_t *rl;
+ unsigned long start_jif;
+
+ uio_from_bio(&uio, bio);
ASSERT(zv && zv->zv_open_count > 0);
- if (bio_is_flush(bio))
- zil_commit(zv->zv_zilog, ZVOL_OBJ);
+ start_jif = jiffies;
+ blk_generic_start_io_acct(zv->zv_queue, WRITE, bio_sectors(bio),
+ &zv->zv_disk->part0);
- /*
- * Some requests are just for flush and nothing else.
- */
- if (size == 0)
- goto out;
+ sync = bio_is_fua(bio) || zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
- rl = zfs_range_lock(&zv->zv_range_lock, offset, size, RL_WRITER);
+ while (uio.uio_resid > 0 && uio.uio_loffset < volsize) {
+ uint64_t bytes = MIN(uio.uio_resid, DMU_MAX_ACCESS >> 1);
+ uint64_t off = uio.uio_loffset;
+ dmu_tx_t *tx = dmu_tx_create(zv->zv_objset);
- tx = dmu_tx_create(zv->zv_objset);
- dmu_tx_hold_write(tx, ZVOL_OBJ, offset, size);
+ if (bytes > volsize - off) /* don't write past the end */
+ bytes = volsize - off;
- /* This will only fail for ENOSPC */
- error = dmu_tx_assign(tx, TXG_WAIT);
- if (error) {
- dmu_tx_abort(tx);
- zfs_range_unlock(rl);
- goto out;
+ dmu_tx_hold_write(tx, ZVOL_OBJ, off, bytes);
+
+ /* This will only fail for ENOSPC */
+ error = dmu_tx_assign(tx, TXG_WAIT);
+ if (error) {
+ dmu_tx_abort(tx);
+ break;
+ }
+ error = dmu_write_uio_dnode(zv->zv_dn, &uio, bytes, tx);
+ if (error == 0)
+ zvol_log_write(zv, tx, off, bytes, sync);
+ dmu_tx_commit(tx);
+
+ if (error)
+ break;
}
+ zfs_range_unlock(zvr->rl);
+ if (sync)
+ zil_commit(zv->zv_zilog, ZVOL_OBJ);
- error = dmu_write_bio(zv->zv_objset, ZVOL_OBJ, bio, tx);
- if (error == 0)
- zvol_log_write(zv, tx, offset, size,
- !!(bio_is_fua(bio)));
+ rw_exit(&zv->zv_suspend_lock);
+ blk_generic_end_io_acct(zv->zv_queue, WRITE, &zv->zv_disk->part0,
+ start_jif);
+ BIO_END_IO(bio, -error);
+ kmem_free(zvr, sizeof (zv_request_t));
+}
- dmu_tx_commit(tx);
- zfs_range_unlock(rl);
+/*
+ * Log a DKIOCFREE/free-long-range to the ZIL with TX_TRUNCATE.
+ */
+static void
+zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off, uint64_t len,
+ boolean_t sync)
+{
+ itx_t *itx;
+ lr_truncate_t *lr;
+ zilog_t *zilog = zv->zv_zilog;
- if ((bio_is_fua(bio)) ||
- zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS)
- zil_commit(zv->zv_zilog, ZVOL_OBJ);
+ if (zil_replaying(zilog, tx))
+ return;
-out:
- return (error);
+ itx = zil_itx_create(TX_TRUNCATE, sizeof (*lr));
+ lr = (lr_truncate_t *)&itx->itx_lr;
+ lr->lr_foid = ZVOL_OBJ;
+ lr->lr_offset = off;
+ lr->lr_length = len;
+
+ itx->itx_sync = sync;
+ zil_itx_assign(zilog, itx, tx);
}
-static int
-zvol_discard(struct bio *bio)
+static void
+zvol_discard(void *arg)
{
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4,14,0)
- zvol_state_t *zv = bio->bi_bdev->bd_disk->private_data;
-#else
- zvol_state_t *zv = bio->bi_disk->private_data;
-#endif
+ zv_request_t *zvr = arg;
+ struct bio *bio = zvr->bio;
+ zvol_state_t *zv = zvr->zv;
uint64_t start = BIO_BI_SECTOR(bio) << 9;
uint64_t size = BIO_BI_SIZE(bio);
uint64_t end = start + size;
- int error;
- rl_t *rl;
+ boolean_t sync;
+ int error = 0;
+ dmu_tx_t *tx;
+ unsigned long start_jif;
ASSERT(zv && zv->zv_open_count > 0);
- if (end > zv->zv_volsize)
- return (SET_ERROR(EIO));
+ start_jif = jiffies;
+ blk_generic_start_io_acct(zv->zv_queue, WRITE, bio_sectors(bio),
+ &zv->zv_disk->part0);
+
+ sync = bio_is_fua(bio) || zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
+
+ if (end > zv->zv_volsize) {
+ error = SET_ERROR(EIO);
+ goto unlock;
+ }
/*
* Align the request to volume block boundaries when a secure erase is
@@ -698,49 +865,72 @@ zvol_discard(struct bio *bio)
}
if (start >= end)
- return (0);
-
- rl = zfs_range_lock(&zv->zv_range_lock, start, size, RL_WRITER);
+ goto unlock;
- error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, start, size);
-
- /*
- * TODO: maybe we should add the operation to the log.
- */
- zfs_range_unlock(rl);
+ tx = dmu_tx_create(zv->zv_objset);
+ dmu_tx_mark_netfree(tx);
+ error = dmu_tx_assign(tx, TXG_WAIT);
+ if (error != 0) {
+ dmu_tx_abort(tx);
+ } else {
+ zvol_log_truncate(zv, tx, start, size, B_TRUE);
+ dmu_tx_commit(tx);
+ error = dmu_free_long_range(zv->zv_objset,
+ ZVOL_OBJ, start, size);
+ }
+unlock:
+ zfs_range_unlock(zvr->rl);
+ if (error == 0 && sync)
+ zil_commit(zv->zv_zilog, ZVOL_OBJ);
- return (error);
+ rw_exit(&zv->zv_suspend_lock);
+ blk_generic_end_io_acct(zv->zv_queue, WRITE, &zv->zv_disk->part0,
+ start_jif);
+ BIO_END_IO(bio, -error);
+ kmem_free(zvr, sizeof (zv_request_t));
}
-static int
-zvol_read(struct bio *bio)
+static void
+zvol_read(void *arg)
{
-#if LINUX_VERSION_CODE < KERNEL_VERSION(4,14,0)
- zvol_state_t *zv = bio->bi_bdev->bd_disk->private_data;
-#else
- zvol_state_t *zv = bio->bi_disk->private_data;
-#endif
- uint64_t offset = BIO_BI_SECTOR(bio) << 9;
- uint64_t len = BIO_BI_SIZE(bio);
- int error;
- rl_t *rl;
+ zv_request_t *zvr = arg;
+ struct bio *bio = zvr->bio;
+ uio_t uio;
+ zvol_state_t *zv = zvr->zv;
+ uint64_t volsize = zv->zv_volsize;
+ int error = 0;
+ unsigned long start_jif;
- ASSERT(zv && zv->zv_open_count > 0);
+ uio_from_bio(&uio, bio);
- if (len == 0)
- return (0);
+ ASSERT(zv && zv->zv_open_count > 0);
- rl = zfs_range_lock(&zv->zv_range_lock, offset, len, RL_READER);
+ start_jif = jiffies;
+ blk_generic_start_io_acct(zv->zv_queue, READ, bio_sectors(bio),
+ &zv->zv_disk->part0);
- error = dmu_read_bio(zv->zv_objset, ZVOL_OBJ, bio);
+ while (uio.uio_resid > 0 && uio.uio_loffset < volsize) {
+ uint64_t bytes = MIN(uio.uio_resid, DMU_MAX_ACCESS >> 1);
- zfs_range_unlock(rl);
+ /* don't read past the end */
+ if (bytes > volsize - uio.uio_loffset)
+ bytes = volsize - uio.uio_loffset;
- /* convert checksum errors into IO errors */
- if (error == ECKSUM)
- error = SET_ERROR(EIO);
+ error = dmu_read_uio_dnode(zv->zv_dn, &uio, bytes);
+ if (error) {
+ /* convert checksum errors into IO errors */
+ if (error == ECKSUM)
+ error = SET_ERROR(EIO);
+ break;
+ }
+ }
+ zfs_range_unlock(zvr->rl);
- return (error);
+ rw_exit(&zv->zv_suspend_lock);
+ blk_generic_end_io_acct(zv->zv_queue, READ, &zv->zv_disk->part0,
+ start_jif);
+ BIO_END_IO(bio, -error);
+ kmem_free(zvr, sizeof (zv_request_t));
}
static MAKE_REQUEST_FN_RET
@@ -748,46 +938,93 @@ zvol_request(struct request_queue *q, struct bio *bio)
{
zvol_state_t *zv = q->queuedata;
fstrans_cookie_t cookie = spl_fstrans_mark();
- uint64_t offset = BIO_BI_SECTOR(bio);
- unsigned int sectors = bio_sectors(bio);
+ uint64_t offset = BIO_BI_SECTOR(bio) << 9;
+ uint64_t size = BIO_BI_SIZE(bio);
int rw = bio_data_dir(bio);
-#ifdef HAVE_GENERIC_IO_ACCT
- unsigned long start = jiffies;
-#endif
- int error = 0;
+ zv_request_t *zvr;
- if (bio_has_data(bio) && offset + sectors >
- get_capacity(zv->zv_disk)) {
+ if (bio_has_data(bio) && offset + size > zv->zv_volsize) {
printk(KERN_INFO
- "%s: bad access: block=%llu, count=%lu\n",
+ "%s: bad access: offset=%llu, size=%lu\n",
zv->zv_disk->disk_name,
(long long unsigned)offset,
- (long unsigned)sectors);
- error = SET_ERROR(EIO);
- goto out1;
- }
+ (long unsigned)size);
- generic_start_io_acct(rw, sectors, &zv->zv_disk->part0);
+ BIO_END_IO(bio, -SET_ERROR(EIO));
+ goto out;
+ }
if (rw == WRITE) {
+ boolean_t need_sync = B_FALSE;
+
if (unlikely(zv->zv_flags & ZVOL_RDONLY)) {
- error = SET_ERROR(EROFS);
- goto out2;
+ BIO_END_IO(bio, -SET_ERROR(EROFS));
+ goto out;
+ }
+
+ /*
+ * To be released in the I/O function. See the comment on
+ * zfs_range_lock below.
+ */
+ rw_enter(&zv->zv_suspend_lock, RW_READER);
+
+ /* bio marked as FLUSH need to flush before write */
+ if (bio_is_flush(bio))
+ zil_commit(zv->zv_zilog, ZVOL_OBJ);
+
+ /* Some requests are just for flush and nothing else. */
+ if (size == 0) {
+ rw_exit(&zv->zv_suspend_lock);
+ BIO_END_IO(bio, 0);
+ goto out;
}
+ zvr = kmem_alloc(sizeof (zv_request_t), KM_SLEEP);
+ zvr->zv = zv;
+ zvr->bio = bio;
+
+ /*
+ * To be released in the I/O function. Since the I/O functions
+ * are asynchronous, we take it here synchronously to make
+ * sure overlapped I/Os are properly ordered.
+ */
+ zvr->rl = zfs_range_lock(&zv->zv_range_lock, offset, size,
+ RL_WRITER);
+ /*
+ * Sync writes and discards execute zil_commit() which may need
+ * to take a RL_READER lock on the whole block being modified
+ * via its zillog->zl_get_data(): to avoid circular dependency
+ * issues with taskq threads execute these requests
+ * synchronously here in zvol_request().
+ */
+ need_sync = bio_is_fua(bio) ||
+ zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
if (bio_is_discard(bio) || bio_is_secure_erase(bio)) {
- error = zvol_discard(bio);
- goto out2;
+ if (zvol_request_sync || need_sync ||
+ taskq_dispatch(zvol_taskq, zvol_discard, zvr,
+ TQ_SLEEP) == TASKQID_INVALID)
+ zvol_discard(zvr);
+ } else {
+ if (zvol_request_sync || need_sync ||
+ taskq_dispatch(zvol_taskq, zvol_write, zvr,
+ TQ_SLEEP) == TASKQID_INVALID)
+ zvol_write(zvr);
}
+ } else {
+ zvr = kmem_alloc(sizeof (zv_request_t), KM_SLEEP);
+ zvr->zv = zv;
+ zvr->bio = bio;
- error = zvol_write(bio);
- } else
- error = zvol_read(bio);
+ rw_enter(&zv->zv_suspend_lock, RW_READER);
-out2:
- generic_end_io_acct(rw, &zv->zv_disk->part0, start);
-out1:
- BIO_END_IO(bio, -error);
+ zvr->rl = zfs_range_lock(&zv->zv_range_lock, offset, size,
+ RL_READER);
+ if (zvol_request_sync || taskq_dispatch(zvol_taskq,
+ zvol_read, zvr, TQ_SLEEP) == TASKQID_INVALID)
+ zvol_read(zvr);
+ }
+
+out:
spl_fstrans_unmark(cookie);
#ifdef HAVE_MAKE_REQUEST_FN_RET_INT
return (0);
@@ -817,11 +1054,8 @@ static int
zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
{
zvol_state_t *zv = arg;
- objset_t *os = zv->zv_objset;
- uint64_t object = ZVOL_OBJ;
uint64_t offset = lr->lr_offset;
uint64_t size = lr->lr_length;
- blkptr_t *bp = &lr->lr_blkptr;
dmu_buf_t *db;
zgd_t *zgd;
int error;
@@ -831,8 +1065,6 @@ zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP);
zgd->zgd_zilog = zv->zv_zilog;
- zgd->zgd_rl = zfs_range_lock(&zv->zv_range_lock, offset, size,
- RL_READER);
/*
* Write records come in two flavors: immediate and indirect.
@@ -842,22 +1074,28 @@ zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
* we don't have to write the data twice.
*/
if (buf != NULL) { /* immediate write */
- error = dmu_read(os, object, offset, size, buf,
+ zgd->zgd_rl = zfs_range_lock(&zv->zv_range_lock, offset, size,
+ RL_READER);
+ error = dmu_read_by_dnode(zv->zv_dn, offset, size, buf,
DMU_READ_NO_PREFETCH);
- } else {
+ } else { /* indirect write */
+ /*
+ * Have to lock the whole block to ensure when it's written out
+ * and its checksum is being calculated that no one can change
+ * the data. Contrarily to zfs_get_data we need not re-check
+ * blocksize after we get the lock because it cannot be changed.
+ */
size = zv->zv_volblocksize;
offset = P2ALIGN_TYPED(offset, size, uint64_t);
- error = dmu_buf_hold(os, object, offset, zgd, &db,
+ zgd->zgd_rl = zfs_range_lock(&zv->zv_range_lock, offset, size,
+ RL_READER);
+ error = dmu_buf_hold_by_dnode(zv->zv_dn, offset, zgd, &db,
DMU_READ_NO_PREFETCH);
if (error == 0) {
- blkptr_t *obp = dmu_buf_get_blkptr(db);
- if (obp) {
- ASSERT(BP_IS_HOLE(bp));
- *bp = *obp;
- }
+ blkptr_t *bp = &lr->lr_blkptr;
zgd->zgd_db = db;
- zgd->zgd_bp = &lr->lr_blkptr;
+ zgd->zgd_bp = bp;
ASSERT(db != NULL);
ASSERT(db->db_offset == offset);
@@ -877,101 +1115,230 @@ zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
}
/*
- * The zvol_state_t's are inserted in increasing MINOR(dev_t) order.
+ * The zvol_state_t's are inserted into zvol_state_list and zvol_htable.
*/
static void
-zvol_insert(zvol_state_t *zv_insert)
+zvol_insert(zvol_state_t *zv)
{
- zvol_state_t *zv = NULL;
-
ASSERT(MUTEX_HELD(&zvol_state_lock));
- ASSERT3U(MINOR(zv_insert->zv_dev) & ZVOL_MINOR_MASK, ==, 0);
- for (zv = list_head(&zvol_state_list); zv != NULL;
- zv = list_next(&zvol_state_list, zv)) {
- if (MINOR(zv->zv_dev) > MINOR(zv_insert->zv_dev))
- break;
- }
-
- list_insert_before(&zvol_state_list, zv, zv_insert);
+ ASSERT3U(MINOR(zv->zv_dev) & ZVOL_MINOR_MASK, ==, 0);
+ list_insert_head(&zvol_state_list, zv);
+ hlist_add_head(&zv->zv_hlink, ZVOL_HT_HEAD(zv->zv_hash));
}
/*
* Simply remove the zvol from to list of zvols.
*/
static void
-zvol_remove(zvol_state_t *zv_remove)
+zvol_remove(zvol_state_t *zv)
{
ASSERT(MUTEX_HELD(&zvol_state_lock));
- list_remove(&zvol_state_list, zv_remove);
+ list_remove(&zvol_state_list, zv);
+ hlist_del(&zv->zv_hlink);
}
+/*
+ * Setup zv after we just own the zv->objset
+ */
static int
-zvol_first_open(zvol_state_t *zv)
+zvol_setup_zv(zvol_state_t *zv)
{
- objset_t *os;
uint64_t volsize;
int error;
uint64_t ro;
+ objset_t *os = zv->zv_objset;
- /* lie and say we're read-only */
- error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, 1, zvol_tag, &os);
+ ASSERT(MUTEX_HELD(&zv->zv_state_lock) &&
+ RW_LOCK_HELD(&zv->zv_suspend_lock));
+
+ error = dsl_prop_get_integer(zv->zv_name, "readonly", &ro, NULL);
if (error)
- return (SET_ERROR(-error));
+ return (SET_ERROR(error));
+
+ error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);
+ if (error)
+ return (SET_ERROR(error));
+
+ error = dnode_hold(os, ZVOL_OBJ, FTAG, &zv->zv_dn);
+ if (error)
+ return (SET_ERROR(error));
+
+ set_capacity(zv->zv_disk, volsize >> 9);
+ zv->zv_volsize = volsize;
+ zv->zv_zilog = zil_open(os, zvol_get_data);
+
+ if (ro || dmu_objset_is_snapshot(os) ||
+ !spa_writeable(dmu_objset_spa(os))) {
+ set_disk_ro(zv->zv_disk, 1);
+ zv->zv_flags |= ZVOL_RDONLY;
+ } else {
+ set_disk_ro(zv->zv_disk, 0);
+ zv->zv_flags &= ~ZVOL_RDONLY;
+ }
+ return (0);
+}
+
+/*
+ * Shutdown every zv_objset related stuff except zv_objset itself.
+ * The is the reverse of zvol_setup_zv.
+ */
+static void
+zvol_shutdown_zv(zvol_state_t *zv)
+{
+ ASSERT(MUTEX_HELD(&zv->zv_state_lock) &&
+ RW_LOCK_HELD(&zv->zv_suspend_lock));
+
+ zil_close(zv->zv_zilog);
+ zv->zv_zilog = NULL;
+
+ dnode_rele(zv->zv_dn, FTAG);
+ zv->zv_dn = NULL;
+
+ /*
+ * Evict cached data
+ */
+ if (dsl_dataset_is_dirty(dmu_objset_ds(zv->zv_objset)) &&
+ !(zv->zv_flags & ZVOL_RDONLY))
+ txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0);
+ (void) dmu_objset_evict_dbufs(zv->zv_objset);
+}
+
+/*
+ * return the proper tag for rollback and recv
+ */
+void *
+zvol_tag(zvol_state_t *zv)
+{
+ ASSERT(RW_WRITE_HELD(&zv->zv_suspend_lock));
+ return (zv->zv_open_count > 0 ? zv : NULL);
+}
+
+/*
+ * Suspend the zvol for recv and rollback.
+ */
+zvol_state_t *
+zvol_suspend(const char *name)
+{
+ zvol_state_t *zv;
+
+ zv = zvol_find_by_name(name, RW_WRITER);
+
+ if (zv == NULL)
+ return (NULL);
+
+ /* block all I/O, release in zvol_resume. */
+ ASSERT(MUTEX_HELD(&zv->zv_state_lock) &&
+ RW_WRITE_HELD(&zv->zv_suspend_lock));
+
+ atomic_inc(&zv->zv_suspend_ref);
+
+ if (zv->zv_open_count > 0)
+ zvol_shutdown_zv(zv);
+
+ /*
+ * do not hold zv_state_lock across suspend/resume to
+ * avoid locking up zvol lookups
+ */
+ mutex_exit(&zv->zv_state_lock);
+
+ /* zv_suspend_lock is released in zvol_resume() */
+ return (zv);
+}
+
+int
+zvol_resume(zvol_state_t *zv)
+{
+ int error = 0;
+
+ ASSERT(RW_WRITE_HELD(&zv->zv_suspend_lock));
+
+ mutex_enter(&zv->zv_state_lock);
+
+ if (zv->zv_open_count > 0) {
+ VERIFY0(dmu_objset_hold(zv->zv_name, zv, &zv->zv_objset));
+ VERIFY3P(zv->zv_objset->os_dsl_dataset->ds_owner, ==, zv);
+ VERIFY(dsl_dataset_long_held(zv->zv_objset->os_dsl_dataset));
+ dmu_objset_rele(zv->zv_objset, zv);
+
+ error = zvol_setup_zv(zv);
+ }
+
+ mutex_exit(&zv->zv_state_lock);
+
+ rw_exit(&zv->zv_suspend_lock);
+ /*
+ * We need this because we don't hold zvol_state_lock while releasing
+ * zv_suspend_lock. zvol_remove_minors_impl thus cannot check
+ * zv_suspend_lock to determine it is safe to free because rwlock is
+ * not inherent atomic.
+ */
+ atomic_dec(&zv->zv_suspend_ref);
+
+ return (SET_ERROR(error));
+}
- zv->zv_objset = os;
+static int
+zvol_first_open(zvol_state_t *zv)
+{
+ objset_t *os;
+ int error, locked = 0;
- error = dsl_prop_get_integer(zv->zv_name, "readonly", &ro, NULL);
- if (error)
- goto out_owned;
+ ASSERT(RW_READ_HELD(&zv->zv_suspend_lock));
+ ASSERT(MUTEX_HELD(&zv->zv_state_lock));
- error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);
- if (error)
- goto out_owned;
+ /*
+ * In all other cases the spa_namespace_lock is taken before the
+ * bdev->bd_mutex lock. But in this case the Linux __blkdev_get()
+ * function calls fops->open() with the bdev->bd_mutex lock held.
+ * This deadlock can be easily observed with zvols used as vdevs.
+ *
+ * To avoid a potential lock inversion deadlock we preemptively
+ * try to take the spa_namespace_lock(). Normally it will not
+ * be contended and this is safe because spa_open_common() handles
+ * the case where the caller already holds the spa_namespace_lock.
+ *
+ * When it is contended we risk a lock inversion if we were to
+ * block waiting for the lock. Luckily, the __blkdev_get()
+ * function allows us to return -ERESTARTSYS which will result in
+ * bdev->bd_mutex being dropped, reacquired, and fops->open() being
+ * called again. This process can be repeated safely until both
+ * locks are acquired.
+ */
+ if (!mutex_owned(&spa_namespace_lock)) {
+ locked = mutex_tryenter(&spa_namespace_lock);
+ if (!locked)
+ return (-SET_ERROR(ERESTARTSYS));
+ }
- error = dmu_bonus_hold(os, ZVOL_OBJ, zvol_tag, &zv->zv_dbuf);
+ /* lie and say we're read-only */
+ error = dmu_objset_own(zv->zv_name, DMU_OST_ZVOL, 1, zv, &os);
if (error)
- goto out_owned;
+ goto out_mutex;
- set_capacity(zv->zv_disk, volsize >> 9);
- zv->zv_volsize = volsize;
- zv->zv_zilog = zil_open(os, zvol_get_data);
+ zv->zv_objset = os;
- if (ro || dmu_objset_is_snapshot(os) ||
- !spa_writeable(dmu_objset_spa(os))) {
- set_disk_ro(zv->zv_disk, 1);
- zv->zv_flags |= ZVOL_RDONLY;
- } else {
- set_disk_ro(zv->zv_disk, 0);
- zv->zv_flags &= ~ZVOL_RDONLY;
- }
+ error = zvol_setup_zv(zv);
-out_owned:
if (error) {
- dmu_objset_disown(os, zvol_tag);
+ dmu_objset_disown(os, zv);
zv->zv_objset = NULL;
}
+out_mutex:
+ if (locked)
+ mutex_exit(&spa_namespace_lock);
return (SET_ERROR(-error));
}
static void
zvol_last_close(zvol_state_t *zv)
{
- zil_close(zv->zv_zilog);
- zv->zv_zilog = NULL;
-
- dmu_buf_rele(zv->zv_dbuf, zvol_tag);
- zv->zv_dbuf = NULL;
+ ASSERT(RW_READ_HELD(&zv->zv_suspend_lock));
+ ASSERT(MUTEX_HELD(&zv->zv_state_lock));
- /*
- * Evict cached data
- */
- if (dsl_dataset_is_dirty(dmu_objset_ds(zv->zv_objset)) &&
- !(zv->zv_flags & ZVOL_RDONLY))
- txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0);
- (void) dmu_objset_evict_dbufs(zv->zv_objset);
+ zvol_shutdown_zv(zv);
- dmu_objset_disown(zv->zv_objset, zvol_tag);
+ dmu_objset_disown(zv->zv_objset, zv);
zv->zv_objset = NULL;
}
@@ -979,31 +1346,41 @@ static int
zvol_open(struct block_device *bdev, fmode_t flag)
{
zvol_state_t *zv;
- int error = 0, drop_mutex = 0;
+ int error = 0;
+ boolean_t drop_suspend = B_FALSE;
- /*
- * If the caller is already holding the mutex do not take it
- * again, this will happen as part of zvol_create_minor_impl().
- * Once add_disk() is called the device is live and the kernel
- * will attempt to open it to read the partition information.
- */
- if (!mutex_owned(&zvol_state_lock)) {
- mutex_enter(&zvol_state_lock);
- drop_mutex = 1;
- }
+ ASSERT(!mutex_owned(&zvol_state_lock));
+ mutex_enter(&zvol_state_lock);
/*
- * Obtain a copy of private_data under the lock to make sure
- * that either the result of zvol_freeg() setting
+ * Obtain a copy of private_data under the zvol_state_lock to make
+ * sure that either the result of zvol free code path setting
* bdev->bd_disk->private_data to NULL is observed, or zvol_free()
* is not called on this zv because of the positive zv_open_count.
*/
zv = bdev->bd_disk->private_data;
if (zv == NULL) {
- error = -ENXIO;
- goto out_mutex;
+ mutex_exit(&zvol_state_lock);
+ return (SET_ERROR(-ENXIO));
+ }
+
+ /* take zv_suspend_lock before zv_state_lock */
+ rw_enter(&zv->zv_suspend_lock, RW_READER);
+
+ mutex_enter(&zv->zv_state_lock);
+
+ /*
+ * make sure zvol is not suspended during first open
+ * (hold zv_suspend_lock), otherwise, drop the lock
+ */
+ if (zv->zv_open_count == 0) {
+ drop_suspend = B_TRUE;
+ } else {
+ rw_exit(&zv->zv_suspend_lock);
}
+ mutex_exit(&zvol_state_lock);
+
if (zv->zv_open_count == 0) {
error = zvol_first_open(zv);
if (error)
@@ -1022,10 +1399,12 @@ zvol_open(struct block_device *bdev, fmode_t flag)
out_open_count:
if (zv->zv_open_count == 0)
zvol_last_close(zv);
-
out_mutex:
- if (drop_mutex)
- mutex_exit(&zvol_state_lock);
+ mutex_exit(&zv->zv_state_lock);
+ if (drop_suspend)
+ rw_exit(&zv->zv_suspend_lock);
+ if (error == -ERESTARTSYS)
+ schedule();
return (SET_ERROR(error));
}
@@ -1037,22 +1416,38 @@ static int
#endif
zvol_release(struct gendisk *disk, fmode_t mode)
{
- zvol_state_t *zv = disk->private_data;
- int drop_mutex = 0;
+ zvol_state_t *zv;
+ boolean_t drop_suspend = B_FALSE;
+
+ ASSERT(!mutex_owned(&zvol_state_lock));
+ mutex_enter(&zvol_state_lock);
+ zv = disk->private_data;
ASSERT(zv && zv->zv_open_count > 0);
- if (!mutex_owned(&zvol_state_lock)) {
- mutex_enter(&zvol_state_lock);
- drop_mutex = 1;
- }
+ /* take zv_suspend_lock before zv_state_lock */
+ rw_enter(&zv->zv_suspend_lock, RW_READER);
+
+ mutex_enter(&zv->zv_state_lock);
+ mutex_exit(&zvol_state_lock);
+
+ /*
+ * make sure zvol is not suspended during last close
+ * (hold zv_suspend_lock), otherwise, drop the lock
+ */
+ if (zv->zv_open_count == 1)
+ drop_suspend = B_TRUE;
+ else
+ rw_exit(&zv->zv_suspend_lock);
zv->zv_open_count--;
if (zv->zv_open_count == 0)
zvol_last_close(zv);
- if (drop_mutex)
- mutex_exit(&zvol_state_lock);
+ mutex_exit(&zv->zv_state_lock);
+
+ if (drop_suspend)
+ rw_exit(&zv->zv_suspend_lock);
#ifndef HAVE_BLOCK_DEVICE_OPERATIONS_RELEASE_VOID
return (0);
@@ -1070,16 +1465,26 @@ zvol_ioctl(struct block_device *bdev, fmode_t mode,
switch (cmd) {
case BLKFLSBUF:
- zil_commit(zv->zv_zilog, ZVOL_OBJ);
+ fsync_bdev(bdev);
+ invalidate_bdev(bdev);
+ rw_enter(&zv->zv_suspend_lock, RW_READER);
+
+ if (dsl_dataset_is_dirty(dmu_objset_ds(zv->zv_objset)) &&
+ !(zv->zv_flags & ZVOL_RDONLY))
+ txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0);
+
+ rw_exit(&zv->zv_suspend_lock);
break;
+
case BLKZNAME:
+ mutex_enter(&zv->zv_state_lock);
error = copy_to_user((void *)arg, zv->zv_name, MAXNAMELEN);
+ mutex_exit(&zv->zv_state_lock);
break;
default:
error = -ENOTTY;
break;
-
}
return (SET_ERROR(error));
@@ -1153,10 +1558,11 @@ zvol_probe(dev_t dev, int *part, void *arg)
zvol_state_t *zv;
struct kobject *kobj;
- mutex_enter(&zvol_state_lock);
zv = zvol_find_by_dev(dev);
kobj = zv ? get_disk(zv->zv_disk) : NULL;
- mutex_exit(&zvol_state_lock);
+ ASSERT(zv == NULL || MUTEX_HELD(&zv->zv_state_lock));
+ if (zv)
+ mutex_exit(&zv->zv_state_lock);
return (kobj);
}
@@ -1232,12 +1638,23 @@ static zvol_state_t *
zvol_alloc(dev_t dev, const char *name)
{
zvol_state_t *zv;
+ uint64_t volmode;
+
+ if (dsl_prop_get_integer(name, "volmode", &volmode, NULL) != 0)
+ return (NULL);
+
+ if (volmode == ZFS_VOLMODE_DEFAULT)
+ volmode = zvol_volmode;
+
+ if (volmode == ZFS_VOLMODE_NONE)
+ return (NULL);
zv = kmem_zalloc(sizeof (zvol_state_t), KM_SLEEP);
- spin_lock_init(&zv->zv_lock);
list_link_init(&zv->zv_next);
+ mutex_init(&zv->zv_state_lock, NULL, MUTEX_DEFAULT, NULL);
+
zv->zv_queue = blk_alloc_queue(GFP_ATOMIC);
if (zv->zv_queue == NULL)
goto out_kmem;
@@ -1245,6 +1662,12 @@ zvol_alloc(dev_t dev, const char *name)
blk_queue_make_request(zv->zv_queue, zvol_request);
blk_queue_set_write_cache(zv->zv_queue, B_TRUE, B_TRUE);
+ /* Limit read-ahead to a single page to prevent over-prefetching. */
+ blk_queue_set_read_ahead(zv->zv_queue, 1);
+
+ /* Disable write merging in favor of the ZIO pipeline. */
+ queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, zv->zv_queue);
+
zv->zv_disk = alloc_disk(ZVOL_MINORS);
if (zv->zv_disk == NULL)
goto out_queue;
@@ -1255,8 +1678,25 @@ zvol_alloc(dev_t dev, const char *name)
strlcpy(zv->zv_name, name, MAXNAMELEN);
zfs_rlock_init(&zv->zv_range_lock);
+ rw_init(&zv->zv_suspend_lock, NULL, RW_DEFAULT, NULL);
zv->zv_disk->major = zvol_major;
+ if (volmode == ZFS_VOLMODE_DEV) {
+ /*
+ * ZFS_VOLMODE_DEV disable partitioning on ZVOL devices: set
+ * gendisk->minors = 1 as noted in include/linux/genhd.h.
+ * Also disable extended partition numbers (GENHD_FL_EXT_DEVT)
+ * and suppresses partition scanning (GENHD_FL_NO_PART_SCAN)
+ * setting gendisk->flags accordingly.
+ */
+ zv->zv_disk->minors = 1;
+#if defined(GENHD_FL_EXT_DEVT)
+ zv->zv_disk->flags &= ~GENHD_FL_EXT_DEVT;
+#endif
+#if defined(GENHD_FL_NO_PART_SCAN)
+ zv->zv_disk->flags |= GENHD_FL_NO_PART_SCAN;
+#endif
+ }
zv->zv_disk->first_minor = (dev & MINORMASK);
zv->zv_disk->fops = &zvol_ops;
zv->zv_disk->private_data = zv;
@@ -1276,21 +1716,32 @@ zvol_alloc(dev_t dev, const char *name)
/*
* Cleanup then free a zvol_state_t which was created by zvol_alloc().
+ * At this time, the structure is not opened by anyone, is taken off
+ * the zvol_state_list, and has its private data set to NULL.
+ * The zvol_state_lock is dropped.
*/
static void
-zvol_free(zvol_state_t *zv)
+zvol_free(void *arg)
{
- ASSERT(MUTEX_HELD(&zvol_state_lock));
+ zvol_state_t *zv = arg;
+
+ ASSERT(!MUTEX_HELD(&zvol_state_lock));
+ ASSERT(!RW_LOCK_HELD(&zv->zv_suspend_lock));
+ ASSERT(!MUTEX_HELD(&zv->zv_state_lock));
ASSERT(zv->zv_open_count == 0);
+ ASSERT(zv->zv_disk->private_data == NULL);
+ rw_destroy(&zv->zv_suspend_lock);
zfs_rlock_destroy(&zv->zv_range_lock);
- zv->zv_disk->private_data = NULL;
-
del_gendisk(zv->zv_disk);
blk_cleanup_queue(zv->zv_queue);
put_disk(zv->zv_disk);
+ ida_simple_remove(&zvol_ida, MINOR(zv->zv_dev) >> ZVOL_MINOR_BITS);
+
+ mutex_destroy(&zv->zv_state_lock);
+
kmem_free(zv, sizeof (zvol_state_t));
}
@@ -1309,18 +1760,28 @@ zvol_create_minor_impl(const char *name)
uint64_t len;
unsigned minor = 0;
int error = 0;
+ int idx;
+ uint64_t hash = zvol_name_hash(name);
- mutex_enter(&zvol_state_lock);
+ if (zvol_inhibit_dev)
+ return (0);
+
+ idx = ida_simple_get(&zvol_ida, 0, 0, kmem_flags_convert(KM_SLEEP));
+ if (idx < 0)
+ return (SET_ERROR(-idx));
+ minor = idx << ZVOL_MINOR_BITS;
- zv = zvol_find_by_name(name);
+ zv = zvol_find_by_name_hash(name, hash, RW_NONE);
if (zv) {
- error = SET_ERROR(EEXIST);
- goto out;
+ ASSERT(MUTEX_HELD(&zv->zv_state_lock));
+ mutex_exit(&zv->zv_state_lock);
+ ida_simple_remove(&zvol_ida, idx);
+ return (SET_ERROR(EEXIST));
}
doi = kmem_alloc(sizeof (dmu_object_info_t), KM_SLEEP);
- error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, zvol_tag, &os);
+ error = dmu_objset_own(name, DMU_OST_ZVOL, B_TRUE, FTAG, &os);
if (error)
goto out_doi;
@@ -1332,15 +1793,12 @@ zvol_create_minor_impl(const char *name)
if (error)
goto out_dmu_objset_disown;
- error = zvol_find_minor(&minor);
- if (error)
- goto out_dmu_objset_disown;
-
zv = zvol_alloc(MKDEV(zvol_major, minor), name);
if (zv == NULL) {
error = SET_ERROR(EAGAIN);
goto out_dmu_objset_disown;
}
+ zv->zv_hash = hash;
if (dmu_objset_is_snapshot(os))
zv->zv_flags |= ZVOL_RDONLY;
@@ -1382,29 +1840,24 @@ zvol_create_minor_impl(const char *name)
*/
len = MIN(MAX(zvol_prefetch_bytes, 0), SPA_MAXBLOCKSIZE);
if (len > 0) {
- dmu_prefetch(os, ZVOL_OBJ, 0, len);
- dmu_prefetch(os, ZVOL_OBJ, volsize - len, len);
+ dmu_prefetch(os, ZVOL_OBJ, 0, 0, len, ZIO_PRIORITY_SYNC_READ);
+ dmu_prefetch(os, ZVOL_OBJ, 0, volsize - len, len,
+ ZIO_PRIORITY_SYNC_READ);
}
zv->zv_objset = NULL;
out_dmu_objset_disown:
- dmu_objset_disown(os, zvol_tag);
+ dmu_objset_disown(os, FTAG);
out_doi:
kmem_free(doi, sizeof (dmu_object_info_t));
-out:
if (error == 0) {
+ mutex_enter(&zvol_state_lock);
zvol_insert(zv);
- /*
- * Drop the lock to prevent deadlock with sys_open() ->
- * zvol_open(), which first takes bd_disk->bd_mutex and then
- * takes zvol_state_lock, whereas this code path first takes
- * zvol_state_lock, and then takes bd_disk->bd_mutex.
- */
mutex_exit(&zvol_state_lock);
add_disk(zv->zv_disk);
} else {
- mutex_exit(&zvol_state_lock);
+ ida_simple_remove(&zvol_ida, idx);
}
return (SET_ERROR(error));
@@ -1419,9 +1872,15 @@ zvol_rename_minor(zvol_state_t *zv, const char *newname)
int readonly = get_disk_ro(zv->zv_disk);
ASSERT(MUTEX_HELD(&zvol_state_lock));
+ ASSERT(MUTEX_HELD(&zv->zv_state_lock));
strlcpy(zv->zv_name, newname, sizeof (zv->zv_name));
+ /* move to new hashtable entry */
+ zv->zv_hash = zvol_name_hash(zv->zv_name);
+ hlist_del(&zv->zv_hlink);
+ hlist_add_head(&zv->zv_hlink, ZVOL_HT_HEAD(zv->zv_hash));
+
/*
* The block device's read-only state is briefly changed causing
* a KOBJ_CHANGE uevent to be issued. This ensures udev detects
@@ -1434,6 +1893,32 @@ zvol_rename_minor(zvol_state_t *zv, const char *newname)
set_disk_ro(zv->zv_disk, readonly);
}
+typedef struct minors_job {
+ list_t *list;
+ list_node_t link;
+ /* input */
+ char *name;
+ /* output */
+ int error;
+} minors_job_t;
+
+/*
+ * Prefetch zvol dnodes for the minors_job
+ */
+static void
+zvol_prefetch_minors_impl(void *arg)
+{
+ minors_job_t *job = arg;
+ char *dsname = job->name;
+ objset_t *os = NULL;
+
+ job->error = dmu_objset_own(dsname, DMU_OST_ZVOL, B_TRUE, FTAG,
+ &os);
+ if (job->error == 0) {
+ dmu_prefetch(os, ZVOL_OBJ, 0, 0, 0, ZIO_PRIORITY_SYNC_READ);
+ dmu_objset_disown(os, FTAG);
+ }
+}
/*
* Mask errors to continue dmu_objset_find() traversal
@@ -1441,7 +1926,9 @@ zvol_rename_minor(zvol_state_t *zv, const char *newname)
static int
zvol_create_snap_minor_cb(const char *dsname, void *arg)
{
- const char *name = (const char *)arg;
+ minors_job_t *j = arg;
+ list_t *minors_list = j->list;
+ const char *name = j->name;
ASSERT0(MUTEX_HELD(&spa_namespace_lock));
@@ -1452,9 +1939,21 @@ zvol_create_snap_minor_cb(const char *dsname, void *arg)
/* at this point, the dsname should name a snapshot */
if (strchr(dsname, '@') == 0) {
dprintf("zvol_create_snap_minor_cb(): "
- "%s is not a shapshot name\n", dsname);
+ "%s is not a shapshot name\n", dsname);
} else {
- (void) zvol_create_minor_impl(dsname);
+ minors_job_t *job;
+ char *n = strdup(dsname);
+ if (n == NULL)
+ return (0);
+
+ job = kmem_alloc(sizeof (minors_job_t), KM_SLEEP);
+ job->name = n;
+ job->list = minors_list;
+ job->error = 0;
+ list_insert_tail(minors_list, job);
+ /* don't care if dispatch fails, because job->error is 0 */
+ taskq_dispatch(system_taskq, zvol_prefetch_minors_impl, job,
+ TQ_SLEEP);
}
return (0);
@@ -1468,6 +1967,7 @@ zvol_create_minors_cb(const char *dsname, void *arg)
{
uint64_t snapdev;
int error;
+ list_t *minors_list = arg;
ASSERT0(MUTEX_HELD(&spa_namespace_lock));
@@ -1483,23 +1983,32 @@ zvol_create_minors_cb(const char *dsname, void *arg)
* snapshots and create device minor nodes for those.
*/
if (strchr(dsname, '@') == 0) {
- /* create minor for the 'dsname' explicitly */
- error = zvol_create_minor_impl(dsname);
- if ((error == 0 || error == EEXIST) &&
- (snapdev == ZFS_SNAPDEV_VISIBLE)) {
- fstrans_cookie_t cookie = spl_fstrans_mark();
+ minors_job_t *job;
+ char *n = strdup(dsname);
+ if (n == NULL)
+ return (0);
+
+ job = kmem_alloc(sizeof (minors_job_t), KM_SLEEP);
+ job->name = n;
+ job->list = minors_list;
+ job->error = 0;
+ list_insert_tail(minors_list, job);
+ /* don't care if dispatch fails, because job->error is 0 */
+ taskq_dispatch(system_taskq, zvol_prefetch_minors_impl, job,
+ TQ_SLEEP);
+
+ if (snapdev == ZFS_SNAPDEV_VISIBLE) {
/*
* traverse snapshots only, do not traverse children,
* and skip the 'dsname'
*/
error = dmu_objset_find((char *)dsname,
- zvol_create_snap_minor_cb, (void *)dsname,
+ zvol_create_snap_minor_cb, (void *)job,
DS_FIND_SNAPSHOTS);
- spl_fstrans_unmark(cookie);
}
} else {
dprintf("zvol_create_minors_cb(): %s is not a zvol name\n",
- dsname);
+ dsname);
}
return (0);
@@ -1518,7 +2027,7 @@ zvol_create_minors_cb(const char *dsname, void *arg)
* - for each zvol, create a minor node, then check if the zvol's snapshots
* are 'visible', and only then iterate over the snapshots if needed
*
- * If the name represents a snapshot, a check is perfromed if the snapshot is
+ * If the name represents a snapshot, a check is performed if the snapshot is
* 'visible' (which also verifies that the parent is a zvol), and if so,
* a minor node for that snapshot is created.
*/
@@ -1528,10 +2037,24 @@ zvol_create_minors_impl(const char *name)
int error = 0;
fstrans_cookie_t cookie;
char *atp, *parent;
+ list_t minors_list;
+ minors_job_t *job;
if (zvol_inhibit_dev)
return (0);
+ /*
+ * This is the list for prefetch jobs. Whenever we found a match
+ * during dmu_objset_find, we insert a minors_job to the list and do
+ * taskq_dispatch to parallel prefetch zvol dnodes. Note we don't need
+ * any lock because all list operation is done on the current thread.
+ *
+ * We will use this list to do zvol_create_minor_impl after prefetch
+ * so we don't have to traverse using dmu_objset_find again.
+ */
+ list_create(&minors_list, sizeof (minors_job_t),
+ offsetof(minors_job_t, link));
+
parent = kmem_alloc(MAXPATHLEN, KM_SLEEP);
(void) strlcpy(parent, name, MAXPATHLEN);
@@ -1547,11 +2070,26 @@ zvol_create_minors_impl(const char *name)
} else {
cookie = spl_fstrans_mark();
error = dmu_objset_find(parent, zvol_create_minors_cb,
- NULL, DS_FIND_CHILDREN);
+ &minors_list, DS_FIND_CHILDREN);
spl_fstrans_unmark(cookie);
}
kmem_free(parent, MAXPATHLEN);
+ taskq_wait_outstanding(system_taskq, 0);
+
+ /*
+ * Prefetch is completed, we can do zvol_create_minor_impl
+ * sequentially.
+ */
+ while ((job = list_head(&minors_list)) != NULL) {
+ list_remove(&minors_list, job);
+ if (!job->error)
+ zvol_create_minor_impl(job->name);
+ strfree(job->name);
+ kmem_free(job, sizeof (minors_job_t));
+ }
+
+ list_destroy(&minors_list);
return (SET_ERROR(error));
}
@@ -1564,60 +2102,117 @@ zvol_remove_minors_impl(const char *name)
{
zvol_state_t *zv, *zv_next;
int namelen = ((name) ? strlen(name) : 0);
+ taskqid_t t, tid = TASKQID_INVALID;
+ list_t free_list;
if (zvol_inhibit_dev)
return;
+ list_create(&free_list, sizeof (zvol_state_t),
+ offsetof(zvol_state_t, zv_next));
+
mutex_enter(&zvol_state_lock);
for (zv = list_head(&zvol_state_list); zv != NULL; zv = zv_next) {
zv_next = list_next(&zvol_state_list, zv);
+ mutex_enter(&zv->zv_state_lock);
if (name == NULL || strcmp(zv->zv_name, name) == 0 ||
(strncmp(zv->zv_name, name, namelen) == 0 &&
(zv->zv_name[namelen] == '/' ||
zv->zv_name[namelen] == '@'))) {
+ /*
+ * By holding zv_state_lock here, we guarantee that no
+ * one is currently using this zv
+ */
/* If in use, leave alone */
- if (zv->zv_open_count > 0)
+ if (zv->zv_open_count > 0 ||
+ atomic_read(&zv->zv_suspend_ref)) {
+ mutex_exit(&zv->zv_state_lock);
continue;
+ }
zvol_remove(zv);
- zvol_free(zv);
+
+ /*
+ * clear this while holding zvol_state_lock so
+ * zvol_open won't open it
+ */
+ zv->zv_disk->private_data = NULL;
+
+ /* Drop zv_state_lock before zvol_free() */
+ mutex_exit(&zv->zv_state_lock);
+
+ /* try parallel zv_free, if failed do it in place */
+ t = taskq_dispatch(system_taskq, zvol_free, zv,
+ TQ_SLEEP);
+ if (t == TASKQID_INVALID)
+ list_insert_head(&free_list, zv);
+ else
+ tid = t;
+ } else {
+ mutex_exit(&zv->zv_state_lock);
}
}
-
mutex_exit(&zvol_state_lock);
+
+ /*
+ * Drop zvol_state_lock before calling zvol_free()
+ */
+ while ((zv = list_head(&free_list)) != NULL) {
+ list_remove(&free_list, zv);
+ zvol_free(zv);
+ }
+
+ if (tid != TASKQID_INVALID)
+ taskq_wait_outstanding(system_taskq, tid);
}
-/* Remove minor for this specific snapshot only */
+/* Remove minor for this specific volume only */
static void
zvol_remove_minor_impl(const char *name)
{
- zvol_state_t *zv, *zv_next;
+ zvol_state_t *zv = NULL, *zv_next;
if (zvol_inhibit_dev)
return;
- if (strchr(name, '@') == NULL)
- return;
-
mutex_enter(&zvol_state_lock);
for (zv = list_head(&zvol_state_list); zv != NULL; zv = zv_next) {
zv_next = list_next(&zvol_state_list, zv);
+ mutex_enter(&zv->zv_state_lock);
if (strcmp(zv->zv_name, name) == 0) {
+ /*
+ * By holding zv_state_lock here, we guarantee that no
+ * one is currently using this zv
+ */
+
/* If in use, leave alone */
- if (zv->zv_open_count > 0)
+ if (zv->zv_open_count > 0 ||
+ atomic_read(&zv->zv_suspend_ref)) {
+ mutex_exit(&zv->zv_state_lock);
continue;
+ }
zvol_remove(zv);
- zvol_free(zv);
+
+ /* clear this so zvol_open won't open it */
+ zv->zv_disk->private_data = NULL;
+
+ mutex_exit(&zv->zv_state_lock);
break;
+ } else {
+ mutex_exit(&zv->zv_state_lock);
}
}
+ /* Drop zvol_state_lock before calling zvol_free() */
mutex_exit(&zvol_state_lock);
+
+ if (zv != NULL)
+ zvol_free(zv);
}
/*
@@ -1640,9 +2235,13 @@ zvol_rename_minors_impl(const char *oldname, const char *newname)
for (zv = list_head(&zvol_state_list); zv != NULL; zv = zv_next) {
zv_next = list_next(&zvol_state_list, zv);
+ mutex_enter(&zv->zv_state_lock);
+
/* If in use, leave alone */
- if (zv->zv_open_count > 0)
+ if (zv->zv_open_count > 0) {
+ mutex_exit(&zv->zv_state_lock);
continue;
+ }
if (strcmp(zv->zv_name, oldname) == 0) {
zvol_rename_minor(zv, newname);
@@ -1655,6 +2254,8 @@ zvol_rename_minors_impl(const char *oldname, const char *newname)
zvol_rename_minor(zv, name);
kmem_free(name, strlen(name + 1));
}
+
+ mutex_exit(&zv->zv_state_lock);
}
mutex_exit(&zvol_state_lock);
@@ -1665,7 +2266,8 @@ typedef struct zvol_snapdev_cb_arg {
} zvol_snapdev_cb_arg_t;
static int
-zvol_set_snapdev_cb(const char *dsname, void *param) {
+zvol_set_snapdev_cb(const char *dsname, void *param)
+{
zvol_snapdev_cb_arg_t *arg = param;
if (strchr(dsname, '@') == NULL)
@@ -1696,9 +2298,50 @@ zvol_set_snapdev_impl(char *name, uint64_t snapdev)
spl_fstrans_unmark(cookie);
}
+typedef struct zvol_volmode_cb_arg {
+ uint64_t volmode;
+} zvol_volmode_cb_arg_t;
+
+static void
+zvol_set_volmode_impl(char *name, uint64_t volmode)
+{
+ fstrans_cookie_t cookie = spl_fstrans_mark();
+
+ if (strchr(name, '@') != NULL)
+ return;
+
+ /*
+ * It's unfortunate we need to remove minors before we create new ones:
+ * this is necessary because our backing gendisk (zvol_state->zv_disk)
+ * coule be different when we set, for instance, volmode from "geom"
+ * to "dev" (or vice versa).
+ * A possible optimization is to modify our consumers so we don't get
+ * called when "volmode" does not change.
+ */
+ switch (volmode) {
+ case ZFS_VOLMODE_NONE:
+ (void) zvol_remove_minor_impl(name);
+ break;
+ case ZFS_VOLMODE_GEOM:
+ case ZFS_VOLMODE_DEV:
+ (void) zvol_remove_minor_impl(name);
+ (void) zvol_create_minor_impl(name);
+ break;
+ case ZFS_VOLMODE_DEFAULT:
+ (void) zvol_remove_minor_impl(name);
+ if (zvol_volmode == ZFS_VOLMODE_NONE)
+ break;
+ else /* if zvol_volmode is invalid defaults to "geom" */
+ (void) zvol_create_minor_impl(name);
+ break;
+ }
+
+ spl_fstrans_unmark(cookie);
+}
+
static zvol_task_t *
zvol_task_alloc(zvol_async_op_t op, const char *name1, const char *name2,
- uint64_t snapdev)
+ uint64_t value)
{
zvol_task_t *task;
char *delim;
@@ -1709,7 +2352,7 @@ zvol_task_alloc(zvol_async_op_t op, const char *name1, const char *name2,
task = kmem_zalloc(sizeof (zvol_task_t), KM_SLEEP);
task->op = op;
- task->snapdev = snapdev;
+ task->value = value;
delim = strchr(name1, '/');
strlcpy(task->pool, name1, delim ? (delim - name1 + 1) : MAXNAMELEN);
@@ -1745,7 +2388,10 @@ zvol_task_cb(void *param)
zvol_rename_minors_impl(task->name1, task->name2);
break;
case ZVOL_ASYNC_SET_SNAPDEV:
- zvol_set_snapdev_impl(task->name1, task->snapdev);
+ zvol_set_snapdev_impl(task->name1, task->value);
+ break;
+ case ZVOL_ASYNC_SET_VOLMODE:
+ zvol_set_volmode_impl(task->name1, task->value);
break;
default:
VERIFY(0);
@@ -1755,12 +2401,12 @@ zvol_task_cb(void *param)
zvol_task_free(task);
}
-typedef struct zvol_set_snapdev_arg {
+typedef struct zvol_set_prop_int_arg {
const char *zsda_name;
uint64_t zsda_value;
zprop_source_t zsda_source;
dmu_tx_t *zsda_tx;
-} zvol_set_snapdev_arg_t;
+} zvol_set_prop_int_arg_t;
/*
* Sanity check the dataset for safe use by the sync task. No additional
@@ -1769,7 +2415,7 @@ typedef struct zvol_set_snapdev_arg {
static int
zvol_set_snapdev_check(void *arg, dmu_tx_t *tx)
{
- zvol_set_snapdev_arg_t *zsda = arg;
+ zvol_set_prop_int_arg_t *zsda = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dir_t *dd;
int error;
@@ -1783,41 +2429,52 @@ zvol_set_snapdev_check(void *arg, dmu_tx_t *tx)
return (error);
}
+/* ARGSUSED */
static int
zvol_set_snapdev_sync_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
{
- zvol_set_snapdev_arg_t *zsda = arg;
char dsname[MAXNAMELEN];
zvol_task_t *task;
+ uint64_t snapdev;
dsl_dataset_name(ds, dsname);
- dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_SNAPDEV),
- zsda->zsda_source, sizeof (zsda->zsda_value), 1,
- &zsda->zsda_value, zsda->zsda_tx);
-
- task = zvol_task_alloc(ZVOL_ASYNC_SET_SNAPDEV, dsname,
- NULL, zsda->zsda_value);
+ if (dsl_prop_get_int_ds(ds, "snapdev", &snapdev) != 0)
+ return (0);
+ task = zvol_task_alloc(ZVOL_ASYNC_SET_SNAPDEV, dsname, NULL, snapdev);
if (task == NULL)
return (0);
(void) taskq_dispatch(dp->dp_spa->spa_zvol_taskq, zvol_task_cb,
- task, TQ_SLEEP);
+ task, TQ_SLEEP);
return (0);
}
/*
- * Traverse all child snapshot datasets and apply snapdev appropriately.
+ * Traverse all child datasets and apply snapdev appropriately.
+ * We call dsl_prop_set_sync_impl() here to set the value only on the toplevel
+ * dataset and read the effective "snapdev" on every child in the callback
+ * function: this is because the value is not guaranteed to be the same in the
+ * whole dataset hierarchy.
*/
static void
zvol_set_snapdev_sync(void *arg, dmu_tx_t *tx)
{
- zvol_set_snapdev_arg_t *zsda = arg;
+ zvol_set_prop_int_arg_t *zsda = arg;
dsl_pool_t *dp = dmu_tx_pool(tx);
dsl_dir_t *dd;
+ dsl_dataset_t *ds;
+ int error;
VERIFY0(dsl_dir_hold(dp, zsda->zsda_name, FTAG, &dd, NULL));
zsda->zsda_tx = tx;
+ error = dsl_dataset_hold(dp, zsda->zsda_name, FTAG, &ds);
+ if (error == 0) {
+ dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_SNAPDEV),
+ zsda->zsda_source, sizeof (zsda->zsda_value), 1,
+ &zsda->zsda_value, zsda->zsda_tx);
+ dsl_dataset_rele(ds, FTAG);
+ }
dmu_objset_find_dp(dp, dd->dd_object, zvol_set_snapdev_sync_cb,
zsda, DS_FIND_CHILDREN);
@@ -1827,7 +2484,7 @@ zvol_set_snapdev_sync(void *arg, dmu_tx_t *tx)
int
zvol_set_snapdev(const char *ddname, zprop_source_t source, uint64_t snapdev)
{
- zvol_set_snapdev_arg_t zsda;
+ zvol_set_prop_int_arg_t zsda;
zsda.zsda_name = ddname;
zsda.zsda_source = source;
@@ -1837,6 +2494,93 @@ zvol_set_snapdev(const char *ddname, zprop_source_t source, uint64_t snapdev)
zvol_set_snapdev_sync, &zsda, 0, ZFS_SPACE_CHECK_NONE));
}
+/*
+ * Sanity check the dataset for safe use by the sync task. No additional
+ * conditions are imposed.
+ */
+static int
+zvol_set_volmode_check(void *arg, dmu_tx_t *tx)
+{
+ zvol_set_prop_int_arg_t *zsda = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ dsl_dir_t *dd;
+ int error;
+
+ error = dsl_dir_hold(dp, zsda->zsda_name, FTAG, &dd, NULL);
+ if (error != 0)
+ return (error);
+
+ dsl_dir_rele(dd, FTAG);
+
+ return (error);
+}
+
+/* ARGSUSED */
+static int
+zvol_set_volmode_sync_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg)
+{
+ char dsname[MAXNAMELEN];
+ zvol_task_t *task;
+ uint64_t volmode;
+
+ dsl_dataset_name(ds, dsname);
+ if (dsl_prop_get_int_ds(ds, "volmode", &volmode) != 0)
+ return (0);
+ task = zvol_task_alloc(ZVOL_ASYNC_SET_VOLMODE, dsname, NULL, volmode);
+ if (task == NULL)
+ return (0);
+
+ (void) taskq_dispatch(dp->dp_spa->spa_zvol_taskq, zvol_task_cb,
+ task, TQ_SLEEP);
+ return (0);
+}
+
+/*
+ * Traverse all child datasets and apply volmode appropriately.
+ * We call dsl_prop_set_sync_impl() here to set the value only on the toplevel
+ * dataset and read the effective "volmode" on every child in the callback
+ * function: this is because the value is not guaranteed to be the same in the
+ * whole dataset hierarchy.
+ */
+static void
+zvol_set_volmode_sync(void *arg, dmu_tx_t *tx)
+{
+ zvol_set_prop_int_arg_t *zsda = arg;
+ dsl_pool_t *dp = dmu_tx_pool(tx);
+ dsl_dir_t *dd;
+ dsl_dataset_t *ds;
+ int error;
+
+ VERIFY0(dsl_dir_hold(dp, zsda->zsda_name, FTAG, &dd, NULL));
+ zsda->zsda_tx = tx;
+
+ error = dsl_dataset_hold(dp, zsda->zsda_name, FTAG, &ds);
+ if (error == 0) {
+ dsl_prop_set_sync_impl(ds, zfs_prop_to_name(ZFS_PROP_VOLMODE),
+ zsda->zsda_source, sizeof (zsda->zsda_value), 1,
+ &zsda->zsda_value, zsda->zsda_tx);
+ dsl_dataset_rele(ds, FTAG);
+ }
+
+ dmu_objset_find_dp(dp, dd->dd_object, zvol_set_volmode_sync_cb,
+ zsda, DS_FIND_CHILDREN);
+
+ dsl_dir_rele(dd, FTAG);
+}
+
+int
+zvol_set_volmode(const char *ddname, zprop_source_t source, uint64_t volmode)
+{
+ zvol_set_prop_int_arg_t zsda;
+
+ zsda.zsda_name = ddname;
+ zsda.zsda_source = source;
+ zsda.zsda_value = volmode;
+
+ return (dsl_sync_task(ddname, zvol_set_volmode_check,
+ zvol_set_volmode_sync, &zsda, 0, ZFS_SPACE_CHECK_NONE));
+}
+
void
zvol_create_minors(spa_t *spa, const char *name, boolean_t async)
{
@@ -1848,7 +2592,7 @@ zvol_create_minors(spa_t *spa, const char *name, boolean_t async)
return;
id = taskq_dispatch(spa->spa_zvol_taskq, zvol_task_cb, task, TQ_SLEEP);
- if ((async == B_FALSE) && (id != 0))
+ if ((async == B_FALSE) && (id != TASKQID_INVALID))
taskq_wait_id(spa->spa_zvol_taskq, id);
}
@@ -1863,7 +2607,7 @@ zvol_remove_minors(spa_t *spa, const char *name, boolean_t async)
return;
id = taskq_dispatch(spa->spa_zvol_taskq, zvol_task_cb, task, TQ_SLEEP);
- if ((async == B_FALSE) && (id != 0))
+ if ((async == B_FALSE) && (id != TASKQID_INVALID))
taskq_wait_id(spa->spa_zvol_taskq, id);
}
@@ -1879,23 +2623,42 @@ zvol_rename_minors(spa_t *spa, const char *name1, const char *name2,
return;
id = taskq_dispatch(spa->spa_zvol_taskq, zvol_task_cb, task, TQ_SLEEP);
- if ((async == B_FALSE) && (id != 0))
+ if ((async == B_FALSE) && (id != TASKQID_INVALID))
taskq_wait_id(spa->spa_zvol_taskq, id);
}
int
zvol_init(void)
{
- int error;
+ int threads = MIN(MAX(zvol_threads, 1), 1024);
+ int i, error;
list_create(&zvol_state_list, sizeof (zvol_state_t),
offsetof(zvol_state_t, zv_next));
mutex_init(&zvol_state_lock, NULL, MUTEX_DEFAULT, NULL);
+ ida_init(&zvol_ida);
+
+ zvol_taskq = taskq_create(ZVOL_DRIVER, threads, maxclsyspri,
+ threads * 2, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
+ if (zvol_taskq == NULL) {
+ printk(KERN_INFO "ZFS: taskq_create() failed\n");
+ error = -ENOMEM;
+ goto out;
+ }
+
+ zvol_htable = kmem_alloc(ZVOL_HT_SIZE * sizeof (struct hlist_head),
+ KM_SLEEP);
+ if (!zvol_htable) {
+ error = -ENOMEM;
+ goto out_taskq;
+ }
+ for (i = 0; i < ZVOL_HT_SIZE; i++)
+ INIT_HLIST_HEAD(&zvol_htable[i]);
error = register_blkdev(zvol_major, ZVOL_DRIVER);
if (error) {
printk(KERN_INFO "ZFS: register_blkdev() failed %d\n", error);
- goto out;
+ goto out_free;
}
blk_register_region(MKDEV(zvol_major, 0), 1UL << MINORBITS,
@@ -1903,7 +2666,12 @@ zvol_init(void)
return (0);
+out_free:
+ kmem_free(zvol_htable, ZVOL_HT_SIZE * sizeof (struct hlist_head));
+out_taskq:
+ taskq_destroy(zvol_taskq);
out:
+ ida_destroy(&zvol_ida);
mutex_destroy(&zvol_state_lock);
list_destroy(&zvol_state_list);
@@ -1917,19 +2685,34 @@ zvol_fini(void)
blk_unregister_region(MKDEV(zvol_major, 0), 1UL << MINORBITS);
unregister_blkdev(zvol_major, ZVOL_DRIVER);
+ kmem_free(zvol_htable, ZVOL_HT_SIZE * sizeof (struct hlist_head));
+ taskq_destroy(zvol_taskq);
list_destroy(&zvol_state_list);
mutex_destroy(&zvol_state_lock);
+
+ ida_destroy(&zvol_ida);
}
+/* BEGIN CSTYLED */
module_param(zvol_inhibit_dev, uint, 0644);
MODULE_PARM_DESC(zvol_inhibit_dev, "Do not create zvol device nodes");
module_param(zvol_major, uint, 0444);
MODULE_PARM_DESC(zvol_major, "Major number for zvol device");
+module_param(zvol_threads, uint, 0444);
+MODULE_PARM_DESC(zvol_threads, "Max number of threads to handle I/O requests");
+
+module_param(zvol_request_sync, uint, 0644);
+MODULE_PARM_DESC(zvol_request_sync, "Synchronously handle bio requests");
+
module_param(zvol_max_discard_blocks, ulong, 0444);
MODULE_PARM_DESC(zvol_max_discard_blocks, "Max number of blocks to discard");
module_param(zvol_prefetch_bytes, uint, 0644);
MODULE_PARM_DESC(zvol_prefetch_bytes, "Prefetch N bytes at zvol start+end");
+
+module_param(zvol_volmode, uint, 0644);
+MODULE_PARM_DESC(zvol_volmode, "Default volmode property value");
+/* END CSTYLED */
diff --git a/zfs/module/zpios/pios.c b/zfs/module/zpios/pios.c
index e3a85c1686e6..c70c0d6f1c9e 100644
--- a/zfs/module/zpios/pios.c
+++ b/zfs/module/zpios/pios.c
@@ -1,7 +1,7 @@
/*
* ZPIOS is a heavily modified version of the original PIOS test code.
* It is designed to have the test code running in the Linux kernel
- * against ZFS while still being flexibly controled from user space.
+ * against ZFS while still being flexibly controlled from user space.
*
* Copyright (C) 2008-2010 Lawrence Livermore National Security, LLC.
* Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
@@ -29,10 +29,13 @@
*
* You should have received a copy of the GNU General Public License along
* with ZPIOS. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Copyright (c) 2015, Intel Corporation.
*/
#include <sys/zfs_context.h>
#include <sys/dmu.h>
+#include <sys/spa.h>
#include <sys/txg.h>
#include <sys/dsl_destroy.h>
#include <linux/miscdevice.h>
@@ -129,8 +132,17 @@ zpios_dmu_object_create(run_args_t *run_args, objset_t *os)
{
struct dmu_tx *tx;
uint64_t obj = 0ULL;
+ uint64_t blksize = run_args->block_size;
int rc;
+ if (blksize < SPA_MINBLOCKSIZE ||
+ blksize > spa_maxblocksize(dmu_objset_spa(os)) ||
+ !ISP2(blksize)) {
+ zpios_print(run_args->file,
+ "invalid block size for pool: %d\n", (int)blksize);
+ return (obj);
+ }
+
tx = dmu_tx_create(os);
dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, OBJ_SIZE);
rc = dmu_tx_assign(tx, TXG_WAIT);
@@ -142,10 +154,11 @@ zpios_dmu_object_create(run_args_t *run_args, objset_t *os)
}
obj = dmu_object_alloc(os, DMU_OT_UINT64_OTHER, 0, DMU_OT_NONE, 0, tx);
- rc = dmu_object_set_blocksize(os, obj, 128ULL << 10, 0, tx);
+ rc = dmu_object_set_blocksize(os, obj, blksize, 0, tx);
if (rc) {
zpios_print(run_args->file,
- "dmu_object_set_blocksize() failed: %d\n", rc);
+ "dmu_object_set_blocksize to %d failed: %d\n",
+ (int)blksize, rc);
dmu_tx_abort(tx);
return (obj);
}
@@ -166,7 +179,7 @@ zpios_dmu_object_free(run_args_t *run_args, objset_t *os, uint64_t obj)
rc = dmu_tx_assign(tx, TXG_WAIT);
if (rc) {
zpios_print(run_args->file,
- "dmu_tx_assign() failed: %d\n", rc);
+ "dmu_tx_assign() failed: %d\n", rc);
dmu_tx_abort(tx);
return (rc);
}
@@ -174,7 +187,7 @@ zpios_dmu_object_free(run_args_t *run_args, objset_t *os, uint64_t obj)
rc = dmu_object_free(os, obj, tx);
if (rc) {
zpios_print(run_args->file,
- "dmu_object_free() failed: %d\n", rc);
+ "dmu_object_free() failed: %d\n", rc);
dmu_tx_abort(tx);
return (rc);
}
@@ -200,14 +213,14 @@ zpios_dmu_setup(run_args_t *run_args)
rc = dmu_objset_create(name, DMU_OST_OTHER, 0, NULL, NULL);
if (rc) {
zpios_print(run_args->file, "Error dmu_objset_create(%s, ...) "
- "failed: %d\n", name, rc);
+ "failed: %d\n", name, rc);
goto out;
}
rc = dmu_objset_own(name, DMU_OST_OTHER, 0, zpios_tag, &os);
if (rc) {
zpios_print(run_args->file, "Error dmu_objset_own(%s, ...) "
- "failed: %d\n", name, rc);
+ "failed: %d\n", name, rc);
goto out_destroy;
}
@@ -216,7 +229,7 @@ zpios_dmu_setup(run_args_t *run_args)
if (obj == 0) {
rc = -EBADF;
zpios_print(run_args->file, "Error zpios_dmu_"
- "object_create() failed, %d\n", rc);
+ "object_create() failed, %d\n", rc);
goto out_destroy;
}
}
@@ -255,7 +268,7 @@ zpios_dmu_setup(run_args_t *run_args)
rc2 = dsl_destroy_head(name);
if (rc2)
zpios_print(run_args->file, "Error dsl_destroy_head"
- "(%s, ...) failed: %d\n", name, rc2);
+ "(%s, ...) failed: %d\n", name, rc2);
}
out:
t->stop = zpios_timespec_now();
@@ -274,11 +287,6 @@ zpios_setup_run(run_args_t **run_args, zpios_cmd_t *kcmd, struct file *file)
size = sizeof (*ra) + kcmd->cmd_region_count * sizeof (zpios_region_t);
ra = vmem_zalloc(size, KM_SLEEP);
- if (ra == NULL) {
- zpios_print(file, "Unable to vmem_zalloc() %d bytes "
- "for regions\n", size);
- return (-ENOMEM);
- }
*run_args = ra;
strncpy(ra->pool, kcmd->cmd_pool, ZPIOS_NAME_SIZE - 1);
@@ -295,6 +303,7 @@ zpios_setup_run(run_args_t **run_args, zpios_cmd_t *kcmd, struct file *file)
ra->chunk_noise = kcmd->cmd_chunk_noise;
ra->thread_delay = kcmd->cmd_thread_delay;
ra->flags = kcmd->cmd_flags;
+ ra->block_size = kcmd->cmd_block_size;
ra->stats.wr_data = 0;
ra->stats.wr_chunks = 0;
ra->stats.rd_data = 0;
@@ -319,7 +328,7 @@ zpios_setup_run(run_args_t **run_args, zpios_cmd_t *kcmd, struct file *file)
static int
zpios_get_work_item(run_args_t *run_args, dmu_obj_t *obj, __u64 *offset,
- __u32 *chunk_size, zpios_region_t **region, __u32 flags)
+ __u32 *chunk_size, zpios_region_t **region, __u32 flags)
{
int i, j, count = 0;
unsigned int random_int;
@@ -467,7 +476,7 @@ zpios_cleanup_run(run_args_t *run_args)
static int
zpios_dmu_write(run_args_t *run_args, objset_t *os, uint64_t object,
- uint64_t offset, uint64_t size, const void *buf)
+ uint64_t offset, uint64_t size, const void *buf)
{
struct dmu_tx *tx;
int rc, how = TXG_WAIT;
@@ -488,7 +497,7 @@ zpios_dmu_write(run_args_t *run_args, objset_t *os, uint64_t object,
continue;
}
zpios_print(run_args->file,
- "Error in dmu_tx_assign(), %d", rc);
+ "Error in dmu_tx_assign(), %d", rc);
dmu_tx_abort(tx);
return (rc);
}
@@ -579,7 +588,7 @@ zpios_thread_main(void *data)
if (rc) {
zpios_print(run_args->file, "IO error while doing "
- "dmu_write(): %d\n", rc);
+ "dmu_write(): %d\n", rc);
break;
}
@@ -642,13 +651,13 @@ zpios_thread_main(void *data)
t.start = zpios_timespec_now();
rc = zpios_dmu_read(run_args, obj.os, obj.obj,
- offset, chunk_size, buf);
+ offset, chunk_size, buf);
t.stop = zpios_timespec_now();
t.delta = zpios_timespec_sub(t.stop, t.start);
if (rc) {
zpios_print(run_args->file, "IO error while doing "
- "dmu_read(): %d\n", rc);
+ "dmu_read(): %d\n", rc);
break;
}
@@ -720,16 +729,8 @@ zpios_threads_run(run_args_t *run_args)
int i, rc = 0, tc = run_args->thread_count;
tsks = kmem_zalloc(sizeof (struct task_struct *) * tc, KM_SLEEP);
- if (tsks == NULL) {
- rc = -ENOMEM;
- goto cleanup2;
- }
run_args->threads = kmem_zalloc(sizeof (thread_data_t *)*tc, KM_SLEEP);
- if (run_args->threads == NULL) {
- rc = -ENOMEM;
- goto cleanup;
- }
init_waitqueue_head(&run_args->waitq);
run_args->threads_done = 0;
@@ -737,10 +738,6 @@ zpios_threads_run(run_args_t *run_args)
/* Create all the needed threads which will sleep until awoken */
for (i = 0; i < tc; i++) {
thr = kmem_zalloc(sizeof (thread_data_t), KM_SLEEP);
- if (thr == NULL) {
- rc = -ENOMEM;
- goto taskerr;
- }
thr->thread_no = i;
thr->run_args = run_args;
@@ -832,8 +829,6 @@ zpios_threads_run(run_args_t *run_args)
cleanup:
kmem_free(tsks, sizeof (struct task_struct *) * tc);
-cleanup2:
- /* Returns first encountered thread error (if any) */
return (rc);
taskerr:
@@ -929,17 +924,11 @@ zpios_open(struct inode *inode, struct file *file)
zpios_info_t *info;
info = (zpios_info_t *)kmem_alloc(sizeof (*info), KM_SLEEP);
- if (info == NULL)
- return (-ENOMEM);
spin_lock_init(&info->info_lock);
info->info_size = ZPIOS_INFO_BUFFER_SIZE;
info->info_buffer =
- (char *) vmem_alloc(ZPIOS_INFO_BUFFER_SIZE, KM_SLEEP);
- if (info->info_buffer == NULL) {
- kmem_free(info, sizeof (*info));
- return (-ENOMEM);
- }
+ (char *)vmem_alloc(ZPIOS_INFO_BUFFER_SIZE, KM_SLEEP);
info->info_head = info->info_buffer;
file->private_data = (void *)info;
@@ -992,10 +981,6 @@ zpios_buffer_size(struct file *file, zpios_cfg_t *kcfg, unsigned long arg)
size = kcfg->cfg_arg1;
buf = (char *)vmem_alloc(size, KM_SLEEP);
- if (buf == NULL) {
- rc = -ENOMEM;
- goto out;
- }
/* Zero fill and truncate contents when coping buffer */
min = ((size < info->info_size) ? size : info->info_size);
@@ -1012,7 +997,7 @@ zpios_buffer_size(struct file *file, zpios_cfg_t *kcfg, unsigned long arg)
if (copy_to_user((struct zpios_cfg_t __user *)arg,
kcfg, sizeof (*kcfg)))
rc = -EFAULT;
-out:
+
spin_unlock(&info->info_lock);
return (rc);
@@ -1050,7 +1035,7 @@ zpios_ioctl_cfg(struct file *file, unsigned long arg)
break;
default:
zpios_print(file, "Bad config command %d\n",
- kcfg.cfg_cmd);
+ kcfg.cfg_cmd);
rc = -EINVAL;
break;
}
@@ -1066,16 +1051,11 @@ zpios_ioctl_cmd(struct file *file, unsigned long arg)
int rc = -EINVAL;
kcmd = kmem_alloc(sizeof (zpios_cmd_t), KM_SLEEP);
- if (kcmd == NULL) {
- zpios_print(file, "Unable to kmem_alloc() %ld byte for "
- "zpios_cmd_t\n", (long int)sizeof (zpios_cmd_t));
- return (-ENOMEM);
- }
rc = copy_from_user(kcmd, (zpios_cfg_t *)arg, sizeof (zpios_cmd_t));
if (rc) {
zpios_print(file, "Unable to copy command structure "
- "from user to kernel memory, %d\n", rc);
+ "from user to kernel memory, %d\n", rc);
goto out_cmd;
}
@@ -1089,19 +1069,12 @@ zpios_ioctl_cmd(struct file *file, unsigned long arg)
/* Allocate memory for any opaque data the caller needed to pass on */
if (kcmd->cmd_data_size > 0) {
data = (void *)vmem_alloc(kcmd->cmd_data_size, KM_SLEEP);
- if (data == NULL) {
- zpios_print(file, "Unable to vmem_alloc() %ld "
- "bytes for data buffer\n",
- (long)kcmd->cmd_data_size);
- rc = -ENOMEM;
- goto out_cmd;
- }
rc = copy_from_user(data, (void *)(arg + offsetof(zpios_cmd_t,
cmd_data_str)), kcmd->cmd_data_size);
if (rc) {
zpios_print(file, "Unable to copy data buffer "
- "from user to kernel memory, %d\n", rc);
+ "from user to kernel memory, %d\n", rc);
goto out_data;
}
}
@@ -1117,7 +1090,7 @@ zpios_ioctl_cmd(struct file *file, unsigned long arg)
cmd_data_str)), data, kcmd->cmd_data_size);
if (rc) {
zpios_print(file, "Unable to copy data buffer "
- "from kernel to user memory, %d\n", rc);
+ "from kernel to user memory, %d\n", rc);
rc = -EFAULT;
}
diff --git a/zfs/zfs_config.h.in b/zfs/zfs_config.h.in
index 94bbeb9e6216..df5fd715e093 100644
--- a/zfs/zfs_config.h.in
+++ b/zfs/zfs_config.h.in
@@ -1,8 +1,5 @@
/* zfs_config.h.in. Generated from configure.ac by autoheader. */
-/* Define to 1 to enabled dmu tx validation */
-#undef DEBUG_DMU_TX
-
/* bio_end_io_t wants 1 arg */
#undef HAVE_1ARG_BIO_END_IO_T
@@ -39,6 +36,39 @@
/* dops->automount() exists */
#undef HAVE_AUTOMOUNT
+/* Define if host toolchain supports AVX */
+#undef HAVE_AVX
+
+/* Define if host toolchain supports AVX2 */
+#undef HAVE_AVX2
+
+/* Define if host toolchain supports AVX512BW */
+#undef HAVE_AVX512BW
+
+/* Define if host toolchain supports AVX512CD */
+#undef HAVE_AVX512CD
+
+/* Define if host toolchain supports AVX512DQ */
+#undef HAVE_AVX512DQ
+
+/* Define if host toolchain supports AVX512ER */
+#undef HAVE_AVX512ER
+
+/* Define if host toolchain supports AVX512F */
+#undef HAVE_AVX512F
+
+/* Define if host toolchain supports AVX512IFMA */
+#undef HAVE_AVX512IFMA
+
+/* Define if host toolchain supports AVX512PF */
+#undef HAVE_AVX512PF
+
+/* Define if host toolchain supports AVX512VBMI */
+#undef HAVE_AVX512VBMI
+
+/* Define if host toolchain supports AVX512VL */
+#undef HAVE_AVX512VL
+
/* struct block_device_operations use bdevs */
#undef HAVE_BDEV_BLOCK_DEVICE_OPERATIONS
@@ -66,12 +96,18 @@
/* BIO_RW_FAILFAST_* are defined */
#undef HAVE_BIO_RW_FAILFAST_DTD
+/* bio_set_dev() exists */
+#undef HAVE_BIO_SET_DEV
+
/* bio_set_op_attrs is available */
#undef HAVE_BIO_SET_OP_ATTRS
/* blkdev_get_by_path() is available */
#undef HAVE_BLKDEV_GET_BY_PATH
+/* blk queue backing_dev_info is dynamic */
+#undef HAVE_BLK_QUEUE_BDI_DYNAMIC
+
/* blk_queue_flush() is available */
#undef HAVE_BLK_QUEUE_FLUSH
@@ -108,9 +144,6 @@
/* iops->check_acl() wants flags */
#undef HAVE_CHECK_ACL_WITH_FLAGS
-/* check_disk_size_change() is available */
-#undef HAVE_CHECK_DISK_SIZE_CHANGE
-
/* clear_inode() is available */
#undef HAVE_CLEAR_INODE
@@ -174,6 +207,9 @@
/* fops->aio_fsync() exists */
#undef HAVE_FILE_AIO_FSYNC
+/* file_dentry() is available */
+#undef HAVE_FILE_DENTRY
+
/* fops->fallocate() exists */
#undef HAVE_FILE_FALLOCATE
@@ -192,6 +228,9 @@
/* iops->follow_link() nameidata */
#undef HAVE_FOLLOW_LINK_NAMEIDATA
+/* kernel has <asm/fpu/api.h> interface */
+#undef HAVE_FPU_API_H
+
/* sops->free_cached_objects() exists */
#undef HAVE_FREE_CACHED_OBJECTS
@@ -204,8 +243,11 @@
/* fops->fsync() with dentry */
#undef HAVE_FSYNC_WITH_DENTRY
-/* generic_start_io_acct()/generic_end_io_acct() avaliable */
-#undef HAVE_GENERIC_IO_ACCT
+/* generic_start_io_acct()/generic_end_io_acct() available */
+#undef HAVE_GENERIC_IO_ACCT_3ARG
+
+/* generic_start_io_acct()/generic_end_io_acct() 4 arg available */
+#undef HAVE_GENERIC_IO_ACCT_4ARG
/* generic_readlink is global */
#undef HAVE_GENERIC_READLINK
@@ -237,6 +279,9 @@
/* inode_owner_or_capable() exists */
#undef HAVE_INODE_OWNER_OR_CAPABLE
+/* inode_set_flags() exists */
+#undef HAVE_INODE_SET_FLAGS
+
/* iops->truncate_range() exists */
#undef HAVE_INODE_TRUNCATE_RANGE
@@ -252,21 +297,33 @@
/* uncached_acl_sentinel() exists */
#undef HAVE_KERNEL_GET_ACL_HANDLE_CACHE
-/* kernel defines KOBJ_NAME_LEN */
-#undef HAVE_KOBJ_NAME_LEN
+/* kernel does stack verification */
+#undef HAVE_KERNEL_OBJTOOL
+
+/* i_(uid|gid)_(read|write) exist */
+#undef HAVE_KUID_HELPERS
/* kernel has large stacks */
#undef HAVE_LARGE_STACKS
+/* Define if you have libattr */
+#undef HAVE_LIBATTR
+
/* Define if you have libblkid */
#undef HAVE_LIBBLKID
+/* Define if you have libtirpc */
+#undef HAVE_LIBTIRPC
+
+/* Define if you have libudev */
+#undef HAVE_LIBUDEV
+
+/* Define if udev_device_get_is_initialized is available */
+#undef HAVE_LIBUDEV_UDEV_DEVICE_GET_IS_INITIALIZED
+
/* Define if you have libuuid */
#undef HAVE_LIBUUID
-/* Define to 1 if you have the `z' library (-lz). */
-#undef HAVE_LIBZ
-
/* iops->lookup() passes nameidata */
#undef HAVE_LOOKUP_NAMEIDATA
@@ -342,6 +399,9 @@
/* iops->put_link() nameidata */
#undef HAVE_PUT_LINK_NAMEIDATA
+/* qat is enabled and existed */
+#undef HAVE_QAT
+
/* iops->rename() wants flags */
#undef HAVE_RENAME_WANTS_FLAGS
@@ -375,6 +435,24 @@
/* struct super_block has s_shrink */
#undef HAVE_SHRINK
+/* Define if host toolchain supports SSE */
+#undef HAVE_SSE
+
+/* Define if host toolchain supports SSE2 */
+#undef HAVE_SSE2
+
+/* Define if host toolchain supports SSE3 */
+#undef HAVE_SSE3
+
+/* Define if host toolchain supports SSE4.1 */
+#undef HAVE_SSE4_1
+
+/* Define if host toolchain supports SSE4.2 */
+#undef HAVE_SSE4_2
+
+/* Define if host toolchain supports SSSE3 */
+#undef HAVE_SSSE3
+
/* Define to 1 if you have the <stdint.h> header file. */
#undef HAVE_STDINT_H
@@ -390,6 +468,9 @@
/* super_setup_bdi_name() exits */
#undef HAVE_SUPER_SETUP_BDI_NAME
+/* super_block->s_user_ns exists */
+#undef HAVE_SUPER_USER_NS
+
/* Define to 1 if you have the <sys/stat.h> header file. */
#undef HAVE_SYS_STAT_H
@@ -402,6 +483,9 @@
/* struct super_block has s_instances list_head */
#undef HAVE_S_INSTANCES_LIST_HEAD
+/* i_op->tmpfile() exists */
+#undef HAVE_TMPFILE
+
/* truncate_setsize() is available */
#undef HAVE_TRUNCATE_SETSIZE
@@ -474,6 +558,9 @@
/* make_request_fn() returns blk_qc_t */
#undef MAKE_REQUEST_FN_RET
+/* hardened module_param_call */
+#undef MODULE_PARAM_CALL_CONST
+
/* Name of package */
#undef PACKAGE
@@ -507,6 +594,9 @@
/* zfs debugging enabled */
#undef ZFS_DEBUG
+/* using global_node_page_state() */
+#undef ZFS_GLOBAL_NODE_PAGE_STATE
+
/* Define to 1 if GPL-only symbols can be used */
#undef ZFS_IS_GPL_COMPATIBLE
--
2.14.1
More information about the kernel-team
mailing list