[PATCH 6/7] powerpc/64: Use optimized checksum routines on little-endian
Tim Gardner
tim.gardner at canonical.com
Fri Mar 24 14:05:39 UTC 2017
From: Paul Mackerras <paulus at ozlabs.org>
BugLink: http://bugs.launchpad.net/bugs/1670247
Currently we have optimized hand-coded assembly checksum routines for
big-endian 64-bit systems, but for little-endian we use the generic C
routines. This modifies the optimized routines to work for
little-endian. With this, we no longer need to enable
CONFIG_GENERIC_CSUM. This also fixes a couple of comments in
checksum_64.S so they accurately reflect what the associated instruction
does.
Signed-off-by: Paul Mackerras <paulus at ozlabs.org>
[mpe: Use the more common __BIG_ENDIAN__]
Signed-off-by: Michael Ellerman <mpe at ellerman.id.au>
(cherry picked from commit d4fde568a34a93897dfb9ae64cfe9dda9d5c908c)
Signed-off-by: Tim Gardner <tim.gardner at canonical.com>
---
arch/powerpc/Kconfig | 2 +-
arch/powerpc/include/asm/checksum.h | 4 ++++
arch/powerpc/lib/Makefile | 2 --
arch/powerpc/lib/checksum_64.S | 12 ++++++++++--
4 files changed, 15 insertions(+), 5 deletions(-)
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 2d6b5c5..347059a 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -164,7 +164,7 @@ config PPC
select HAVE_ARCH_HARDENED_USERCOPY
config GENERIC_CSUM
- def_bool CPU_LITTLE_ENDIAN
+ def_bool n
config EARLY_PRINTK
bool
diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h
index 5b1a6e3..4e63787 100644
--- a/arch/powerpc/include/asm/checksum.h
+++ b/arch/powerpc/include/asm/checksum.h
@@ -70,7 +70,11 @@ static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len,
s += (__force u32)saddr;
s += (__force u32)daddr;
+#ifdef __BIG_ENDIAN__
s += proto + len;
+#else
+ s += (proto + len) << 8;
+#endif
return (__force __wsum) from64to32(s);
#else
__asm__("\n\
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index ad52900..0e47de2 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -21,9 +21,7 @@ obj64-y += copypage_64.o copyuser_64.o usercopy_64.o mem_64.o hweight_64.o \
obj64-$(CONFIG_SMP) += locks.o
obj64-$(CONFIG_ALTIVEC) += vmx-helper.o
-ifeq ($(CONFIG_GENERIC_CSUM),)
obj-y += checksum_$(BITS).o checksum_wrappers.o
-endif
obj-$(CONFIG_PPC_EMULATE_SSTEP) += sstep.o ldstfp.o
diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S
index fdec6e6..3419411d 100644
--- a/arch/powerpc/lib/checksum_64.S
+++ b/arch/powerpc/lib/checksum_64.S
@@ -35,7 +35,7 @@ _GLOBAL(__csum_partial)
* work to calculate the correct checksum, we ignore that case
* and take the potential slowdown of unaligned loads.
*/
- rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */
+ rldicl. r6,r3,64-1,64-2 /* r6 = (r3 >> 1) & 0x3 */
beq .Lcsum_aligned
li r7,4
@@ -167,8 +167,12 @@ _GLOBAL(__csum_partial)
beq .Lcsum_finish
lbz r6,0(r3)
+#ifdef __BIG_ENDIAN__
sldi r9,r6,8 /* Pad the byte out to 16 bits */
adde r0,r0,r9
+#else
+ adde r0,r0,r6
+#endif
.Lcsum_finish:
addze r0,r0 /* add in final carry */
@@ -234,7 +238,7 @@ _GLOBAL(csum_partial_copy_generic)
* If the source and destination are relatively unaligned we only
* align the source. This keeps things simple.
*/
- rldicl. r6,r3,64-1,64-2 /* r6 = (r3 & 0x3) >> 1 */
+ rldicl. r6,r3,64-1,64-2 /* r6 = (r3 >> 1) & 0x3 */
beq .Lcopy_aligned
li r9,4
@@ -396,8 +400,12 @@ dstnr; sth r6,0(r4)
beq .Lcopy_finish
srcnr; lbz r6,0(r3)
+#ifdef __BIG_ENDIAN__
sldi r9,r6,8 /* Pad the byte out to 16 bits */
adde r0,r0,r9
+#else
+ adde r0,r0,r6
+#endif
dstnr; stb r6,0(r4)
.Lcopy_finish:
--
2.7.4
More information about the kernel-team
mailing list