lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date:   Wed, 21 Nov 2018 11:59:31 +0000
From:   Dave Rodgman <dave.rodgman@....com>
To:     "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>
CC:     nd <nd@....com>,
        "herbert@...dor.apana.org.au" <herbert@...dor.apana.org.au>,
        "davem@...emloft.net" <davem@...emloft.net>,
        Matt Sealey <Matt.Sealey@....com>,
        "nitingupta910@...il.com" <nitingupta910@...il.com>,
        "rpurdie@...nedhand.com" <rpurdie@...nedhand.com>,
        "markus@...rhumer.com" <markus@...rhumer.com>,
        "minchan@...nel.org" <minchan@...nel.org>,
        "sergey.senozhatsky.work@...il.com" 
        <sergey.senozhatsky.work@...il.com>
Subject: [PATCH 1/6] lib/lzo: clean-up by introducing COPY16

From: Matt Sealey <matt.sealey@....com>

Most compilers should be able to merge adjacent loads/stores of sizes which
are less than but effect a multiple of a machine word size (in effect a
memcpy() of a constant amount). However the semantics of the macro are that
it just does the copy, the pointer increment is in the code, hence we see

    *a = *b
    a += 8
    b += 8
    *a = *b
    a += 8
    b += 8

This introduces a dependency between the two groups of statements which
seems to defeat said compiler optimizers and generate some very strange
sequences of addition and subtraction of address offsets (i.e. it is
overcomplicated).

Since COPY8 is only ever used to copy amounts of 16 bytes (in pairs), just
define COPY16 as COPY8,COPY8. We leave the definition to preserve the need
to do unaligned accesses to machine-sized words per the original code
intent, we just don't use it in the code proper.

COPY16 then gives us code like:

    *a = *b
    *(a+8) = *(b+8)
    a += 16
    b += 16

This seems to allow compilers to generate much better code by using
base register writeback or simply positively incrementing offsets which
seems to positively affect performance. It is, at least, fewer instructions
to do the same job.

Signed-off-by: Matt Sealey <matt.sealey@....com>
---
 lib/lzo/lzo1x_compress.c        |  9 +++------
 lib/lzo/lzo1x_decompress_safe.c | 18 ++++++------------
 lib/lzo/lzodefs.h               |  3 +++
 3 files changed, 12 insertions(+), 18 deletions(-)

diff --git a/lib/lzo/lzo1x_compress.c b/lib/lzo/lzo1x_compress.c
index 236eb21167b5..82fb5571ce5e 100644
--- a/lib/lzo/lzo1x_compress.c
+++ b/lib/lzo/lzo1x_compress.c
@@ -60,8 +60,7 @@ lzo1x_1_do_compress(const unsigned char *in, size_t in_len,
 				op += t;
 			} else if (t <= 16) {
 				*op++ = (t - 3);
-				COPY8(op, ii);
-				COPY8(op + 8, ii + 8);
+				COPY16(op, ii);
 				op += t;
 			} else {
 				if (t <= 18) {
@@ -76,8 +75,7 @@ lzo1x_1_do_compress(const unsigned char *in, size_t in_len,
 					*op++ = tt;
 				}
 				do {
-					COPY8(op, ii);
-					COPY8(op + 8, ii + 8);
+					COPY16(op, ii);
 					op += 16;
 					ii += 16;
 					t -= 16;
@@ -255,8 +253,7 @@ int lzo1x_1_compress(const unsigned char *in, size_t in_len,
 			*op++ = tt;
 		}
 		if (t >= 16) do {
-			COPY8(op, ii);
-			COPY8(op + 8, ii + 8);
+			COPY16(op, ii);
 			op += 16;
 			ii += 16;
 			t -= 16;
diff --git a/lib/lzo/lzo1x_decompress_safe.c b/lib/lzo/lzo1x_decompress_safe.c
index a1c387f6afba..aa95d3066b7d 100644
--- a/lib/lzo/lzo1x_decompress_safe.c
+++ b/lib/lzo/lzo1x_decompress_safe.c
@@ -86,12 +86,9 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
 					const unsigned char *ie = ip + t;
 					unsigned char *oe = op + t;
 					do {
-						COPY8(op, ip);
-						op += 8;
-						ip += 8;
-						COPY8(op, ip);
-						op += 8;
-						ip += 8;
+						COPY16(op, ip);
+						op += 16;
+						ip += 16;
 					} while (ip < ie);
 					ip = ie;
 					op = oe;
@@ -187,12 +184,9 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
 			unsigned char *oe = op + t;
 			if (likely(HAVE_OP(t + 15))) {
 				do {
-					COPY8(op, m_pos);
-					op += 8;
-					m_pos += 8;
-					COPY8(op, m_pos);
-					op += 8;
-					m_pos += 8;
+					COPY16(op, m_pos);
+					op += 16;
+					m_pos += 16;
 				} while (op < oe);
 				op = oe;
 				if (HAVE_IP(6)) {
diff --git a/lib/lzo/lzodefs.h b/lib/lzo/lzodefs.h
index 4edefd2f540c..9f167eaabb28 100644
--- a/lib/lzo/lzodefs.h
+++ b/lib/lzo/lzodefs.h
@@ -23,6 +23,9 @@
 		COPY4(dst, src); COPY4((dst) + 4, (src) + 4)
 #endif
 
+#define COPY16(dst, src) \
+	do { COPY8(dst, src); COPY8((dst) + 8, (src) + 8); } while (0)
+
 #if defined(__BIG_ENDIAN) && defined(__LITTLE_ENDIAN)
 #error "conflicting endian definitions"
 #elif defined(__x86_64__)
-- 
2.16.4

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ