lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20201109192859.14267-1-zengzhaoxiu@163.com>
Date:   Tue, 10 Nov 2020 03:28:59 +0800
From:   zengzhaoxiu@....com
To:     Jonathan Corbet <corbet@....net>,
        Mauro Carvalho Chehab <mchehab+huawei@...nel.org>
Cc:     linux-kernel@...r.kernel.org, Zhaoxiu Zeng <zhaoxiu.zeng@...il.com>
Subject: [PATCH 3/3] lib: lzo: Improves decompression performance

From: Zhaoxiu Zeng <zhaoxiu.zeng@...il.com>

This patch does:
1. Cleanup code
2. Use the copy_from_back to copy the matched bytes from the back output buffer

I testd on 5.8.18-300.fc33.x86_64.
The performance of the lzo1x_decompress_safe function is improved by about 5%.
If no CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS feature, the performance is improved by 60%!

Signed-off-by: Zhaoxiu Zeng <zhaoxiu.zeng@...il.com>
---
 lib/lzo/lzo1x_decompress_safe.c | 140 ++++++++++++++------------------
 1 file changed, 59 insertions(+), 81 deletions(-)

diff --git a/lib/lzo/lzo1x_decompress_safe.c b/lib/lzo/lzo1x_decompress_safe.c
index 7892a40cf765..afef64cedc51 100644
--- a/lib/lzo/lzo1x_decompress_safe.c
+++ b/lib/lzo/lzo1x_decompress_safe.c
@@ -17,6 +17,7 @@
 #include <linux/kernel.h>
 #endif
 #include <asm/unaligned.h>
+#include <asm/copy_from_back.h>
 #include <linux/lzo.h>
 #include "lzodefs.h"
 
@@ -43,7 +44,7 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
 	const unsigned char *ip;
 	size_t t, next;
 	size_t state = 0;
-	const unsigned char *m_pos;
+	size_t dist;
 	const unsigned char * const ip_end = in + in_len;
 	unsigned char * const op_end = out + *out_len;
 
@@ -117,29 +118,31 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
 				}
 				state = 4;
 				continue;
-			} else if (state != 4) {
-				next = t & 3;
-				m_pos = op - 1;
-				m_pos -= t >> 2;
-				m_pos -= *ip++ << 2;
-				TEST_LB(m_pos);
+			}
+
+			next = t & 3;
+			dist  = t >> 2;
+			dist += *ip++ << 2;
+			if (state != 4) {
+				dist += 1;
+				TEST_LB(op - dist);
 				NEED_OP(2);
-				op[0] = m_pos[0];
-				op[1] = m_pos[1];
 				op += 2;
-				goto match_next;
 			} else {
-				next = t & 3;
-				m_pos = op - (1 + M2_MAX_OFFSET);
-				m_pos -= t >> 2;
-				m_pos -= *ip++ << 2;
-				t = 3;
+				dist += (1 + M2_MAX_OFFSET);
+				TEST_LB(op - dist);
+				NEED_OP(3);
+				op += 3;
+				op[-3] = op[-3 - dist];
 			}
+			op[-2] = op[-2 - dist];
+			op[-1] = op[-1 - dist];
+			goto match_next;
 		} else if (t >= 64) {
 			next = t & 3;
-			m_pos = op - 1;
-			m_pos -= (t >> 2) & 7;
-			m_pos -= *ip++ << 3;
+			dist = 1;
+			dist += (t >> 2) & 7;
+			dist += *ip++ << 3;
 			t = (t >> 5) - 1 + (3 - 1);
 		} else if (t >= 32) {
 			t = (t & 31) + (3 - 1);
@@ -159,14 +162,15 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
 				t += offset + 31 + *ip++;
 				NEED_IP(2);
 			}
-			m_pos = op - 1;
+			dist = 1;
 			next = get_unaligned_le16(ip);
 			ip += 2;
-			m_pos -= next >> 2;
+			dist += next >> 2;
 			next &= 3;
 		} else {
 			NEED_IP(2);
 			next = get_unaligned_le16(ip);
+
 			if (((next & 0xfffc) == 0xfffc) &&
 			    ((t & 0xf8) == 0x18) &&
 			    likely(bitstream_version)) {
@@ -180,74 +184,48 @@ int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
 				next &= 3;
 				ip += 3;
 				goto match_next;
-			} else {
-				m_pos = op;
-				m_pos -= (t & 8) << 11;
-				t = (t & 7) + (3 - 1);
-				if (unlikely(t == 2)) {
-					size_t offset;
-					const unsigned char *ip_last = ip;
+			}
 
-					while (unlikely(*ip == 0)) {
-						ip++;
-						NEED_IP(1);
-					}
-					offset = ip - ip_last;
-					if (unlikely(offset > MAX_255_COUNT))
-						return LZO_E_ERROR;
+			dist = (t & 8) << 11;
+			t = (t & 7) + (3 - 1);
+			if (unlikely(t == 2)) {
+				size_t offset;
+				const unsigned char *ip_last = ip;
 
-					offset = (offset << 8) - offset;
-					t += offset + 7 + *ip++;
-					NEED_IP(2);
-					next = get_unaligned_le16(ip);
+				while (unlikely(*ip == 0)) {
+					ip++;
+					NEED_IP(1);
 				}
-				ip += 2;
-				m_pos -= next >> 2;
-				next &= 3;
-				if (m_pos == op)
-					goto eof_found;
-				m_pos -= 0x4000;
+				offset = ip - ip_last;
+				if (unlikely(offset > MAX_255_COUNT))
+					return LZO_E_ERROR;
+
+				offset = (offset << 8) - offset;
+				t += offset + 7 + *ip++;
+				NEED_IP(2);
+				next = get_unaligned_le16(ip);
 			}
+			ip += 2;
+			dist += next >> 2;
+			if (dist == 0)
+				goto eof_found;
+			dist += M3_MAX_OFFSET;
+			next &= 3;
 		}
-		TEST_LB(m_pos);
-#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
-		if (op - m_pos >= 8) {
-			unsigned char *oe = op + t;
-			if (likely(HAVE_OP(t + 15))) {
-				do {
-					COPY8(op, m_pos);
-					op += 8;
-					m_pos += 8;
-					COPY8(op, m_pos);
-					op += 8;
-					m_pos += 8;
-				} while (op < oe);
-				op = oe;
-				if (HAVE_IP(6)) {
-					state = next;
-					COPY4(op, ip);
-					op += next;
-					ip += next;
-					continue;
-				}
-			} else {
-				NEED_OP(t);
-				do {
-					*op++ = *m_pos++;
-				} while (op < oe);
+		TEST_LB(op - dist);
+		if (likely(HAVE_OP(t + FAST_COPY_SAFEGUARD_SIZE))) {
+			/* very common case */
+			op = copy_from_back_fast(op, dist, t);
+			if (HAVE_IP(6)) {
+				state = next;
+				COPY4(op, ip);
+				op += next;
+				ip += next;
+				continue;
 			}
-		} else
-#endif
-		{
-			unsigned char *oe = op + t;
+		} else {
 			NEED_OP(t);
-			op[0] = m_pos[0];
-			op[1] = m_pos[1];
-			op += 2;
-			m_pos += 2;
-			do {
-				*op++ = *m_pos++;
-			} while (op < oe);
+			op = copy_from_back(op, dist, t);
 		}
 match_next:
 		state = next;
-- 
2.28.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ