[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <aC9Y__MxnncE2teF@x1>
Date: Thu, 22 May 2025 14:03:59 -0300
From: Arnaldo Carvalho de Melo <acme@...nel.org>
To: Yuzhuo Jing <yuzhuo@...gle.com>
Cc: Peter Zijlstra <peterz@...radead.org>, Ingo Molnar <mingo@...hat.com>,
Namhyung Kim <namhyung@...nel.org>,
Mark Rutland <mark.rutland@....com>,
Alexander Shishkin <alexander.shishkin@...ux.intel.com>,
Jiri Olsa <jolsa@...nel.org>, Ian Rogers <irogers@...gle.com>,
Adrian Hunter <adrian.hunter@...el.com>,
Liang Kan <kan.liang@...ux.intel.com>,
Nathan Chancellor <nathan@...nel.org>,
Nick Desaulniers <nick.desaulniers+lkml@...il.com>,
Bill Wendling <morbo@...gle.com>,
Justin Stitt <justinstitt@...gle.com>,
"Steven Rostedt (Google)" <rostedt@...dmis.org>,
James Clark <james.clark@...aro.org>,
Tomas Glozar <tglozar@...hat.com>, Leo Yan <leo.yan@....com>,
Guilherme Amadio <amadio@...too.org>,
Yang Jihong <yangjihong@...edance.com>,
"Masami Hiramatsu (Google)" <mhiramat@...nel.org>,
Adhemerval Zanella <adhemerval.zanella@...aro.org>,
Wei Yang <richard.weiyang@...il.com>,
Ard Biesheuvel <ardb@...nel.org>,
"Mike Rapoport (Microsoft)" <rppt@...nel.org>,
Athira Rajeev <atrajeev@...ux.vnet.ibm.com>,
Kajol Jain <kjain@...ux.ibm.com>,
Aditya Gupta <adityag@...ux.ibm.com>,
Charlie Jenkins <charlie@...osinc.com>,
"Steinar H. Gunderson" <sesse@...gle.com>,
"Dr. David Alan Gilbert" <linux@...blig.org>,
Herbert Xu <herbert@...dor.apana.org.au>,
Jeff Johnson <jeff.johnson@....qualcomm.com>,
Al Viro <viro@...iv.linux.org.uk>, linux-kernel@...r.kernel.org,
linux-perf-users@...r.kernel.org, llvm@...ts.linux.dev
Subject: Re: [PATCH v1 2/4] perf tools: Add sha1 utils
On Wed, May 21, 2025 at 03:53:05PM -0700, Yuzhuo Jing wrote:
> Those new files are derived from the kernel tree, namely:
>
> tools/perf/util/sha1.c from lib/crypto/sha1.c
> tools/perf/util/sha1.h from include/crypto/sha1.h
> tools/perf/util/sha1_base.h from include/crypto/sha1_base.h
> tools/perf/util/sha1_generic.c from crypto/sha1_generic.c
>
> The reason that we are not syncing them with the kernel tree like other
> tools header files is because of the deep dependency in
That is ok, we do it in some other cases, but while looking at this
patchset I checked and the source code for sha1_transform() is verbatim
the same, as intended, I wonder if we could add infrastructure to
check_headers.h to instead of checking whole files, check if the source
code we got from the kernel is the same, something along the lines of:
⬢ [acme@...lbx perf-tools-next]$ line=$(ctags -x --c-kinds=f lib/crypto/sha1.c | awk '$1 == "sha1_transform" {print $3}')
⬢ [acme@...lbx perf-tools-next]$ sed -n $line,\$p lib/crypto/sha1.c | awk '{print} /\{/ {c++} /\}/ {c--; if (c==0) exit}'
void sha1_transform(__u32 *digest, const char *data, __u32 *array)
{
__u32 A, B, C, D, E;
unsigned int i = 0;
A = digest[0];
B = digest[1];
C = digest[2];
D = digest[3];
E = digest[4];
/* Round 1 - iterations 0-16 take their input from 'data' */
for (; i < 16; ++i)
T_0_15(i, A, B, C, D, E);
/* Round 1 - tail. Input from 512-bit mixing array */
for (; i < 20; ++i)
T_16_19(i, A, B, C, D, E);
/* Round 2 */
for (; i < 40; ++i)
T_20_39(i, A, B, C, D, E);
/* Round 3 */
for (; i < 60; ++i)
T_40_59(i, A, B, C, D, E);
/* Round 4 */
for (; i < 80; ++i)
T_60_79(i, A, B, C, D, E);
digest[0] += A;
digest[1] += B;
digest[2] += C;
digest[3] += D;
digest[4] += E;
}
⬢ [acme@...lbx perf-tools-next]$
But that can be done later :-)
- Arnaldo
> include/crypto/hash.h. It's painful to import the whole kernel crypto
> driver infrastructure into tools.
> The derived files get rid of struct shash_desc definition, and directly
> operates on the struct sha1_state.
> Signed-off-by: Yuzhuo Jing <yuzhuo@...gle.com>
> ---
> tools/perf/util/Build | 2 +
> tools/perf/util/sha1.c | 122 +++++++++++++++++++++++++++++++++
> tools/perf/util/sha1.h | 41 +++++++++++
> tools/perf/util/sha1_base.h | 103 ++++++++++++++++++++++++++++
> tools/perf/util/sha1_generic.c | 49 +++++++++++++
> 5 files changed, 317 insertions(+)
> create mode 100644 tools/perf/util/sha1.c
> create mode 100644 tools/perf/util/sha1.h
> create mode 100644 tools/perf/util/sha1_base.h
> create mode 100644 tools/perf/util/sha1_generic.c
>
> diff --git a/tools/perf/util/Build b/tools/perf/util/Build
> index 7910d908c814..ecee96b3f3fa 100644
> --- a/tools/perf/util/Build
> +++ b/tools/perf/util/Build
> @@ -41,6 +41,8 @@ perf-util-y += rbtree.o
> perf-util-y += libstring.o
> perf-util-y += bitmap.o
> perf-util-y += hweight.o
> +perf-util-y += sha1.o
> +perf-util-y += sha1_generic.o
> perf-util-y += smt.o
> perf-util-y += strbuf.o
> perf-util-y += string.o
> diff --git a/tools/perf/util/sha1.c b/tools/perf/util/sha1.c
> new file mode 100644
> index 000000000000..5ae658afb56b
> --- /dev/null
> +++ b/tools/perf/util/sha1.c
> @@ -0,0 +1,122 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * SHA1 routine optimized to do word accesses rather than byte accesses,
> + * and to avoid unnecessary copies into the context array.
> + *
> + * This was based on the git SHA1 implementation.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/bitops.h>
> +#include <linux/string.h>
> +#include <linux/unaligned.h>
> +
> +#include "sha1.h"
> +
> +/*
> + * If you have 32 registers or more, the compiler can (and should)
> + * try to change the array[] accesses into registers. However, on
> + * machines with less than ~25 registers, that won't really work,
> + * and at least gcc will make an unholy mess of it.
> + *
> + * So to avoid that mess which just slows things down, we force
> + * the stores to memory to actually happen (we might be better off
> + * with a 'W(t)=(val);asm("":"+m" (W(t))' there instead, as
> + * suggested by Artur Skawina - that will also make gcc unable to
> + * try to do the silly "optimize away loads" part because it won't
> + * see what the value will be).
> + *
> + * Ben Herrenschmidt reports that on PPC, the C version comes close
> + * to the optimized asm with this (ie on PPC you don't want that
> + * 'volatile', since there are lots of registers).
> + *
> + * On ARM we get the best code generation by forcing a full memory barrier
> + * between each SHA_ROUND, otherwise gcc happily get wild with spilling and
> + * the stack frame size simply explode and performance goes down the drain.
> + */
> +
> +#ifdef CONFIG_X86
> + #define setW(x, val) (*(volatile __u32 *)&W(x) = (val))
> +#elif defined(CONFIG_ARM)
> + #define setW(x, val) do { W(x) = (val); __asm__("":::"memory"); } while (0)
> +#else
> + #define setW(x, val) (W(x) = (val))
> +#endif
> +
> +/* This "rolls" over the 512-bit array */
> +#define W(x) (array[(x)&15])
> +
> +/*
> + * Where do we get the source from? The first 16 iterations get it from
> + * the input data, the next mix it from the 512-bit array.
> + */
> +#define SHA_SRC(t) get_unaligned_be32((__u32 *)data + t)
> +#define SHA_MIX(t) rol32(W(t+13) ^ W(t+8) ^ W(t+2) ^ W(t), 1)
> +
> +#define SHA_ROUND(t, input, fn, constant, A, B, C, D, E) do { \
> + __u32 TEMP = input(t); setW(t, TEMP); \
> + E += TEMP + rol32(A,5) + (fn) + (constant); \
> + B = ror32(B, 2); \
> + TEMP = E; E = D; D = C; C = B; B = A; A = TEMP; } while (0)
> +
> +#define T_0_15(t, A, B, C, D, E) SHA_ROUND(t, SHA_SRC, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E )
> +#define T_16_19(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (((C^D)&B)^D) , 0x5a827999, A, B, C, D, E )
> +#define T_20_39(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (B^C^D) , 0x6ed9eba1, A, B, C, D, E )
> +#define T_40_59(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, ((B&C)+(D&(B^C))) , 0x8f1bbcdc, A, B, C, D, E )
> +#define T_60_79(t, A, B, C, D, E) SHA_ROUND(t, SHA_MIX, (B^C^D) , 0xca62c1d6, A, B, C, D, E )
> +
> +/**
> + * sha1_transform - single block SHA1 transform (deprecated)
> + *
> + * @digest: 160 bit digest to update
> + * @data: 512 bits of data to hash
> + * @array: 16 words of workspace (see note)
> + *
> + * This function executes SHA-1's internal compression function. It updates the
> + * 160-bit internal state (@digest) with a single 512-bit data block (@data).
> + *
> + * Don't use this function. SHA-1 is no longer considered secure. And even if
> + * you do have to use SHA-1, this isn't the correct way to hash something with
> + * SHA-1 as this doesn't handle padding and finalization.
> + *
> + * Note: If the hash is security sensitive, the caller should be sure
> + * to clear the workspace. This is left to the caller to avoid
> + * unnecessary clears between chained hashing operations.
> + */
> +void sha1_transform(__u32 *digest, const char *data, __u32 *array)
> +{
> + __u32 A, B, C, D, E;
> + unsigned int i = 0;
> +
> + A = digest[0];
> + B = digest[1];
> + C = digest[2];
> + D = digest[3];
> + E = digest[4];
> +
> + /* Round 1 - iterations 0-16 take their input from 'data' */
> + for (; i < 16; ++i)
> + T_0_15(i, A, B, C, D, E);
> +
> + /* Round 1 - tail. Input from 512-bit mixing array */
> + for (; i < 20; ++i)
> + T_16_19(i, A, B, C, D, E);
> +
> + /* Round 2 */
> + for (; i < 40; ++i)
> + T_20_39(i, A, B, C, D, E);
> +
> + /* Round 3 */
> + for (; i < 60; ++i)
> + T_40_59(i, A, B, C, D, E);
> +
> + /* Round 4 */
> + for (; i < 80; ++i)
> + T_60_79(i, A, B, C, D, E);
> +
> + digest[0] += A;
> + digest[1] += B;
> + digest[2] += C;
> + digest[3] += D;
> + digest[4] += E;
> +}
> diff --git a/tools/perf/util/sha1.h b/tools/perf/util/sha1.h
> new file mode 100644
> index 000000000000..9da4ece49bc6
> --- /dev/null
> +++ b/tools/perf/util/sha1.h
> @@ -0,0 +1,41 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Common values for SHA-1 algorithms
> + */
> +
> +#ifndef _CRYPTO_SHA1_H
> +#define _CRYPTO_SHA1_H
> +
> +#include <linux/types.h>
> +
> +#define SHA1_DIGEST_SIZE 20
> +#define SHA1_BLOCK_SIZE 64
> +
> +#define SHA1_H0 0x67452301UL
> +#define SHA1_H1 0xefcdab89UL
> +#define SHA1_H2 0x98badcfeUL
> +#define SHA1_H3 0x10325476UL
> +#define SHA1_H4 0xc3d2e1f0UL
> +
> +struct sha1_state {
> + u32 state[SHA1_DIGEST_SIZE / 4];
> + u64 count;
> + u8 buffer[SHA1_BLOCK_SIZE];
> +};
> +
> +extern int crypto_sha1_update(struct sha1_state *desc, const u8 *data,
> + unsigned int len);
> +
> +extern int crypto_sha1_finup(struct sha1_state *desc, const u8 *data,
> + unsigned int len, u8 *hash);
> +
> +/*
> + * An implementation of SHA-1's compression function. Don't use in new code!
> + * You shouldn't be using SHA-1, and even if you *have* to use SHA-1, this isn't
> + * the correct way to hash something with SHA-1 (use crypto_shash instead).
> + */
> +#define SHA1_DIGEST_WORDS (SHA1_DIGEST_SIZE / 4)
> +#define SHA1_WORKSPACE_WORDS 16
> +void sha1_transform(__u32 *digest, const char *data, __u32 *W);
> +
> +#endif /* _CRYPTO_SHA1_H */
> diff --git a/tools/perf/util/sha1_base.h b/tools/perf/util/sha1_base.h
> new file mode 100644
> index 000000000000..cea22c5a4952
> --- /dev/null
> +++ b/tools/perf/util/sha1_base.h
> @@ -0,0 +1,103 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * sha1_base.h - core logic for SHA-1 implementations
> + *
> + * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@...aro.org>
> + */
> +
> +#ifndef _CRYPTO_SHA1_BASE_H
> +#define _CRYPTO_SHA1_BASE_H
> +
> +#include <linux/string.h>
> +
> +#include <linux/kernel.h>
> +#include <linux/unaligned.h>
> +
> +#include "sha1.h"
> +
> +typedef void (sha1_block_fn)(struct sha1_state *sst, u8 const *src, int blocks);
> +
> +static inline int sha1_base_init(struct sha1_state *sctx)
> +{
> + sctx->state[0] = SHA1_H0;
> + sctx->state[1] = SHA1_H1;
> + sctx->state[2] = SHA1_H2;
> + sctx->state[3] = SHA1_H3;
> + sctx->state[4] = SHA1_H4;
> + sctx->count = 0;
> +
> + return 0;
> +}
> +
> +static inline int sha1_base_do_update(struct sha1_state *sctx,
> + const u8 *data,
> + unsigned int len,
> + sha1_block_fn *block_fn)
> +{
> + unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
> +
> + sctx->count += len;
> +
> + if (unlikely((partial + len) >= SHA1_BLOCK_SIZE)) {
> + int blocks;
> +
> + if (partial) {
> + int p = SHA1_BLOCK_SIZE - partial;
> +
> + memcpy(sctx->buffer + partial, data, p);
> + data += p;
> + len -= p;
> +
> + block_fn(sctx, sctx->buffer, 1);
> + }
> +
> + blocks = len / SHA1_BLOCK_SIZE;
> + len %= SHA1_BLOCK_SIZE;
> +
> + if (blocks) {
> + block_fn(sctx, data, blocks);
> + data += blocks * SHA1_BLOCK_SIZE;
> + }
> + partial = 0;
> + }
> + if (len)
> + memcpy(sctx->buffer + partial, data, len);
> +
> + return 0;
> +}
> +
> +static inline int sha1_base_do_finalize(struct sha1_state *sctx,
> + sha1_block_fn *block_fn)
> +{
> + const int bit_offset = SHA1_BLOCK_SIZE - sizeof(__be64);
> + __be64 *bits = (__be64 *)(sctx->buffer + bit_offset);
> + unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
> +
> + sctx->buffer[partial++] = 0x80;
> + if (partial > bit_offset) {
> + memset(sctx->buffer + partial, 0x0, SHA1_BLOCK_SIZE - partial);
> + partial = 0;
> +
> + block_fn(sctx, sctx->buffer, 1);
> + }
> +
> + memset(sctx->buffer + partial, 0x0, bit_offset - partial);
> + *bits = cpu_to_be64(sctx->count << 3);
> + block_fn(sctx, sctx->buffer, 1);
> +
> + return 0;
> +}
> +
> +static inline int sha1_base_finish(struct sha1_state *sctx, u8 *out)
> +{
> + __be32 *digest = (__be32 *)out;
> + int i;
> +
> + for (i = 0; i < SHA1_DIGEST_SIZE / (int)sizeof(__be32); i++)
> + put_unaligned_be32(sctx->state[i], digest++);
> +
> + memzero_explicit(sctx, sizeof(*sctx));
> + return 0;
> +}
> +
> +#endif /* _CRYPTO_SHA1_BASE_H */
> diff --git a/tools/perf/util/sha1_generic.c b/tools/perf/util/sha1_generic.c
> new file mode 100644
> index 000000000000..b0a7af370d59
> --- /dev/null
> +++ b/tools/perf/util/sha1_generic.c
> @@ -0,0 +1,49 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Cryptographic API.
> + *
> + * SHA1 Secure Hash Algorithm.
> + *
> + * Derived from cryptoapi implementation, adapted for in-place
> + * scatterlist interface.
> + *
> + * Copyright (c) Alan Smithee.
> + * Copyright (c) Andrew McDonald <andrew@...onald.org.uk>
> + * Copyright (c) Jean-Francois Dive <jef@...uxbe.org>
> + */
> +#include <linux/types.h>
> +#include <linux/string.h>
> +#include <asm/byteorder.h>
> +
> +#include "sha1_base.h"
> +
> +static void sha1_generic_block_fn(struct sha1_state *sst, u8 const *src,
> + int blocks)
> +{
> + u32 temp[SHA1_WORKSPACE_WORDS];
> +
> + while (blocks--) {
> + sha1_transform(sst->state, (const char *)src, temp);
> + src += SHA1_BLOCK_SIZE;
> + }
> + memzero_explicit(temp, sizeof(temp));
> +}
> +
> +int crypto_sha1_update(struct sha1_state *desc, const u8 *data,
> + unsigned int len)
> +{
> + return sha1_base_do_update(desc, data, len, sha1_generic_block_fn);
> +}
> +
> +static int sha1_final(struct sha1_state *desc, u8 *out)
> +{
> + sha1_base_do_finalize(desc, sha1_generic_block_fn);
> + return sha1_base_finish(desc, out);
> +}
> +
> +int crypto_sha1_finup(struct sha1_state *desc, const u8 *data,
> + unsigned int len, u8 *out)
> +{
> + sha1_base_do_update(desc, data, len, sha1_generic_block_fn);
> + return sha1_final(desc, out);
> +}
> --
> 2.49.0.1164.gab81da1b16-goog
Powered by blists - more mailing lists