[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250402224652.1bb38f6b@pumpkin>
Date: Wed, 2 Apr 2025 22:46:52 +0100
From: David Laight <david.laight.linux@...il.com>
To: Uwe Kleine-König <u.kleine-koenig@...libre.com>
Cc: Nicolas Pitre <nico@...xnic.net>, Andrew Morton
<akpm@...ux-foundation.org>, linux-kernel@...r.kernel.org
Subject: Re: [PATCH] math64: Provide an uprounding variant of
mul_u64_u64_div_u64()
On Wed, 2 Apr 2025 17:01:49 +0200
Uwe Kleine-König <u.kleine-koenig@...libre.com> wrote:
How about (tab damaged):
Compile tested only, on x86-x64 (once with the local definitions removed).
Looking at the object code, if u128 is supported then checking n_hi
is always going to be better than a pre-check.
Remember multiply is cheap.
David
diff --git a/arch/x86/include/asm/div64.h b/arch/x86/include/asm/div64.h
index 9931e4c7d73f..6115f3fcb975 100644
--- a/arch/x86/include/asm/div64.h
+++ b/arch/x86/include/asm/div64.h
@@ -84,21 +84,28 @@ static inline u64 mul_u32_u32(u32 a, u32 b)
* Will generate an #DE when the result doesn't fit u64, could fix with an
* __ex_table[] entry when it becomes an issue.
*/
-static inline u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div)
+static inline u64 mul_u64_add_u64_div_u64(u64 a, u64 mul, u64 add, u64 div)
{
u64 q;
- asm ("mulq %2; divq %3" : "=a" (q)
- : "a" (a), "rm" (mul), "rm" (div)
- : "rdx");
+ if (statically_true(!add)) {
+ asm ("mulq %2; divq %3" : "=a" (q)
+ : "a" (a), "rm" (mul), "rm" (div)
+ : "rdx");
+ } else {
+ asm ("mulq %2; addq %4,%rax; addc $0,%rdx; divq %3"
+ : "=a" (q)
+ : "a" (a), "rm" (mul), "rm" (div), "rm" (add)
+ : "rdx");
+ }
return q;
}
-#define mul_u64_u64_div_u64 mul_u64_u64_div_u64
+#define mul_u64_add_u64_div_u64 mul_u64_add_u64_div_u64
static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 div)
{
- return mul_u64_u64_div_u64(a, mul, div);
+ return mul_u64_add_u64_div_u64(a, mul, 0, div);
}
#define mul_u64_u32_div mul_u64_u32_div
diff --git a/include/linux/math64.h b/include/linux/math64.h
index 6aaccc1626ab..1544dc37e317 100644
--- a/include/linux/math64.h
+++ b/include/linux/math64.h
@@ -282,7 +282,10 @@ static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 divisor)
}
#endif /* mul_u64_u32_div */
-u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div);
+u64 mul_u64_add_u64_div_u64(u64 a, u64 mul, u64 add, u64 div);
+#define mul_u64_u64_div_u64(a, mul, div) mul_u64_add_u64_div_u64(a, mul, 0, div)
+#define mul_u64_u64_div_u64_roundup(a, mul, div) \
+ ({ u64 _tmp = (div); mul_u64_add_u64_div_u64(a, mul, _tmp - 1, _tmp); })
/**
* DIV64_U64_ROUND_UP - unsigned 64bit divide with 64bit divisor rounded up
diff --git a/lib/math/div64.c b/lib/math/div64.c
index 5faa29208bdb..efcc8d729c74 100644
--- a/lib/math/div64.c
+++ b/lib/math/div64.c
@@ -183,16 +183,13 @@ u32 iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder)
}
EXPORT_SYMBOL(iter_div_u64_rem);
-#ifndef mul_u64_u64_div_u64
-u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c)
+#ifndef mul_u64_add_u64_div_u64
+u64 mul_u64_add_u64_div_u64(u64 a, u64 b, u64 add, u64 c)
{
- if (ilog2(a) + ilog2(b) <= 62)
- return div64_u64(a * b, c);
-
#if defined(__SIZEOF_INT128__)
/* native 64x64=128 bits multiplication */
- u128 prod = (u128)a * b;
+ u128 prod = (u128)a * b + add;
u64 n_lo = prod, n_hi = prod >> 64;
#else
@@ -201,6 +198,11 @@ u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c)
u32 a_lo = a, a_hi = a >> 32, b_lo = b, b_hi = b >> 32;
u64 x, y, z;
+#if BITS_PER_LONG == 32
+ if (!(a_hi | b_hi))
+ return div64_u64(a_lo * b_lo + add, c);
+#endif
+
x = (u64)a_lo * b_lo;
y = (u64)a_lo * b_hi + (u32)(x >> 32);
z = (u64)a_hi * b_hi + (u32)(y >> 32);
@@ -208,10 +210,13 @@ u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c)
z += (u32)(y >> 32);
x = (y << 32) + (u32)x;
- u64 n_lo = x, n_hi = z;
+ u64 n_lo = x + add, n_hi = z + (n_lo < x);
#endif
+ if (!n_hi)
+ return div64_u64(n_lo, c);
+
/* make sure c is not zero, trigger exception otherwise */
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wdiv-by-zero"
@@ -265,5 +270,5 @@ u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c)
return res;
}
-EXPORT_SYMBOL(mul_u64_u64_div_u64);
+EXPORT_SYMBOL(mul_u64_add_u64_div_u64);
#endif
Powered by blists - more mailing lists