lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250709152420.4c95c22d@pumpkin>
Date: Wed, 9 Jul 2025 15:24:20 +0100
From: David Laight <david.laight.linux@...il.com>
To: Andrew Morton <akpm@...ux-foundation.org>, linux-kernel@...r.kernel.org
Cc: u.kleine-koenig@...libre.com, Nicolas Pitre <npitre@...libre.com>, Oleg
 Nesterov <oleg@...hat.com>, Peter Zijlstra <peterz@...radead.org>, Biju Das
 <biju.das.jz@...renesas.com>, rostedt@...dmis.org, lirongqing@...du.com
Subject: Re: [PATCH v3 next 09/10] lib: mul_u64_u64_div_u64() Optimise the
 divide code

On Sat, 14 Jun 2025 10:53:45 +0100
David Laight <david.laight.linux@...il.com> wrote:

> Replace the bit by bit algorithm with one that generates 16 bits
> per iteration on 32bit architectures and 32 bits on 64bit ones.

I've spent far too long doing some clock counting exercises on this code.
This is the latest version with some conditional compiles and comments
explaining the various optimisation.
I think the 'best' version is with -DMULDIV_OPT=0xc3

#ifndef MULDIV_OPT
#define MULDIV_OPT 0
#endif

#ifndef BITS_PER_ITER
#define BITS_PER_ITER (__LONG_WIDTH__ >= 64 ? 32 : 16)
#endif

#define unlikely(x)  __builtin_expect((x), 0)
#define likely(x)  __builtin_expect(!!(x), 1)

#if __LONG_WIDTH__ >= 64

/* gcc generates sane code for 64bit. */
static unsigned int tzcntll(unsigned long long x)
{
        return __builtin_ctzll(x);
}

static unsigned int lzcntll(unsigned long long x)
{
        return __builtin_clzll(x);
}
#else
        
/*
 * Assuming that bsf/bsr dont change the output register
 * when the input is zero (should be true now that 486 aren't
 * supported) these simple conditional (and cmov) free functions
 * can be used to count trailing/leading zeros.
 */
static inline unsigned int tzcnt_z(u32 x, unsigned int if_z)
{               
        asm("bsfl %1,%0" : "+r" (if_z) : "r" (x));
        return if_z;
}

static inline unsigned int tzcntll(unsigned long long x)
{
        return tzcnt_z(x, 32 + tzcnt_z(x >> 32, 32));
}

static inline unsigned int bsr_z(u32 x, unsigned int if_z)
{
        asm("bsrl %1,%0" : "+r" (if_z) : "r" (x));
        return if_z;
}

static inline unsigned int bsrll(unsigned long long x)
{
        return 32 + bsr_z(x >> 32, bsr_z(x, -1) - 32);
}

static inline unsigned int lzcntll(unsigned long long x)
{
        return 63 - bsrll(x);
}

#endif

/*
 *  gcc (but not clang) makes a pigs-breakfast of mixed
 *  32/64 bit maths.
 */
#if !defined(__i386__) || defined(__clang__)
static u64 add_u64_u32(u64 a, u32 b)
{
        return a + b;
}

static inline u64 mul_u32_u32(u32 a, u32 b)
{
        return (u64)a * b;
}
#else
static u64 add_u64_u32(u64 a, u32 b)
{
        u32 hi = a >> 32, lo = a;

        asm ("addl %[b], %[lo]; adcl $0, %[hi]"
                         : [lo] "+r" (lo), [hi] "+r" (hi)
                         : [b] "rm" (b) );

        return (u64)hi << 32 | lo;
}

static inline u64 mul_u32_u32(u32 a, u32 b)
{
        u32 high, low;

        asm ("mull %[b]" : "=a" (low), "=d" (high)
                         : [a] "a" (a), [b] "rm" (b) );

        return low | ((u64)high) << 32;
}
#endif

static inline u64 mul_add(u32 a, u32 b, u32 c)
{
        return add_u64_u32(mul_u32_u32(a, b), c);
}

#if defined(__SIZEOF_INT128__)
typedef unsigned __int128 u128;
static inline u64 mul_u64_u64_add_u64(u64 *p_lo, u64 a, u64 b, u64 c)
{
        /* native 64x64=128 bits multiplication */
        u128 prod = (u128)a * b + c;

        *p_lo = prod;
        return prod >> 64;
}

#else
static inline u64 mul_u64_u64_add_u64(u64 *p_lo, u64 a, u64 b, u64 c)
{
        /* perform a 64x64=128 bits multiplication in 32bit chunks */
        u64 x, y, z;

        /* Since (x-1)(x-1) + 2(x-1) == x.x - 1 two u32 can be added to a u64 */
        x = mul_add(a, b, c);
        y = mul_add(a, b >> 32, c >> 32);
        y = add_u64_u32(y, x >> 32);
        z = mul_add(a >> 32, b >> 32, y >> 32);
        y = mul_add(a >> 32, b, y);
        *p_lo = (y << 32) + (u32)x;
        return add_u64_u32(z, y >> 32);
}

#endif

#if BITS_PER_ITER == 32
#define mul_u64_long_add_u64(p_lo, a, b, c) mul_u64_u64_add_u64(p_lo, a, b, c)
#define add_u64_long(a, b) ((a) + (b))

#else
static inline u32 mul_u64_long_add_u64(u64 *p_lo, u64 a, u32 b, u64 c)
{
        u64 n_lo = mul_add(a, b, c);
        u64 n_med = mul_add(a >> 32, b, c >> 32);

        n_med = add_u64_u32(n_med, n_lo >> 32);
        *p_lo = n_med << 32 | (u32)n_lo;
        return n_med >> 32;
}

#define add_u64_long(a, b) add_u64_u32(a, b)

#endif

#if MULDIV_OPT & 0x40
/*
 * If the divisor has BITS_PER_ITER or fewer bits then a simple
 * long division can be done.
 */
#if BITS_PER_ITER == 16
static u64 div_u80_u16(u32 n_hi, u64 n_lo, u32 d)
{
        u64 q = 0;

        if (n_hi) {
                n_hi = n_hi << 16 | (u32)(n_lo >> 48);
                q = (n_hi / d) << 16;
                n_hi = (n_hi % d) << 16 | (u16)(n_lo >> 32);
        } else {
                n_hi = n_lo >> 32;
                if (!n_hi)
                        return (u32)n_lo / d;
        }
        q |= n_hi / d;
        q <<= 32;
        n_hi = (n_hi % d) << 16 | ((u32)n_lo >> 16);
        q |= (n_hi / d) << 16;
        n_hi = (n_hi % d) << 16 | (u16)n_lo;
        q |= n_hi / d;

        return q;
}
#else
static u64 div_u96_u32(u64 n_hi, u64 n_lo, u32 d)
{
        u64 q;

        if (!n_hi)
                return n_lo / d;

        n_hi = n_hi << 32 | n_lo >> 32;
        q = (n_hi / d) << 32;
        n_hi = (n_hi % d) << 32 | (u32)n_lo;
        return q | n_hi / d;
}
#endif
#endif

u64 mul_u64_add_u64_div_u64(u64 a, u64 b, u64 c, u64 d)
{
        unsigned long d_msig, n_long, q_digit;
        unsigned int reps, d_z_hi;
        u64 quotient, n_lo, n_hi;
        u32 overflow;

        n_hi = mul_u64_u64_add_u64(&n_lo, a, b, c);
        if (unlikely(n_hi >= d)) {
                if (!d)
                        // Quotient infinity or NaN
                        return 0;
                // Quotient larger than 64 bits
                return ~(u64)0;
        }

#if !(MULDIV_OPT & 0x80)
        // OPT: Small divisors can be optimised here.
        // OPT: But the test is measurable and a lot of the cases get
        // OPT: picked up by later tests - especially 1, 2 and 0x40.
        // OPT: For 32bit a full 64bit divide will also be non-trivial.
        if (unlikely(!n_hi))
                return div64_u64(n_lo, d);
#endif

        d_z_hi = lzcntll(d);

#if MULDIV_OPT & 0x40
        // Optimise for divisors with less than BITS_PER_ITER significant bits.
        // OPT: A much simpler 'long division' can be done.
        // OPT: The test could be reworked to avoid the txcntll() when d_z_hi
        // OPT: is large enough - but the code starts looking horrid.
        // OPT: This picks up the same divisions as OPT 8, with a faster algorithm.
        u32 d_z_lo = tzcntll(d);
        if (d_z_hi + d_z_lo >= 64 - BITS_PER_ITER) {
                if (d_z_hi < 64 - BITS_PER_ITER) {
                        n_lo = n_lo >> d_z_lo | n_hi << (64 - d_z_lo);
                        n_hi >>= d_z_lo;
                        d >>= d_z_lo;
                }
#if BITS_PER_ITER == 16
                return div_u80_u16(n_hi, n_lo, d);
#else
                return div_u96_u32(n_hi, n_lo, d);
#endif
        }
#endif

        /* Left align the divisor, shifting the dividend to match */
#if MULDIV_OPT & 0x10
        // OPT: Replacing the 'pretty much always taken' branch
        // OPT: with an extra shift (one clock - should be noise)
        // OPT: feels like it ought to be a gain (for 64bit).
        // OPT: Most of the test cases have 64bit divisors - so lose,
        // OPT: but even some with a smaller divisor are hit for a few clocks.
        // OPT: Might be generating a register spill to stack.
        d <<= d_z_hi;
        n_hi = n_hi << d_z_hi | (n_lo >> (63 - d_z_hi) >> 1);
        n_lo <<= d_z_hi;
#else
        if (d_z_hi) {
                d <<= d_z_hi;
                n_hi = n_hi << d_z_hi | n_lo >> (64 - d_z_hi);
                n_lo <<= d_z_hi;
        }
#endif

        reps = 64 / BITS_PER_ITER;
        /* Optimise loop count for small dividends */
#if MULDIV_OPT & 1
        // OPT: Products with lots of leading zeros are almost certainly
        // OPT: very common.
        // OPT: The gain from removing the loop iterations is significant.
        // OPT: Especially on 32bit where two iterations can be removed
        // OPT: with a simple shift and conditional jump.
        if (!(u32)(n_hi >> 32)) {
                reps -= 32 / BITS_PER_ITER;
                n_hi = n_hi << 32 | n_lo >> 32;
                n_lo <<= 32;
        }
#endif
#if MULDIV_OPT & 2 && BITS_PER_ITER == 16
        if (!(u32)(n_hi >> 48)) {
                reps--;
                n_hi = add_u64_u32(n_hi << 16, n_lo >> 48);
                n_lo <<= 16;
        }
#endif

        /*
         * Get the most significant BITS_PER_ITER bits of the divisor.
         * This is used to get a low 'guestimate' of the quotient digit.
         */
        d_msig = (d >> (64 - BITS_PER_ITER));
#if MULDIV_OPT & 8
        // OPT: d_msig only needs rounding up - so can be unchanged if
        // OPT: all its low bits are zero.
        // OPT: However the test itself causes register pressure on x86-32.
        // OPT: The earlier check (0x40) optimises the same cases.
        // OPT: The code it generates is a lot better.
        d_msig += !!(d << BITS_PER_ITER);
#else
        d_msig += 1;
#endif


        /* Invert the dividend so we can use add instead of subtract. */
        n_lo = ~n_lo;
        n_hi = ~n_hi;

        /*
         * Now do a 'long division' with BITS_PER_ITER bit 'digits'.
         * The 'guess' quotient digit can be low and BITS_PER_ITER+1 bits.
         * The worst case is dividing ~0 by 0x8000 which requires two subtracts.
         */
        quotient = 0;
        while (reps--) {
                n_long = ~n_hi >> (64 - 2 * BITS_PER_ITER);
#if !(MULDIV_OPT & 0x20)
                // OPT: If the cpu divide instruction has a long latency an other
                // OPT: instructions can execute while the divide is pending then
                // OPT: you want the divide as early as possible.
                // OPT: I've seen delays if it moved below the shifts, but I suspect
                // OPT: the ivy bridge cpu spreads the u-ops between the execution
                // OPT: units so you don't get the full latency to play with.
                // OPT: gcc doesn't put the divide as early as it might, attempting to
                // OPT: do so by hand failed - and I'm not playing with custom asm.
                q_digit = n_long / d_msig;
#endif
                /* Shift 'n' left to align with the product q_digit * d */
                overflow = n_hi >> (64 - BITS_PER_ITER);
                n_hi = add_u64_u32(n_hi << BITS_PER_ITER, n_lo >> (64 - BITS_PER_ITER));
                n_lo <<= BITS_PER_ITER;
                quotient <<= BITS_PER_ITER;
#if MULDIV_OPT & 4
                // OPT: This optimises for zero digits.
                // OPT: With the compiler/cpu I using today (gcc 13.3 and Sandy bridge)
                // OPT: it needs the divide moved below the conditional.
                // OPT: For x86-64 0x24 and 0x03 are actually pretty similar,
                // OPT: but x86-32 is definitely slower all the time, and the outer
                // OPT: check removes two loop iterations at once.
                if (unlikely(n_long < d_msig)) {
                        // OPT: Without something here the 'unlikely' still generates
                        // OPT: a conditional backwards branch which some cpu will
                        // OPT: statically predict taken.
                        // asm( "nop");
                        continue;
                }
#endif
#if MULDIV_OPT & 0x20
                q_digit = n_long / d_msig;
#endif
                /* Add product to negated divisor */
                overflow += mul_u64_long_add_u64(&n_hi, d, q_digit, n_hi);
                /* Adjust for the q_digit 'guestimate' being low */
                while (unlikely(overflow < 0xffffffff >> (32 - BITS_PER_ITER))) {
                        q_digit++;
                        n_hi += d;
                        overflow += n_hi < d;
                }
                quotient = add_u64_long(quotient, q_digit);
        }

        /*
         * The above only ensures the remainder doesn't overflow,
         * it can still be possible to add (aka subtract) another copy
         * of the divisor.
         */
        if ((n_hi + d) > n_hi)
                quotient++;
        return quotient;
}

Some measurements on an ivy bridge.
These are the test vectors from the test module with a few extra values on the
end that pick different paths through this implementatoin.
The numbers are 'performance counter' deltas for 10 consecutive calls with the
same values.
So the latter values are with the branch predictor 'trained' to the test case.
The first few (larger) values show the cost of mispredicted branches.

Apologies for the very long lines.

$ cc -O2 -o div_perf div_perf.c -DMULDIV_OPT=0xc3  && sudo ./div_perf
0: ok   162   134    78    78    78    78    78    80    80    80 mul_u64_u64_div_u64_new b*7/3 = 19
1: ok    91    91    91    91    91    91    91    91    91    91 mul_u64_u64_div_u64_new ffff0000*ffff0000/f = 1110eeef00000000
2: ok    75    77    75    77    77    77    77    77    77    77 mul_u64_u64_div_u64_new ffffffff*ffffffff/1 = fffffffe00000001
3: ok    89    91    91    91    91    91    89    90    91    91 mul_u64_u64_div_u64_new ffffffff*ffffffff/2 = 7fffffff00000000
4: ok   147   147   128   128   128   128   128   128   128   128 mul_u64_u64_div_u64_new 1ffffffff*ffffffff/2 = fffffffe80000000
5: ok   128   128   128   128   128   128   128   128   128   128 mul_u64_u64_div_u64_new 1ffffffff*ffffffff/3 = aaaaaaa9aaaaaaab
6: ok   121   121   121   121   121   121   121   121   121   121 mul_u64_u64_div_u64_new 1ffffffff*1ffffffff/4 = ffffffff00000000
7: ok   274   234   146   138   138   138   138   138   138   138 mul_u64_u64_div_u64_new ffff000000000000*ffff000000000000/ffff000000000001 = fffeffffffffffff
8: ok   177   148   148   149   149   149   149   149   149   149 mul_u64_u64_div_u64_new 3333333333333333*3333333333333333/5555555555555555 = 1eb851eb851eb851
9: ok   138    90   118    91    91    91    91    92    92    92 mul_u64_u64_div_u64_new 7fffffffffffffff*2/3 = 5555555555555554
10: ok   113   114    86    86    84    86    86    84    87    87 mul_u64_u64_div_u64_new ffffffffffffffff*2/8000000000000000 = 3
11: ok    87    88    88    86    88    88    88    88    90    90 mul_u64_u64_div_u64_new ffffffffffffffff*2/c000000000000000 = 2
12: ok    82    86    84    86    83    86    83    86    83    87 mul_u64_u64_div_u64_new ffffffffffffffff*4000000000000004/8000000000000000 = 8000000000000007
13: ok    82    86    84    86    83    86    83    86    83    86 mul_u64_u64_div_u64_new ffffffffffffffff*4000000000000001/8000000000000000 = 8000000000000001
14: ok   189   187   138   132   132   132   131   131   131   131 mul_u64_u64_div_u64_new ffffffffffffffff*8000000000000001/ffffffffffffffff = 8000000000000001
15: ok   221   175   159   131   131   131   131   131   131   131 mul_u64_u64_div_u64_new fffffffffffffffe*8000000000000001/ffffffffffffffff = 8000000000000000
16: ok   134   132   134   134   134   135   134   134   134   134 mul_u64_u64_div_u64_new ffffffffffffffff*8000000000000001/fffffffffffffffe = 8000000000000001
17: ok   172   134   137   134   134   134   134   134   134   134 mul_u64_u64_div_u64_new ffffffffffffffff*8000000000000001/fffffffffffffffd = 8000000000000002
18: ok   182   182   129   129   129   129   129   129   129   129 mul_u64_u64_div_u64_new 7fffffffffffffff*ffffffffffffffff/c000000000000000 = aaaaaaaaaaaaaaa8
19: ok   130   129   130   129   129   129   129   129   129   129 mul_u64_u64_div_u64_new ffffffffffffffff*7fffffffffffffff/a000000000000000 = ccccccccccccccca
20: ok   130   129   129   129   129   129   129   129   129   129 mul_u64_u64_div_u64_new ffffffffffffffff*7fffffffffffffff/9000000000000000 = e38e38e38e38e38b
21: ok   130   129   129   129   129   129   129   129   129   129 mul_u64_u64_div_u64_new 7fffffffffffffff*7fffffffffffffff/5000000000000000 = ccccccccccccccc9
22: ok   206   140   138   138   138   138   138   138   138   138 mul_u64_u64_div_u64_new ffffffffffffffff*fffffffffffffffe/ffffffffffffffff = fffffffffffffffe
23: ok   174   140   138   138   138   138   138   138   138   138 mul_u64_u64_div_u64_new e6102d256d7ea3ae*70a77d0be4c31201/d63ec35ab3220357 = 78f8bf8cc86c6e18
24: ok   135   137   137   137   137   137   137   137   137   137 mul_u64_u64_div_u64_new f53bae05cb86c6e1*3847b32d2f8d32e0/cfd4f55a647f403c = 42687f79d8998d35
25: ok   134   136   136   136   136   136   136   136   136   136 mul_u64_u64_div_u64_new 9951c5498f941092*1f8c8bfdf287a251/a3c8dc5f81ea3fe2 = 1d887cb25900091f
26: ok   136   134   134   134   134   134   134   134   134   134 mul_u64_u64_div_u64_new 374fee9daa1bb2bb*d0bfbff7b8ae3ef/c169337bd42d5179 = 3bb2dbaffcbb961
27: ok   139   138   138   138   138   138   138   138   138   138 mul_u64_u64_div_u64_new eac0d03ac10eeaf0*89be05dfa162ed9b/92bb1679a41f0e4b = dc5f5cc9e270d216
28: ok   130   143    95    95    96    96    96    96    96    96 mul_u64_u64_div_u64_new 2d256d7ea3ae*7d0be4c31201/d63ec35ab3220357 = 1a599d6e
29: ok   169   158   158   138   138   138   138   138   138   138 mul_u64_u64_div_u64_new eac0d03ac10eeaf0*89be05dfa162ed9b/92bb1679a41f0e4b = dc5f5cc9e270d216
30: ok   178   164   144   147   147   147   147   147   147   147 mul_u64_u64_div_u64_new 2d256d7ea3ae*7d0be4c31201/63ec35ab3220357 = 387f55cef
31: ok   163   128   128   128   128   128   128   128   128   128 mul_u64_u64_div_u64_new eac0d03ac10eeaf0*89be05dfa162ed9b/92bb000000000000 = dc5f7e8b334db07d
32: ok   163   184   137   136   136   138   138   138   138   138 mul_u64_u64_div_u64_new eac0d03ac10eeaf0*89be05dfa162ed9b/92bb1679a41f0e4b = dc5f5cc9e270d216

$ cc -O2 -o div_perf div_perf.c -DMULDIV_OPT=0x03  && sudo ./div_perf
0: ok   125    78    78    79    79    79    79    79    79    79 mul_u64_u64_div_u64_new b*7/3 = 19
1: ok    88    89    89    88    89    89    89    89    89    89 mul_u64_u64_div_u64_new ffff0000*ffff0000/f = 1110eeef00000000
2: ok    75    76    76    76    76    76    74    76    76    76 mul_u64_u64_div_u64_new ffffffff*ffffffff/1 = fffffffe00000001
3: ok    87    89    89    89    89    89    89    88    88    88 mul_u64_u64_div_u64_new ffffffff*ffffffff/2 = 7fffffff00000000
4: ok   305   221   148   144   147   147   147   147   147   147 mul_u64_u64_div_u64_new 1ffffffff*ffffffff/2 = fffffffe80000000
5: ok   179   178   141   141   141   141   141   141   141   141 mul_u64_u64_div_u64_new 1ffffffff*ffffffff/3 = aaaaaaa9aaaaaaab
6: ok   148   200   143   145   145   145   145   145   145   145 mul_u64_u64_div_u64_new 1ffffffff*1ffffffff/4 = ffffffff00000000
7: ok   201   186   140   135   135   135   135   135   135   135 mul_u64_u64_div_u64_new ffff000000000000*ffff000000000000/ffff000000000001 = fffeffffffffffff
8: ok   227   154   145   141   141   141   141   141   141   141 mul_u64_u64_div_u64_new 3333333333333333*3333333333333333/5555555555555555 = 1eb851eb851eb851
9: ok   111   111    89    89    89    89    89    89    89    89 mul_u64_u64_div_u64_new 7fffffffffffffff*2/3 = 5555555555555554
10: ok   149   156   124    90    90    90    90    90    90    90 mul_u64_u64_div_u64_new ffffffffffffffff*2/8000000000000000 = 3
11: ok    91    91    90    90    90    90    90    90    90    90 mul_u64_u64_div_u64_new ffffffffffffffff*2/c000000000000000 = 2
12: ok   197   197   138   134   134   134   134   134   134   134 mul_u64_u64_div_u64_new ffffffffffffffff*4000000000000004/8000000000000000 = 8000000000000007
13: ok   260   136   136   135   135   135   135   135   135   135 mul_u64_u64_div_u64_new ffffffffffffffff*4000000000000001/8000000000000000 = 8000000000000001
14: ok   186   187   164   130   127   127   127   127   127   127 mul_u64_u64_div_u64_new ffffffffffffffff*8000000000000001/ffffffffffffffff = 8000000000000001
15: ok   171   172   173   158   160   128   125   127   125   127 mul_u64_u64_div_u64_new fffffffffffffffe*8000000000000001/ffffffffffffffff = 8000000000000000
16: ok   157   164   129   130   130   130   130   130   130   130 mul_u64_u64_div_u64_new ffffffffffffffff*8000000000000001/fffffffffffffffe = 8000000000000001
17: ok   191   158   130   132   132   130   130   130   130   130 mul_u64_u64_div_u64_new ffffffffffffffff*8000000000000001/fffffffffffffffd = 8000000000000002
18: ok   197   214   163   138   138   138   138   138   138   138 mul_u64_u64_div_u64_new 7fffffffffffffff*ffffffffffffffff/c000000000000000 = aaaaaaaaaaaaaaa8
19: ok   196   196   135   134   134   134   134   134   134   134 mul_u64_u64_div_u64_new ffffffffffffffff*7fffffffffffffff/a000000000000000 = ccccccccccccccca
20: ok   191   216   176   140   138   138   138   138   138   138 mul_u64_u64_div_u64_new ffffffffffffffff*7fffffffffffffff/9000000000000000 = e38e38e38e38e38b
21: ok   232   157   156   145   145   145   145   145   145   145 mul_u64_u64_div_u64_new 7fffffffffffffff*7fffffffffffffff/5000000000000000 = ccccccccccccccc9
22: ok   159   192   133   134   134   134   134   134   134   134 mul_u64_u64_div_u64_new ffffffffffffffff*fffffffffffffffe/ffffffffffffffff = fffffffffffffffe
23: ok   133   134   134   134   134   134   134   134   134   134 mul_u64_u64_div_u64_new e6102d256d7ea3ae*70a77d0be4c31201/d63ec35ab3220357 = 78f8bf8cc86c6e18
24: ok   134   131   131   131   131   134   131   131   131   131 mul_u64_u64_div_u64_new f53bae05cb86c6e1*3847b32d2f8d32e0/cfd4f55a647f403c = 42687f79d8998d35
25: ok   133   130   130   130   130   130   130   130   130   130 mul_u64_u64_div_u64_new 9951c5498f941092*1f8c8bfdf287a251/a3c8dc5f81ea3fe2 = 1d887cb25900091f
26: ok   133   130   130   130   130   130   130   130   130   131 mul_u64_u64_div_u64_new 374fee9daa1bb2bb*d0bfbff7b8ae3ef/c169337bd42d5179 = 3bb2dbaffcbb961
27: ok   133   134   134   134   134   134   134   134   134   133 mul_u64_u64_div_u64_new eac0d03ac10eeaf0*89be05dfa162ed9b/92bb1679a41f0e4b = dc5f5cc9e270d216
28: ok   151    93   119    93    93    93    93    93    93    93 mul_u64_u64_div_u64_new 2d256d7ea3ae*7d0be4c31201/d63ec35ab3220357 = 1a599d6e
29: ok   193   137   134   134   134   134   134   134   134   134 mul_u64_u64_div_u64_new eac0d03ac10eeaf0*89be05dfa162ed9b/92bb1679a41f0e4b = dc5f5cc9e270d216
30: ok   194   151   150   137   137   137   137   137   137   137 mul_u64_u64_div_u64_new 2d256d7ea3ae*7d0be4c31201/63ec35ab3220357 = 387f55cef
31: ok   137   173   172   137   138   138   138   138   138   138 mul_u64_u64_div_u64_new eac0d03ac10eeaf0*89be05dfa162ed9b/92bb000000000000 = dc5f7e8b334db07d
32: ok   160   149   131   134   134   134   134   134   134   134 mul_u64_u64_div_u64_new eac0d03ac10eeaf0*89be05dfa162ed9b/92bb1679a41f0e4b = dc5f5cc9e270d216

$ cc -O2 -o div_perf div_perf.c -DMULDIV_OPT=0x24  && sudo ./div_perf
0: ok   130   106    79    79    78    78    78    78    81    81 mul_u64_u64_div_u64_new b*7/3 = 19
1: ok    88    92    92    89    92    92    92    92    91    91 mul_u64_u64_div_u64_new ffff0000*ffff0000/f = 1110eeef00000000
2: ok    74    78    78    78    78    78    75    79    78    78 mul_u64_u64_div_u64_new ffffffff*ffffffff/1 = fffffffe00000001
3: ok    87    92    92    92    92    92    92    92    92    92 mul_u64_u64_div_u64_new ffffffff*ffffffff/2 = 7fffffff00000000
4: ok   330   275   181   145   147   148   148   148   148   148 mul_u64_u64_div_u64_new 1ffffffff*ffffffff/2 = fffffffe80000000
5: ok   225   175   141   146   146   146   146   146   146   146 mul_u64_u64_div_u64_new 1ffffffff*ffffffff/3 = aaaaaaa9aaaaaaab
6: ok   187   194   193   194   178   144   148   148   148   148 mul_u64_u64_div_u64_new 1ffffffff*1ffffffff/4 = ffffffff00000000
7: ok   202   189   178   139   140   139   140   139   140   139 mul_u64_u64_div_u64_new ffff000000000000*ffff000000000000/ffff000000000001 = fffeffffffffffff
8: ok   228   168   150   143   143   143   143   143   143   143 mul_u64_u64_div_u64_new 3333333333333333*3333333333333333/5555555555555555 = 1eb851eb851eb851
9: ok   112   112    92    89    92    92    92    92    92    87 mul_u64_u64_div_u64_new 7fffffffffffffff*2/3 = 5555555555555554
10: ok   153   184    92    93    95    95    95    95    95    95 mul_u64_u64_div_u64_new ffffffffffffffff*2/8000000000000000 = 3
11: ok   158    93    93    93    95    92    95    95    95    95 mul_u64_u64_div_u64_new ffffffffffffffff*2/c000000000000000 = 2
12: ok   206   178   146   139   139   140   139   140   139   140 mul_u64_u64_div_u64_new ffffffffffffffff*4000000000000004/8000000000000000 = 8000000000000007
13: ok   187   146   140   139   140   139   140   139   140   139 mul_u64_u64_div_u64_new ffffffffffffffff*4000000000000001/8000000000000000 = 8000000000000001
14: ok   167   173   136   136   102    97    97    97    97    97 mul_u64_u64_div_u64_new ffffffffffffffff*8000000000000001/ffffffffffffffff = 8000000000000001
15: ok   166   150   105    98    98    98    97    99    97    99 mul_u64_u64_div_u64_new fffffffffffffffe*8000000000000001/ffffffffffffffff = 8000000000000000
16: ok   209   197   139   136   136   136   136   136   136   136 mul_u64_u64_div_u64_new ffffffffffffffff*8000000000000001/fffffffffffffffe = 8000000000000001
17: ok   170   142   170   137   136   135   136   135   136   135 mul_u64_u64_div_u64_new ffffffffffffffff*8000000000000001/fffffffffffffffd = 8000000000000002
18: ok   238   197   172   140   140   140   139   141   140   139 mul_u64_u64_div_u64_new 7fffffffffffffff*ffffffffffffffff/c000000000000000 = aaaaaaaaaaaaaaa8
19: ok   185   206   139   141   142   142   142   142   142   142 mul_u64_u64_div_u64_new ffffffffffffffff*7fffffffffffffff/a000000000000000 = ccccccccccccccca
20: ok   207   226   146   142   140   140   140   140   140   140 mul_u64_u64_div_u64_new ffffffffffffffff*7fffffffffffffff/9000000000000000 = e38e38e38e38e38b
21: ok   161   153   148   148   148   148   148   148   148   146 mul_u64_u64_div_u64_new 7fffffffffffffff*7fffffffffffffff/5000000000000000 = ccccccccccccccc9
22: ok   172   199   141   140   140   140   140   140   140   140 mul_u64_u64_div_u64_new ffffffffffffffff*fffffffffffffffe/ffffffffffffffff = fffffffffffffffe
23: ok   172   137   140   140   140   140   140   140   140   140 mul_u64_u64_div_u64_new e6102d256d7ea3ae*70a77d0be4c31201/d63ec35ab3220357 = 78f8bf8cc86c6e18
24: ok   135   138   138   138   138   138   138   138   138   138 mul_u64_u64_div_u64_new f53bae05cb86c6e1*3847b32d2f8d32e0/cfd4f55a647f403c = 42687f79d8998d35
25: ok   136   137   137   137   137   137   137   137   137   137 mul_u64_u64_div_u64_new 9951c5498f941092*1f8c8bfdf287a251/a3c8dc5f81ea3fe2 = 1d887cb25900091f
26: ok   136   136   136   136   136   136   136   136   136   136 mul_u64_u64_div_u64_new 374fee9daa1bb2bb*d0bfbff7b8ae3ef/c169337bd42d5179 = 3bb2dbaffcbb961
27: ok   139   140   140   140   140   140   140   140   140   140 mul_u64_u64_div_u64_new eac0d03ac10eeaf0*89be05dfa162ed9b/92bb1679a41f0e4b = dc5f5cc9e270d216
28: ok   132    94    97    96    96    96    96    96    96    96 mul_u64_u64_div_u64_new 2d256d7ea3ae*7d0be4c31201/d63ec35ab3220357 = 1a599d6e
29: ok   139   140   140   140   140   140   140   140   140   140 mul_u64_u64_div_u64_new eac0d03ac10eeaf0*89be05dfa162ed9b/92bb1679a41f0e4b = dc5f5cc9e270d216
30: ok   159   141   141   141   141   141   141   141   141   141 mul_u64_u64_div_u64_new 2d256d7ea3ae*7d0be4c31201/63ec35ab3220357 = 387f55cef
31: ok   244   186   154   141   139   141   140   139   141   140 mul_u64_u64_div_u64_new eac0d03ac10eeaf0*89be05dfa162ed9b/92bb000000000000 = dc5f7e8b334db07d
32: ok   165   140   140   140   140   140   140   140   140   140 mul_u64_u64_div_u64_new eac0d03ac10eeaf0*89be05dfa162ed9b/92bb1679a41f0e4b = dc5f5cc9e270d216

$ cc -O2 -o div_perf div_perf.c -DMULDIV_OPT=0xc3 -m32 && sudo ./div_perf
0: ok   336   131   104   104   102   102   103   103   103   105 mul_u64_u64_div_u64_new b*7/3 = 19
1: ok   195   195   171   171   171   171   171   171   171   171 mul_u64_u64_div_u64_new ffff0000*ffff0000/f = 1110eeef00000000
2: ok   165   162   162   162   162   162   162   162   162   162 mul_u64_u64_div_u64_new ffffffff*ffffffff/1 = fffffffe00000001
3: ok   165   173   173   173   173   173   173   173   173   173 mul_u64_u64_div_u64_new ffffffff*ffffffff/2 = 7fffffff00000000
4: ok   220   216   202   206   202   206   202   206   202   206 mul_u64_u64_div_u64_new 1ffffffff*ffffffff/2 = fffffffe80000000
5: ok   203   205   208   205   209   205   209   205   209   205 mul_u64_u64_div_u64_new 1ffffffff*ffffffff/3 = aaaaaaa9aaaaaaab
6: ok   203   203   199   203   199   203   199   203   199   203 mul_u64_u64_div_u64_new 1ffffffff*1ffffffff/4 = ffffffff00000000
7: ok   574   421   251   242   246   254   246   254   246   254 mul_u64_u64_div_u64_new ffff000000000000*ffff000000000000/ffff000000000001 = fffeffffffffffff
8: ok   370   317   263   262   254   259   258   262   254   259 mul_u64_u64_div_u64_new 3333333333333333*3333333333333333/5555555555555555 = 1eb851eb851eb851
9: ok   214   199   177   177   177   177   177   177   177   177 mul_u64_u64_div_u64_new 7fffffffffffffff*2/3 = 5555555555555554
10: ok   163   150   128   129   129   129   129   129   129   129 mul_u64_u64_div_u64_new ffffffffffffffff*2/8000000000000000 = 3
11: ok   135   133   135   135   135   135   135   135   135   135 mul_u64_u64_div_u64_new ffffffffffffffff*2/c000000000000000 = 2
12: ok   206   208   208   180   183   183   183   183   183   183 mul_u64_u64_div_u64_new ffffffffffffffff*4000000000000004/8000000000000000 = 8000000000000007
13: ok   205   183   183   184   183   183   183   183   183   183 mul_u64_u64_div_u64_new ffffffffffffffff*4000000000000001/8000000000000000 = 8000000000000001
14: ok   331   296   238   229   232   232   232   232   232   232 mul_u64_u64_div_u64_new ffffffffffffffff*8000000000000001/ffffffffffffffff = 8000000000000001
15: ok   324   311   238   239   239   239   239   239   239   242 mul_u64_u64_div_u64_new fffffffffffffffe*8000000000000001/ffffffffffffffff = 8000000000000000
16: ok   300   262   265   233   238   234   238   234   238   234 mul_u64_u64_div_u64_new ffffffffffffffff*8000000000000001/fffffffffffffffe = 8000000000000001
17: ok   295   282   244   244   244   244   244   244   244   244 mul_u64_u64_div_u64_new ffffffffffffffff*8000000000000001/fffffffffffffffd = 8000000000000002
18: ok   245   247   235   222   221   222   219   222   221   222 mul_u64_u64_div_u64_new 7fffffffffffffff*ffffffffffffffff/c000000000000000 = aaaaaaaaaaaaaaa8
19: ok   235   221   222   221   222   219   222   221   222   219 mul_u64_u64_div_u64_new ffffffffffffffff*7fffffffffffffff/a000000000000000 = ccccccccccccccca
20: ok   220   222   221   222   219   222   219   222   221   222 mul_u64_u64_div_u64_new ffffffffffffffff*7fffffffffffffff/9000000000000000 = e38e38e38e38e38b
21: ok   225   221   222   219   221   219   221   219   221   219 mul_u64_u64_div_u64_new 7fffffffffffffff*7fffffffffffffff/5000000000000000 = ccccccccccccccc9
22: ok   309   274   250   238   237   244   239   241   237   244 mul_u64_u64_div_u64_new ffffffffffffffff*fffffffffffffffe/ffffffffffffffff = fffffffffffffffe
23: ok   284   284   250   247   243   247   247   243   247   247 mul_u64_u64_div_u64_new e6102d256d7ea3ae*70a77d0be4c31201/d63ec35ab3220357 = 78f8bf8cc86c6e18
24: ok   239   239   243   240   239   239   239   239   239   239 mul_u64_u64_div_u64_new f53bae05cb86c6e1*3847b32d2f8d32e0/cfd4f55a647f403c = 42687f79d8998d35
25: ok   255   255   255   255   247   243   247   247   243   247 mul_u64_u64_div_u64_new 9951c5498f941092*1f8c8bfdf287a251/a3c8dc5f81ea3fe2 = 1d887cb25900091f
26: ok   327   274   240   242   240   242   240   242   240   242 mul_u64_u64_div_u64_new 374fee9daa1bb2bb*d0bfbff7b8ae3ef/c169337bd42d5179 = 3bb2dbaffcbb961
27: ok   461   313   340   259   257   259   257   259   257   259 mul_u64_u64_div_u64_new eac0d03ac10eeaf0*89be05dfa162ed9b/92bb1679a41f0e4b = dc5f5cc9e270d216
28: ok   291   219   180   174   170   173   171   174   171   174 mul_u64_u64_div_u64_new 2d256d7ea3ae*7d0be4c31201/d63ec35ab3220357 = 1a599d6e
29: ok   280   319   258   257   259   257   259   257   259   257 mul_u64_u64_div_u64_new eac0d03ac10eeaf0*89be05dfa162ed9b/92bb1679a41f0e4b = dc5f5cc9e270d216
30: ok   379   352   247   239   220   225   225   229   228   229 mul_u64_u64_div_u64_new 2d256d7ea3ae*7d0be4c31201/63ec35ab3220357 = 387f55cef
31: ok   235   219   221   219   221   219   221   219   221   219 mul_u64_u64_div_u64_new eac0d03ac10eeaf0*89be05dfa162ed9b/92bb000000000000 = dc5f7e8b334db07d
32: ok   305   263   257   257   258   258   263   257   257   257 mul_u64_u64_div_u64_new eac0d03ac10eeaf0*89be05dfa162ed9b/92bb1679a41f0e4b = dc5f5cc9e270d216

$ cc -O2 -o div_perf div_perf.c -DMULDIV_OPT=0x03 -m32 && sudo ./div_perf
0: ok   292   127   129   125   123   128   125   123   125   121 mul_u64_u64_div_u64_new b*7/3 = 19
1: ok   190   196   151   149   151   149   151   149   151   149 mul_u64_u64_div_u64_new ffff0000*ffff0000/f = 1110eeef00000000
2: ok   141   139   139   139   139   139   139   139   139   139 mul_u64_u64_div_u64_new ffffffff*ffffffff/1 = fffffffe00000001
3: ok   152   149   151   149   151   149   151   149   151   149 mul_u64_u64_div_u64_new ffffffff*ffffffff/2 = 7fffffff00000000
4: ok   667   453   276   270   271   271   271   267   274   272 mul_u64_u64_div_u64_new 1ffffffff*ffffffff/2 = fffffffe80000000
5: ok   366   319   373   337   278   278   278   278   278   278 mul_u64_u64_div_u64_new 1ffffffff*ffffffff/3 = aaaaaaa9aaaaaaab
6: ok   380   349   268   268   271   265   265   265   265   265 mul_u64_u64_div_u64_new 1ffffffff*1ffffffff/4 = ffffffff00000000
7: ok   340   277   255   251   249   255   251   249   255   251 mul_u64_u64_div_u64_new ffff000000000000*ffff000000000000/ffff000000000001 = fffeffffffffffff
8: ok   377   302   253   252   256   256   256   256   256   256 mul_u64_u64_div_u64_new 3333333333333333*3333333333333333/5555555555555555 = 1eb851eb851eb851
9: ok   181   184   157   155   157   155   157   155   157   155 mul_u64_u64_div_u64_new 7fffffffffffffff*2/3 = 5555555555555554
10: ok   304   223   142   139   139   141   139   139   139   139 mul_u64_u64_div_u64_new ffffffffffffffff*2/8000000000000000 = 3
11: ok   153   143   148   142   143   142   143   142   143   142 mul_u64_u64_div_u64_new ffffffffffffffff*2/c000000000000000 = 2
12: ok   428   323   292   246   257   248   253   250   248   253 mul_u64_u64_div_u64_new ffffffffffffffff*4000000000000004/8000000000000000 = 8000000000000007
13: ok   289   256   260   257   255   257   255   257   255   257 mul_u64_u64_div_u64_new ffffffffffffffff*4000000000000001/8000000000000000 = 8000000000000001
14: ok   334   246   234   234   227   233   229   233   229   233 mul_u64_u64_div_u64_new ffffffffffffffff*8000000000000001/ffffffffffffffff = 8000000000000001
15: ok   324   302   273   236   236   236   236   236   236   236 mul_u64_u64_div_u64_new fffffffffffffffe*8000000000000001/ffffffffffffffff = 8000000000000000
16: ok   269   328   285   259   232   230   236   232   232   236 mul_u64_u64_div_u64_new ffffffffffffffff*8000000000000001/fffffffffffffffe = 8000000000000001
17: ok   307   329   330   244   247   246   245   245   245   245 mul_u64_u64_div_u64_new ffffffffffffffff*8000000000000001/fffffffffffffffd = 8000000000000002
18: ok   359   361   324   258   258   258   258   258   258   258 mul_u64_u64_div_u64_new 7fffffffffffffff*ffffffffffffffff/c000000000000000 = aaaaaaaaaaaaaaa8
19: ok   347   325   295   258   260   253   251   251   251   251 mul_u64_u64_div_u64_new ffffffffffffffff*7fffffffffffffff/a000000000000000 = ccccccccccccccca
20: ok   339   312   261   260   255   255   255   255   255   255 mul_u64_u64_div_u64_new ffffffffffffffff*7fffffffffffffff/9000000000000000 = e38e38e38e38e38b
21: ok   411   349   333   276   272   272   272   272   272   272 mul_u64_u64_div_u64_new 7fffffffffffffff*7fffffffffffffff/5000000000000000 = ccccccccccccccc9
22: ok   297   330   290   266   239   236   238   237   238   237 mul_u64_u64_div_u64_new ffffffffffffffff*fffffffffffffffe/ffffffffffffffff = fffffffffffffffe
23: ok   299   311   250   247   250   245   247   250   245   247 mul_u64_u64_div_u64_new e6102d256d7ea3ae*70a77d0be4c31201/d63ec35ab3220357 = 78f8bf8cc86c6e18
24: ok   274   245   238   237   237   237   237   237   237   247 mul_u64_u64_div_u64_new f53bae05cb86c6e1*3847b32d2f8d32e0/cfd4f55a647f403c = 42687f79d8998d35
25: ok   247   247   245   247   250   245   247   250   245   247 mul_u64_u64_div_u64_new 9951c5498f941092*1f8c8bfdf287a251/a3c8dc5f81ea3fe2 = 1d887cb25900091f
26: ok   341   354   288   239   242   240   239   242   240   239 mul_u64_u64_div_u64_new 374fee9daa1bb2bb*d0bfbff7b8ae3ef/c169337bd42d5179 = 3bb2dbaffcbb961
27: ok   408   375   288   312   257   260   259   259   259   259 mul_u64_u64_div_u64_new eac0d03ac10eeaf0*89be05dfa162ed9b/92bb1679a41f0e4b = dc5f5cc9e270d216
28: ok   289   259   199   198   201   170   170   174   173   173 mul_u64_u64_div_u64_new 2d256d7ea3ae*7d0be4c31201/d63ec35ab3220357 = 1a599d6e
29: ok   334   257   260   257   260   259   259   259   259   259 mul_u64_u64_div_u64_new eac0d03ac10eeaf0*89be05dfa162ed9b/92bb1679a41f0e4b = dc5f5cc9e270d216
30: ok   341   267   244   233   229   233   229   233   229   233 mul_u64_u64_div_u64_new 2d256d7ea3ae*7d0be4c31201/63ec35ab3220357 = 387f55cef
31: ok   323   323   297   297   264   268   267   268   267   268 mul_u64_u64_div_u64_new eac0d03ac10eeaf0*89be05dfa162ed9b/92bb000000000000 = dc5f7e8b334db07d
32: ok   284   262   251   251   253   251   251   251   251   251 mul_u64_u64_div_u64_new eac0d03ac10eeaf0*89be05dfa162ed9b/92bb1679a41f0e4b = dc5f5cc9e270d216

$ cc -O2 -o div_perf div_perf.c -DMULDIV_OPT=0x24 -m32 && sudo ./div_perf
0: ok   362   127   125   122   123   122   125   129   126   126 mul_u64_u64_div_u64_new b*7/3 = 19
1: ok   190   175   149   150   154   150   154   150   154   150 mul_u64_u64_div_u64_new ffff0000*ffff0000/f = 1110eeef00000000
2: ok   144   139   139   139   139   139   139   139   139   139 mul_u64_u64_div_u64_new ffffffff*ffffffff/1 = fffffffe00000001
3: ok   146   150   154   154   150   154   150   154   150   154 mul_u64_u64_div_u64_new ffffffff*ffffffff/2 = 7fffffff00000000
4: ok   747   554   319   318   316   319   318   328   314   324 mul_u64_u64_div_u64_new 1ffffffff*ffffffff/2 = fffffffe80000000
5: ok   426   315   312   315   315   315   315   315   315   315 mul_u64_u64_div_u64_new 1ffffffff*ffffffff/3 = aaaaaaa9aaaaaaab
6: ok   352   391   317   316   323   327   323   327   323   324 mul_u64_u64_div_u64_new 1ffffffff*1ffffffff/4 = ffffffff00000000
7: ok   369   328   298   292   298   292   298   292   298   292 mul_u64_u64_div_u64_new ffff000000000000*ffff000000000000/ffff000000000001 = fffeffffffffffff
8: ok   436   348   298   299   298   300   307   297   297   301 mul_u64_u64_div_u64_new 3333333333333333*3333333333333333/5555555555555555 = 1eb851eb851eb851
9: ok   180   183   151   151   151   151   151   151   151   153 mul_u64_u64_div_u64_new 7fffffffffffffff*2/3 = 5555555555555554
10: ok   286   251   188   169   174   170   178   170   178   170 mul_u64_u64_div_u64_new ffffffffffffffff*2/8000000000000000 = 3
11: ok   230   177   172   177   172   177   172   177   172   177 mul_u64_u64_div_u64_new ffffffffffffffff*2/c000000000000000 = 2
12: ok   494   412   371   331   296   298   305   290   296   298 mul_u64_u64_div_u64_new ffffffffffffffff*4000000000000004/8000000000000000 = 8000000000000007
13: ok   330   300   304   305   290   305   294   302   290   296 mul_u64_u64_div_u64_new ffffffffffffffff*4000000000000001/8000000000000000 = 8000000000000001
14: ok   284   241   175   172   176   175   175   172   176   175 mul_u64_u64_div_u64_new ffffffffffffffff*8000000000000001/ffffffffffffffff = 8000000000000001
15: ok   283   263   171   176   175   175   175   175   175   175 mul_u64_u64_div_u64_new fffffffffffffffe*8000000000000001/ffffffffffffffff = 8000000000000000
16: ok   346   247   258   208   202   205   208   202   205   208 mul_u64_u64_div_u64_new ffffffffffffffff*8000000000000001/fffffffffffffffe = 8000000000000001
17: ok   242   272   208   213   209   213   209   213   209   213 mul_u64_u64_div_u64_new ffffffffffffffff*8000000000000001/fffffffffffffffd = 8000000000000002
18: ok   494   337   306   309   309   309   309   309   309   309 mul_u64_u64_div_u64_new 7fffffffffffffff*ffffffffffffffff/c000000000000000 = aaaaaaaaaaaaaaa8
19: ok   392   394   329   305   299   302   294   302   292   305 mul_u64_u64_div_u64_new ffffffffffffffff*7fffffffffffffff/a000000000000000 = ccccccccccccccca
20: ok   372   307   306   310   308   310   308   310   308   310 mul_u64_u64_div_u64_new ffffffffffffffff*7fffffffffffffff/9000000000000000 = e38e38e38e38e38b
21: ok   525   388   310   320   314   313   314   315   312   315 mul_u64_u64_div_u64_new 7fffffffffffffff*7fffffffffffffff/5000000000000000 = ccccccccccccccc9
22: ok   400   293   289   354   284   283   284   284   284   284 mul_u64_u64_div_u64_new ffffffffffffffff*fffffffffffffffe/ffffffffffffffff = fffffffffffffffe
23: ok   320   324   289   290   289   289   290   289   289   290 mul_u64_u64_div_u64_new e6102d256d7ea3ae*70a77d0be4c31201/d63ec35ab3220357 = 78f8bf8cc86c6e18
24: ok   279   289   290   285   285   285   285   285   285   285 mul_u64_u64_div_u64_new f53bae05cb86c6e1*3847b32d2f8d32e0/cfd4f55a647f403c = 42687f79d8998d35
25: ok   288   290   289   289   290   289   289   290   289   289 mul_u64_u64_div_u64_new 9951c5498f941092*1f8c8bfdf287a251/a3c8dc5f81ea3fe2 = 1d887cb25900091f
26: ok   361   372   351   325   288   293   286   293   294   293 mul_u64_u64_div_u64_new 374fee9daa1bb2bb*d0bfbff7b8ae3ef/c169337bd42d5179 = 3bb2dbaffcbb961
27: ok   483   349   302   302   305   300   307   304   307   304 mul_u64_u64_div_u64_new eac0d03ac10eeaf0*89be05dfa162ed9b/92bb1679a41f0e4b = dc5f5cc9e270d216
28: ok   339   328   234   233   213   214   213   208   215   208 mul_u64_u64_div_u64_new 2d256d7ea3ae*7d0be4c31201/d63ec35ab3220357 = 1a599d6e
29: ok   406   300   303   300   307   304   307   304   307   304 mul_u64_u64_div_u64_new eac0d03ac10eeaf0*89be05dfa162ed9b/92bb1679a41f0e4b = dc5f5cc9e270d216
30: ok   404   421   335   268   265   271   267   271   267   272 mul_u64_u64_div_u64_new 2d256d7ea3ae*7d0be4c31201/63ec35ab3220357 = 387f55cef
31: ok   484   350   310   309   306   309   309   306   309   309 mul_u64_u64_div_u64_new eac0d03ac10eeaf0*89be05dfa162ed9b/92bb000000000000 = dc5f7e8b334db07d
32: ok   368   306   301   307   304   307   304   307   304   307 mul_u64_u64_div_u64_new eac0d03ac10eeaf0*89be05dfa162ed9b/92bb1679a41f0e4b = dc5f5cc9e270d216

	David

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ