linux-kernel - mmap/munmap 20ms latencies, 2.6.37

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <20110111081928.GL18828@hexapodia.org>
Date:	Tue, 11 Jan 2011 00:19:28 -0800
From:	Andy Isaacson <adi@...apodia.org>
To:	linux-kernel@...r.kernel.org
Subject: mmap/munmap 20ms latencies, 2.6.37

I'm seeing high latencies (up to 20 ms with no sign of CPU contention)
from mmap/munmap, using the enclosed test program on various Core i7
processors.  For example on a "Intel(R) Core(TM) i7 CPU 930@...0GHz"
with 12GB RAM running 0c21e3a,

% /tmp/munmapslow -t 6
...
[1294732507.539851] <8636> free(0x7f2608f08000) len=57344 took 4041 us
[1294732507.559841] <8632> free(0x7f260e810000) len=32768 took 8032 us
[1294732507.570254] <8632> malloc(81920) took 2452 us
[1294732507.583834] <8635> malloc(73728) took 8035 us
[1294732507.595840] <8631> malloc(65536) took 4048 us
[1294732507.599838] <8635> malloc(45056) took 8041 us
[1294732507.603834] <8631> malloc(20480) took 4029 us
...
[1294732510.603845] <8634> realloc(0x756170, 23408) took 4035 us
[1294732510.647850] <8634> malloc(73728) took 28047 us
[1294732510.675831] <8631> malloc(53248) took 28028 us
[1294732510.731835] <8631> malloc(81920) took 28035 us
...
[1294732543.685480] <8633> 5244182 heap events, 3739 allocated, 4425 max, 153.57 MiB total
[1294732543.685503] <8633> 78060.59 ms total for 5244182 allocations (15 us per allocation)
[1294732543.685509] <8633> 13779.82 ms total for 5240443 frees (3 us per free)

Most of the time, mmap takes about 15 us and munmap takes less than 5 us
for these small buffers (<20 pages).

Of course if a significant background job fires, a lot of "slow
operations" are seen; but I'm seeing cases where preemption doesn't
appear to be the cause.  (I suppose I could parse /proc/self/sched to
confirm this...)

I'll try trace-cmd tomorrow, but I wonder if anyone already knows what's
going on?

(As you may guess, this is an attempt to isolate a problem seen in a
larger system that's not easy to reduce to a simple testcase.  We're
seeing a 3x higher system time reported via munin than previously, even
though the CPUs went from 20%-40% idle to 100% pegged and overall
throughput *dropped* somewhat due to being CPU limited.  It's not clear
what changed to trigger the increased system time -- but munmap popped
out of strace -c.)

Thanks,
-andy

/*
 * Attempt to reproduce munmap slowness seen on
 * 2.6.35-23-generic
 * Intel(R) Xeon(R) CPU           E5504  @ 2.00GHz
 * with HT turned on.
 *
 * strace -c shows that munmap is taking up to 11 ms per call and
 * that's where we are spending 95% of our time.
 *
 * Plan for testcase:
 *
 * fork N processes.
 * In each process, repeatedly allocate and free various buffer sizes.
 */

#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include <errno.h>

#include <sys/time.h>

#include <unistd.h>
#include <sys/wait.h>
#include <sys/mman.h>

#define MAX_TIME (1000 * 1e-6)

typedef unsigned int u32;

static int pgsz;

double rtc(void)
{
    struct timeval t;

    gettimeofday(&t, 0);
    return t.tv_sec + t.tv_usec/1e6;
}

void die(char *fmt, ...) __attribute__((noreturn));

void die(char *fmt, ...)
{
    va_list ap;

    fprintf(stderr, "[%.6f] <%d> ", rtc(), (int)getpid());
    va_start(ap, fmt);
    vfprintf(stderr, fmt, ap);
    va_end(ap);
    exit(1);
}

void msg(char *fmt, ...)
{
    va_list ap;

    printf("[%.6f] <%d> ", rtc(), (int)getpid());
    va_start(ap, fmt);
    vfprintf(stdout, fmt, ap);
    va_end(ap);
}

/*
 * returns a random integer chosen from a uniform distribution on [0, range).
 */
u32 myrand(u32 range)
{
    u32 x, mask;
    int i, MAX = 1000;

    if (range == 1) return 0;

    mask = range - 1;
    while (mask & (mask - 1))
	mask &= (mask - 1);
    mask |= (mask - 1);

    for (i = 0; i < MAX; i++) {
	x = lrand48() & mask;
	if (x < range)
	    return x;
    }
    die("myrand failed after %d iter, range = %lld mask = 0x%llx x = 0x%llx\n",
	    i, (long long)range, (long long) mask, (long long)x);
}

static void **heap;
static int num_heap, max_num_heap;
static long long totsz;

static double elapsed_alloc, elapsed_free;
int num_alloc, num_free;

void heap_stats(void)
{
    msg("%d heap events, %d allocated, %d max, %.2f MiB total\n",
	    num_alloc, num_heap, max_num_heap, totsz / 1024. / 1024);
    msg("%.2f ms total for %d allocations (%.0f us per allocation)\n",
	    elapsed_alloc * 1e3, num_alloc, elapsed_alloc * 1e6 / num_alloc);
    msg("%.2f ms total for %d frees (%.0f us per free)\n",
	    elapsed_free * 1e3, num_free, elapsed_free * 1e6 / num_alloc);
}

void *myalloc(size_t sz)
{
    double t0, t1, e;
    void *p;

    t0 = rtc();
    p = mmap(0, sz, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
    if (p == MAP_FAILED)
	die("mmap(%d): %s\n", (int)sz, strerror(errno));
    t1 = rtc();

    e = t1 - t0;

    if (e > MAX_TIME)
	msg("malloc(%d) took %.0f us\n", (int)sz, e * 1e6);

    elapsed_alloc += e;
    num_alloc++;

    return p;
}

void myfree(void *p)
{
    double t0, t1, e;
    int sz = *(int *)p;

    t0 = rtc();
    munmap(p, sz);
    t1 = rtc();

    e = t1 - t0;
    if (e > MAX_TIME)
	msg("free(%p) len=%d took %.0f us\n", p, sz, e * 1e6);
    elapsed_free += e;
    num_free++;
}

void heap_allocate(int sz)
{
    void *n;
    void **h2;
    double t1, t2, e;

    n = myalloc(sz);
    if (!n) die("malloc(%d) failed.\n", sz);

    *(int *)n = sz;

    t1 = rtc();
    h2 = realloc(heap, sizeof(*heap) * ++num_heap);
    t2 = rtc();
    e = t2 - t1;
    if (!h2)
	die("realloc(%p, %d) failed.\n",
	       	heap, (int)(sizeof(*heap) * num_heap));

    if ((t2 - t1) > MAX_TIME)
	msg("realloc(%p, %d) took %.0f us\n",
	       	heap, (int)(sizeof(*heap) * num_heap), e * 1e6);

    if (num_heap > max_num_heap) max_num_heap = num_heap;
    totsz += sz;

    h2[num_heap-1] = n;
    heap = h2;
}

void heap_free(void)
{
    int i;

    if (num_heap <= 0)
	return;

    i = myrand(num_heap--);

    totsz -= *(int *)heap[i];

    if (heap[i])
	myfree(heap[i]);

    for (; i < num_heap; i++)
	heap[i] = heap[i+1];
}

void send_message(void)
{
}

void receive_message(void)
{
}

void worker(void)
{
    long long i;
    long long counts[4] = { 0 };
    int ITERCOUNT = 10 * 1024 * 1024;

    for (i=0; ; i++) {
	int action = myrand(4);

	if (i < 10000 && action == 1 && myrand(2) == 1) action = 0;
	counts[action]++;
	switch(action) {
	case 0:
	    heap_allocate(pgsz * (myrand(20) + 1));
	    break;
	case 1:
	    heap_free();
	    break;
	case 2:
	    send_message();
	    break;
	case 3:
	    receive_message();
	    break;
	}
	if (i % ITERCOUNT == ITERCOUNT - 1) {
	    heap_stats();
	}
    }
}

void usage(const char *cmd)
{
    die("Usage: %s [-t numthreads]\n", cmd);
}

int main(int argc, char **argv)
{
    int c, i;
    int nproc = sysconf(_SC_NPROCESSORS_ONLN);

    pgsz = getpagesize();

    while ((c = getopt(argc, argv, "t:")) != EOF) {
	switch (c) {
	case 't':
	    nproc = strtol(optarg, 0, 0);
	    break;
	default:
	    usage(argv[0]);
	}
    }

    printf("forking %d children\n", nproc);
    for (i=0; i < nproc; i++) {
	switch(fork()) {
	case -1:
	    die("fork: %s\n", strerror(errno));
	case 0: /* child */
	    worker();
	    exit(0);
	default: /* parent */
	    /* nothing */
	    break;
	}
    }

    for (i=0; i < nproc; i++) {
	int x, p;

	p = wait(&x);
	msg("child %d exited\n", p);
    }

    return 0;
}
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/