lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [day] [month] [year] [list]
Message-ID: <CAJ6qgScNJFAO9h7+04WEKSC-znJ2-WFVPTKiJV9Uadf2-XCW1A@mail.gmail.com>
Date:	Sat, 25 Jul 2015 10:19:39 +0800
From:	Jaden Liang <jaden1q84@...il.com>
To:	linux-kernel@...r.kernel.org
Subject: sys_pread64 data error: sys_pread64 return success but buffer data
 wrong with transparent hugepage enabled in multi-thread application

Hi all,

We have a multi-thread application to read file with pread DIRECT_IO
from file. The buffer and file offset are both 4K aligned. And
sometimes the threads will call system() to do some command while
running.

With some debug, there are some chances sys_pread64 return success,
but the buffer returned was NOT modified at all. This happens when
some other threads call fork() at the same time.

After some tracing jobs, we located this issue is related with
transparent hugepage feature. If we set transparent hugepage policy to
NEVER like below:
    echo never > /sys/kernel/mm/transparent_hugepage/enabled
the buffer data issue disappear.

We are still trying to trace deep inside of transparent hugepage to
find out what really happen under transparent hugepage enabled. We
think there might be some related with the COW mechanism in mm module,
but not pretty sure yet.

There is a small test program to reproduce such issue. The test
program will create 384 threads to do pread on one file and check the
returns. All the threads will call system() to simulate fork().

My envirement:
OS: CentOS 7
Kernel Version: 3.10.0-229.7.2.el7.x86_64

Compile:
# gcc pread_test.c -o pread_test -lpthread

Run test:
# dd if=/dev/zero of=pread_test_file bs=1M count=200 oflag=direct
# ./pread_test ./pread_test_file

If output include the message like below, there is pread data wrong issue.
[7fb5e4ff9700][ifkey_exec:153]pread BUG buf:0x7fb5b500c000
offset:10881536 size:92160 ret=92160 errno:Success

Test program source code:

// pread_test.c
#include <stdio.h>
#include <time.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#define _GNU_SOURCE
#include <sys/syscall.h>
#include <unistd.h>
#include <pthread.h>
#include <stdint.h>
#include <string.h>
#include <stdlib.h>
#include <malloc.h>
#include <errno.h>


#ifndef O_DIRECT
#define O_DIRECT 00040000 /* direct disk access hint */
#endif

#ifndef ARRAY_SIZE
#define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
#endif

#define errlog(fmt, args...) \
fprintf(stderr, "[%lx][%s:%d]"fmt, pthread_self(),  __FUNCTION__,
__LINE__, ##args)

#define MAX_BRICK 24
#define MAX_USER 16
#define MAX_THREADS (MAX_BRICK * MAX_USER)

enum
{
ifop_en_none,
ifop_en_write,
ifop_en_read,
ifop_en_readmiss,
};

typedef struct
{
int         op;

off_t       offset;

size_t      size;
}ifkey_t;

static ifkey_t s_ops[] =
{
{
.op = ifop_en_read,
.offset = 10881536,
.size = 92160,
},
};
static ifkey_t s_read[] =
{
{
.op = ifop_en_read,
.offset = 0,
.size = 10952704,
},
};

static int s_sectors = 8;

static int s_threads_exit = 0;

size_t ifkey_get_size(const ifkey_t *key, int count)
{
int i = 0;
size_t size = 0;

for ( i = 0; i < count; i++ )
{
if ( key[i].offset + key[i].size > size )
size = key[i].offset + key[i].size;
}

return size;
}

static void ifkey_exec(int fd, ifkey_t *key, int count, int init)
{
int i = 0;
ssize_t ret = 0;
void *buf = NULL;
size_t filesize = 0;

    if ( s_threads_exit )
        return;

filesize = ifkey_get_size(key, count);

buf = memalign(s_sectors * 512, filesize);
if ( !buf )
{
errlog("memalign %lu errno:%s\n", filesize, strerror(errno));
return ;
}

memset(buf, 'A', filesize);

if ( init )
{

ret = pwrite(fd, buf, filesize, 0);
if ( ret != filesize )
errlog("pwrite %ld != %lu\n", ret, filesize);
}

for ( i = 0; i < count; i++ )
{
switch(key[i].op)
{
case ifop_en_write:
memset(buf, 'W', key[i].size);
ret = pwrite(fd, buf, key[i].size, key[i].offset);
if ( ret != key[i].size )
{
errlog("pwrite offset:%ld size:%lu errno:%s\n"
, key[i].offset, key[i].size, strerror(errno));
}

break;
case ifop_en_read:
memset(buf, 'R', key[i].size);
ret = syscall(SYS_pread64, fd, buf, key[i].size, key[i].offset);
if ( ret != key[i].size )
{
errlog("pread offset:%ld size:%lu errno:%s\n"
, key[i].offset, key[i].size, strerror(errno));
}
else if ( !memcmp("RRRRRRRR", buf, 8) )
{
errlog("pread BUG buf:%p offset:%ld size:%lu "
                            "ret=%ld errno:%s\n"
, buf, key[i].offset, key[i].size
, ret, strerror(errno));

s_threads_exit = 1;
                    //abort();
}
break;
default:
errlog("Not support %d op:%d\n", i, key[i].op);
break;
}
}

free(buf);
}

void* ifkey_test(void *arg)
{
int fd = (int)(ulong)arg;

system("echo aaa >/dev/null");

ifkey_exec(fd, s_ops, ARRAY_SIZE(s_ops), 0);
system("echo aaa >/dev/null");

ifkey_exec(fd, s_ops, ARRAY_SIZE(s_ops), 0);
system("echo aaa >/dev/null");

ifkey_exec(fd, s_ops, ARRAY_SIZE(s_ops), 0);
system("echo aaa >/dev/null");

ifkey_exec(fd, s_read, ARRAY_SIZE(s_read), 0);
system("echo aaa >/dev/null");

ifkey_exec(fd, s_read, ARRAY_SIZE(s_read), 0);
system("echo aaa >/dev/null");

return NULL;
}

void file_test(const char *filename)
{
void *buf = NULL;
size_t bs = (1UL<<17);
size_t count = (1UL<<13);
size_t i = 0;
off_t off = 0;
pthread_t tds[MAX_USER];

int fd = 0;

fd = open(filename, O_RDONLY | O_DIRECT);
if ( fd < 0 )
{
fprintf(stderr, "%lx open %s failed:%s\n"
, pthread_self(), filename, strerror(errno));
return ;
}

for ( i = 0; i < MAX_USER; i++ )
{
pthread_create(&tds[i], NULL, ifkey_test, (void*)(ulong)fd);
}

for ( i = 0; i < MAX_USER; i++ )
{
pthread_join(tds[i], NULL);
}


close(fd);
}

void* thread_test(void *arg)
{
char *name = (char*)arg;
file_test(name);
return NULL;
}

int main(int argc, char *argv[])
{
int i = 0;
    struct stat stbuf = {0};
pthread_t tds[MAX_BRICK];

if ( argc < 2 )
{
printf("%s filepath [sectors]\n", argv[0]);
return 1;
}

if ( argc >= 3 )
s_sectors = atol(argv[2]);

    if ( stat(argv[1], &stbuf) < 0 )
    {
        errlog("invalid filepath:%s %s\n", argv[1], strerror(errno));
        return -1;
    }
    if ( (S_ISREG(stbuf.st_mode) && stbuf.st_size < 10952704) )
    {
        errlog("invalid file %s size(%ld) < 10952704\n"
                    , argv[1], stbuf.st_size);
        return -1;
    }

printf("threads:%lu align sectors:%d filepath:%s\n"
, MAX_BRICK * MAX_USER, s_sectors, argv[1]);

for ( i = 0; i < MAX_BRICK; i++ )
{
pthread_create(&tds[i], NULL, thread_test, (void*)argv[1]);
}

for ( i = 0; i < MAX_BRICK; i++ )
{
pthread_join(tds[i], NULL);
}

    if ( s_threads_exit )
    {
        printf("ERROR\n");
    }
    else
    {
        printf("OK\n");
    }

return s_threads_exit ? -1 : 0;
}
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ