lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date:	Tue, 4 Aug 2015 19:06:13 +0000
From:	David Muchene <david.muchene@...d.com>
To:	"linux-ext4@...r.kernel.org" <linux-ext4@...r.kernel.org>
Subject: Fsync Performance 


Hi,
I'm not sure if this is the place to ask this, if it isn't I apologize. We are occasionally seeing fsync take a very long time (sometimes upwards of 3s). We decided to run some fio tests and use systemtap to determine if the disks were the cause of the problem. One of the results from the tests is that there occasionally there is a significant difference between time spent doing io, and the total time to complete the fsync. Is there an explanation to this difference, or is the systemtap script bogus? If it is in fact the driver/disks that is taking a long time, does anyone have any suggestions as to how I'd debug that? I appreciate any help you can provide (even if it's pointing me to the relevant documents). 

System Config:
Debian Wheezy,
3.2.6 Kernel, 
Seagate SSDs  ST200FM0073 
Mount options   (rw,relatime,user_xattr,barrier=1,data=ordered)


fio  --time_based --thinktime=125000 --rw=randwrite --write_lat_log=./throwaway/sm_sd --name=sm_sd --fsync=20 --buffered=1 --thinktime_blocks=20 --rate_iops=20 --direct=0 --filename=/avid/mnt/storagemanager0/sm_sd_test --per_job_logs=1 --bs=4k --output=./throwaway/sm_sd --ioengine=libaio --runtime=36000 --iodepth=20 --log_avg_msec=1000 --size=1024m

fio  --time_based --thinktime=16000000 --rw=randwrite --write_lat_log=./throwaway/sd_sm --name=sd_sm --fsync=25 --buffered=1 --thinktime_blocks=25 --rate_iops=25 --direct=0 --filename=/avid/mnt/systemdirector/sm_sd_test --per_job_logs=1 --bs=128k --output=./throwaway/sd_sm --ioengine=libaio --runtime=36000 --iodepth=20 --log_avg_msec=1000 --size=1024m


Results
---------------------------------------------------------------------------
Sync Time(uS),   Max IO Time (uS),    Total IO time (us)
            13801,                    4402,             4522
            13923,                          0,                    0
            348697,                  88261,           88375
            254846,                  170582,       170723
            318862,                 317069,        317593

/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////test_sync.stp////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
global sync_start = 0 
//submission, io-starting, and io completion...
global sync_blocks_start 
global sync_blocks_end
global PROCNAME = "fio"
global sync_beg = 0
global sync_end = 0
global wrt_beg  = 0
global wrt_end  = 0
global fm_beg   = 0
global fm_end   = 0
global thresh = 10000
global max_iotime = 0
global first_io = 0
global last_io = 0


function get_next:long(bio:long)%{
    THIS->__retvalue = (long)((struct bio*)((long)THIS->bio))->bi_next;
    
%}

function iter_bio(bio){
    bio_itr = bio;
    printf("bio list: [");
    while(bio_itr != NULL){
        printf("%p, ",bio_itr);
        bio_itr = get_next(bio_itr);
    }
    printf("]\n");
}
    

probe kernel.function("blk_dequeue_request"){
    if(sync_start == 1 && execname() == PROCNAME){    
        sync_blocks_start[$rq->bio] = gettimeofday_us();
        if(first_io == 0){ 
            first_io = sync_blocks_start[$rq->bio];
        }
    }
}


probe kernel.function("blk_account_io_completion"){
    if(sync_start == 1 && ($req->bio in sync_blocks_start)){
        sync_blocks_end[$req->bio] = gettimeofday_us();    
        iotime = sync_blocks_end[$req->bio] - sync_blocks_start[$req->bio];
        if(iotime > max_iotime){
            max_iotime = iotime;
        }
        last_io = sync_blocks_end[$req->bio];
    }
}

probe syscall.fsync{
    if(execname() == PROCNAME){
        sync_start = 1;
        sync_beg = gettimeofday_us();
        //printf("sync start: %d\n",gettimeofday_us());
    }
}
 
probe syscall.fsync.return{
    if(execname() == PROCNAME){
        sync_start = 0;
        sync_end = gettimeofday_us();
        total_iotime = last_io - first_io;
            
        delete sync_blocks_start;
        delete sync_blocks_end;
        if((sync_end-sync_beg) > thresh){
            printf("%d,%d,%d %d,%d, %d, %d\n", sync_beg,sync_end,first_io, last_io, sync_end-sync_beg, max_iotime, total_iotime);
        }
        last_io = first_io = 0;
        sync_end = 0;
        max_iotime = 0;
    }
}

probe kernel.function("do_writepages") {
    if(execname() == PROCNAME){
        wrt_beg = gettimeofday_us();
    }
}

probe kernel.function("filemap_write_and_wait_range"){
    if(execname() == PROCNAME){
        fm_beg = gettimeofday_us();
    }
}

probe kernel.function("filemap_write_and_wait_range").return{
    if(execname() == PROCNAME){
        fm_end = gettimeofday_us();
    }
}

probe kernel.function("do_writepages").return{
    if(execname() == PROCNAME){
        wrt_end = gettimeofday_us();
    }
}

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ