lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <21dd32c6-f1f9-f44a-466a-e18fdc6788a7@virtuozzo.com>
Date:   Thu, 4 Aug 2022 19:30:52 +0300
From:   Pavel Tikhomirov <ptikhomirov@...tuozzo.com>
To:     Chris Mason <clm@...com>, Josef Bacik <josef@...icpanda.com>,
        David Sterba <dsterba@...e.com>
Cc:     linux-btrfs@...r.kernel.org, lkml <linux-kernel@...r.kernel.org>,
        Chen Liang-Chun <featherclc@...il.com>,
        Alexander Mikhalitsyn <alexander.mikhalitsyn@...tuozzo.com>,
        kernel@...nvz.org,
        Dominique MARTINET <dominique.martinet@...ark-techno.com>,
        Yu Kuai <yukuai3@...wei.com>, Theodore Ts'o <tytso@....edu>
Subject: fiemap is slow on btrfs on files with multiple extents

I ran the below test on Fedora 36 (the test basically creates "very" 
sparse file, with 4k data followed by 4k hole again and again for the 
specified length and uses fiemap to count extents in this file) and face 
the problem that fiemap hangs for too long (for instance comparing to 
ext4 version). Fiemap with 32768 extents takes ~37264 us and with 65536 
extents it takes ~34123954 us, which is x1000 times more when file only 
increased twice the size:

256Mb:

./fiemap-reproduce /testfile $((1<<28))
size: 268435456
actual size: 134217728
fiemap: fm_mapped_extents = 32768
time = 37264 us

./fiemap-reproduce /testfile $((1<<28))
size: 268435456
actual size: 134217728
fiemap: fm_mapped_extents = 32768
time = 37285 us

512Mb:

./fiemap-reproduce /testfile $((1<<29))
size: 536870912
actual size: 268435456
fiemap: fm_mapped_extents = 65536
time = 34123954 us

./fiemap-reproduce /testfile $((1<<29))
size: 536870912
actual size: 268435456
fiemap: fm_mapped_extents = 65536
time = 60404334 us

1Gb (the whole Fedora hangs sometimes when I measure it):

./fiemap-reproduce /testfile $((1<<30))
size: 1073741824
actual size: 536870912
fiemap: fm_mapped_extents = 131072
time = 231194793 us

./fiemap-reproduce /testfile $((1<<30))
size: 1073741824
actual size: 536870912
fiemap: fm_mapped_extents = 131072
time = 347867789 us

I see a similar problem here 
https://lore.kernel.org/linux-btrfs/Yr4nEoNLkXPKcOBi@atmark-techno.com/#r , 
but in my case I have "5.18.6-200.fc36.x86_64" fedora kernel which does 
not have 5ccc944dce3d ("filemap: Correct the conditions for marking a 
folio as accessed") commit, so it should be something else.

Some more info:

cat /proc/self/mountinfo | grep btrfs
106 1 0:47 /root / rw,relatime shared:1 - btrfs /dev/nvme0n1p3 
rw,compress=zstd:1,ssd,space_cache,subvolid=257,subvol=/root

perf top -ag
Samples: 268K of event 'cycles', 4000 Hz, Event count (approx.): 
77250404934 lost: 0/0 drop: 0/0
   Children      Self  Shared Object                       Symbol
+   74,25%     1,16%  [kernel]                            [k] 
entry_SYSCALL_64_after_hwframe
+   73,14%     0,65%  [kernel]                            [k] do_syscall_64
+   53,05%     3,30%  libc.so.6                           [.] __poll
+   39,53%     0,76%  [kernel]                            [k] __x64_sys_poll
+   34,91%     6,44%  [kernel]                            [k] do_sys_poll
+   29,37%     0,00%  [kernel]                            [k] 
__x64_sys_ioctl
+   29,08%     7,65%  [kernel]                            [k] 
count_range_bits
+   28,44%     0,00%  [kernel]                            [k] do_vfs_ioctl
+   28,43%     0,00%  [kernel]                            [k] extent_fiemap
+   28,43%     0,00%  [kernel]                            [k] 
btrfs_get_extent_fiemap
+   27,87%     0,00%  libc.so.6                           [.] __GI___ioctl
+   25,89%     0,00%  [kernel]                            [k] 
get_extent_skip_holes
+   21,76%    21,29%  [kernel]                            [k] rb_next
+    9,50%     0,48%  [kernel]                            [k] perf_poll
+    8,04%     0,00%  libc.so.6                           [.] 
__libc_start_call_main
+    6,93%     3,26%  [kernel]                            [k] 
select_estimate_accuracy
+    6,69%     2,15%  [kernel]                            [k] ktime_get_ts64
+    5,60%     3,99%  [kernel]                            [k] 
_raw_spin_lock_irqsave
+    5,16%     0,40%  [kernel]                            [k] poll_freewait

Here is a fiemap-reproduce.c code:

#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <fcntl.h>

#include <sys/stat.h>
#include <sys/time.h>
#include <sys/ioctl.h>

#include <linux/fs.h>
#include <linux/fiemap.h>

#define FILE_INTERVAL (1<<13) /* 8Kb */

long long interval(struct timeval t1, struct timeval t2)
{
         long long val = 0;
         val += (t2.tv_usec - t1.tv_usec);
         val += (t2.tv_sec - t1.tv_sec) * 1000 * 1000;
         return val;
}

int main(int argc, char **argv) {
         struct fiemap fiemap = {};
         struct timeval t1, t2;
         char data = 'a';
         struct stat st;
         int fd, off, file_size = FILE_INTERVAL;

         if (argc != 3 && argc != 2) {
                 printf("usage: %s <path> [size]\n", argv[0]);
                 return 1;
         }

         if (argc == 3)
                 file_size = atoi(argv[2]);
         if (file_size < FILE_INTERVAL)
                 file_size = FILE_INTERVAL;
         file_size -= file_size % FILE_INTERVAL;

         fd = open(argv[1], O_RDWR | O_CREAT | O_TRUNC, 0644);
         if (fd < 0) {
                 perror("open");
                 return 1;
         }

         for (off = 0; off < file_size; off += FILE_INTERVAL) {
                 if (pwrite(fd, &data, 1, off) != 1) {
                         perror("pwrite");
                         close(fd);
                         return 1;
                 }
         }

         if (ftruncate(fd, file_size)) {
                 perror("ftruncate");
                 close(fd);
                 return 1;
         }

         if (fstat(fd, &st) < 0) {
                 perror("fstat");
                 close(fd);
                 return 1;
         }

         printf("size: %ld\n", st.st_size);
         printf("actual size: %ld\n", st.st_blocks * 512);

         fiemap.fm_length = FIEMAP_MAX_OFFSET;
         gettimeofday(&t1, NULL);
         if (ioctl(fd, FS_IOC_FIEMAP, &fiemap) < 0) {
                 perror("fiemap");
                 close(fd);
                 return 1;
         }
         gettimeofday(&t2, NULL);

         printf("fiemap: fm_mapped_extents = %d\n", 
fiemap.fm_mapped_extents);
         printf("time = %lld us\n", interval(t1, t2));

         close(fd);
         return 0;
}

-- 
Best regards, Tikhomirov Pavel
Software Developer, Virtuozzo.

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ