[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <21dd32c6-f1f9-f44a-466a-e18fdc6788a7@virtuozzo.com>
Date: Thu, 4 Aug 2022 19:30:52 +0300
From: Pavel Tikhomirov <ptikhomirov@...tuozzo.com>
To: Chris Mason <clm@...com>, Josef Bacik <josef@...icpanda.com>,
David Sterba <dsterba@...e.com>
Cc: linux-btrfs@...r.kernel.org, lkml <linux-kernel@...r.kernel.org>,
Chen Liang-Chun <featherclc@...il.com>,
Alexander Mikhalitsyn <alexander.mikhalitsyn@...tuozzo.com>,
kernel@...nvz.org,
Dominique MARTINET <dominique.martinet@...ark-techno.com>,
Yu Kuai <yukuai3@...wei.com>, Theodore Ts'o <tytso@....edu>
Subject: fiemap is slow on btrfs on files with multiple extents
I ran the below test on Fedora 36 (the test basically creates "very"
sparse file, with 4k data followed by 4k hole again and again for the
specified length and uses fiemap to count extents in this file) and face
the problem that fiemap hangs for too long (for instance comparing to
ext4 version). Fiemap with 32768 extents takes ~37264 us and with 65536
extents it takes ~34123954 us, which is x1000 times more when file only
increased twice the size:
256Mb:
./fiemap-reproduce /testfile $((1<<28))
size: 268435456
actual size: 134217728
fiemap: fm_mapped_extents = 32768
time = 37264 us
./fiemap-reproduce /testfile $((1<<28))
size: 268435456
actual size: 134217728
fiemap: fm_mapped_extents = 32768
time = 37285 us
512Mb:
./fiemap-reproduce /testfile $((1<<29))
size: 536870912
actual size: 268435456
fiemap: fm_mapped_extents = 65536
time = 34123954 us
./fiemap-reproduce /testfile $((1<<29))
size: 536870912
actual size: 268435456
fiemap: fm_mapped_extents = 65536
time = 60404334 us
1Gb (the whole Fedora hangs sometimes when I measure it):
./fiemap-reproduce /testfile $((1<<30))
size: 1073741824
actual size: 536870912
fiemap: fm_mapped_extents = 131072
time = 231194793 us
./fiemap-reproduce /testfile $((1<<30))
size: 1073741824
actual size: 536870912
fiemap: fm_mapped_extents = 131072
time = 347867789 us
I see a similar problem here
https://lore.kernel.org/linux-btrfs/Yr4nEoNLkXPKcOBi@atmark-techno.com/#r ,
but in my case I have "5.18.6-200.fc36.x86_64" fedora kernel which does
not have 5ccc944dce3d ("filemap: Correct the conditions for marking a
folio as accessed") commit, so it should be something else.
Some more info:
cat /proc/self/mountinfo | grep btrfs
106 1 0:47 /root / rw,relatime shared:1 - btrfs /dev/nvme0n1p3
rw,compress=zstd:1,ssd,space_cache,subvolid=257,subvol=/root
perf top -ag
Samples: 268K of event 'cycles', 4000 Hz, Event count (approx.):
77250404934 lost: 0/0 drop: 0/0
Children Self Shared Object Symbol
+ 74,25% 1,16% [kernel] [k]
entry_SYSCALL_64_after_hwframe
+ 73,14% 0,65% [kernel] [k] do_syscall_64
+ 53,05% 3,30% libc.so.6 [.] __poll
+ 39,53% 0,76% [kernel] [k] __x64_sys_poll
+ 34,91% 6,44% [kernel] [k] do_sys_poll
+ 29,37% 0,00% [kernel] [k]
__x64_sys_ioctl
+ 29,08% 7,65% [kernel] [k]
count_range_bits
+ 28,44% 0,00% [kernel] [k] do_vfs_ioctl
+ 28,43% 0,00% [kernel] [k] extent_fiemap
+ 28,43% 0,00% [kernel] [k]
btrfs_get_extent_fiemap
+ 27,87% 0,00% libc.so.6 [.] __GI___ioctl
+ 25,89% 0,00% [kernel] [k]
get_extent_skip_holes
+ 21,76% 21,29% [kernel] [k] rb_next
+ 9,50% 0,48% [kernel] [k] perf_poll
+ 8,04% 0,00% libc.so.6 [.]
__libc_start_call_main
+ 6,93% 3,26% [kernel] [k]
select_estimate_accuracy
+ 6,69% 2,15% [kernel] [k] ktime_get_ts64
+ 5,60% 3,99% [kernel] [k]
_raw_spin_lock_irqsave
+ 5,16% 0,40% [kernel] [k] poll_freewait
Here is a fiemap-reproduce.c code:
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/ioctl.h>
#include <linux/fs.h>
#include <linux/fiemap.h>
#define FILE_INTERVAL (1<<13) /* 8Kb */
long long interval(struct timeval t1, struct timeval t2)
{
long long val = 0;
val += (t2.tv_usec - t1.tv_usec);
val += (t2.tv_sec - t1.tv_sec) * 1000 * 1000;
return val;
}
int main(int argc, char **argv) {
struct fiemap fiemap = {};
struct timeval t1, t2;
char data = 'a';
struct stat st;
int fd, off, file_size = FILE_INTERVAL;
if (argc != 3 && argc != 2) {
printf("usage: %s <path> [size]\n", argv[0]);
return 1;
}
if (argc == 3)
file_size = atoi(argv[2]);
if (file_size < FILE_INTERVAL)
file_size = FILE_INTERVAL;
file_size -= file_size % FILE_INTERVAL;
fd = open(argv[1], O_RDWR | O_CREAT | O_TRUNC, 0644);
if (fd < 0) {
perror("open");
return 1;
}
for (off = 0; off < file_size; off += FILE_INTERVAL) {
if (pwrite(fd, &data, 1, off) != 1) {
perror("pwrite");
close(fd);
return 1;
}
}
if (ftruncate(fd, file_size)) {
perror("ftruncate");
close(fd);
return 1;
}
if (fstat(fd, &st) < 0) {
perror("fstat");
close(fd);
return 1;
}
printf("size: %ld\n", st.st_size);
printf("actual size: %ld\n", st.st_blocks * 512);
fiemap.fm_length = FIEMAP_MAX_OFFSET;
gettimeofday(&t1, NULL);
if (ioctl(fd, FS_IOC_FIEMAP, &fiemap) < 0) {
perror("fiemap");
close(fd);
return 1;
}
gettimeofday(&t2, NULL);
printf("fiemap: fm_mapped_extents = %d\n",
fiemap.fm_mapped_extents);
printf("time = %lld us\n", interval(t1, t2));
close(fd);
return 0;
}
--
Best regards, Tikhomirov Pavel
Software Developer, Virtuozzo.
Powered by blists - more mailing lists