[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20221222035134.3467659-4-ammar.faizi@intel.com>
Date: Thu, 22 Dec 2022 10:51:29 +0700
From: Ammar Faizi <ammarfaizi2@...weeb.org>
To: Willy Tarreau <w@....eu>, Shuah Khan <shuah@...nel.org>,
"Paul E. McKenney" <paulmck@...nel.org>
Cc: Ammar Faizi <ammarfaizi2@...weeb.org>,
Gilang Fachrezy <gilang4321@...il.com>,
VNLX Kernel Department <kernel@...x.org>,
Alviro Iskandar Setiawan <alviro.iskandar@...weeb.org>,
Kanna Scarlet <knscarlet@...weeb.org>,
Muhammad Rizki <kiizuha@...weeb.org>,
GNU/Weeb Mailing List <gwml@...r.gnuweeb.org>,
Linux Kernel Mailing List <linux-kernel@...r.kernel.org>,
Linux Kselftest Mailing List
<linux-kselftest@...r.kernel.org>
Subject: [RFC PATCH v1 3/8] nolibc/sys: Implement `getpagesize(2)` function
From: Ammar Faizi <ammarfaizi2@...weeb.org>
This commit adds getpagesize() function implementation.
The getpagesize() syscall doesn't always exist on the Linux syscall
table. Only specific architectures have this syscall.
Implementation detail:
Some architectures use a fixed page size, like x86. We can hard-code
the page size value on such architectures.
Some other architectures may use different page sizes. For example,
Linux aarch64 supports three values of page size: 4K, 16K, and 64K
which are selected at kernel compilation time. The kernel stores the
used page size in the auxiliary vector. The auxiliary vector can be
obtained from /proc/self/auxv at AT_PAGESZ key-value-pair.
/proc/self/auxv is available on all architectures.
Once we obtain the page size info, cache the value in a static
variable to avoid traversing the auxiliary vector again in the next
getpagesize() call. The page size should never change during kernel
uptime.
Link: https://lwn.net/Articles/519085
Link: https://github.com/torvalds/linux/blob/v6.1/fs/binfmt_elf.c#L260
Signed-off-by: Ammar Faizi <ammarfaizi2@...weeb.org>
---
tools/include/nolibc/sys.h | 114 +++++++++++++++++++++++++++++++++++++
1 file changed, 114 insertions(+)
diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h
index ca348939eb50..e9e3640c36e1 100644
--- a/tools/include/nolibc/sys.h
+++ b/tools/include/nolibc/sys.h
@@ -18,6 +18,7 @@
#include <linux/fs.h>
#include <linux/loop.h>
#include <linux/time.h>
+#include <linux/auxvec.h>
#include "arch.h"
#include "errno.h"
@@ -407,6 +408,119 @@ int getdents64(int fd, struct linux_dirent64 *dirp, int count)
return ret;
}
+/*
+ * The getpagesize() syscall doesn't always exist on the Linux syscall
+ * table. Only specific architectures have this syscall.
+ *
+ * Implementation detail:
+ * Some architectures use a fixed page size, like x86. We can hard-code
+ * the page size value on such architectures.
+ *
+ * Some other architectures may use different page sizes. For example,
+ * Linux aarch64 supports three values of page size: 4K, 16K, and 64K
+ * which are selected at kernel compilation time. The kernel stores the
+ * used page size in the auxiliary vector. The auxiliary vector can be
+ * obtained from /proc/self/auxv at AT_PAGESZ key-val-pair.
+ * /proc/self/auxv is available on all architectures.
+ *
+ * Once we obtain the page size info, cache the value in a static
+ * variable to avoid traversing the auxiliary vector again in the next
+ * getpagesize() call. The page size should never change during kernel
+ * uptime.
+ *
+ * Link: https://lwn.net/Articles/519085
+ * Link: https://github.com/torvalds/linux/blob/v6.1/fs/binfmt_elf.c#L260
+ *
+ *
+ * long getpagesize(void);
+ *
+ */
+
+#if defined(__x86_64__) || defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__)
+__attribute__((unused))
+static inline long getpagesize(void)
+{
+ /*
+ * x86 family is always 4K page. Don't bother
+ * reading the auxiliary vector.
+ */
+ return 4096;
+}
+#else
+static int sys_open(const char *path, int flags, mode_t mode);
+static ssize_t sys_read(int fd, void *buf, size_t count);
+
+/*
+ * This function works for all architectures.
+ */
+static long sys_getpagesize(void)
+{
+ uint64_t buf[2] = {0, 0};
+ long ret;
+ int fd;
+
+
+ fd = sys_open("/proc/self/auxv", O_RDONLY, 0);
+ if (fd < 0)
+ return fd;
+
+ while (1) {
+ ssize_t x;
+
+ x = sys_read(fd, buf, sizeof(buf));
+ if (x < 0) {
+ ret = x;
+ break;
+ }
+
+ if (__builtin_expect(x == 0, 0)) {
+ /*
+ * We've reached the end of the auxiliary
+ * vector, but can't find the AT_PAGESZ
+ * entry.
+ */
+ ret = -ENOENT;
+ break;
+ }
+
+ /*
+ * buf[0] is the key.
+ * buf[1] is the value.
+ */
+ if (buf[0] == AT_PAGESZ) {
+ ret = buf[1];
+ break;
+ }
+ }
+
+ sys_close(fd);
+ return ret;
+}
+
+__attribute__((unused))
+static long getpagesize(void)
+{
+ static long cached;
+ long ret;
+
+ /*
+ * No need to read the auxv for the second
+ * getpagesize() call.
+ */
+ if (__builtin_expect(cached != 0, 1))
+ return cached;
+
+ ret = sys_getpagesize();
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ } else {
+ cached = ret;
+ }
+ return ret;
+}
+#endif
+
/*
* pid_t getpgid(pid_t pid);
--
Ammar Faizi
Powered by blists - more mailing lists