[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <b8a8d44e5b81c93598caee82254320507142d4be.1748594841.git.libo.gcs85@bytedance.com>
Date: Fri, 30 May 2025 17:28:03 +0800
From: Bo Li <libo.gcs85@...edance.com>
To: tglx@...utronix.de,
mingo@...hat.com,
bp@...en8.de,
dave.hansen@...ux.intel.com,
x86@...nel.org,
luto@...nel.org,
kees@...nel.org,
akpm@...ux-foundation.org,
david@...hat.com,
juri.lelli@...hat.com,
vincent.guittot@...aro.org,
peterz@...radead.org
Cc: dietmar.eggemann@....com,
hpa@...or.com,
acme@...nel.org,
namhyung@...nel.org,
mark.rutland@....com,
alexander.shishkin@...ux.intel.com,
jolsa@...nel.org,
irogers@...gle.com,
adrian.hunter@...el.com,
kan.liang@...ux.intel.com,
viro@...iv.linux.org.uk,
brauner@...nel.org,
jack@...e.cz,
lorenzo.stoakes@...cle.com,
Liam.Howlett@...cle.com,
vbabka@...e.cz,
rppt@...nel.org,
surenb@...gle.com,
mhocko@...e.com,
rostedt@...dmis.org,
bsegall@...gle.com,
mgorman@...e.de,
vschneid@...hat.com,
jannh@...gle.com,
pfalcato@...e.de,
riel@...riel.com,
harry.yoo@...cle.com,
linux-kernel@...r.kernel.org,
linux-perf-users@...r.kernel.org,
linux-fsdevel@...r.kernel.org,
linux-mm@...ck.org,
duanxiongchun@...edance.com,
yinhongbo@...edance.com,
dengliang.1214@...edance.com,
xieyongji@...edance.com,
chaiwen.cc@...edance.com,
songmuchun@...edance.com,
yuanzhu@...edance.com,
chengguozhu@...edance.com,
sunjiadong.lff@...edance.com,
Bo Li <libo.gcs85@...edance.com>
Subject: [RFC v2 35/35] samples/rpal: add RPAL samples
Added test samples for RPAL (with librpal included). Compile via:
cd samples/rpal && make
And run it using the following command:
./server & ./client
Example output:
EPOLL: Message length: 32 bytes, Total TSC cycles: 16439927066,
Message count: 1000000, Average latency: 16439 cycles
RPAL: Message length: 32 bytes, Total TSC cycles: 2197479484,
Message count: 1000000, Average latency: 2197 cycles
Signed-off-by: Bo Li <libo.gcs85@...edance.com>
---
samples/rpal/Makefile | 17 +
samples/rpal/asm_define.c | 14 +
samples/rpal/client.c | 178 ++
samples/rpal/librpal/asm_define.h | 6 +
samples/rpal/librpal/asm_x86_64_rpal_call.S | 57 +
samples/rpal/librpal/debug.h | 12 +
samples/rpal/librpal/fiber.c | 119 +
samples/rpal/librpal/fiber.h | 64 +
.../rpal/librpal/jump_x86_64_sysv_elf_gas.S | 81 +
.../rpal/librpal/make_x86_64_sysv_elf_gas.S | 82 +
.../rpal/librpal/ontop_x86_64_sysv_elf_gas.S | 84 +
samples/rpal/librpal/private.h | 341 +++
samples/rpal/librpal/rpal.c | 2351 +++++++++++++++++
samples/rpal/librpal/rpal.h | 149 ++
samples/rpal/librpal/rpal_pkru.h | 78 +
samples/rpal/librpal/rpal_queue.c | 239 ++
samples/rpal/librpal/rpal_queue.h | 55 +
samples/rpal/librpal/rpal_x86_64_call_ret.S | 45 +
samples/rpal/offset.sh | 5 +
samples/rpal/server.c | 249 ++
20 files changed, 4226 insertions(+)
create mode 100644 samples/rpal/Makefile
create mode 100644 samples/rpal/asm_define.c
create mode 100644 samples/rpal/client.c
create mode 100644 samples/rpal/librpal/asm_define.h
create mode 100644 samples/rpal/librpal/asm_x86_64_rpal_call.S
create mode 100644 samples/rpal/librpal/debug.h
create mode 100644 samples/rpal/librpal/fiber.c
create mode 100644 samples/rpal/librpal/fiber.h
create mode 100644 samples/rpal/librpal/jump_x86_64_sysv_elf_gas.S
create mode 100644 samples/rpal/librpal/make_x86_64_sysv_elf_gas.S
create mode 100644 samples/rpal/librpal/ontop_x86_64_sysv_elf_gas.S
create mode 100644 samples/rpal/librpal/private.h
create mode 100644 samples/rpal/librpal/rpal.c
create mode 100644 samples/rpal/librpal/rpal.h
create mode 100644 samples/rpal/librpal/rpal_pkru.h
create mode 100644 samples/rpal/librpal/rpal_queue.c
create mode 100644 samples/rpal/librpal/rpal_queue.h
create mode 100644 samples/rpal/librpal/rpal_x86_64_call_ret.S
create mode 100755 samples/rpal/offset.sh
create mode 100644 samples/rpal/server.c
diff --git a/samples/rpal/Makefile b/samples/rpal/Makefile
new file mode 100644
index 000000000000..25627a970028
--- /dev/null
+++ b/samples/rpal/Makefile
@@ -0,0 +1,17 @@
+.PHONY: rpal
+
+all: server client offset
+
+offset: asm_define.c
+ $(shell ./offset.sh)
+
+server: server.c librpal/*.c librpal/*.S
+ $(CC) $^ -lpthread -g -o $@
+ @printf "RPAL" | dd of=./server bs=1 count=4 conv=notrunc seek=12
+
+client: client.c librpal/*.c librpal/*.S
+ $(CC) $^ -lpthread -g -o $@
+ @printf "RPAL" | dd of=./client bs=1 count=4 conv=notrunc seek=12
+
+clean:
+ rm server client
diff --git a/samples/rpal/asm_define.c b/samples/rpal/asm_define.c
new file mode 100644
index 000000000000..6f7731ebc870
--- /dev/null
+++ b/samples/rpal/asm_define.c
@@ -0,0 +1,14 @@
+#include <stddef.h>
+#include "librpal/private.h"
+
+#define DEFINE(sym, val) asm volatile("\n-> " #sym " %0 " #val "\n" : : "i" (val))
+
+static void common(void)
+{
+ DEFINE(RCI_SENDER_TLS_BASE, offsetof(rpal_call_info_t, sender_tls_base));
+ DEFINE(RCI_SENDER_FCTX, offsetof(rpal_call_info_t, sender_fctx));
+ DEFINE(RCI_PKRU, offsetof(rpal_call_info_t, pkru));
+ DEFINE(RC_SENDER_STATE, offsetof(receiver_context_t, sender_state));
+ DEFINE(RET_BEGIN, offsetof(critical_section_t, ret_begin));
+ DEFINE(RET_END, offsetof(critical_section_t, ret_end));
+}
diff --git a/samples/rpal/client.c b/samples/rpal/client.c
new file mode 100644
index 000000000000..2c4a9eb6115e
--- /dev/null
+++ b/samples/rpal/client.c
@@ -0,0 +1,178 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <x86intrin.h>
+#include "librpal/rpal.h"
+
+#define SOCKET_PATH "/tmp/rpal_socket"
+#define BUFFER_SIZE 1025
+#define MSG_NUM 1000000
+#define MSG_LEN 32
+
+char hello[BUFFER_SIZE];
+char buffer[BUFFER_SIZE] = { 0 };
+
+int remote_id;
+uint64_t remote_sidfd;
+
+#define INIT_MSG "INIT"
+#define SUCC_MSG "SUCC"
+#define FAIL_MSG "FAIL"
+
+#define handle_error(s) \
+ do { \
+ perror(s); \
+ exit(EXIT_FAILURE); \
+ } while (0)
+
+int rpal_epoll_add(int epfd, int fd)
+{
+ struct epoll_event ev;
+
+ ev.events = EPOLLRPALIN | EPOLLIN | EPOLLRDHUP | EPOLLET;
+ ev.data.fd = fd;
+
+ return rpal_epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev);
+}
+
+void rpal_client_init(int fd)
+{
+ struct epoll_event ev;
+ char buffer[BUFFER_SIZE];
+ rpal_error_code_t err;
+ uint64_t remote_key, service_key;
+ int epoll_fd;
+ int proc_fd;
+ int ret;
+
+ proc_fd = rpal_init(1, 0, &err);
+ if (proc_fd < 0)
+ handle_error("rpal init fail");
+ rpal_get_service_key(&service_key);
+
+ strcpy(buffer, INIT_MSG);
+ *(uint64_t *)(buffer + strlen(INIT_MSG)) = service_key;
+ ret = write(fd, buffer, strlen(INIT_MSG) + sizeof(uint64_t));
+ if (ret < 0)
+ handle_error("write key");
+
+ ret = read(fd, buffer, BUFFER_SIZE);
+ if (ret < 0)
+ handle_error("read key");
+
+ memcpy(&remote_key, buffer, sizeof(remote_key));
+ if (remote_key == 0)
+ handle_error("remote down");
+
+ ret = rpal_request_service(remote_key);
+ if (ret) {
+ write(fd, FAIL_MSG, strlen(FAIL_MSG));
+ handle_error("request");
+ }
+
+ ret = write(fd, SUCC_MSG, strlen(SUCC_MSG));
+ if (ret < 0)
+ handle_error("handshake");
+
+ remote_id = rpal_get_request_service_id(remote_key);
+ rpal_sender_init(&err);
+
+ epoll_fd = epoll_create(1024);
+ if (epoll_fd == -1) {
+ perror("epoll_create");
+ exit(EXIT_FAILURE);
+ }
+ rpal_epoll_add(epoll_fd, fd);
+
+ sleep(3); //wait for epoll wait
+ ret = rpal_uds_fdmap(((unsigned long)remote_id << 32) | fd,
+ &remote_sidfd);
+ if (ret < 0)
+ handle_error("uds fdmap fail");
+}
+
+int run_rpal_client(int msg_len)
+{
+ ssize_t valread;
+ int sock = 0;
+ struct sockaddr_un serv_addr;
+ int count = MSG_NUM;
+ int ret;
+
+ if ((sock = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
+ perror("socket creation error");
+ return -1;
+ }
+
+ memset(&serv_addr, 0, sizeof(serv_addr));
+ serv_addr.sun_family = AF_UNIX;
+ strncpy(serv_addr.sun_path, SOCKET_PATH, sizeof(SOCKET_PATH));
+
+ if (connect(sock, (struct sockaddr *)&serv_addr, sizeof(serv_addr)) <
+ 0) {
+ perror("Connection Failed");
+ return -1;
+ }
+ rpal_client_init(sock);
+
+ while (count) {
+ for (int i = 18; i < msg_len; i++)
+ hello[i] = 'a' + i % 26;
+ sprintf(hello, "0x%016lx", __rdtsc());
+ ret = rpal_write_ptrs(remote_id, remote_sidfd, (int64_t *)hello,
+ msg_len / sizeof(int64_t *));
+ valread = read(sock, buffer, BUFFER_SIZE);
+ if (memcmp(hello, buffer, msg_len) != 0)
+ perror("data error");
+ count--;
+ }
+
+ close(sock);
+}
+
+int run_client(int msg_len)
+{
+ ssize_t valread;
+ int sock = 0;
+ struct sockaddr_un serv_addr;
+ int count = MSG_NUM;
+
+ if ((sock = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
+ perror("socket creation error");
+ return -1;
+ }
+
+ memset(&serv_addr, 0, sizeof(serv_addr));
+ serv_addr.sun_family = AF_UNIX;
+ strncpy(serv_addr.sun_path, SOCKET_PATH, sizeof(SOCKET_PATH));
+
+ if (connect(sock, (struct sockaddr *)&serv_addr, sizeof(serv_addr)) <
+ 0) {
+ perror("Connection Failed");
+ return -1;
+ }
+
+ while (count) {
+ for (int i = 18; i < msg_len; i++)
+ hello[i] = 'a' + i % 26;
+ sprintf(hello, "0x%016lx", __rdtsc());
+ send(sock, hello, msg_len, 0);
+ valread = read(sock, buffer, BUFFER_SIZE);
+ if (memcmp(hello, buffer, msg_len) != 0)
+ perror("data error");
+ count--;
+ }
+
+ close(sock);
+}
+
+int main()
+{
+ run_client(MSG_LEN);
+ run_rpal_client(MSG_LEN);
+
+ return 0;
+}
diff --git a/samples/rpal/librpal/asm_define.h b/samples/rpal/librpal/asm_define.h
new file mode 100644
index 000000000000..bc57586cda58
--- /dev/null
+++ b/samples/rpal/librpal/asm_define.h
@@ -0,0 +1,6 @@
+#define RCI_SENDER_TLS_BASE 0
+#define RCI_SENDER_FCTX 16
+#define RCI_PKRU 8
+#define RC_SENDER_STATE 72
+#define RET_BEGIN 0
+#define RET_END 8
diff --git a/samples/rpal/librpal/asm_x86_64_rpal_call.S b/samples/rpal/librpal/asm_x86_64_rpal_call.S
new file mode 100644
index 000000000000..538e8ac5f09b
--- /dev/null
+++ b/samples/rpal/librpal/asm_x86_64_rpal_call.S
@@ -0,0 +1,57 @@
+#ifdef __x86_64__
+#define __ASSEMBLY__
+#include "asm_define.h"
+
+.text
+.globl rpal_access_warpper
+.type rpal_access_warpper,@function
+.align 16
+
+rpal_access_warpper:
+ pushq %r12
+ pushq %r13
+ pushq %r14
+ pushq %r15
+ pushq %rbx
+ pushq %rbp
+
+ leaq -0x8(%rsp), %rsp
+ stmxcsr (%rsp)
+ fnstcw 0x4(%rsp)
+
+ pushq %rsp // Save rsp which may be unaligned.
+ pushq (%rsp) // Save the original value again
+ andq $-16, %rsp // Align stack to 16bytes - SysV AMD64 ABI.
+
+ movq %rsp, (%rdi)
+ call rpal_access@plt
+retip:
+ movq 8(%rsp), %rsp // Restore the potentially unaligned stack
+ ldmxcsr (%rsp)
+ fldcw 0x4(%rsp)
+ leaq 0x8(%rsp), %rsp
+
+ popq %rbp
+ popq %rbx
+ popq %r15
+ popq %r14
+ popq %r13
+ popq %r12
+ ret
+
+.size rpal_access_warpper,.-rpal_access_warpper
+
+
+
+.globl rpal_get_ret_rip
+.type rpal_get_ret_rip, @function
+.align 16
+rpal_get_ret_rip:
+ leaq retip(%rip), %rax
+ ret
+
+.size rpal_get_ret_rip,.-rpal_get_ret_rip
+
+/* Mark that we don't need executable stack. */
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/samples/rpal/librpal/debug.h b/samples/rpal/librpal/debug.h
new file mode 100644
index 000000000000..10d2fef8d69a
--- /dev/null
+++ b/samples/rpal/librpal/debug.h
@@ -0,0 +1,12 @@
+#ifndef RPAL_DEBUG_H
+#define RPAL_DEBUG_H
+
+typedef enum {
+ RPAL_DEBUG_MANAGEMENT = (1 << 0),
+ RPAL_DEBUG_SENDER = (1 << 1),
+ RPAL_DEBUG_RECVER = (1 << 2),
+ RPAL_DEBUG_FIBER = (1 << 3),
+
+ __RPAL_DEBUG_ALL = ~(0ULL),
+} rpal_debug_flag_t;
+#endif
diff --git a/samples/rpal/librpal/fiber.c b/samples/rpal/librpal/fiber.c
new file mode 100644
index 000000000000..2141ad9ab770
--- /dev/null
+++ b/samples/rpal/librpal/fiber.c
@@ -0,0 +1,119 @@
+#ifdef __x86_64__
+#include "debug.h"
+#include "fiber.h"
+#include "private.h"
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/mman.h>
+
+#define RPAL_CHECK_FAIL -1
+#define STACK_DEBUG 1
+
+static task_t *make_fiber_ctx(task_t *fc)
+{
+ fc->fctx = make_fcontext(fc->sp, 0, NULL);
+ return fc;
+}
+
+static task_t *fiber_ctx_create(void (*fn)(void *ud), void *ud, void *stack,
+ size_t size)
+{
+ task_t *fc;
+ int i;
+
+ if (stack == NULL)
+ return NULL;
+
+ fc = (task_t *)stack;
+ fc->fn = fn;
+ fc->ud = ud;
+ fc->size = size;
+ fc->sp = stack + size;
+ for (i = 0; i < NR_PADDING; ++i) {
+ fc->padding[i] = 0xdeadbeef;
+ }
+
+ return make_fiber_ctx(fc);
+}
+
+task_t *fiber_ctx_alloc(void (*fn)(void *ud), void *ud, size_t size)
+{
+ void *stack;
+ size_t stack_size;
+ size_t total_size;
+ void *lower_guard;
+ void *upper_guard;
+
+ if (PAGE_SIZE == 4096 || STACK_DEBUG) {
+ stack_size = (size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
+
+ dbprint(RPAL_DEBUG_FIBER,
+ "fiber_ctx_alloc: stack size adjusted from %lu to %lu\n",
+ size, stack_size);
+
+ // Allocate a stack using mmap with 2 extra pages, 1 at each end
+ // which will be PROT_NONE to act as guard pages to catch overflow
+ // and underflow. This will result in a SIGSEGV but should make it
+ // easier to catch a stack that is too small (or underflows).
+ //
+ // Notes:
+ //
+ // 1. On ARM64 with 64K pages this would be quite wasteful of memory
+ // so it is behind a DEBUG flag to enable/disable on that platform.
+ //
+ // 2. If the requested stack size is not a multiple of a page size
+ // then stack underflow wont always be caught as there is some
+ // extra space up until the next page boundary with the guard page.
+ //
+ // 3. The task_t is placed at the top of the stack so can be overwritten
+ // just before the stack overflows and hits the guard page.
+ //
+
+ total_size = stack_size + (PAGE_SIZE * 2);
+ lower_guard = mmap(NULL, total_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (lower_guard == MAP_FAILED) {
+ errprint("mmap of %lu bytes failed: %s\n", total_size,
+ strerror(errno));
+ return NULL;
+ }
+
+ stack = lower_guard + PAGE_SIZE;
+ upper_guard = stack + stack_size;
+ mprotect(lower_guard, PAGE_SIZE, PROT_NONE);
+ mprotect(upper_guard, PAGE_SIZE, PROT_NONE);
+
+ dbprint(RPAL_DEBUG_FIBER,
+ "Total stack of size %lu bytes allocated @ %p\n",
+ total_size, stack);
+ dbprint(RPAL_DEBUG_FIBER,
+ "Underflow guard page %p - %p overflow guard page %p - %p\n",
+ lower_guard, lower_guard + PAGE_SIZE - 1, upper_guard,
+ upper_guard + PAGE_SIZE - 1);
+ } else {
+ stack = malloc(size);
+ }
+ return fiber_ctx_create(fn, ud, stack, size);
+}
+
+void fiber_ctx_free(task_t *fc)
+{
+ size_t stack_size;
+ size_t total_size;
+ void *addr;
+
+ if (STACK_DEBUG) {
+ stack_size = (fc->size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
+ total_size = stack_size + (PAGE_SIZE * 2);
+ addr = fc;
+ addr -= PAGE_SIZE;
+ if (munmap(addr, total_size) != 0) {
+ errprint("munmap of %lu bytes @ %p failed: %s\n",
+ total_size, addr, strerror(errno));
+ }
+ } else {
+ free(fc);
+ }
+}
+#endif
diff --git a/samples/rpal/librpal/fiber.h b/samples/rpal/librpal/fiber.h
new file mode 100644
index 000000000000..b46485ba740f
--- /dev/null
+++ b/samples/rpal/librpal/fiber.h
@@ -0,0 +1,64 @@
+#ifndef FIBER_H
+#define FIBER_H
+
+#include <stdlib.h>
+
+typedef void *fcontext_t;
+typedef struct {
+ fcontext_t fctx;
+ void *ud;
+} transfer_t;
+
+typedef struct fiber_stack {
+ unsigned long padding;
+ unsigned long r12;
+ unsigned long r13;
+ unsigned long r14;
+ unsigned long r15;
+ unsigned long rbx;
+ unsigned long rbp;
+ unsigned long rip;
+} fiber_stack_t;
+
+#define NR_PADDING 8
+typedef struct fiber_ctx {
+ void *sp;
+ size_t size;
+ void (*fn)(void *fc);
+ void *ud;
+ fcontext_t fctx;
+ int padding[NR_PADDING];
+} task_t;
+
+task_t *fiber_ctx_alloc(void (*fn)(void *ud), void *ud, size_t size);
+void fiber_ctx_free(task_t *fc);
+
+/**
+ * @brief Make a context for jump_fcontext.
+ *
+ * @param sp The stack top pointer of context.
+ * @param size The size of stack, this argument is useless. But a second argument is neccessary.
+ * @param fn The function pointer of the context function.
+ *
+ * @return The pointer of the newly made context.
+ */
+extern fcontext_t make_fcontext(void *sp, size_t size, void (*fn)(transfer_t));
+
+/**
+ * @brief jump to target context and execute fn with argument ud
+ *
+ * @param to The pointer of target context.
+ * @param ud The data part of the argument of fn.
+ *
+ * @return the pointer of the prev transfer_t struct, where RAX store
+ * previous context, RDX store ud passed by previous caller.
+ */
+extern transfer_t jump_fcontext(fcontext_t const to, void *ud);
+
+/**
+ * @brief To be written.
+ */
+extern transfer_t ontop_fcontext(fcontext_t const to, void *ud,
+ transfer_t (*fn)(transfer_t));
+
+#endif
diff --git a/samples/rpal/librpal/jump_x86_64_sysv_elf_gas.S b/samples/rpal/librpal/jump_x86_64_sysv_elf_gas.S
new file mode 100644
index 000000000000..43d3a8149c58
--- /dev/null
+++ b/samples/rpal/librpal/jump_x86_64_sysv_elf_gas.S
@@ -0,0 +1,81 @@
+/*
+ Copyright Oliver Kowalke 2009.
+ Distributed under the Boost Software License, Version 1.0.
+ (See accompanying file LICENSE_1_0.txt or copy at
+ http://www.boost.org/LICENSE_1_0.txt)
+*/
+
+/****************************************************************************************
+ * *
+ * ---------------------------------------------------------------------------------- *
+ * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | *
+ * ---------------------------------------------------------------------------------- *
+ * | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | *
+ * ---------------------------------------------------------------------------------- *
+ * | fc_mxcsr|fc_x87_cw| R12 | R13 | R14 | *
+ * ---------------------------------------------------------------------------------- *
+ * ---------------------------------------------------------------------------------- *
+ * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | *
+ * ---------------------------------------------------------------------------------- *
+ * | 0x20 | 0x24 | 0x28 | 0x2c | 0x30 | 0x34 | 0x38 | 0x3c | *
+ * ---------------------------------------------------------------------------------- *
+ * | R15 | RBX | RBP | RIP | *
+ * ---------------------------------------------------------------------------------- *
+ * *
+ ****************************************************************************************/
+#ifdef __x86_64__
+.text
+.globl jump_fcontext
+.type jump_fcontext,@function
+.align 16
+jump_fcontext:
+ leaq -0x38(%rsp), %rsp /* prepare stack */
+
+#if !defined(BOOST_USE_TSX)
+ stmxcsr (%rsp) /* save MMX control- and status-word */
+ fnstcw 0x4(%rsp) /* save x87 control-word */
+#endif
+
+ movq %r12, 0x8(%rsp) /* save R12 */
+ movq %r13, 0x10(%rsp) /* save R13 */
+ movq %r14, 0x18(%rsp) /* save R14 */
+ movq %r15, 0x20(%rsp) /* save R15 */
+ movq %rbx, 0x28(%rsp) /* save RBX */
+ movq %rbp, 0x30(%rsp) /* save RBP */
+
+ /* store RSP (pointing to context-data) in RAX */
+ movq %rsp, %rax
+
+ /* restore RSP (pointing to context-data) from RDI */
+ movq %rdi, %rsp
+
+ movq 0x38(%rsp), %r8 /* restore return-address */
+
+#if !defined(BOOST_USE_TSX)
+ ldmxcsr (%rsp) /* restore MMX control- and status-word */
+ fldcw 0x4(%rsp) /* restore x87 control-word */
+#endif
+
+ movq 0x8(%rsp), %r12 /* restore R12 */
+ movq 0x10(%rsp), %r13 /* restore R13 */
+ movq 0x18(%rsp), %r14 /* restore R14 */
+ movq 0x20(%rsp), %r15 /* restore R15 */
+ movq 0x28(%rsp), %rbx /* restore RBX */
+ movq 0x30(%rsp), %rbp /* restore RBP */
+
+ leaq 0x40(%rsp), %rsp /* prepare stack */
+
+ /* return transfer_t from jump */
+ /* RAX == fctx, RDX == data */
+ movq %rsi, %rdx
+ /* pass transfer_t as first arg in context function */
+ /* RDI == fctx, RSI == data */
+ movq %rax, %rdi
+
+ /* indirect jump to context */
+ jmp *%r8
+.size jump_fcontext,.-jump_fcontext
+
+/* Mark that we don't need executable stack. */
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/samples/rpal/librpal/make_x86_64_sysv_elf_gas.S b/samples/rpal/librpal/make_x86_64_sysv_elf_gas.S
new file mode 100644
index 000000000000..4f3af9247110
--- /dev/null
+++ b/samples/rpal/librpal/make_x86_64_sysv_elf_gas.S
@@ -0,0 +1,82 @@
+/*
+ Copyright Oliver Kowalke 2009.
+ Distributed under the Boost Software License, Version 1.0.
+ (See accompanying file LICENSE_1_0.txt or copy at
+ http://www.boost.org/LICENSE_1_0.txt)
+*/
+
+/****************************************************************************************
+ * *
+ * ---------------------------------------------------------------------------------- *
+ * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | *
+ * ---------------------------------------------------------------------------------- *
+ * | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | *
+ * ---------------------------------------------------------------------------------- *
+ * | fc_mxcsr|fc_x87_cw| R12 | R13 | R14 | *
+ * ---------------------------------------------------------------------------------- *
+ * ---------------------------------------------------------------------------------- *
+ * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | *
+ * ---------------------------------------------------------------------------------- *
+ * | 0x20 | 0x24 | 0x28 | 0x2c | 0x30 | 0x34 | 0x38 | 0x3c | *
+ * ---------------------------------------------------------------------------------- *
+ * | R15 | RBX | RBP | RIP | *
+ * ---------------------------------------------------------------------------------- *
+ * *
+ ****************************************************************************************/
+#ifdef __x86_64__
+.text
+.globl make_fcontext
+.type make_fcontext,@function
+.align 16
+make_fcontext:
+ /* first arg of make_fcontext() == top of context-stack */
+ movq %rdi, %rax
+
+ /* shift address in RAX to lower 16 byte boundary */
+ andq $-16, %rax
+
+ /* reserve space for context-data on context-stack */
+ /* on context-function entry: (RSP -0x8) % 16 == 0 */
+ leaq -0x40(%rax), %rax
+
+ /* third arg of make_fcontext() == address of context-function */
+ /* stored in RBX */
+ movq %rdx, 0x28(%rax)
+
+ /* save MMX control- and status-word */
+ stmxcsr (%rax)
+ /* save x87 control-word */
+ fnstcw 0x4(%rax)
+
+ /* compute abs address of label trampoline */
+ leaq trampoline(%rip), %rcx
+ /* save address of trampoline as return-address for context-function */
+ /* will be entered after calling jump_fcontext() first time */
+ movq %rcx, 0x38(%rax)
+
+ /* compute abs address of label finish */
+ leaq finish(%rip), %rcx
+ /* save address of finish as return-address for context-function */
+ /* will be entered after context-function returns */
+ movq %rcx, 0x30(%rax)
+
+ ret /* return pointer to context-data */
+
+trampoline:
+ /* store return address on stack */
+ /* fix stack alignment */
+ push %rbp
+ /* jump to context-function */
+ jmp *%rbx
+
+finish:
+ /* exit code is zero */
+ xorq %rdi, %rdi
+ /* exit application */
+ call _exit@PLT
+ hlt
+.size make_fcontext,.-make_fcontext
+
+/* Mark that we don't need executable stack. */
+.section .note.GNU-stack,"",%progbits
+#endif
\ No newline at end of file
diff --git a/samples/rpal/librpal/ontop_x86_64_sysv_elf_gas.S b/samples/rpal/librpal/ontop_x86_64_sysv_elf_gas.S
new file mode 100644
index 000000000000..9dce797c2541
--- /dev/null
+++ b/samples/rpal/librpal/ontop_x86_64_sysv_elf_gas.S
@@ -0,0 +1,84 @@
+/*
+ Copyright Oliver Kowalke 2009.
+ Distributed under the Boost Software License, Version 1.0.
+ (See accompanying file LICENSE_1_0.txt or copy at
+ http://www.boost.org/LICENSE_1_0.txt)
+*/
+
+/****************************************************************************************
+ * *
+ * ---------------------------------------------------------------------------------- *
+ * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | *
+ * ---------------------------------------------------------------------------------- *
+ * | 0x0 | 0x4 | 0x8 | 0xc | 0x10 | 0x14 | 0x18 | 0x1c | *
+ * ---------------------------------------------------------------------------------- *
+ * | fc_mxcsr|fc_x87_cw| R12 | R13 | R14 | *
+ * ---------------------------------------------------------------------------------- *
+ * ---------------------------------------------------------------------------------- *
+ * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | *
+ * ---------------------------------------------------------------------------------- *
+ * | 0x20 | 0x24 | 0x28 | 0x2c | 0x30 | 0x34 | 0x38 | 0x3c | *
+ * ---------------------------------------------------------------------------------- *
+ * | R15 | RBX | RBP | RIP | *
+ * ---------------------------------------------------------------------------------- *
+ * *
+ ****************************************************************************************/
+#ifdef __x86_64__
+.text
+.globl ontop_fcontext
+.type ontop_fcontext,@function
+.align 16
+ontop_fcontext:
+ /* preserve ontop-function in R8 */
+ movq %rdx, %r8
+
+ leaq -0x38(%rsp), %rsp /* prepare stack */
+
+#if !defined(BOOST_USE_TSX)
+ stmxcsr (%rsp) /* save MMX control- and status-word */
+ fnstcw 0x4(%rsp) /* save x87 control-word */
+#endif
+
+ movq %r12, 0x8(%rsp) /* save R12 */
+ movq %r13, 0x10(%rsp) /* save R13 */
+ movq %r14, 0x18(%rsp) /* save R14 */
+ movq %r15, 0x20(%rsp) /* save R15 */
+ movq %rbx, 0x28(%rsp) /* save RBX */
+ movq %rbp, 0x30(%rsp) /* save RBP */
+
+ /* store RSP (pointing to context-data) in RAX */
+ movq %rsp, %rax
+
+ /* restore RSP (pointing to context-data) from RDI */
+ movq %rdi, %rsp
+
+#if !defined(BOOST_USE_TSX)
+ ldmxcsr (%rsp) /* restore MMX control- and status-word */
+ fldcw 0x4(%rsp) /* restore x87 control-word */
+#endif
+
+ movq 0x8(%rsp), %r12 /* restore R12 */
+ movq 0x10(%rsp), %r13 /* restore R13 */
+ movq 0x18(%rsp), %r14 /* restore R14 */
+ movq 0x20(%rsp), %r15 /* restore R15 */
+ movq 0x28(%rsp), %rbx /* restore RBX */
+ movq 0x30(%rsp), %rbp /* restore RBP */
+
+ leaq 0x38(%rsp), %rsp /* prepare stack */
+
+ /* return transfer_t from jump */
+ /* RAX == fctx, RDX == data */
+ movq %rsi, %rdx
+ /* pass transfer_t as first arg in context function */
+ /* RDI == fctx, RSI == data */
+ movq %rax, %rdi
+
+ /* keep return-address on stack */
+
+ /* indirect jump to context */
+ jmp *%r8
+.size ontop_fcontext,.-ontop_fcontext
+
+/* Mark that we don't need executable stack. */
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/samples/rpal/librpal/private.h b/samples/rpal/librpal/private.h
new file mode 100644
index 000000000000..9dc78f449f0f
--- /dev/null
+++ b/samples/rpal/librpal/private.h
@@ -0,0 +1,341 @@
+#ifndef PRIVATE_H
+#define PRIVATE_H
+
+#include <unistd.h>
+#include <stdint.h>
+#include <sys/syscall.h>
+#include <sys/uio.h>
+#ifdef __x86_64__
+#include <immintrin.h>
+#endif
+#include <pthread.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <stddef.h>
+#include <sys/ioctl.h>
+
+#include "debug.h"
+#include "rpal_queue.h"
+#include "fiber.h"
+#include "rpal.h"
+
+#ifdef __x86_64__
+static inline void write_tls_base(unsigned long tls_base)
+{
+ asm volatile("wrfsbase %0" ::"r"(tls_base) : "memory");
+}
+
+static inline unsigned long read_tls_base(void)
+{
+ unsigned long fsbase;
+ asm volatile("rdfsbase %0" : "=r"(fsbase)::"memory");
+ return fsbase;
+}
+#endif
+
+#define likely(x) __builtin_expect(!!(x), 1)
+#define unlikely(x) __builtin_expect(!!(x), 0)
+
+// | fd_timestamp | pad | rthread_id | server_fd |
+// | 16 | 8 | 8 | 32 |
+#define LOW32_MASK ((1UL << 32) - 1)
+#define MIDL8_MASK ((unsigned long)(((1UL << 8) - 1)) << 32)
+
+#define HIGH16_OFFSET 48
+#define HIGH32_OFFSET 32
+
+#define get_high16(val) ({ (val) >> HIGH16_OFFSET; })
+
+#define get_high32(val) ({ (val) >> HIGH32_OFFSET; })
+
+#define get_midl8(val) ({ ((val) & MIDL8_MASK) >> HIGH32_OFFSET; })
+#define get_low32(val) ({ (val) & LOW32_MASK; })
+
+#define get_fdtimestamp(rpalfd) get_high16(rpalfd)
+#define get_rid(rpalfd) get_midl8(rpalfd)
+#define get_sfd(rpalfd) get_low32(rpalfd)
+
+#define PAGE_SIZE 4096
+#define DEFUALT_STACK_SIZE (PAGE_SIZE * 4)
+#define TRAMPOLINE_SIZE (PAGE_SIZE * 1)
+
+#define BITS_PER_LONG 64
+#define BITS_TO_LONGS(x) \
+ (((x) + 8 * sizeof(unsigned long) - 1) / (8 * sizeof(unsigned long)))
+
+#define KEY_SIZE 16
+
+enum rpal_sender_state {
+ RPAL_SENDER_STATE_RUNNING,
+ RPAL_SENDER_STATE_CALL,
+ RPAL_SENDER_STATE_KERNEL_RET,
+};
+
+enum rpal_epoll_event {
+ RPAL_KERNEL_PENDING = 0x1,
+ RPAL_USER_PENDING = 0x2,
+};
+
+enum rpal_receiver_state {
+ RPAL_RECEIVER_STATE_RUNNING,
+ RPAL_RECEIVER_STATE_KERNEL_RET,
+ RPAL_RECEIVER_STATE_READY,
+ RPAL_RECEIVER_STATE_WAIT,
+ RPAL_RECEIVER_STATE_CALL,
+ RPAL_RECEIVER_STATE_LAZY_SWITCH,
+ RPAL_RECEIVER_STATE_MAX,
+};
+
+enum rpal_command_type {
+ RPAL_CMD_GET_API_VERSION_AND_CAP,
+ RPAL_CMD_GET_SERVICE_KEY,
+ RPAL_CMD_GET_SERVICE_ID,
+ RPAL_CMD_REGISTER_SENDER,
+ RPAL_CMD_UNREGISTER_SENDER,
+ RPAL_CMD_REGISTER_RECEIVER,
+ RPAL_CMD_UNREGISTER_RECEIVER,
+ RPAL_CMD_ENABLE_SERVICE,
+ RPAL_CMD_DISABLE_SERVICE,
+ RPAL_CMD_REQUEST_SERVICE,
+ RPAL_CMD_RELEASE_SERVICE,
+ RPAL_CMD_GET_SERVICE_PKEY,
+ RPAL_CMD_UDS_FDMAP,
+ RPAL_NR_CMD,
+};
+
+/* RPAL ioctl macro */
+#define RPAL_IOCTL_MAGIC 0x33
+#define RPAL_IOCTL_GET_API_VERSION_AND_CAP \
+ _IOWR(RPAL_IOCTL_MAGIC, RPAL_CMD_GET_API_VERSION_AND_CAP, \
+ struct rpal_version_info *)
+#define RPAL_IOCTL_GET_SERVICE_KEY \
+ _IOWR(RPAL_IOCTL_MAGIC, RPAL_CMD_GET_SERVICE_KEY, unsigned long)
+#define RPAL_IOCTL_GET_SERVICE_ID \
+ _IOWR(RPAL_IOCTL_MAGIC, RPAL_CMD_GET_SERVICE_ID, int *)
+#define RPAL_IOCTL_REGISTER_SENDER \
+ _IOWR(RPAL_IOCTL_MAGIC, RPAL_CMD_REGISTER_SENDER, unsigned long)
+#define RPAL_IOCTL_UNREGISTER_SENDER \
+ _IO(RPAL_IOCTL_MAGIC, RPAL_CMD_UNREGISTER_SENDER)
+#define RPAL_IOCTL_REGISTER_RECEIVER \
+ _IOWR(RPAL_IOCTL_MAGIC, RPAL_CMD_REGISTER_RECEIVER, unsigned long)
+#define RPAL_IOCTL_UNREGISTER_RECEIVER \
+ _IO(RPAL_IOCTL_MAGIC, RPAL_CMD_UNREGISTER_RECEIVER)
+#define RPAL_IOCTL_ENABLE_SERVICE \
+ _IOWR(RPAL_IOCTL_MAGIC, RPAL_CMD_ENABLE_SERVICE, unsigned long)
+#define RPAL_IOCTL_DISABLE_SERVICE \
+ _IO(RPAL_IOCTL_MAGIC, RPAL_CMD_DISABLE_SERVICE)
+#define RPAL_IOCTL_REQUEST_SERVICE \
+ _IOWR(RPAL_IOCTL_MAGIC, RPAL_CMD_REQUEST_SERVICE, unsigned long)
+#define RPAL_IOCTL_RELEASE_SERVICE \
+ _IOWR(RPAL_IOCTL_MAGIC, RPAL_CMD_RELEASE_SERVICE, unsigned long)
+#define RPAL_IOCTL_GET_SERVICE_PKEY \
+ _IOWR(RPAL_IOCTL_MAGIC, RPAL_CMD_GET_SERVICE_PKEY, int *)
+#define RPAL_IOCTL_UDS_FDMAP \
+ _IOWR(RPAL_IOCTL_MAGIC, RPAL_CMD_UDS_FDMAP, void *)
+
+typedef enum rpal_receiver_status {
+ RPAL_RECEIVER_UNINITIALIZED,
+ RPAL_RECEIVER_INITIALIZED,
+ RPAL_RECEIVER_AVAILABLE,
+} rpal_receiver_status_t;
+
+enum RPAL_CAPABILITIES {
+ RPAL_CAP_PKU,
+};
+
+#define RPAL_SID_SHIFT 24
+#define RPAL_ID_SHIFT 8
+#define RPAL_RECEIVER_STATE_MASK ((1 << RPAL_ID_SHIFT) - 1)
+#define RPAL_SID_MASK (~((1 << RPAL_SID_SHIFT) - 1))
+#define RPAL_ID_MASK (~(0 | RPAL_RECEIVER_STATE_MASK | RPAL_SID_MASK))
+#define RPAL_MAX_ID ((1 << (RPAL_SID_SHIFT - RPAL_ID_SHIFT)) - 1)
+#define RPAL_BUILD_CALL_STATE(id, sid) \
+ ((sid << RPAL_SID_SHIFT) | (id << RPAL_ID_SHIFT) | RPAL_RECEIVER_STATE_CALL)
+
+typedef struct rpal_capability {
+ int compat_version;
+ int api_version;
+ unsigned long cap;
+} rpal_capability_t;
+
+typedef struct task_context {
+ unsigned long r15;
+ unsigned long r14;
+ unsigned long r13;
+ unsigned long r12;
+ unsigned long rbx;
+ unsigned long rbp;
+ unsigned long rip;
+ unsigned long rsp;
+} task_context_t;
+
+typedef struct receiver_context {
+ task_context_t task_context;
+ int receiver_id;
+ int receiver_state;
+ int sender_state;
+ int ep_pending;
+ int rpal_ep_poll_magic;
+ int epfd;
+ void *ep_events;
+ int maxevents;
+ int timeout;
+ int64_t total_time;
+} receiver_context_t;
+
+typedef struct rpal_call_info {
+ unsigned long sender_tls_base;
+ uint32_t pkru;
+ fcontext_t sender_fctx;
+} rpal_call_info_t;
+
+enum thread_type {
+ RPAL_RECEIVER = 0x1,
+ RPAL_SENDER = 0x2,
+};
+typedef struct rpal_receiver_info {
+ long tid;
+ unsigned long tls_base;
+
+ int epfd;
+ rpal_receiver_status_t status;
+ epoll_uevent_queue_t ueventq;
+ volatile uint64_t uqlock;
+
+ fcontext_t main_ctx;
+ task_t *ep_stack;
+ task_t *trampoline;
+
+ rpal_call_info_t rci;
+
+ volatile receiver_context_t *rc;
+ struct rpal_thread_pool *rtp;
+} rpal_receiver_info_t;
+
+typedef struct fd_table fd_table_t;
+/* Keep it the same as kernel */
+struct rpal_thread_pool {
+ rpal_receiver_info_t *rris;
+ fd_table_t *fdt;
+ uint64_t service_key;
+ int nr_threads;
+ int service_id;
+ int pkey;
+};
+
+struct rpal_request_arg {
+ unsigned long version;
+ uint64_t key;
+ struct rpal_thread_pool **rtp;
+ int *id;
+ int *pkey;
+};
+
+struct rpal_uds_fdmap_arg {
+ int service_id;
+ int cfd;
+ unsigned long *res;
+};
+
+#define RPAL_ERROR_MAGIC 0x98CC98CC
+
+typedef struct rpal_error_context {
+ unsigned long tls_base;
+ uint64_t erip;
+ uint64_t ersp;
+ int state;
+ int magic;
+} rpal_error_context_t;
+
+typedef struct sender_context {
+ task_context_t task_context;
+ rpal_error_context_t ec;
+ int sender_id;
+ int64_t start_time;
+ int64_t total_time;
+} sender_context_t;
+
+#define RPAL_EP_POLL_MAGIC 0xCC98CC98
+
+typedef struct rpal_sender_info {
+ int idx;
+ int tid;
+ int pkey;
+ int inited;
+ sender_context_t sc;
+} rpal_sender_info_t;
+
+typedef struct fdt_node fdt_node_t;
+
+typedef struct fd_event {
+ int epfd;
+ int fd;
+ struct epoll_event epev;
+ uint32_t events;
+ int wait;
+
+ rpal_queue_t q;
+ int pkey; // unused
+ fdt_node_t *node;
+ struct fd_event *next;
+ uint16_t timestamp;
+ uint16_t outdated;
+ uint64_t service_key;
+} fd_event_t;
+
+struct fdt_node {
+ fd_event_t **events;
+ fdt_node_t *next;
+ int *ref_count;
+ uint16_t *timestamps;
+};
+
+// when sender calls fd_event_get, we must check this number to avoid
+// accessing outdated fdt_node definitions
+
+#define FDTAB_MAG1 0x4D414731UL // add fde lazyswitch
+#define FDTAB_MAG2 0x14D414731UL // add fde timestamp
+#define FDTAB_MAG3 0x34D414731UL // add fde outdated
+#define FDTAB_MAG4 0x74D414731UL // add automatic identification rpal mode
+
+enum fde_ref_status {
+ FDE_FREEING = -100,
+ FDE_FREED = -1,
+ FDE_AVAILABLE = 0,
+};
+
+#define DEFAULT_NODE_SHIFT 14 // 2^14 elements per node
+typedef struct fd_table {
+ fdt_node_t *head;
+ fdt_node_t *tail;
+ int max_fd;
+ unsigned int node_shift;
+ unsigned int node_mask;
+ pthread_mutex_t lock;
+ unsigned long magic;
+ fd_event_t *freelist;
+ pthread_mutex_t list_lock;
+} fd_table_t;
+
+typedef struct critical_section {
+ unsigned long ret_begin;
+ unsigned long ret_end;
+} critical_section_t;
+
+struct rpal_service_metadata {
+ unsigned long version;
+ struct rpal_thread_pool *rtp;
+ critical_section_t rcs;
+ int pkey;
+};
+
+#ifndef RPAL_DEBUG
+#define dbprint(category, format, args...) ((void)0)
+#else
+void dbprint(rpal_debug_flag_t category, char *format, ...)
+ __attribute__((format(printf, 2, 3)));
+#endif
+void errprint(const char *format, ...) __attribute__((format(printf, 1, 2)));
+void warnprint(const char *format, ...) __attribute__((format(printf, 1, 2)));
+
+#endif
diff --git a/samples/rpal/librpal/rpal.c b/samples/rpal/librpal/rpal.c
new file mode 100644
index 000000000000..64bd2b93bd67
--- /dev/null
+++ b/samples/rpal/librpal/rpal.c
@@ -0,0 +1,2351 @@
+#include "private.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <assert.h>
+#include <sys/socket.h>
+#include <sys/mman.h>
+#include <sys/eventfd.h>
+#include <linux/futex.h>
+#include <signal.h>
+#include <stdarg.h>
+
+#include "rpal_pkru.h"
+
+/* prints an error message to stderr */
+void errprint(const char *format, ...)
+{
+ va_list args;
+
+ fprintf(stderr, "[RPAL_ERROR] ");
+ va_start(args, format);
+ vfprintf(stderr, format, args);
+ va_end(args);
+}
+
+/* prints a warning message to stderr */
+void warnprint(const char *format, ...)
+{
+ va_list args;
+
+ fprintf(stderr, "[RPAL_WARNING] ");
+ va_start(args, format);
+ vfprintf(stderr, format, args);
+ va_end(args);
+}
+
+#ifdef RPAL_DEBUG
+void dbprint(rpal_debug_flag_t category, char *format, ...)
+{
+ if (category & RPAL_DEBUG) {
+ va_list args;
+ fprintf(stderr, "[RPAL_DEBUG] ");
+ va_start(args, format);
+ vfprintf(stderr, format, args);
+ va_end(args);
+ }
+}
+#endif
+
+#define SAVE_FPU(mxcsr, fpucw) \
+ __asm__ __volatile__("stmxcsr %0;" \
+ "fnstcw %1;" \
+ : "=m"(mxcsr), "=m"(fpucw) \
+ :)
+#define RESTORE_FPU(mxcsr, fpucw) \
+ __asm__ __volatile__("ldmxcsr %0;" \
+ "fldcw %1;" \
+ : \
+ : "m"(mxcsr), "m"(fpucw))
+
+#define ERRREPORT(EPTR, ECODE, ...) \
+ if (EPTR) { \
+ *EPTR = ECODE; \
+ } \
+ errprint(__VA_ARGS__);
+
+#define RPAL_MGT_FILE "/proc/rpal"
+#define MAX_SUPPROTED_CPUS 192
+
+static __always_inline unsigned long __ffs(unsigned long word)
+{
+ asm("rep; bsf %1,%0" : "=r"(word) : "rm"(word));
+
+ return word;
+}
+
+static void __set_bit(uint64_t *bitmap, int idx)
+{
+ int bit, i;
+ i = idx / 8;
+ bit = idx % 8;
+ bitmap[i] |= (1UL << bit);
+}
+
+static int clear_first_set_bit(uint64_t *bitmap, int size)
+{
+ int idx;
+ int bit, i;
+
+ for (i = 0; i * BITS_PER_LONG < size; i++) {
+ if (bitmap[i]) {
+ bit = __ffs(bitmap[i]);
+ idx = i * BITS_PER_LONG + bit;
+ if (idx >= size) {
+ return -1;
+ }
+ bitmap[i] &= ~(1UL << bit);
+ return idx;
+ }
+ }
+ return -1;
+}
+
+extern void rpal_get_critical_addr(critical_section_t *rcs);
+static critical_section_t rcs = { 0 };
+
+#define MAX_SERVICEID 254 // Intel MPK Limit
+#define MIN_RPAL_KERNEL_API_VERSION 1
+#define TARGET_RPAL_KERNEL_API_VERSION \
+ 1 // RPAL will disable when KERNEL_API < TARGET_RPAL_KERNEL_API_VERSION
+
+enum {
+ RCALL_IN = 0x1 << 0,
+ RCALL_OUT = 0x1 << 1,
+};
+
+enum {
+ FDE_NO_TRIGGER,
+ FDE_TRIGGER_OUT,
+};
+
+#define EPOLLRPALINOUT_BITS (EPOLLRPALIN | EPOLLRPALOUT)
+
+#define DEFAULT_QUEUE_SIZE 32U
+
+typedef struct rpal_requested_service {
+ struct rpal_thread_pool *service;
+ int pkey;
+ uint64_t key;
+} rpal_requeseted_service_t;
+
+static int rpal_mgtfd = -1;
+static int inited;
+int pkru_enabled = 0;
+
+static rpal_capability_t version;
+static pthread_key_t rpal_key;
+static rpal_requeseted_service_t requested_services[MAX_SERVICEID];
+static pthread_mutex_t release_lock;
+
+typedef struct rpal_local {
+ unsigned int tflag;
+ rpal_receiver_info_t *rri;
+ rpal_sender_info_t *rsi;
+} rpal_local_t;
+
+#define SENDERS_PAGE_ORDER 3
+#define RPALTHREAD_PAGE_ORDER 0
+
+typedef struct rpal_thread_metadata {
+ int rpal_receiver_idx;
+ int service_id;
+ const int epcpage_order;
+ uint64_t service_key;
+ struct rpal_thread_pool *rtp;
+ receiver_context_t *rc;
+ pid_t pid;
+ int *eventfds;
+} rpal_thread_metadata_t;
+
+static rpal_thread_metadata_t threads_md = {
+ .service_id = -1,
+ .epcpage_order = RPALTHREAD_PAGE_ORDER,
+};
+
+static inline rpal_sender_info_t *current_rpal_sender(void)
+{
+ rpal_local_t *local;
+
+ local = pthread_getspecific(rpal_key);
+ if (local && (local->tflag & RPAL_SENDER)) {
+ return local->rsi;
+ } else {
+ return NULL;
+ }
+}
+
+static inline rpal_receiver_info_t *current_rpal_thread(void)
+{
+ rpal_local_t *local;
+
+ local = pthread_getspecific(rpal_key);
+ if (local && (local->tflag & RPAL_RECEIVER)) {
+ return local->rri;
+ } else {
+ return NULL;
+ }
+}
+
+static status_t rpal_register_sender_local(rpal_sender_info_t *sender)
+{
+ rpal_local_t *local;
+ local = pthread_getspecific(rpal_key);
+ if (!local) {
+ local = malloc(sizeof(rpal_local_t));
+ if (!local)
+ return RPAL_FAILURE;
+ memset(local, 0, sizeof(rpal_local_t));
+ pthread_setspecific(rpal_key, local);
+ }
+ if (local->tflag & RPAL_SENDER) {
+ return RPAL_FAILURE;
+ }
+ local->rsi = sender;
+ local->tflag |= RPAL_SENDER;
+ return RPAL_SUCCESS;
+}
+
+static status_t rpal_unregister_sender_local(void)
+{
+ rpal_local_t *local;
+ local = pthread_getspecific(rpal_key);
+ if (!local || !(local->tflag & RPAL_SENDER))
+ return RPAL_FAILURE;
+
+ local->rsi = NULL;
+ local->tflag &= ~RPAL_SENDER;
+ if (!local->tflag) {
+ pthread_setspecific(rpal_key, NULL);
+ free(local);
+ }
+ return RPAL_SUCCESS;
+}
+
+static status_t rpal_register_receiver_local(rpal_receiver_info_t *thread)
+{
+ rpal_local_t *local;
+ local = pthread_getspecific(rpal_key);
+ if (!local) {
+ local = malloc(sizeof(rpal_local_t));
+ if (!local)
+ return RPAL_FAILURE;
+ memset(local, 0, sizeof(rpal_local_t));
+ pthread_setspecific(rpal_key, local);
+ }
+ if (local->tflag & RPAL_RECEIVER) {
+ return RPAL_FAILURE;
+ }
+ local->rri = thread;
+ local->tflag |= RPAL_RECEIVER;
+ return RPAL_SUCCESS;
+}
+
+static status_t rpal_unregister_receiver_local(void)
+{
+ rpal_local_t *local;
+ local = pthread_getspecific(rpal_key);
+ if (!local || !(local->tflag & RPAL_RECEIVER))
+ return RPAL_FAILURE;
+
+ local->rri = NULL;
+ local->tflag &= ~RPAL_RECEIVER;
+ if (!local->tflag) {
+ pthread_setspecific(rpal_key, NULL);
+ free(local);
+ }
+ return RPAL_SUCCESS;
+}
+
+#define MAX_SENDERS 256
+typedef struct rpal_senders_metadata {
+ uint64_t bitmap[BITS_TO_LONGS(MAX_SENDERS)];
+ pthread_mutex_t lock;
+ int sdpage_order;
+ rpal_sender_info_t *senders;
+} rpal_senders_metadata_t;
+
+static rpal_senders_metadata_t *senders_md;
+
+static long rpal_ioctl(unsigned long cmd, unsigned long arg)
+{
+ struct {
+ unsigned long *ret;
+ unsigned long cmd;
+ unsigned long arg0;
+ unsigned long arg1;
+ } args;
+ const int args_size = sizeof(args);
+ int ret;
+
+ if (rpal_mgtfd == -1) {
+ errprint("rpal_mgtfd is not opened\n");
+ return -1;
+ }
+
+ ret = ioctl(rpal_mgtfd, cmd, arg);
+
+ return ret;
+}
+
+static inline long rpal_register_sender(rpal_sender_info_t *sender)
+{
+ long ret;
+
+ if (rpal_register_sender_local(sender) == RPAL_FAILURE)
+ return RPAL_FAILURE;
+
+ ret = rpal_ioctl(RPAL_IOCTL_REGISTER_SENDER,
+ (unsigned long)&sender->sc);
+ if (ret < 0) {
+ rpal_unregister_sender_local();
+ }
+ return ret;
+}
+
+static inline long rpal_register_receiver(rpal_receiver_info_t *rri)
+{
+ long ret;
+
+ if (rpal_register_receiver_local(rri) == RPAL_FAILURE)
+ return RPAL_FAILURE;
+ ret = rpal_ioctl(RPAL_IOCTL_REGISTER_RECEIVER,
+ (unsigned long)rri->rc);
+ if (ret < 0) {
+ rpal_unregister_receiver_local();
+ }
+ return ret;
+}
+
+static inline long rpal_unregister_sender(void)
+{
+ if (rpal_unregister_sender_local() == RPAL_FAILURE)
+ return RPAL_FAILURE;
+ return rpal_ioctl(RPAL_IOCTL_UNREGISTER_SENDER, 0);
+}
+
+static inline long rpal_unregister_receiver(void)
+{
+ if (rpal_unregister_receiver_local() == RPAL_FAILURE)
+ return RPAL_FAILURE;
+ return rpal_ioctl(RPAL_IOCTL_UNREGISTER_RECEIVER, 0);
+}
+
+static int rpal_get_service_pkey(void)
+{
+ int pkey, ret;
+
+ ret = rpal_ioctl(RPAL_IOCTL_GET_SERVICE_PKEY, (unsigned long)&pkey);
+ if (ret < 0 || pkey == -1) {
+ warnprint("MPK not supported on this host, disabling PKRU\n");
+ return -1;
+ }
+ return pkey;
+}
+
+static int __rpal_get_service_id(void)
+{
+ int id, ret;
+
+ ret = rpal_ioctl(RPAL_IOCTL_GET_SERVICE_ID, (unsigned long)&id);
+
+ if (ret < 0)
+ return ret;
+ else
+ return id;
+}
+
+static uint64_t __rpal_get_service_key(void)
+{
+ int ret;
+ uint64_t key;
+
+ ret = rpal_ioctl(RPAL_IOCTL_GET_SERVICE_KEY, (unsigned long)&key);
+ if (ret < 0)
+ return 0;
+ else
+ return key;
+}
+
+static void *rpal_get_shared_page(int order)
+{
+ void *p;
+ int size;
+ int flags = MAP_SHARED;
+
+ if (rpal_mgtfd == -1) {
+ return NULL;
+ }
+ size = PAGE_SIZE * (1 << order);
+
+ p = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, rpal_mgtfd, 0);
+
+ return p;
+}
+
+static int rpal_free_shared_page(void *page, int order)
+{
+ int ret = 0;
+ int size;
+
+ size = PAGE_SIZE * (1 << order);
+ ret = munmap(page, size);
+ if (ret) {
+ errprint("munmap fail: %d\n", ret);
+ }
+ return ret;
+}
+
+static inline int rpal_inited(void)
+{
+ return (inited == 1);
+}
+
+static inline int sender_idx_is_invalid(int idx)
+{
+ if (idx < 0 || idx >= MAX_SENDERS)
+ return 1;
+ return 0;
+}
+
+static int rpal_sender_info_alloc(rpal_sender_info_t **sender)
+{
+ int idx;
+
+ if (!senders_md)
+ return RPAL_FAILURE;
+ pthread_mutex_lock(&senders_md->lock);
+ idx = clear_first_set_bit(senders_md->bitmap, MAX_SENDERS);
+ if (idx < 0) {
+ errprint("sender data alloc failed: %d, bitmap: %lx\n", idx,
+ senders_md->bitmap[0]);
+ goto unlock;
+ }
+ *sender = senders_md->senders + idx;
+
+unlock:
+ pthread_mutex_unlock(&senders_md->lock);
+ return idx;
+}
+
+static void rpal_sender_info_free(int idx)
+{
+ if (sender_idx_is_invalid(idx)) {
+ return;
+ }
+ pthread_mutex_lock(&senders_md->lock);
+ __set_bit(senders_md->bitmap, idx);
+ pthread_mutex_unlock(&senders_md->lock);
+}
+
+extern unsigned long rpal_get_ret_rip(void);
+
+static int rpal_sender_inited(rpal_sender_info_t *sender)
+{
+ return (sender->inited == 1);
+}
+
+status_t rpal_sender_init(rpal_error_code_t *error)
+{
+ int idx;
+ int ret = RPAL_FAILURE;
+ rpal_sender_info_t *sender;
+
+ if (!rpal_inited()) {
+ ERRREPORT(error, RPAL_DONT_INITED, "%s: rpal do not init\n",
+ __FUNCTION__);
+ goto error_out;
+ }
+ sender = current_rpal_sender();
+ if (sender) {
+ goto error_out;
+ }
+ idx = rpal_sender_info_alloc(&sender);
+ if (idx < 0) {
+ if (error) {
+ *error = RPAL_ERR_SENDER_INIT;
+ }
+ goto error_out;
+ }
+ sender->idx = idx;
+ sender->sc.sender_id = idx;
+ sender->tid = syscall(SYS_gettid);
+ sender->pkey = rpal_get_service_pkey();
+ sender->sc.ec.erip = rpal_get_ret_rip();
+ ret = rpal_register_sender(sender);
+ if (ret) {
+ ERRREPORT(error, RPAL_ERR_SENDER_REG,
+ "rpal_register_sender error: %d\n", ret);
+ goto sender_register_failed;
+ }
+ sender->inited = 1;
+ return RPAL_SUCCESS;
+
+sender_register_failed:
+ rpal_sender_info_free(idx);
+error_out:
+ return RPAL_FAILURE;
+}
+
+status_t rpal_sender_exit(void)
+{
+ int idx;
+ rpal_sender_info_t *sender;
+
+ sender = current_rpal_sender();
+
+ if (sender) {
+ idx = sender->idx;
+ sender->idx = 0;
+ sender->tid = 0;
+ rpal_unregister_sender();
+ rpal_sender_info_free(idx);
+ sender->pkey = 0;
+ }
+ return RPAL_SUCCESS;
+}
+
+static status_t rpal_enable_service(rpal_error_code_t *error)
+{
+ struct rpal_service_metadata rsm;
+ long ret = 0;
+
+ rsm.version = 0;
+ rsm.rtp = threads_md.rtp;
+ rsm.rcs = rcs;
+ rsm.pkey = -1;
+ ret = rpal_ioctl(RPAL_IOCTL_ENABLE_SERVICE, (unsigned long)&rsm);
+ if (ret) {
+ ERRREPORT(error, RPAL_ERR_ENABLE_SERVICE,
+ "rpal enable service failed: %ld\n", ret)
+ return RPAL_FAILURE;
+ }
+ threads_md.rtp->pkey = rpal_get_service_pkey();
+ return RPAL_SUCCESS;
+}
+
+static status_t rpal_disable_service(void)
+{
+ long ret = 0;
+ ret = rpal_ioctl(RPAL_IOCTL_DISABLE_SERVICE, 0);
+ if (ret) {
+ errprint("rpal disable service failed: %ld\n", ret);
+ return RPAL_FAILURE;
+ }
+ return RPAL_SUCCESS;
+}
+
+static status_t add_requested_service(struct rpal_thread_pool *rtp, uint64_t key, int id, int pkey)
+{
+ struct rpal_thread_pool *expected = NULL;
+
+ if (!rtp) {
+ errprint("add requested service null\n");
+ return RPAL_FAILURE;
+ }
+
+ if (!__atomic_compare_exchange_n(&requested_services[id].service,
+ &expected, rtp, 1, __ATOMIC_SEQ_CST,
+ __ATOMIC_SEQ_CST)) {
+ errprint("rpal service %d already add, expected: %ld\n", id,
+ expected->service_key);
+ return RPAL_FAILURE;
+ }
+ requested_services[id].key = key;
+ requested_services[id].pkey = pkey;
+ return RPAL_SUCCESS;
+}
+
+int rpal_get_request_service_id(uint64_t key)
+{
+ int i;
+
+ for (i = 0; i < MAX_SERVICEID; i++) {
+ if (requested_services[i].key == key)
+ return i;
+ }
+ return -1;
+}
+
+static struct rpal_thread_pool *get_service_from_key(uint64_t key)
+{
+ int i;
+ struct rpal_thread_pool *rtp;
+
+ for (i = 0; i < MAX_SERVICEID; i++) {
+ if (requested_services[i].key == key)
+ return requested_services[i].service;
+ }
+ return NULL;
+}
+
+static inline struct rpal_thread_pool *get_service_from_id(int id)
+{
+ return requested_services[id].service;
+}
+
+static inline int get_service_pkey_from_id(int id)
+{
+ return requested_services[id].pkey;
+}
+
+static struct rpal_thread_pool *del_requested_service(uint64_t key)
+{
+ int id;
+ struct rpal_thread_pool *rtp;
+
+ id = rpal_get_request_service_id(key);
+ if (id == -1)
+ return NULL;
+ rtp = __atomic_exchange_n(&requested_services[id].service, NULL,
+ __ATOMIC_RELAXED);
+ return rtp;
+}
+
+int rpal_request_service(uint64_t key)
+{
+ struct rpal_request_arg rra;
+ long ret = RPAL_FAILURE;
+ struct rpal_thread_pool *rtp;
+ int id, pkey;
+
+ if (!rpal_inited()) {
+ errprint("%s: rpal do not init\n", __FUNCTION__);
+ goto error_out;
+ }
+
+ rra.version = 0;
+ rra.key = key;
+ rra.rtp = &rtp;
+ rra.id = &id;
+ rra.pkey = &pkey;
+ ret = rpal_ioctl(RPAL_IOCTL_REQUEST_SERVICE, (unsigned long)&rra);
+ if (ret) {
+ goto error_out;
+ }
+
+ ret = add_requested_service(rtp, key, id, pkey);
+ if (ret == RPAL_FAILURE) {
+ goto add_requested_failed;
+ }
+
+ return RPAL_SUCCESS;
+
+add_requested_failed:
+ rpal_ioctl(RPAL_IOCTL_RELEASE_SERVICE, key);
+error_out:
+ return (int)ret;
+}
+
+static void fdt_freelist_forcefree(fd_table_t *fdt, uint64_t service_key);
+
+status_t rpal_release_service(uint64_t key)
+{
+ long ret;
+ struct rpal_thread_pool *rtp;
+
+ if (!rpal_inited()) {
+ errprint("%s: rpal do not init\n", __FUNCTION__);
+ return RPAL_FAILURE;
+ }
+
+ rtp = del_requested_service(key);
+ ret = rpal_ioctl(RPAL_IOCTL_RELEASE_SERVICE, key);
+ if (ret) {
+ errprint("rpal release service failed: %ld\n", ret);
+ return RPAL_FAILURE;
+ }
+ fdt_freelist_forcefree(threads_md.rtp->fdt, key);
+ return RPAL_SUCCESS;
+}
+
+static void try_clean_lock(rpal_receiver_info_t *rri, uint64_t key)
+{
+ uint64_t lock_state = key | 1UL << 63;
+
+ if (__atomic_load_n(&rri->uqlock, __ATOMIC_RELAXED) == lock_state)
+ uevent_queue_fix(&rri->ueventq);
+
+ if (__atomic_compare_exchange_n(&rri->uqlock, &lock_state, (uint64_t)0,
+ 1, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
+ dbprint(RPAL_DEBUG_MANAGEMENT,
+ "Serivce (key: %lu) does exit with holding lock\n",
+ key);
+}
+
+struct release_info {
+ uint64_t keys[KEY_SIZE];
+ int size;
+};
+
+status_t rpal_clean_service_start(int64_t *ptr)
+{
+ rpal_receiver_info_t *rri;
+ struct release_info *info;
+ int i, j;
+ int size;
+
+ if (!ptr) {
+ goto error_out;
+ }
+
+ info = malloc(sizeof(struct release_info));
+ if (info == NULL) {
+ errprint("alloc release_info fail\n");
+ goto error_out;
+ }
+
+ pthread_mutex_lock(&release_lock);
+ size = read(rpal_mgtfd, info->keys, KEY_SIZE * sizeof(uint64_t));
+ if (size <= 0) {
+ errprint("Read keys on rpal_mgtfd failed\n");
+ goto error_unlock;
+ }
+
+ size /= sizeof(uint64_t);
+ info->size = size;
+
+ for (i = 0; i < size; i++) {
+ for (j = 0; j < threads_md.rtp->nr_threads; j++) {
+ rri = threads_md.rtp->rris + j;
+ try_clean_lock(rri, info->keys[i]);
+ }
+ }
+ pthread_mutex_unlock(&release_lock);
+ *ptr = (int64_t)info;
+ return RPAL_SUCCESS;
+
+error_unlock:
+ pthread_mutex_unlock(&release_lock);
+ free(info);
+error_out:
+ return RPAL_FAILURE;
+}
+
+void rpal_clean_service_end(int64_t *ptr)
+{
+ int i;
+ struct release_info *info;
+
+ if (ptr == NULL)
+ return;
+ info = (struct release_info *)(*ptr);
+ if (info == NULL)
+ return;
+ for (i = 0; i < info->size; i++) {
+ dbprint(RPAL_DEBUG_MANAGEMENT, "release service: 0x%lx\n",
+ info->keys[i]);
+ rpal_release_service(info->keys[i]);
+ }
+ free(info);
+}
+int rpal_get_service_id(void)
+{
+ if (!rpal_inited()) {
+ return RPAL_FAILURE;
+ }
+ return threads_md.service_id;
+}
+
+status_t rpal_get_service_key(uint64_t *service_key)
+{
+ if (!rpal_inited() || !service_key) {
+ return RPAL_FAILURE;
+ }
+ *service_key = threads_md.service_key;
+ return RPAL_SUCCESS;
+}
+
+static fdt_node_t *fdt_node_alloc(fd_table_t *fdt)
+{
+ fdt_node_t *node;
+ fd_event_t **ev;
+ int *ref_count;
+ uint16_t *timestamps;
+ int size = 0;
+
+ node = malloc(sizeof(fdt_node_t));
+ if (!node)
+ goto node_alloc_failed;
+
+ size = sizeof(fd_event_t **) * (1 << fdt->node_shift);
+ ev = malloc(size);
+ if (!ev)
+ goto events_alloc_failed;
+ memset(ev, 0, size);
+
+ size = sizeof(int) * (1 << fdt->node_shift);
+ ref_count = malloc(size);
+ if (!ref_count)
+ goto used_alloc_failed;
+ memset(ref_count, 0xff, size);
+
+ size = sizeof(uint16_t) * (1 << fdt->node_shift);
+ timestamps = malloc(size);
+ if (!timestamps)
+ goto ts_alloc_failed;
+ memset(timestamps, 0, size);
+
+ node->events = ev;
+ node->ref_count = ref_count;
+ node->next = NULL;
+ node->timestamps = timestamps;
+ if (!fdt->head) {
+ fdt->head = node;
+ fdt->tail = node;
+ } else {
+ fdt->tail->next = node;
+ fdt->tail = node;
+ }
+ fdt->max_fd += (1 << fdt->node_shift);
+ return node;
+
+ts_alloc_failed:
+ free(ref_count);
+used_alloc_failed:
+ free(ev);
+events_alloc_failed:
+ free(node);
+node_alloc_failed:
+ errprint("%s Error!!! max_fd: %d\n", __FUNCTION__, fdt->max_fd);
+ return NULL;
+}
+
+static void fdt_node_free_all(fd_table_t *fdt)
+{
+ fdt_node_t *node, *ptr;
+
+ node = fdt->head;
+ while (node) {
+ free(node->timestamps);
+ free(node->ref_count);
+ free(node->events);
+ ptr = node;
+ node = node->next;
+ free(ptr);
+ }
+}
+
+static fdt_node_t *fdt_node_expand(fd_table_t *fdt, int fd)
+{
+ fdt_node_t *node = NULL;
+ while (fd >= fdt->max_fd) {
+ node = fdt_node_alloc(fdt);
+ if (!node)
+ break;
+ }
+ return node;
+}
+
+static fdt_node_t *fdt_node_search(fd_table_t *fdt, int fd)
+{
+ fdt_node_t *node = NULL;
+ int pos = 0;
+ if (fd >= fdt->max_fd)
+ return NULL;
+ pos = fd >> fdt->node_shift;
+ node = fdt->head;
+ while (pos) {
+ if (!node) {
+ errprint(
+ "fdt node search ERROR! fd: %d, pos: %d, fdt->max_fd: %d\n",
+ fd, pos, fdt->max_fd);
+ return NULL;
+ }
+ node = node->next;
+ pos--;
+ }
+ return node;
+}
+
+static fd_table_t *fd_table_alloc(unsigned int node_shift)
+{
+ fd_table_t *fdt;
+ pthread_mutexattr_t mattr;
+
+ fdt = malloc(sizeof(fd_table_t));
+ if (!fdt)
+ return NULL;
+ fdt->head = NULL;
+ fdt->tail = NULL;
+ fdt->max_fd = 0;
+ fdt->node_shift = node_shift;
+ fdt->node_mask = (1 << node_shift) - 1;
+ fdt->freelist = NULL;
+ pthread_mutex_init(&fdt->list_lock, NULL);
+
+ pthread_mutexattr_init(&mattr);
+ pthread_mutexattr_setpshared(&mattr, PTHREAD_PROCESS_SHARED);
+ pthread_mutex_init(&fdt->lock, &mattr);
+ return fdt;
+}
+
+static void fd_table_free(fd_table_t *fdt)
+{
+ if (!fdt)
+ return;
+ fdt_node_free_all(fdt);
+ free(fdt);
+ return;
+}
+
+static inline fd_event_t *fd_event_alloc(int fd, int epfd,
+ struct epoll_event *event)
+{
+ fd_event_t *fde;
+ uint64_t *qdata;
+
+ fde = (fd_event_t *)malloc(sizeof(fd_event_t));
+ if (!fde)
+ return NULL;
+
+ fde->fd = fd;
+ fde->epfd = epfd;
+ fde->epev = *event;
+ fde->events = 0;
+ fde->node = NULL;
+ fde->next = NULL;
+ fde->timestamp = 0;
+ fde->service_key = 0;
+ __atomic_store_n(&fde->outdated, (uint16_t)0, __ATOMIC_RELEASE);
+
+ qdata = malloc(DEFAULT_QUEUE_SIZE * sizeof(uint64_t));
+ if (!qdata) {
+ errprint("malloc queue data failed\n");
+ goto malloc_error;
+ }
+ if (rpal_queue_init(&fde->q, qdata, DEFAULT_QUEUE_SIZE)) {
+ errprint("fde queue alloc failed, fd: %d\n", fd);
+ goto init_error;
+ }
+ return fde;
+
+init_error:
+ free(qdata);
+malloc_error:
+ free(fde);
+ return NULL;
+}
+
+static inline void fd_event_free(fd_event_t *fde)
+{
+ uint64_t *qdata;
+
+ if (!fde)
+ return;
+ qdata = rpal_queue_destroy(&fde->q);
+ free(qdata);
+ free(fde);
+ return;
+}
+
+static void fdt_freelist_insert(fd_table_t *fdt, fd_event_t *fde)
+{
+ if (!fde)
+ return;
+
+ pthread_mutex_lock(&fdt->list_lock);
+ if (fdt->freelist == NULL) {
+ fdt->freelist = fde;
+ } else {
+ fde->next = fdt->freelist;
+ fdt->freelist = fde;
+ }
+ pthread_mutex_unlock(&fdt->list_lock);
+}
+
+static void fdt_freelist_forcefree(fd_table_t *fdt, uint64_t service_key)
+{
+ fd_event_t *prev, *pos, *f_fde;
+ fdt_node_t *node;
+ int idx;
+
+ pthread_mutex_lock(&fdt->list_lock);
+ prev = NULL;
+ pos = fdt->freelist;
+ while (pos) {
+ idx = pos->fd & fdt->node_mask;
+ node = pos->node;
+ if (pos->service_key == service_key) {
+ __atomic_exchange_n(&node->ref_count[idx], FDE_FREEING,
+ __ATOMIC_RELAXED);
+ if (!prev) {
+ fdt->freelist = pos->next;
+ } else {
+ prev->next = pos->next;
+ }
+ f_fde = pos;
+ pos = pos->next;
+ node->events[idx] = NULL;
+ __atomic_store_n(&node->ref_count[idx], -1,
+ __ATOMIC_RELEASE);
+ fd_event_free(f_fde);
+ } else {
+ prev = pos;
+ pos = pos->next;
+ }
+ }
+ pthread_mutex_unlock(&fdt->list_lock);
+ return;
+}
+
+static void fdt_freelist_lazyfree(fd_table_t *fdt)
+{
+ fd_event_t *prev, *pos, *f_fde;
+ fdt_node_t *node;
+ int idx;
+ int expected;
+
+ pthread_mutex_lock(&fdt->list_lock);
+ prev = NULL;
+ pos = fdt->freelist;
+
+ while (pos) {
+ idx = pos->fd & fdt->node_mask;
+ // do lazyfree when ref_count less than 0
+ expected = FDE_AVAILABLE;
+ node = pos->node;
+ if (__atomic_compare_exchange_n(
+ &node->ref_count[idx], &expected, FDE_FREEING, 1,
+ __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
+ if (!prev) {
+ fdt->freelist = pos->next;
+ } else {
+ prev->next = pos->next;
+ }
+ f_fde = pos;
+ pos = pos->next;
+ node->events[idx] = NULL;
+ __atomic_store_n(&node->ref_count[idx], -1,
+ __ATOMIC_RELEASE);
+ fd_event_free(f_fde);
+ } else {
+ if (expected < 0) {
+ errprint("error ref: %d, fd: %d\n", expected,
+ pos->fd);
+ }
+ prev = pos;
+ pos = pos->next;
+ }
+ }
+ pthread_mutex_unlock(&fdt->list_lock);
+ return;
+}
+
+static uint16_t fde_timestamp_get(fd_table_t *fdt, int fd)
+{
+ fdt_node_t *node;
+ int idx;
+
+ node = fdt_node_search(fdt, fd);
+ if (!node) {
+ return 0;
+ }
+ idx = fd & fdt->node_mask;
+ return node->timestamps[idx];
+}
+
+static void fd_event_put(fd_table_t *fdt, fd_event_t *fde);
+
+static fd_event_t *fd_event_get(fd_table_t *fdt, int fd)
+{
+ fd_event_t *fde = NULL;
+ fdt_node_t *node;
+ int idx;
+ int val = -1;
+ int expected;
+
+ node = fdt_node_search(fdt, fd);
+ if (!node) {
+ return NULL;
+ }
+ idx = fd & fdt->node_mask;
+
+retry:
+ val = __atomic_load_n(&node->ref_count[idx], __ATOMIC_ACQUIRE);
+ if (val < 0)
+ return NULL;
+ expected = val;
+ val++;
+ if (!__atomic_compare_exchange_n(&node->ref_count[idx], &expected, val,
+ 1, __ATOMIC_SEQ_CST,
+ __ATOMIC_SEQ_CST)) {
+ if (expected >= 0) {
+ goto retry;
+ } else {
+ return NULL;
+ }
+ }
+ fde = node->events[idx];
+ if (!fde) {
+ errprint("error get: %d, fd: %d\n", val, fd);
+ } else {
+ if (__atomic_load_n(&fde->outdated, __ATOMIC_ACQUIRE)) {
+ fd_event_put(fdt, fde);
+ fde = NULL;
+ }
+ }
+ return fde;
+}
+
+static void fd_event_put(fd_table_t *fdt, fd_event_t *fde)
+{
+ int idx;
+ int val;
+
+ if (!fde)
+ return;
+
+ idx = fde->fd & fdt->node_mask;
+ val = __atomic_sub_fetch(&fde->node->ref_count[idx], 1,
+ __ATOMIC_RELEASE);
+ if (val < 0) {
+ errprint("error put: %d, fd: %d\n", val, fde->fd);
+ }
+ return;
+}
+
+int rpal_access(void *addr, access_fn do_access, int *ret, va_list va);
+
+int rpal_access(void *addr, access_fn do_access, int *ret, va_list va)
+{
+ int func_ret;
+
+ func_ret = do_access(va);
+ if (ret) {
+ *ret = func_ret;
+ }
+ return RPAL_SUCCESS;
+}
+
+extern status_t rpal_access_warpper(void *addr, access_fn do_access, int *ret,
+ va_list va);
+
+#define rpal_write_access_safety(ACCESS_FUNC, FUNC_RET, ...) \
+ ({ \
+ status_t __access = RPAL_FAILURE; \
+ uint32_t old_pkru = 0; \
+ old_pkru = rdpkru(); \
+ __access = rpal_read_access_safety(ACCESS_FUNC, FUNC_RET, \
+ ##__VA_ARGS__); \
+ wrpkru(old_pkru); \
+ __access; \
+ })
+
+status_t rpal_read_access_safety(access_fn do_access, int *ret, ...)
+{
+ rpal_sender_info_t *sender;
+ sender_context_t *sc;
+ rpal_error_code_t error;
+ status_t access = RPAL_FAILURE;
+ va_list args;
+
+ sender = current_rpal_sender();
+ if (!sender || !rpal_sender_inited(sender)) {
+ dbprint(RPAL_DEBUG_SENDER, "%s: sender(%d) do not init\n",
+ __FUNCTION__, getpid());
+ if (RPAL_FAILURE == rpal_sender_init(&error)) {
+ return RPAL_FAILURE;
+ }
+ sender = current_rpal_sender();
+ }
+ sc = &sender->sc;
+ sc->ec.magic = RPAL_ERROR_MAGIC;
+ va_start(args, ret);
+ access = rpal_access_warpper(&(sc->ec.ersp), do_access, ret, args);
+ va_end(args);
+ sc->ec.magic = 0;
+
+ return access;
+}
+
+static int64_t __do_rpal_uds_fdmap(int service_id, int connfd)
+{
+ struct rpal_uds_fdmap_arg arg;
+ int64_t res;
+ int ret;
+
+ arg.cfd = connfd;
+ arg.service_id = service_id;
+ arg.res = &res;
+ ret = rpal_ioctl(RPAL_IOCTL_UDS_FDMAP, (unsigned long)&arg);
+ if (ret < 0)
+ return RPAL_FAILURE;
+
+ return res;
+}
+
+static status_t do_rpal_uds_fdmap(va_list va)
+{
+ int64_t ret;
+ int sfd, cfd, sid;
+ struct rpal_thread_pool *srtp;
+ uint64_t stamp = 0;
+ uint64_t sid_fd;
+ uint64_t *rpalfd;
+ fd_event_t *fde;
+
+ sid_fd = va_arg(va, uint64_t);
+ rpalfd = va_arg(va, uint64_t *);
+
+ if (!rpalfd) {
+ return RPAL_FAILURE;
+ }
+ sid = get_high32(sid_fd);
+ cfd = get_low32(sid_fd);
+
+ ret = __do_rpal_uds_fdmap(sid, cfd);
+ if (ret < 0) {
+ errprint("%s failed %ld, cfd: %d\n", __FUNCTION__, ret, cfd);
+ return RPAL_FAILURE;
+ }
+
+ srtp = get_service_from_id(sid);
+ if (!srtp) {
+ errprint("%s INVALID service_id: %d\n", __FUNCTION__, sid);
+ return RPAL_FAILURE;
+ }
+ sfd = get_sfd(ret);
+ stamp = fde_timestamp_get(srtp->fdt, sfd);
+ ret |= (stamp << HIGH16_OFFSET);
+
+ fde = fd_event_get(threads_md.rtp->fdt, cfd);
+ if (!fde) {
+ errprint("%s get self fde error, fd: %d\n", __FUNCTION__, cfd);
+ goto out;
+ }
+ fde->service_key = srtp->service_key;
+ fd_event_put(threads_md.rtp->fdt, fde);
+out:
+ *rpalfd = ret;
+ return RPAL_SUCCESS;
+}
+
+int rpal_get_peer_rid(uint64_t sid_fd)
+{
+ int64_t ret;
+ int sid, cfd;
+ int rid;
+
+ sid = get_high32(sid_fd);
+ cfd = get_low32(sid_fd);
+
+ ret = __do_rpal_uds_fdmap(sid, cfd);
+ if (ret < 0) {
+ errprint("%s failed %ld, cfd: %d\n", __FUNCTION__, ret, cfd);
+ return RPAL_FAILURE;
+ }
+ rid = get_rid(ret);
+ return rid;
+}
+
+status_t rpal_uds_fdmap(uint64_t sid_fd, uint64_t *rpalfd)
+{
+ status_t ret = RPAL_FAILURE;
+ status_t access;
+ uint32_t old_pkru;
+
+ old_pkru = rdpkru();
+ wrpkru(old_pkru & RPAL_PKRU_BASE_CODE_READ);
+ access = rpal_read_access_safety(do_rpal_uds_fdmap, &ret, sid_fd,
+ rpalfd);
+ wrpkru(old_pkru);
+ if (access == RPAL_FAILURE) {
+ return RPAL_FAILURE;
+ }
+ return ret;
+}
+
+static status_t fd_event_install(fd_table_t *fdt, int fd, int epfd,
+ struct epoll_event *event)
+{
+ fdt_node_t *node;
+ fd_event_t *fde;
+ int idx;
+ int expected;
+
+ fde = fd_event_alloc(fd, epfd, event);
+ if (!fde) {
+ goto fde_error;
+ }
+ pthread_mutex_lock(&fdt->lock);
+ if (fd >= fdt->max_fd) {
+ node = fdt_node_expand(fdt, fd);
+ } else {
+ node = fdt_node_search(fdt, fd);
+ }
+ pthread_mutex_unlock(&fdt->lock);
+
+ if (!node) {
+ errprint("fd node search failed, fd: %d\n", fd);
+ goto node_error;
+ }
+ idx = fd & fdt->node_mask;
+ fdt_freelist_lazyfree(fdt);
+ expected = __atomic_load_n(&node->ref_count[idx], __ATOMIC_ACQUIRE);
+ if (expected != FDE_FREED) {
+ goto node_error;
+ }
+ fde->timestamp =
+ __atomic_add_fetch(&node->timestamps[idx], 1, __ATOMIC_RELEASE);
+ fde->node = node;
+ node->events[idx] = fde;
+ if (!__atomic_compare_exchange_n(&node->ref_count[idx], &expected,
+ FDE_AVAILABLE, 1, __ATOMIC_SEQ_CST,
+ __ATOMIC_SEQ_CST)) {
+ errprint("may override fd: %d, val: %d\n", fd, expected);
+ node->events[idx] = NULL;
+ goto node_error;
+ }
+ return RPAL_SUCCESS;
+
+node_error:
+ fd_event_free(fde);
+fde_error:
+ return RPAL_FAILURE;
+}
+
+static status_t fd_event_uninstall(fd_table_t *fdt, int fd)
+{
+ fd_event_t *fde;
+ fdt_node_t *node;
+ int idx;
+ int ret = RPAL_SUCCESS;
+ int expected;
+
+ node = fdt_node_search(fdt, fd);
+ if (!node) {
+ ret = RPAL_FAILURE;
+ goto out;
+ }
+ idx = fd & fdt->node_mask;
+ fde = node->events[idx];
+ if (!fde) {
+ ret = RPAL_FAILURE;
+ goto out;
+ }
+ expected = FDE_AVAILABLE;
+ __atomic_store_n(&fde->outdated, (uint16_t)1, __ATOMIC_RELEASE);
+ if (__atomic_compare_exchange_n(&node->ref_count[idx], &expected,
+ FDE_FREEING, 1, __ATOMIC_SEQ_CST,
+ __ATOMIC_SEQ_CST)) {
+ node->events[idx] = NULL;
+ __atomic_store_n(&node->ref_count[idx], -1, __ATOMIC_RELEASE);
+ fd_event_free(fde);
+ } else {
+ if (expected < FDE_AVAILABLE) {
+ errprint("error cnt: %d, fd: %d\n", expected, fde->fd);
+ }
+ // link this fde for free_head
+ fdt_freelist_insert(fdt, fde);
+ }
+
+out:
+ fdt_freelist_lazyfree(fdt);
+ return ret;
+}
+
+static status_t fd_event_modify(fd_table_t *fdt, int fd,
+ struct epoll_event *event)
+{
+ fd_event_t *fde;
+
+ fde = fd_event_get(fdt, fd);
+ if (!fde) {
+ errprint("fde MOD fd(%d) ERROR!\n", fd);
+ return RPAL_FAILURE;
+ }
+ fde->fd = fd;
+ fde->epev = *event;
+ fde->events = 0;
+ fd_event_put(fdt, fde);
+ return RPAL_SUCCESS;
+}
+
+static int rpal_receiver_info_create(struct rpal_thread_pool *rtp, int id)
+{
+ rpal_receiver_info_t *rri = &rtp->rris[id];
+
+ rri->ep_stack = fiber_ctx_alloc(NULL, NULL, DEFUALT_STACK_SIZE);
+ if (!rri->ep_stack)
+ return -1;
+
+ rri->trampoline = fiber_ctx_alloc(NULL, NULL, TRAMPOLINE_SIZE);
+ if (!rri->trampoline) {
+ fiber_ctx_free(rri->ep_stack);
+ return -1;
+ }
+
+ rri->rc = threads_md.rc + id;
+ rri->rc->receiver_id = id;
+ rri->rtp = rtp;
+
+ return 0;
+}
+
+static void rpal_receiver_info_destroy(rpal_receiver_info_t *rri)
+{
+ fiber_ctx_free(rri->ep_stack);
+ fiber_ctx_free(rri->trampoline);
+ return;
+}
+
+static struct rpal_thread_pool *rpal_thread_pool_create(int nr_threads,
+ rpal_thread_metadata_t *rtm)
+{
+ void *p;
+ int i, j;
+ struct rpal_thread_pool *rtp;
+
+ if (rpal_inited())
+ goto out;
+ rtp = malloc(sizeof(struct rpal_thread_pool));
+ if (rtp == NULL) {
+ goto out;
+ }
+ threads_md.eventfds = malloc(nr_threads * sizeof(int));
+ if (threads_md.eventfds == NULL) {
+ goto eventfds_alloc_fail;
+ }
+ rtp->nr_threads = nr_threads;
+ rtp->pkey = -1;
+ p = malloc(nr_threads * sizeof(rpal_receiver_info_t));
+ if (p == NULL) {
+ goto rri_alloc_fail;
+ }
+ rtp->rris = p;
+ memset(p, 0, nr_threads * sizeof(rpal_receiver_info_t));
+
+ rtp->fdt = fd_table_alloc(DEFAULT_NODE_SHIFT);
+ if (!rtp->fdt) {
+ goto fdt_alloc_fail;
+ }
+
+ p = rpal_get_shared_page(rtm->epcpage_order);
+
+ if (!p)
+ goto page_alloc_fail;
+ rtm->rc = p;
+
+ for (i = 0; i < nr_threads; i++) {
+ if (rpal_receiver_info_create(rtp, i)) {
+ for (j = 0; j < i; j++) {
+ rpal_receiver_info_destroy(&rtp->rris[j]);
+ }
+ goto rri_create_fail;
+ }
+ }
+ return rtp;
+
+rri_create_fail:
+ rpal_free_shared_page(rtm->rc, rtm->epcpage_order);
+page_alloc_fail:
+ fd_table_free(rtp->fdt);
+fdt_alloc_fail:
+ free(rtp->rris);
+rri_alloc_fail:
+ free(threads_md.eventfds);
+eventfds_alloc_fail:
+ free(rtp);
+out:
+ return NULL;
+}
+
+static void rpal_thread_pool_destory(rpal_thread_metadata_t *rtm)
+{
+ int i;
+ struct rpal_thread_pool *rtp;
+
+ if (!rpal_inited()) {
+ errprint("thread pool is not created.\n");
+ return;
+ }
+ pthread_mutex_destroy(&release_lock);
+ rtp = threads_md.rtp;
+ fd_table_free(rtp->fdt);
+ for (i = 0; i < rtp->nr_threads; ++i) {
+ rpal_receiver_info_destroy(&rtp->rris[i]);
+ }
+ rpal_free_shared_page(threads_md.rc, threads_md.epcpage_order);
+ free(rtp->rris);
+ free(threads_md.eventfds);
+ free(rtp);
+}
+
+static inline int rpal_receiver_inited(rpal_receiver_info_t *rri)
+{
+ if (!rri)
+ return 0;
+ return (rri->status != RPAL_RECEIVER_UNINITIALIZED);
+}
+
+static inline int rpal_receiver_available(rpal_receiver_info_t *rri)
+{
+ return (rri->status == RPAL_RECEIVER_AVAILABLE);
+}
+
+static int rpal_receiver_idx_get(void)
+{
+ return __atomic_fetch_add(&threads_md.rpal_receiver_idx, 1,
+ __ATOMIC_RELAXED);
+}
+
+int rpal_receiver_init(void)
+{
+ int ret = 0;
+ int receiver_idx;
+ rpal_receiver_info_t *rri;
+
+ if (!rpal_inited()) {
+ errprint("thread pool is not created.\n");
+ goto error_out;
+ }
+
+ receiver_idx = rpal_receiver_idx_get();
+ if (receiver_idx >= threads_md.rtp->nr_threads) {
+ errprint(
+ "rpal thread pool size exceeded. thread_idx: %d, thread pool capacity: %d\n",
+ receiver_idx, threads_md.rtp->nr_threads);
+ goto error_out;
+ }
+
+ rri = threads_md.rtp->rris + receiver_idx;
+ rri->status = RPAL_RECEIVER_UNINITIALIZED;
+ rri->tid = syscall(SYS_gettid);
+ rri->tls_base = read_tls_base();
+
+ rpal_uevent_queue_init(&rri->ueventq, &rri->uqlock);
+
+ rri->rc->rpal_ep_poll_magic = 0;
+ rri->rc->receiver_state = RPAL_RECEIVER_STATE_RUNNING;
+ rri->rc->ep_pending = 0;
+ __atomic_store_n(&rri->rc->sender_state, RPAL_SENDER_STATE_RUNNING,
+ __ATOMIC_RELAXED);
+ ret = rpal_register_receiver(rri);
+ if (ret < 0) {
+ errprint("rpal thread %ld register failed %d\n", rri->tid, ret);
+ goto error_out;
+ }
+ ret = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
+ if (ret < 0) {
+ errprint("rpal thread %ld eventfd failed %d\n", rri->tid,
+ errno);
+ goto eventfd_failed;
+ }
+ threads_md.eventfds[receiver_idx] = ret;
+ rri->status = RPAL_RECEIVER_INITIALIZED;
+ return ret;
+
+eventfd_failed:
+ rpal_unregister_receiver();
+error_out:
+ return RPAL_FAILURE;
+}
+
+void rpal_receiver_exit(void)
+{
+ rpal_receiver_info_t *rri = current_rpal_thread();
+ int id, fd;
+
+ if (!rpal_receiver_inited(rri))
+ return;
+ rri->status = RPAL_RECEIVER_UNINITIALIZED;
+ id = rri->rc->receiver_id;
+ fd = threads_md.eventfds[id];
+ close(fd);
+ threads_md.eventfds[id] = 0;
+ rpal_unregister_receiver();
+ return;
+}
+
+static inline void set_task_context(volatile task_context_t *tc, void *src)
+{
+ fiber_stack_t *fstack = src;
+ tc->r15 = fstack->r15;
+ tc->r14 = fstack->r14;
+ tc->r13 = fstack->r13;
+ tc->r12 = fstack->r12;
+ tc->rbx = fstack->rbx;
+ tc->rbp = fstack->rbp;
+ tc->rip = fstack->rip;
+ tc->rsp = (unsigned long)(src + 0x40);
+}
+
+static transfer_t _syscall_epoll_wait(transfer_t t)
+{
+ rpal_receiver_info_t *rri = t.ud;
+ volatile receiver_context_t *rc = rri->rc;
+ long ret;
+
+ rc->rpal_ep_poll_magic = RPAL_EP_POLL_MAGIC;
+ ret = epoll_wait(rc->epfd, rc->ep_events, rc->maxevents,
+ rc->timeout);
+ t = jump_fcontext(rri->main_ctx, (void *)ret);
+ return t;
+}
+
+extern void rpal_ret_critical(volatile receiver_context_t *rc,
+ rpal_call_info_t *rci);
+
+static transfer_t syscall_epoll_wait(transfer_t t)
+{
+ rpal_receiver_info_t *rri = t.ud;
+ volatile receiver_context_t *rc = rri->rc;
+ rpal_call_info_t *rci = &rri->rci;
+ task_t *estk = rri->ep_stack;
+
+ set_task_context(&rri->rc->task_context, t.fctx);
+ rri->main_ctx = t.fctx;
+
+ rpal_ret_critical(rc, rci);
+
+ estk->fctx = make_fcontext(estk->sp, 0, NULL);
+ t = ontop_fcontext(rri->ep_stack->fctx, rri, _syscall_epoll_wait);
+ return t;
+}
+
+static inline int ep_kernel_events_available(volatile int *ep_pending)
+{
+ return (RPAL_KERNEL_PENDING &
+ __atomic_load_n(ep_pending, __ATOMIC_ACQUIRE));
+}
+
+static inline int ep_user_events_available(volatile int *ep_pending)
+{
+ return (RPAL_USER_PENDING &
+ __atomic_load_n(ep_pending, __ATOMIC_ACQUIRE));
+}
+
+static inline int rpal_ep_send_events(epoll_uevent_queue_t *uq, fd_table_t *fdt,
+ volatile receiver_context_t *rc,
+ struct epoll_event *events, int maxevents)
+{
+ int fd = -1;
+ int ret = 0;
+ int res = 0;
+ fd_event_t *fde = NULL;
+
+ __atomic_and_fetch(&rc->ep_pending, ~RPAL_USER_PENDING,
+ __ATOMIC_ACQUIRE);
+ while (uevent_queue_len(uq) && ret < maxevents) {
+ fd = uevent_queue_del(uq);
+ if (fd == -1) {
+ errprint("uevent get failed\n");
+ continue;
+ }
+ fde = fd_event_get(fdt, fd);
+ if (!fde)
+ continue;
+ res = __atomic_exchange_n(&fde->events, 0, __ATOMIC_RELAXED);
+ res &= fde->epev.events;
+ if (res) {
+ events[ret].data = fde->epev.data;
+ events[ret].events = res;
+ ret++;
+ }
+ fd_event_put(fdt, fde);
+ }
+ if (uevent_queue_len(uq) || ret == maxevents) {
+ dbprint(RPAL_DEBUG_RECVER,
+ "uevent queue still have events, len: %d, ret: %d, maxevents: %d\n",
+ uevent_queue_len(uq), ret, maxevents);
+ __atomic_fetch_or(&rc->ep_pending, RPAL_USER_PENDING,
+ __ATOMIC_RELAXED);
+ }
+ return ret;
+}
+
+extern void rpal_call_critical(volatile receiver_context_t *rc,
+ rpal_receiver_info_t *rri);
+
+int rpal_epoll_wait(int epfd, struct epoll_event *events, int maxevents,
+ int timeout)
+{
+ transfer_t t;
+ rpal_call_info_t *rci;
+ task_t *estk, *trampoline;
+ volatile receiver_context_t *rc;
+ epoll_uevent_queue_t *ueventq;
+ rpal_receiver_info_t *rri = current_rpal_thread();
+ long ret = 0;
+ unsigned int mxcsr = 0, fpucw = 0;
+
+ if (!rpal_receiver_inited(rri))
+ return epoll_wait(epfd, events, maxevents, timeout);
+
+ rc = rri->rc;
+ estk = rri->ep_stack;
+ trampoline = rri->trampoline;
+ rci = &rri->rci;
+ ueventq = &rri->ueventq;
+
+ rc->epfd = epfd;
+ rc->ep_events = events;
+ rc->maxevents = maxevents;
+ rc->timeout = timeout;
+
+ if (!rpal_receiver_available(rri)) {
+ rri->status = RPAL_RECEIVER_AVAILABLE;
+ estk->fctx = make_fcontext(estk->sp, 0, NULL);
+ SAVE_FPU(mxcsr, fpucw);
+ trampoline->fctx = make_fcontext(trampoline->sp, 0, NULL);
+ t = ontop_fcontext(trampoline->fctx, rri, syscall_epoll_wait);
+ } else {
+ // kernel pending events
+ if (ep_kernel_events_available(&rc->ep_pending)) {
+ rc->rpal_ep_poll_magic =
+ RPAL_EP_POLL_MAGIC; // clear KERNEL_PENDING
+ ret = epoll_wait(epfd, events, maxevents, 0);
+ rc->rpal_ep_poll_magic = 0;
+ goto send_user_events;
+ }
+ // user pending events
+ if (ep_user_events_available(&rc->ep_pending)) {
+ goto send_user_events;
+ }
+ SAVE_FPU(mxcsr, fpucw);
+ trampoline->fctx = make_fcontext(trampoline->sp, 0, NULL);
+ t = ontop_fcontext(trampoline->fctx, rri, syscall_epoll_wait);
+ }
+ rc->rpal_ep_poll_magic = 0;
+
+ /*
+ * Here is where sender starts after user context switch.
+ * The TLS may still be sender's. We should not do anything
+ * that may use TLS, otherwise the result cannot be controlled.
+ */
+
+ switch (rc->receiver_state & RPAL_RECEIVER_STATE_MASK) {
+ case RPAL_RECEIVER_STATE_RUNNING: // syscall kernel ret
+ ret = (long)t.ud;
+ break;
+ case RPAL_RECEIVER_STATE_KERNEL_RET: // receiver kernel ret
+ RESTORE_FPU(mxcsr, fpucw);
+ ret = (long)t.fctx;
+ break;
+ case RPAL_RECEIVER_STATE_CALL: // rpalcall user jmp
+ rci->sender_tls_base = read_tls_base();
+ rci->pkru = rdpkru();
+ write_tls_base(rri->tls_base);
+ wrpkru(rpal_pkey_to_pkru(rri->rtp->pkey));
+ rci->sender_fctx = t.fctx;
+ break;
+ default:
+ errprint("Error ep_status: %ld\n",
+ rc->receiver_state & RPAL_RECEIVER_STATE_MASK);
+ return -1;
+ }
+
+send_user_events:
+ if (ret < maxevents && ret >= 0)
+ ret += rpal_ep_send_events(ueventq, rri->rtp->fdt, rc,
+ events + ret, maxevents - ret);
+ return ret;
+}
+
+int rpal_epoll_wait_user(int epfd, struct epoll_event *events, int maxevents,
+ int timeout)
+{
+ volatile receiver_context_t *rc;
+ epoll_uevent_queue_t *ueventq;
+ rpal_receiver_info_t *rri = current_rpal_thread();
+
+ if (!rpal_receiver_inited(rri))
+ return 0;
+
+ if (!rpal_receiver_available(rri))
+ return 0;
+
+ rc = rri->rc;
+ ueventq = &rri->ueventq;
+ if (ep_user_events_available(&rc->ep_pending)) {
+ return rpal_ep_send_events(ueventq, rri->rtp->fdt, rc, events,
+ maxevents);
+ }
+ return 0;
+}
+
+int rpal_epoll_ctl(int epfd, int op, int fd, struct epoll_event *event)
+{
+ fd_table_t *fdt;
+ int ret;
+
+ ret = epoll_ctl(epfd, op, fd, event);
+ if (ret || !rpal_inited()) {
+ return ret;
+ }
+ fdt = threads_md.rtp->fdt;
+ switch (op) {
+ case EPOLL_CTL_ADD:
+ if (event->events & EPOLLRPALINOUT_BITS) {
+ ret = fd_event_install(fdt, fd, epfd, event);
+ if (ret == RPAL_FAILURE)
+ goto install_error;
+ }
+ break;
+ case EPOLL_CTL_MOD:
+ fd_event_modify(fdt, fd, event);
+ break;
+ case EPOLL_CTL_DEL:
+ fd_event_uninstall(fdt, fd);
+ break;
+ }
+ return ret;
+install_error:
+ epoll_ctl(epfd, EPOLL_CTL_DEL, fd, event);
+ return RPAL_FAILURE;
+}
+
+static transfer_t set_fcontext(transfer_t t)
+{
+ sender_context_t *sc = t.ud;
+
+ set_task_context(&sc->task_context, t.fctx);
+ return t;
+}
+
+static void uq_lock(volatile uint64_t *uqlock, uint64_t key)
+{
+ uint64_t init = 0;
+
+ while (1) {
+ if (__atomic_compare_exchange_n(
+ uqlock, &init, (1UL << 63 | key), 1,
+ __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
+ return;
+ asm volatile("rep; nop");
+ init = 0;
+ }
+}
+
+static void uq_unlock(volatile uint64_t *uqlock)
+{
+ __atomic_store_n(uqlock, (uint64_t)0, __ATOMIC_RELAXED);
+}
+
+static status_t do_rpal_call_jump(rpal_sender_info_t *rsi,
+ rpal_receiver_info_t *rri,
+ volatile receiver_context_t *rc)
+{
+ int desired, expected;
+ int64_t diff;
+
+WAKE_AGAIN:
+ desired = RPAL_BUILD_CALL_STATE(rsi->sc.sender_id,
+ threads_md.service_id);
+ expected = RPAL_RECEIVER_STATE_WAIT;
+ if (__atomic_compare_exchange_n(&rc->receiver_state, &expected, desired, 1,
+ __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) {
+ __atomic_store_n(&rc->sender_state, RPAL_SENDER_STATE_CALL,
+ __ATOMIC_RELAXED);
+ rsi->sc.start_time = _rdtsc();
+ ontop_fcontext(rri->main_ctx, &rsi->sc, set_fcontext);
+
+ if (__atomic_load_n(&rc->sender_state, __ATOMIC_RELAXED) ==
+ RPAL_SENDER_STATE_RUNNING) {
+ if (rc->receiver_state == RPAL_RECEIVER_STATE_LAZY_SWITCH)
+ read(-1, NULL, 0);
+ diff = _rdtsc() - rsi->sc.start_time;
+ rsi->sc.total_time += diff;
+ rri->rc->total_time += diff;
+ expected = desired;
+ desired = RPAL_RECEIVER_STATE_WAIT;
+ __atomic_compare_exchange_n(&rc->receiver_state, &expected,
+ desired, 1,
+ __ATOMIC_SEQ_CST,
+ __ATOMIC_SEQ_CST);
+
+ if (ep_user_events_available(&rc->ep_pending)) {
+ goto WAKE_AGAIN;
+ }
+ }
+ dbprint(RPAL_DEBUG_SENDER, "app return: 0x%x, %d, %d\n",
+ rc->receiver_state, rc->sender_state, sfd);
+ }
+ return RPAL_SUCCESS;
+}
+
+static inline void set_fde_trigger(fd_event_t *fde)
+{
+ __atomic_store_n(&fde->wait, FDE_TRIGGER_OUT, __ATOMIC_RELEASE);
+ return;
+}
+
+static inline int clear_fde_trigger(fd_event_t *fde)
+{
+ int expected = FDE_TRIGGER_OUT;
+
+ return __atomic_compare_exchange_n(&fde->wait, &expected,
+ FDE_NO_TRIGGER, 1, __ATOMIC_SEQ_CST,
+ __ATOMIC_SEQ_CST);
+}
+
+static int do_rpal_call(va_list va)
+{
+ rpal_sender_info_t *rsi;
+ rpal_receiver_info_t *rri;
+ fd_event_t *fde;
+ volatile receiver_context_t *rc;
+ struct rpal_thread_pool *srtp;
+ uint16_t stamp;
+ uint8_t rid;
+ int sfd;
+ int ret = 0;
+ int fall = 0;
+ int pkey;
+
+ int service_id = va_arg(va, int);
+ uint64_t rpalfd = va_arg(va, uint64_t);
+ int64_t *ptrs = va_arg(va, int64_t *);
+ int len = va_arg(va, int);
+ int flags = va_arg(va, int);
+
+ rsi = current_rpal_sender();
+ if (!rsi) {
+ ret = RPAL_INVAL_THREAD;
+ goto ERROR;
+ }
+ srtp = get_service_from_id(service_id);
+ if (!srtp) {
+ ret = RPAL_INVAL_SERVICE;
+ goto ERROR;
+ }
+ pkey = get_service_pkey_from_id(service_id);
+
+ rid = get_rid(rpalfd);
+ sfd = get_sfd(rpalfd);
+ wrpkru(rpal_pkru_union(rdpkru(), rpal_pkey_to_pkru(pkey)));
+ rri = srtp->rris + rid;
+ if (!rri) {
+ errprint("INVALID rid: %u, rri is NULL\n", rid);
+ ret = RPAL_INVALID_ARG;
+ goto ERROR;
+ }
+ rc = rri->rc;
+ rsi->sc.ec.tls_base = rri->tls_base;
+
+ fde = fd_event_get(srtp->fdt, sfd);
+ if (!fde) {
+ ret = RPAL_INVALID_ARG;
+ goto ERROR;
+ }
+ stamp = get_fdtimestamp(rpalfd);
+ if (fde->timestamp != stamp) {
+ ret = RPAL_FDE_OUTDATED;
+ goto FDE_PUT;
+ }
+
+ uq_lock(&rri->uqlock, threads_md.service_key);
+ if (uevent_queue_len(&rri->ueventq) == MAX_RDY) {
+ errprint("rdylist is full: [%u, %u]\n", rri->ueventq.l_beg,
+ rri->ueventq.l_end);
+ ret = RPAL_CACHE_FULL;
+ goto UNLOCK;
+ }
+ if (likely(flags & RCALL_IN)) {
+ if (unlikely(rpal_queue_unused(&fde->q) < (uint32_t)len)) {
+ set_fde_trigger(fde);
+ fall = 1;
+ /* fall through: try to put data to queue */
+ }
+ ret = rpal_queue_put(&fde->q, ptrs, len);
+ if (ret != len) {
+ errprint("fde queue put error: %d, data: %lx\n", ret,
+ (unsigned long)fde->q.data);
+ ret = RPAL_QUEUE_PUT_FAILED;
+ goto UNLOCK;
+ }
+ if (unlikely(fall)) {
+ clear_fde_trigger(fde);
+ }
+ fde->events |= EPOLLRPALIN;
+ } else if (unlikely(flags & RCALL_OUT)) {
+ ret = 0;
+ fde->events |= EPOLLRPALOUT;
+ } else {
+ errprint("rpal call failed, ptrs: %lx, len: %d",
+ (unsigned long)ptrs, len);
+ ret = RPAL_INVALID_ARG;
+ goto UNLOCK;
+ }
+
+ uevent_queue_add(&rri->ueventq, sfd);
+ uq_unlock(&rri->uqlock);
+ fd_event_put(srtp->fdt, fde);
+
+ __atomic_fetch_or(&rc->ep_pending, RPAL_USER_PENDING,
+ __ATOMIC_RELEASE);
+ do_rpal_call_jump(rsi, rri, rc);
+ return ret;
+
+UNLOCK:
+ uq_unlock(&rri->uqlock);
+FDE_PUT:
+ fd_event_put(srtp->fdt, fde);
+ERROR:
+ return -ret;
+}
+
+static int __rpal_write_ptrs_common(int service_id, uint64_t rpalfd,
+ int64_t *ptrs, int len, int flags)
+{
+ int ret = RPAL_FAILURE;
+ status_t access = RPAL_FAILURE;
+
+ if (unlikely(NULL == ptrs)) {
+ dbprint(RPAL_DEBUG_SENDER, "%s: ptrs is NULL\n", __FUNCTION__);
+ return -RPAL_INVALID_ARG;
+ }
+ if (unlikely(len <= 0 || ((uint32_t)len) > DEFAULT_QUEUE_SIZE)) {
+ dbprint(RPAL_DEBUG_SENDER,
+ "%s: data len less than or equal to zero\n",
+ __FUNCTION__);
+ return -RPAL_INVALID_ARG;
+ }
+
+ access = rpal_write_access_safety(do_rpal_call, &ret, service_id,
+ rpalfd, ptrs, len, flags);
+ if (access == RPAL_FAILURE) {
+ return -RPAL_ERR_PEER_MEM;
+ }
+ return ret;
+}
+
+int rpal_write_ptrs(int service_id, uint64_t rpalfd, int64_t *ptrs, int len)
+{
+ return __rpal_write_ptrs_common(service_id, rpalfd, ptrs, len,
+ RCALL_IN);
+}
+
+int rpal_read_ptrs(int fd, int64_t *dptrs, int len)
+{
+ fd_event_t *fde;
+ fd_table_t *fdt = threads_md.rtp->fdt;
+ int ret;
+
+ if (!rpal_inited())
+ return -1;
+
+ fde = fd_event_get(fdt, fd);
+ if (!fde)
+ return -1;
+
+ ret = rpal_queue_get(&fde->q, dptrs, len);
+ fd_event_put(fdt, fde);
+ return ret;
+}
+
+int rpal_read_ptrs_trigger_out(int fd, int64_t *dptrs, int len, int service_id,
+ uint64_t rpalfd)
+{
+ fd_event_t *fde;
+ fd_table_t *fdt = threads_md.rtp->fdt;
+ int access, ret = -1;
+ int nread;
+
+ if (!rpal_inited())
+ return -1;
+
+ fde = fd_event_get(fdt, fd);
+ if (!fde)
+ return -1;
+
+ nread = rpal_queue_get(&fde->q, dptrs, len);
+ if (nread > 0 && clear_fde_trigger(fde)) {
+ access =
+ rpal_write_access_safety(do_rpal_call, &ret, service_id,
+ rpalfd, NULL, 0, RCALL_OUT);
+ if (access == RPAL_FAILURE || ret < 0) {
+ set_fde_trigger(fde);
+ errprint(
+ "trigger out failed! access: %d, ret: %d, id: %d, rpalfd: %lx\n",
+ access, ret, service_id, rpalfd);
+ }
+ }
+ fd_event_put(fdt, fde);
+
+ return nread;
+}
+
+static inline int pkey_is_invalid(const int pkey)
+{
+ return (pkey < 0 || pkey > 15);
+}
+
+static status_t rpal_thread_metadata_init(int nr_rpalthread,
+ rpal_error_code_t *error)
+{
+ uint64_t key;
+ struct rpal_thread_pool *rtp;
+ key = __rpal_get_service_key();
+ if (key >= 1UL << 63) {
+ ERRREPORT(
+ error, RPAL_ERR_SERVICE_KEY,
+ "rpal service key error. Service key: 0x%lx, oeverflow, should less than 2^63\n",
+ key);
+ goto error_out;
+ }
+ threads_md.service_key = key;
+ threads_md.service_id = __rpal_get_service_id();
+ pthread_mutex_init(&release_lock, NULL);
+ rpal_get_critical_addr(&rcs);
+ rtp = rpal_thread_pool_create(nr_rpalthread, &threads_md);
+ if (rtp == NULL) {
+ goto error_out;
+ }
+ rtp->service_key = threads_md.service_key;
+ rtp->service_id = threads_md.service_id;
+ threads_md.rtp = rtp;
+ if (rpal_enable_service(error) == RPAL_FAILURE)
+ goto destroy_thread_pool;
+ threads_md.pid = getpid();
+ return RPAL_SUCCESS;
+
+destroy_thread_pool:
+ rpal_thread_pool_destory(&threads_md);
+error_out:
+ return RPAL_FAILURE;
+}
+
+static void rpal_thread_metadata_exit(void)
+{
+ rpal_disable_service();
+ rpal_thread_pool_destory(&threads_md);
+}
+
+static status_t rpal_senders_metadata_init(rpal_error_code_t *error)
+{
+ if (senders_md) {
+ ERRREPORT(error, RPAL_ERR_SENDERS_METADATA,
+ "senders metadata is already initialized.\n");
+ return RPAL_FAILURE;
+ }
+
+ senders_md = malloc(sizeof(struct rpal_senders_metadata));
+ if (!senders_md) {
+ ERRREPORT(error, RPAL_ERR_NOMEM,
+ "senders metadata alloc failed.\n");
+ goto sendes_alloc_failed;
+ }
+ senders_md->sdpage_order = SENDERS_PAGE_ORDER;
+ memset(senders_md->bitmap, 0xFF,
+ sizeof(unsigned long) * BITS_TO_LONGS(MAX_SENDERS));
+ pthread_mutex_init(&senders_md->lock, NULL);
+ senders_md->senders = rpal_get_shared_page(senders_md->sdpage_order);
+ if (!senders_md->senders) {
+ ERRREPORT(error, RPAL_ERR_SENDER_PAGES,
+ "get senders share page error.\n");
+ goto pages_alloc_failed;
+ }
+ dbprint(RPAL_DEBUG_MANAGEMENT, "senders pages addr: 0x%016lx\n",
+ (unsigned long)senders_md->senders);
+ return RPAL_SUCCESS;
+
+pages_alloc_failed:
+ free(senders_md);
+sendes_alloc_failed:
+ return RPAL_FAILURE;
+}
+
+static void rpal_senders_metadata_exit(void)
+{
+ if (!senders_md)
+ return;
+
+ rpal_free_shared_page((void *)senders_md->senders,
+ senders_md->sdpage_order);
+ pthread_mutex_destroy(&senders_md->lock);
+ free(senders_md);
+}
+
+static int rpal_get_version_cap(rpal_capability_t *version)
+{
+ return rpal_ioctl(RPAL_IOCTL_GET_API_VERSION_AND_CAP,
+ (unsigned long)version);
+}
+
+static status_t rpal_version_check(rpal_capability_t *ver)
+{
+ if (ver->compat_version != MIN_RPAL_KERNEL_API_VERSION)
+ return RPAL_FAILURE;
+ if (ver->api_version < TARGET_RPAL_KERNEL_API_VERSION)
+ return RPAL_FAILURE;
+ return RPAL_SUCCESS;
+}
+
+static status_t rpal_capability_check(rpal_capability_t *ver)
+{
+ unsigned long cap = ver->cap;
+
+ if (!(cap & (1 << RPAL_CAP_PKU))) {
+ return RPAL_FAILURE;
+ }
+ return RPAL_SUCCESS;
+}
+
+static status_t rpal_check_version_cap(rpal_error_code_t *error)
+{
+ int ret;
+
+ ret = rpal_get_version_cap(&version);
+ if (ret < 0) {
+ ERRREPORT(error, RPAL_ERR_GET_CAP_VERSION,
+ "rpal get version failed: %d\n", ret);
+ ret = RPAL_FAILURE;
+ goto out;
+ }
+ ret = rpal_version_check(&version);
+ if (ret == RPAL_FAILURE) {
+ ERRREPORT(
+ error, RPAL_KERNEL_API_NOTSUPPORT,
+ "kernel rpal(version: %d-%d) API is not compatible with librpal(version: %d-%d)\n",
+ version.compat_version, version.api_version,
+ MIN_RPAL_KERNEL_API_VERSION,
+ TARGET_RPAL_KERNEL_API_VERSION);
+ goto out;
+ }
+ ret = rpal_capability_check(&version);
+ if (ret == RPAL_FAILURE) {
+ ERRREPORT(error, RPAL_HARDWARE_NOTSUPPORT,
+ "hardware do not support RPAL\n");
+ goto out;
+ }
+out:
+ return ret;
+}
+
+static status_t rpal_mgtfd_init(rpal_error_code_t *error)
+{
+ int err, n;
+ int mgtfd;
+ char name[1024];
+
+ mgtfd = open(RPAL_MGT_FILE, O_RDWR);
+ if (mgtfd == -1) {
+ err = errno;
+ switch (err) {
+ case EPERM:
+ n = readlink("/proc/self/exe", name, sizeof(name) - 1);
+ if (n < 0) {
+ n = 0;
+ }
+ name[n] = 0;
+ errprint("%s is not a RPAL binary\n", name);
+ break;
+ case ENOENT:
+ errprint("Not in RPAL Environment\n");
+ break;
+ default:
+ errprint("open %s fail, %d, %s\n", RPAL_MGT_FILE, err,
+ strerror(err));
+ }
+ if (error) {
+ *error = RPAL_ERR_RPALFILE_OPS;
+ }
+ return RPAL_FAILURE;
+ }
+ rpal_mgtfd = mgtfd;
+ return RPAL_SUCCESS;
+}
+
+static void rpal_mgtfd_destroy(void)
+{
+ if (rpal_mgtfd != -1) {
+ close(rpal_mgtfd);
+ }
+ return;
+}
+
+#define RPAL_SECTION_SIZE (512 * 1024 * 1024 * 1024UL)
+
+static inline status_t rpal_check_address(uint64_t start, uint64_t end,
+ uint64_t check)
+{
+ if (check >= start && check < end) {
+ return RPAL_SUCCESS;
+ }
+ return RPAL_FAILURE;
+}
+
+static status_t rpal_managment_init(rpal_error_code_t *error)
+{
+ int i = 0;
+
+ if (rpal_mgtfd_init(error) == RPAL_FAILURE) {
+ goto mgtfd_init_failed;
+ }
+ if (pthread_key_create(&rpal_key, NULL))
+ goto rpal_key_failed;
+
+ for (i = 0; i < MAX_SERVICEID; i++) {
+ requested_services[i].key = 0;
+ requested_services[i].service = NULL;
+ requested_services[i].pkey = -1;
+ }
+ if (rpal_check_version_cap(error) == RPAL_FAILURE) {
+ goto rpal_check_failed;
+ }
+ return RPAL_SUCCESS;
+
+rpal_check_failed:
+ pthread_key_delete(rpal_key);
+rpal_key_failed:
+ rpal_mgtfd_destroy();
+mgtfd_init_failed:
+ return RPAL_FAILURE;
+}
+
+static void rpal_managment_exit(void)
+{
+ pthread_key_delete(rpal_key);
+ rpal_mgtfd_destroy();
+ return;
+}
+
+int rpal_init(int nr_rpalthread, int flags, rpal_error_code_t *error)
+{
+ if (nr_rpalthread <= 0) {
+ dbprint(RPAL_DEBUG_MANAGEMENT,
+ "%s: nr_rpalthread(%d) less than or equal to 0\n",
+ __FUNCTION__, nr_rpalthread);
+ return RPAL_FAILURE;
+ }
+ if (rpal_managment_init(error) == RPAL_FAILURE) {
+ goto error_out;
+ }
+ if (rpal_thread_metadata_init(nr_rpalthread, error) == RPAL_FAILURE)
+ goto managment_exit;
+
+ if (rpal_senders_metadata_init(error) == RPAL_FAILURE)
+ goto thread_md_exit;
+
+ inited = 1;
+ dbprint(RPAL_DEBUG_MANAGEMENT,
+ "rpal init success, service key: 0x%lx, service id: %d, "
+ "critical_start: 0x%016lx, critical_end: 0x%016lx\n",
+ threads_md.service_key, threads_md.service_id, rcs.ret_begin,
+ rcs.ret_end);
+ return rpal_mgtfd;
+
+thread_md_exit:
+ rpal_thread_metadata_exit();
+managment_exit:
+ rpal_managment_exit();
+error_out:
+ return RPAL_FAILURE;
+}
+
+void rpal_exit(void)
+{
+ if (rpal_inited()) {
+ dbprint(RPAL_DEBUG_MANAGEMENT,
+ "rpal exit, service key: 0x%lx, service id: %d\n",
+ threads_md.service_key, threads_md.service_id);
+ rpal_senders_metadata_exit();
+ rpal_thread_metadata_exit();
+ rpal_managment_exit();
+ }
+}
diff --git a/samples/rpal/librpal/rpal.h b/samples/rpal/librpal/rpal.h
new file mode 100644
index 000000000000..e91a206b8370
--- /dev/null
+++ b/samples/rpal/librpal/rpal.h
@@ -0,0 +1,149 @@
+#ifndef RPAL_H_INCLUDED
+#define RPAL_H_INCLUDED
+
+#ifdef __cplusplus
+#if __cplusplus
+extern "C" {
+#endif
+#endif /* __cplusplus */
+
+#include <stdint.h>
+#include <stdarg.h>
+#include <sys/epoll.h>
+
+typedef enum rpal_error_code {
+ RPAL_ERR_NONE = 0,
+ RPAL_ERR_BAD_ARG = 1,
+ RPAL_ERR_NO_SERVICE = 2,
+ RPAL_ERR_MAPPED = 3,
+ RPAL_ERR_RETRY = 4,
+ RPAL_ERR_BAD_SERVICE_STATUS = 5,
+ RPAL_ERR_BAD_THREAD_STATUS = 6,
+ RPAL_ERR_REACH_LIMIT = 7,
+ RPAL_ERR_NOMEM = 8,
+ RPAL_ERR_NOMAPPING = 9,
+ RPAL_ERR_INVAL = 10,
+
+ RPAL_ERR_KERNEL_MAX_CODE = 100,
+
+ RPAL_ERR_RPALFILE_OPS, /**< Failed to open /proc/self/rpal */
+ RPAL_ERR_RPAL_DISABLED,
+ RPAL_ERR_GET_CAP_VERSION,
+ RPAL_KERNEL_API_NOTSUPPORT,
+ RPAL_HARDWARE_NOTSUPPORT,
+ RPAL_ERR_SERVICE_KEY, /**< Failed to get service key */
+ RPAL_ERR_SENDERS_METADATA,
+ RPAL_ERR_ENABLE_SERVICE,
+ RPAL_ERR_SENDER_PAGES,
+ RPAL_DONT_INITED,
+ RPAL_ERR_SENDER_INIT,
+ RPAL_ERR_SENDER_REG,
+ RPAL_INVALID_ARG,
+ RPAL_CACHE_FULL,
+ RPAL_FDE_OUTDATED,
+ RPAL_QUEUE_PUT_FAILED,
+ RPAL_ERR_PEER_MEM,
+ RPAL_ERR_NOTIFY_RECVER,
+ RPAL_INVAL_THREAD,
+ RPAL_INVAL_SERVICE,
+} rpal_error_code_t;
+
+#define EPOLLRPALIN 0x00020000
+#define EPOLLRPALOUT 0x00040000
+
+typedef enum rpal_features {
+ RPAL_SENDER_RECEIVER = 0x1 << 0,
+} rpal_features_t;
+
+typedef enum status {
+ RPAL_FAILURE = -1, /**< return value indicating failure */
+ RPAL_SUCCESS /**< return value indicating success */
+} status_t;
+
+#define RPAL_PUBLIC __attribute__((visibility("default")))
+
+RPAL_PUBLIC
+int rpal_init(int nr_rpalthread, int flags, rpal_error_code_t *error);
+
+RPAL_PUBLIC
+void rpal_exit(void);
+
+RPAL_PUBLIC
+int rpal_receiver_init(void);
+
+RPAL_PUBLIC
+void rpal_receiver_exit(void);
+
+RPAL_PUBLIC
+int rpal_request_service(uint64_t key);
+
+RPAL_PUBLIC
+status_t rpal_release_service(uint64_t key);
+
+RPAL_PUBLIC
+status_t rpal_clean_service_start(int64_t *ptr);
+
+RPAL_PUBLIC
+void rpal_clean_service_end(int64_t *ptr);
+
+RPAL_PUBLIC
+int rpal_get_service_id(void);
+
+RPAL_PUBLIC
+status_t rpal_get_service_key(uint64_t *service_key);
+
+RPAL_PUBLIC
+int rpal_get_request_service_id(uint64_t key);
+
+RPAL_PUBLIC
+status_t rpal_uds_fdmap(uint64_t sid_fd, uint64_t *rpalfd);
+
+RPAL_PUBLIC
+int rpal_get_peer_rid(uint64_t sid_fd);
+
+RPAL_PUBLIC
+status_t rpal_sender_init(rpal_error_code_t *error);
+
+RPAL_PUBLIC
+status_t rpal_sender_exit(void);
+
+/* Hook epoll syscall */
+RPAL_PUBLIC
+int rpal_epoll_wait(int epfd, struct epoll_event *events, int maxevents,
+ int timeout);
+
+RPAL_PUBLIC
+int rpal_epoll_wait_user(int epfd, struct epoll_event *events, int maxevents,
+ int timeout);
+
+RPAL_PUBLIC
+int rpal_epoll_ctl(int epfd, int op, int fd, struct epoll_event *event);
+
+RPAL_PUBLIC
+status_t rpal_copy_prepare(int service_id);
+
+RPAL_PUBLIC
+status_t rpal_copy_finish(void);
+
+RPAL_PUBLIC
+int rpal_write_ptrs(int service_id, uint64_t rpalfd, int64_t *ptrs, int len);
+
+RPAL_PUBLIC
+int rpal_read_ptrs(int fd, int64_t *ptrs, int len);
+
+typedef int (*access_fn)(va_list args);
+RPAL_PUBLIC
+status_t rpal_read_access_safety(access_fn do_access, int *do_access_ret, ...);
+
+RPAL_PUBLIC
+void rpal_recver_count_print(void);
+
+RPAL_PUBLIC
+void rpal_sender_count_print(void);
+
+#ifdef __cplusplus
+#if __cplusplus
+}
+#endif
+#endif
+#endif //!_RPAL_H_INCLUDED
diff --git a/samples/rpal/librpal/rpal_pkru.h b/samples/rpal/librpal/rpal_pkru.h
new file mode 100644
index 000000000000..9590aa7203bb
--- /dev/null
+++ b/samples/rpal/librpal/rpal_pkru.h
@@ -0,0 +1,78 @@
+#include <x86intrin.h>
+#include "private.h"
+
+#define RPAL_PKRU_BASE_CODE_READ 0xAAAAAAAA
+#define RPAL_PKRU_BASE_CODE 0xFFFFFFFF
+#define RPAL_NO_PKEY -1
+
+typedef uint32_t u32;
+/*
+ * extern __inline unsigned int
+ * __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+ * _rdpkru_u32 (void)
+ * {
+ * return __builtin_ia32_rdpkru ();
+ * }
+ *
+ * extern __inline void
+ * __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+ * _wrpkru (unsigned int __key)
+ * {
+ * __builtin_ia32_wrpkru (__key);
+ * }
+ */
+// #define rdpkru _rdpkru_u32
+// #define wrpkru _wrpkru
+static inline uint32_t rdpkru(void)
+{
+ uint32_t ecx = 0;
+ uint32_t edx, pkru;
+
+ /*
+ * "rdpkru" instruction. Places PKRU contents in to EAX,
+ * clears EDX and requires that ecx=0.
+ */
+ asm volatile(".byte 0x0f,0x01,0xee\n\t"
+ : "=a"(pkru), "=d"(edx)
+ : "c"(ecx));
+ return pkru;
+}
+
+static inline void wrpkru(uint32_t pkru)
+{
+ uint32_t ecx = 0, edx = 0;
+
+ /*
+ * "wrpkru" instruction. Loads contents in EAX to PKRU,
+ * requires that ecx = edx = 0.
+ */
+ asm volatile(".byte 0x0f,0x01,0xef\n\t"
+ :
+ : "a"(pkru), "c"(ecx), "d"(edx));
+}
+
+static inline u32 rpal_pkey_to_pkru(int pkey)
+{
+ int offset = pkey * 2;
+ u32 mask = 0x3 << offset;
+
+ return RPAL_PKRU_BASE_CODE & ~mask;
+}
+
+static inline u32 rpal_pkey_to_pkru_read(int pkey)
+{
+ int offset = pkey * 2;
+ u32 mask = 0x3 << offset;
+
+ return RPAL_PKRU_BASE_CODE_READ & ~mask;
+}
+
+static inline u32 rpal_pkru_union(u32 pkru0, u32 pkru1)
+{
+ return pkru0 & pkru1;
+}
+
+static inline u32 rpal_pkru_intersect(u32 pkru0, u32 pkru1)
+{
+ return pkru0 | pkru1;
+}
diff --git a/samples/rpal/librpal/rpal_queue.c b/samples/rpal/librpal/rpal_queue.c
new file mode 100644
index 000000000000..07a90122aa16
--- /dev/null
+++ b/samples/rpal/librpal/rpal_queue.c
@@ -0,0 +1,239 @@
+#include "rpal_queue.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#define min(X, Y) ({ ((X) > (Y)) ? (Y) : (X); })
+
+static unsigned int roundup_pow_of_two(unsigned int data)
+{
+ unsigned int msb_position;
+
+ if (data <= 1)
+ return 1;
+ if (!(data & (data - 1)))
+ return data;
+
+ msb_position = 31 - __builtin_clz(data);
+ assert(msb_position < 31);
+ return 1 << (msb_position + 1);
+}
+
+QUEUE_UINT rpal_queue_unused(rpal_queue_t *q)
+{
+ return (q->mask + 1) - (q->tail - q->head);
+}
+
+QUEUE_UINT rpal_queue_len(rpal_queue_t *q)
+{
+ return (q->tail - q->head);
+}
+
+int rpal_queue_init(rpal_queue_t *q, void *data, QUEUE_UINT_INC usize)
+{
+ QUEUE_UINT_INC size;
+ if (usize > QUEUE_UINT_MAX || !data) {
+ return -1;
+ }
+ size = roundup_pow_of_two(usize);
+ if (usize != size) {
+ return -1;
+ }
+ q->data = data;
+ memset(q->data, 0, size * sizeof(int64_t));
+ q->head = 0;
+ q->tail = 0;
+ q->mask = size - 1;
+ return 0;
+}
+
+void *rpal_queue_destroy(rpal_queue_t *q)
+{
+ void *data = q->data;
+ if (q->data) {
+ q->data = NULL;
+ }
+ q->mask = 0;
+ q->head = 0;
+ q->tail = 0;
+ return data;
+}
+
+int rpal_queue_alloc(rpal_queue_t *q, QUEUE_UINT_INC size)
+{
+ assert(q && size);
+ if (size > QUEUE_UINT_MAX) {
+ return -1;
+ }
+ size = roundup_pow_of_two(size);
+ q->data = malloc(size * sizeof(int64_t));
+ if (!q->data)
+ return -1;
+ memset(q->data, 0, size * sizeof(int64_t));
+ q->head = 0;
+ q->tail = 0;
+ q->mask = size - 1;
+ return 0;
+}
+
+void rpal_queue_free(rpal_queue_t *q)
+{
+ if (q->data) {
+ free(q->data);
+ q->data = NULL;
+ }
+ q->mask = 0;
+ q->head = 0;
+ q->tail = 0;
+}
+
+static void rpal_queue_copy_in(rpal_queue_t *q, const int64_t *buf,
+ QUEUE_UINT_INC len, QUEUE_UINT off)
+{
+ QUEUE_UINT_INC l;
+ QUEUE_UINT_INC size = q->mask + 1;
+
+ off &= q->mask;
+ l = min(len, size - off);
+
+ memcpy(q->data + off, buf, l << 3);
+ memcpy(q->data, buf + l, (len - l) << 3);
+ asm volatile("" : : : "memory");
+}
+
+QUEUE_UINT_INC rpal_queue_put(rpal_queue_t *q, const int64_t *buf,
+ QUEUE_UINT_INC len)
+{
+ QUEUE_UINT_INC l;
+
+ if (!q->data) {
+ return 0;
+ }
+ l = rpal_queue_unused(q);
+ if (len > l) {
+ return 0;
+ }
+ l = len;
+ rpal_queue_copy_in(q, buf, l, q->tail);
+ q->tail += l;
+ return l;
+}
+
+static QUEUE_UINT_INC rpal_queue_copy_out(rpal_queue_t *q, int64_t *buf,
+ QUEUE_UINT_INC len, QUEUE_UINT head)
+{
+ unsigned int l;
+ QUEUE_UINT tail;
+ QUEUE_UINT off;
+ QUEUE_UINT_INC size = q->mask + 1;
+
+ tail = __atomic_load_n(&q->tail, __ATOMIC_RELAXED);
+ len = min((QUEUE_UINT)(tail - head), len);
+ if (head == tail)
+ return 0;
+ off = head & q->mask;
+ l = min(len, size - off);
+
+ memcpy(buf, q->data + off, l << 3);
+ memcpy(buf + l, q->data, (len - l) << 3);
+
+ return len;
+}
+
+QUEUE_UINT_INC rpal_queue_peek(rpal_queue_t *q, int64_t *buf,
+ QUEUE_UINT_INC len, QUEUE_UINT *phead)
+{
+ QUEUE_UINT_INC copied;
+ QUEUE_UINT head;
+
+ head = __atomic_load_n(&q->head, __ATOMIC_RELAXED);
+ copied = rpal_queue_copy_out(q, buf, len, head);
+ if (phead) {
+ *phead = head;
+ }
+ return copied;
+}
+
+QUEUE_UINT_INC rpal_queue_skip(rpal_queue_t *q, QUEUE_UINT head,
+ QUEUE_UINT_INC skip)
+{
+ if (skip > rpal_queue_len(q)) {
+ return 0;
+ }
+ if (__atomic_compare_exchange_n(&q->head, &head, head + skip, 1,
+ __ATOMIC_RELAXED, __ATOMIC_RELAXED)) {
+ return skip;
+ }
+ return 0;
+}
+
+QUEUE_UINT_INC rpal_queue_get(rpal_queue_t *q, int64_t *buf, QUEUE_UINT_INC len)
+{
+ QUEUE_UINT_INC copied;
+ QUEUE_UINT head;
+
+ while (1) {
+ head = __atomic_load_n(&q->head, __ATOMIC_RELAXED);
+ copied = rpal_queue_copy_out(q, buf, len, head);
+ if (__atomic_compare_exchange_n(&q->head, &head, head + copied,
+ 1, __ATOMIC_RELAXED,
+ __ATOMIC_RELAXED)) {
+ return copied;
+ }
+ }
+}
+
+void rpal_uevent_queue_init(epoll_uevent_queue_t *ueventq,
+ volatile uint64_t *uqlock)
+{
+ int i;
+ __atomic_store_n(uqlock, (uint64_t)0, __ATOMIC_RELAXED);
+ ueventq->l_beg = 0;
+ ueventq->l_end = 0;
+ ueventq->l_end_cache = 0;
+ for (i = 0; i < MAX_RDY; ++i) {
+ ueventq->fds[i] = -1;
+ }
+ return;
+}
+
+QUEUE_UINT uevent_queue_len(epoll_uevent_queue_t *ueventq)
+{
+ return (ueventq->l_end - ueventq->l_beg);
+}
+
+QUEUE_UINT uevent_queue_add(epoll_uevent_queue_t *ueventq, int fd)
+{
+ unsigned int pos;
+ if (uevent_queue_len(ueventq) == MAX_RDY)
+ return MAX_RDY;
+ pos = __sync_fetch_and_add(&ueventq->l_end_cache, 1);
+ pos %= MAX_RDY;
+ ueventq->fds[pos] = fd;
+ asm volatile("" : : : "memory");
+ __sync_fetch_and_add(&ueventq->l_end, 1);
+ return (pos);
+}
+
+int uevent_queue_del(epoll_uevent_queue_t *ueventq)
+{
+ int fd = -1;
+ int pos;
+ if (uevent_queue_len(ueventq) == 0) {
+ return -1;
+ }
+ pos = ueventq->l_beg % MAX_RDY;
+ fd = ueventq->fds[pos];
+ asm volatile("" : : : "memory");
+ __sync_fetch_and_add(&ueventq->l_beg, 1);
+ return fd;
+}
+
+int uevent_queue_fix(epoll_uevent_queue_t *ueventq)
+{
+ __atomic_store_n(&ueventq->l_end_cache, ueventq->l_end,
+ __ATOMIC_SEQ_CST);
+ return 0;
+}
diff --git a/samples/rpal/librpal/rpal_queue.h b/samples/rpal/librpal/rpal_queue.h
new file mode 100644
index 000000000000..224e7b449d50
--- /dev/null
+++ b/samples/rpal/librpal/rpal_queue.h
@@ -0,0 +1,55 @@
+#ifndef RPAL_QUEUE_H
+#define RPAL_QUEUE_H
+
+#include <stdint.h>
+
+// typedef uint8_t QUEUE_UINT;
+// typedef uint16_t QUEUE_UINT_INC;
+// #define QUEUE_UINT_MAX UINT8_MAX
+
+// typedef uint16_t QUEUE_UINT;
+// typedef uint32_t QUEUE_UINT_INC;
+// #define QUEUE_UINT_MAX UINT16_MAX
+
+typedef uint32_t QUEUE_UINT;
+typedef uint64_t QUEUE_UINT_INC;
+#define QUEUE_UINT_MAX UINT32_MAX
+
+typedef struct rpal_queue {
+ QUEUE_UINT head;
+ QUEUE_UINT tail;
+ QUEUE_UINT mask;
+ uint64_t *data;
+} rpal_queue_t;
+
+QUEUE_UINT rpal_queue_len(rpal_queue_t *q);
+QUEUE_UINT rpal_queue_unused(rpal_queue_t *q);
+int rpal_queue_init(rpal_queue_t *q, void *data, QUEUE_UINT_INC usize);
+void *rpal_queue_destroy(rpal_queue_t *q);
+int rpal_queue_alloc(rpal_queue_t *q, QUEUE_UINT_INC size);
+void rpal_queue_free(rpal_queue_t *q);
+QUEUE_UINT_INC rpal_queue_put(rpal_queue_t *q, const int64_t *buf,
+ QUEUE_UINT_INC len);
+QUEUE_UINT_INC rpal_queue_get(rpal_queue_t *q, int64_t *buf,
+ QUEUE_UINT_INC len);
+QUEUE_UINT_INC rpal_queue_peek(rpal_queue_t *q, int64_t *buf,
+ QUEUE_UINT_INC len, QUEUE_UINT *phead);
+QUEUE_UINT_INC rpal_queue_skip(rpal_queue_t *q, QUEUE_UINT head,
+ QUEUE_UINT_INC skip);
+
+#define MAX_RDY 4096
+typedef struct epoll_uevent_queue {
+ int fds[MAX_RDY];
+ volatile QUEUE_UINT l_beg;
+ volatile QUEUE_UINT l_end;
+ volatile QUEUE_UINT l_end_cache;
+} epoll_uevent_queue_t;
+
+void rpal_uevent_queue_init(epoll_uevent_queue_t *ueventq,
+ volatile uint64_t *uqlock);
+QUEUE_UINT uevent_queue_len(epoll_uevent_queue_t *ueventq);
+QUEUE_UINT uevent_queue_add(epoll_uevent_queue_t *ueventq, int fd);
+int uevent_queue_del(epoll_uevent_queue_t *ueventq);
+int uevent_queue_fix(epoll_uevent_queue_t *ueventq);
+
+#endif
diff --git a/samples/rpal/librpal/rpal_x86_64_call_ret.S b/samples/rpal/librpal/rpal_x86_64_call_ret.S
new file mode 100644
index 000000000000..a7c09a1b033d
--- /dev/null
+++ b/samples/rpal/librpal/rpal_x86_64_call_ret.S
@@ -0,0 +1,45 @@
+#ifdef __x86_64__
+#define __ASSEMBLY__
+#include "asm_define.h"
+#define RPAL_SENDER_STATE_RUNNING $0x0
+#define RPAL_SENDER_STATE_CALL $0x1
+
+.text
+.globl rpal_ret_critical
+.type rpal_ret_critical,@function
+.align 16
+
+//void rpal_ret_criticalreceiver_context_t *rc, rpal_call_info_t *rci
+
+rpal_ret_critical:
+ mov RPAL_SENDER_STATE_CALL, %eax
+ mov RPAL_SENDER_STATE_RUNNING, %ecx
+ lock cmpxchg %ecx, RC_SENDER_STATE(%rdi)
+ret_begin:
+ jne 2f
+ movq RCI_PKRU(%rsi), %rax
+ xor %edx, %edx
+ .byte 0x0f,0x01,0xef
+ movq RCI_SENDER_TLS_BASE(%rsi), %rax
+ wrfsbase %rax
+ret_end:
+ movq RCI_SENDER_FCTX(%rsi), %rdi
+ call jump_fcontext@plt
+2:
+ ret
+
+.globl rpal_get_critical_addr
+.type rpal_get_critical_addr,@function
+.align 16
+rpal_get_critical_addr:
+ leaq ret_begin(%rip), %rax
+ movq %rax, RET_BEGIN(%rdi)
+ leaq ret_end(%rip), %rax
+ movq %rax, RET_END(%rdi)
+ ret
+
+.size rpal_ret_critical,.-rpal_ret_critical
+
+/* Mark that we don't need executable stack. */
+.section .note.GNU-stack,"",%progbits
+#endif
diff --git a/samples/rpal/offset.sh b/samples/rpal/offset.sh
new file mode 100755
index 000000000000..f5ae77b893e8
--- /dev/null
+++ b/samples/rpal/offset.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+set -e
+CUR_DIR=$(dirname $(realpath -s "$0"))
+gcc -masm=intel -S $CUR_DIR/asm_define.c -o - | awk '($1 == "->") { print "#define " $2 " " $3 }' > $CUR_DIR/librpal/asm_define.h
\ No newline at end of file
diff --git a/samples/rpal/server.c b/samples/rpal/server.c
new file mode 100644
index 000000000000..82c5c9dec922
--- /dev/null
+++ b/samples/rpal/server.c
@@ -0,0 +1,249 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/epoll.h>
+#include <x86intrin.h>
+#include "librpal/rpal.h"
+
+#define SOCKET_PATH "/tmp/rpal_socket"
+#define MAX_EVENTS 10
+#define BUFFER_SIZE 1025
+#define MSG_LEN 32
+
+#define INIT_MSG "INIT"
+#define SUCC_MSG "SUCC"
+#define FAIL_MSG "FAIL"
+
+#define handle_error(s) \
+ do { \
+ perror(s); \
+ exit(EXIT_FAILURE); \
+ } while (0)
+
+uint64_t service_key;
+int server_fd;
+int epoll_fd;
+
+int rpal_epoll_add(int epfd, int fd)
+{
+ struct epoll_event ev;
+
+ ev.events = EPOLLRPALIN | EPOLLIN | EPOLLRDHUP | EPOLLET;
+ ev.data.fd = fd;
+
+ return rpal_epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev);
+}
+
+void rpal_server_init(int fd, int epoll_fd)
+{
+ char buffer[BUFFER_SIZE];
+ rpal_error_code_t err;
+ uint64_t remote_key, service_key;
+ int remote_id;
+ int proc_fd;
+ int ret;
+
+ proc_fd = rpal_init(1, 0, &err);
+ if (proc_fd < 0)
+ handle_error("rpal init fail");
+ rpal_get_service_key(&service_key);
+
+ rpal_epoll_add(epoll_fd, fd);
+
+ ret = read(fd, buffer, BUFFER_SIZE);
+ if (ret < 0)
+ handle_error("rpal init: read");
+
+ if (strncmp(buffer, INIT_MSG, strlen(INIT_MSG)) != 0) {
+ buffer[BUFFER_SIZE - 1] = 0;
+ handle_error("Invalid msg\n");
+ return;
+ }
+
+ remote_key = *(uint64_t *)(buffer + strlen(INIT_MSG));
+ ret = rpal_request_service(remote_key);
+ if (ret) {
+ uint64_t service_key = 0;
+ ret = write(fd, (char *)&service_key, sizeof(uint64_t));
+ handle_error("request service fail");
+ return;
+ }
+ ret = write(fd, (char *)&service_key, sizeof(uint64_t));
+ if (ret < 0)
+ handle_error("write error");
+
+ ret = read(fd, buffer, BUFFER_SIZE);
+ if (ret < 0)
+ handle_error("handshake read");
+
+ if (strncmp(SUCC_MSG, buffer, strlen(SUCC_MSG)) != 0)
+ handle_error("handshake");
+
+ remote_id = rpal_get_request_service_id(remote_key);
+ if (remote_id < 0)
+ handle_error("remote id get fail");
+ rpal_receiver_init();
+}
+
+void run_rpal_server(int msg_len)
+{
+ struct epoll_event ev, events[MAX_EVENTS];
+ int new_socket;
+ int nfds;
+ uint64_t tsc, total_tsc = 0;
+ int count = 0;
+
+ while (1) {
+ nfds = rpal_epoll_wait(epoll_fd, events, MAX_EVENTS, -1);
+ if (nfds == -1) {
+ perror("epoll_wait");
+ exit(EXIT_FAILURE);
+ }
+
+ for (int n = 0; n < nfds; ++n) {
+ if (events[n].data.fd == server_fd) {
+ new_socket = accept(server_fd, NULL, NULL);
+ if (new_socket == -1) {
+ perror("accept");
+ continue;
+ }
+
+ rpal_server_init(new_socket, epoll_fd);
+ } else if (events[n].events & EPOLLRDHUP) {
+ close(events[n].data.fd);
+ goto finish;
+ } else if (events[n].events & EPOLLRPALIN) {
+ char buffer[BUFFER_SIZE] = { 0 };
+
+ ssize_t valread = rpal_read_ptrs(
+ events[n].data.fd, (int64_t *)buffer,
+ MSG_LEN / sizeof(int64_t *));
+ if (valread <= 0) {
+ close(events[n].data.fd);
+ epoll_ctl(epoll_fd, EPOLL_CTL_DEL,
+ events[n].data.fd, NULL);
+ goto finish;
+ } else {
+ count++;
+ sscanf(buffer, "0x%016lx", &tsc);
+ total_tsc += __rdtsc() - tsc;
+ send(events[n].data.fd, buffer, msg_len,
+ 0);
+ }
+ } else {
+ perror("bad request\n");
+ }
+ }
+ }
+finish:
+ printf("RPAL: Message length: %d bytes, Total TSC cycles: %lu, "
+ "Message count: %d, Average latency: %lu cycles\n",
+ MSG_LEN, total_tsc, count, total_tsc / count);
+}
+
+void run_server(int msg_len)
+{
+ struct epoll_event ev, events[MAX_EVENTS];
+ int new_socket;
+ int nfds;
+ uint64_t tsc, total_tsc = 0;
+ int count = 0;
+
+ while (1) {
+ nfds = epoll_wait(epoll_fd, events, MAX_EVENTS, -1);
+ if (nfds == -1) {
+ perror("epoll_wait");
+ exit(EXIT_FAILURE);
+ }
+
+ for (int n = 0; n < nfds; ++n) {
+ if (events[n].data.fd == server_fd) {
+ new_socket = accept(server_fd, NULL, NULL);
+ if (new_socket == -1) {
+ perror("accept");
+ continue;
+ }
+
+ ev.events = EPOLLIN;
+ ev.data.fd = new_socket;
+ if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD,
+ new_socket, &ev) == -1) {
+ close(new_socket);
+ perror("epoll_ctl: add new socket");
+ }
+ } else if (events[n].events & EPOLLRDHUP) {
+ close(events[n].data.fd);
+ goto finish;
+ } else {
+ char buffer[BUFFER_SIZE] = { 0 };
+
+ ssize_t valread = read(events[n].data.fd,
+ buffer, BUFFER_SIZE);
+ if (valread <= 0) {
+ close(events[n].data.fd);
+ epoll_ctl(epoll_fd, EPOLL_CTL_DEL,
+ events[n].data.fd, NULL);
+ goto finish;
+ } else {
+ count++;
+ sscanf(buffer, "0x%016lx", &tsc);
+ total_tsc += __rdtsc() - tsc;
+ send(events[n].data.fd, buffer, msg_len,
+ 0);
+ }
+ }
+ }
+ }
+finish:
+ printf("EPOLL: Message length: %d bytes, Total TSC cycles: %lu, "
+ "Message count: %d, Average latency: %lu cycles\n",
+ MSG_LEN, total_tsc, count, total_tsc / count);
+}
+
+int main()
+{
+ struct sockaddr_un address;
+ struct epoll_event ev;
+
+ if ((server_fd = socket(AF_UNIX, SOCK_STREAM, 0)) == 0) {
+ perror("socket failed");
+ exit(EXIT_FAILURE);
+ }
+
+ memset(&address, 0, sizeof(address));
+ address.sun_family = AF_UNIX;
+ strncpy(address.sun_path, SOCKET_PATH, sizeof(SOCKET_PATH));
+
+ if (bind(server_fd, (struct sockaddr *)&address, sizeof(address)) < 0) {
+ perror("bind failed");
+ exit(EXIT_FAILURE);
+ }
+
+ if (listen(server_fd, 3) < 0) {
+ perror("listen");
+ exit(EXIT_FAILURE);
+ }
+
+ epoll_fd = epoll_create(1024);
+ if (epoll_fd == -1) {
+ perror("epoll_create");
+ exit(EXIT_FAILURE);
+ }
+
+ ev.events = EPOLLIN;
+ ev.data.fd = server_fd;
+ if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, server_fd, &ev) == -1) {
+ perror("epoll_ctl: listen_sock");
+ exit(EXIT_FAILURE);
+ }
+
+ run_server(MSG_LEN);
+ run_rpal_server(MSG_LEN);
+
+ close(server_fd);
+ unlink(SOCKET_PATH);
+ return 0;
+}
--
2.20.1
Powered by blists - more mailing lists