[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <ZxEr1TOg4ddoAm7y@mini-arch>
Date: Thu, 17 Oct 2024 08:23:01 -0700
From: Stanislav Fomichev <stfomichev@...il.com>
To: Anjali Kulkarni <anjali.k.kulkarni@...cle.com>
Cc: davem@...emloft.net, Liam.Howlett@...cle.com, edumazet@...gle.com,
kuba@...nel.org, pabeni@...hat.com, mingo@...hat.com,
peterz@...radead.org, juri.lelli@...hat.com,
vincent.guittot@...aro.org, dietmar.eggemann@....com,
rostedt@...dmis.org, bsegall@...gle.com, mgorman@...e.de,
vschneid@...hat.com, jiri@...nulli.us, linux-kernel@...r.kernel.org,
netdev@...r.kernel.org, akpm@...ux-foundation.org, shuah@...nel.org,
linux-kselftest@...r.kernel.org, peili.io@...cle.com
Subject: Re: [PATCH net-next v3 1/3] connector/cn_proc: Add hash table for
threads
On 10/16, Anjali Kulkarni wrote:
> Add a new type PROC_CN_MCAST_NOTIFY to proc connector API, which allows a
> thread to notify the kernel that is going to exit with a non-zero exit
> code and specify the exit code in it. When thread exits in the kernel,
> it will send this exit code as a proc filter notification to any
> listening process.
> Exiting thread can call this either when it wants to call pthread_exit()
> with non-zero value or from signal handler.
>
> Add a new file cn_hash.c which implements a hash table storing the exit
> codes of abnormally exiting threads, received by the system call above.
> The key used for the hash table is the pid of the thread, so when the
> thread actually exits, we lookup it's pid in the hash table and retrieve
> the exit code sent by user. If the exit code in struct task is 0, we
> then replace it with the user supplied non-zero exit code.
>
> cn_hash.c implements the hash table add, delete, lookup operations.
> mutex_lock() and mutex_unlock() operations are used to safeguard the
> integrity of the hash table while adding or deleting elements.
> connector.c has the API calls, called from cn_proc.c, as well as calls
> to allocate, initialize and free the hash table.
>
> Add a new flag in PF_* flags of task_struct - EXIT_NOTIFY. This flag is
> set when user sends the exit code via PROC_CN_MCAST_NOTIFY. While
> exiting, this flag is checked and the hash table add or delete calls
> are only made if this flag is set.
>
> A refcount field hrefcnt is added in struct cn_hash_dev, to keep track
> of number of threads which have added an entry in hash table. Before
> freeing the struct cn_hash_dev, this value must be 0.
> This refcnt check is added in case CONFIG_CONNECTOR is compiled as a
> module. In that case, when unloading the module, we need to make sure
> no hash entries are still present in the hdev table.
>
> Copy the task's name (task->comm) into the exit event notification.
> This will allow applications to filter on the name further using
> userspace filtering like ebpf.
>
> Signed-off-by: Anjali Kulkarni <anjali.k.kulkarni@...cle.com>
> ---
> drivers/connector/Makefile | 2 +-
> drivers/connector/cn_hash.c | 181 ++++++++++++++++++++++++++++++++++
> drivers/connector/cn_proc.c | 62 +++++++++++-
> drivers/connector/connector.c | 63 +++++++++++-
> include/linux/connector.h | 31 ++++++
> include/linux/sched.h | 2 +-
> include/uapi/linux/cn_proc.h | 5 +-
> 7 files changed, 338 insertions(+), 8 deletions(-)
> create mode 100644 drivers/connector/cn_hash.c
>
> diff --git a/drivers/connector/Makefile b/drivers/connector/Makefile
> index 1bf67d3df97d..cb1dcdf067ad 100644
> --- a/drivers/connector/Makefile
> +++ b/drivers/connector/Makefile
> @@ -2,4 +2,4 @@
> obj-$(CONFIG_CONNECTOR) += cn.o
> obj-$(CONFIG_PROC_EVENTS) += cn_proc.o
>
> -cn-y += cn_queue.o connector.o
> +cn-y += cn_hash.o cn_queue.o connector.o
> diff --git a/drivers/connector/cn_hash.c b/drivers/connector/cn_hash.c
> new file mode 100644
> index 000000000000..a079e9bcea6d
> --- /dev/null
> +++ b/drivers/connector/cn_hash.c
> @@ -0,0 +1,181 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Author: Anjali Kulkarni <anjali.k.kulkarni@...cle.com>
> + *
> + * Copyright (c) 2024 Oracle and/or its affiliates.
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/init.h>
> +#include <linux/connector.h>
> +#include <linux/mutex.h>
> +#include <linux/pid_namespace.h>
> +
> +#include <linux/cn_proc.h>
> +
> +struct cn_hash_dev *cn_hash_alloc_dev(const char *name)
> +{
> + struct cn_hash_dev *hdev;
> +
> + hdev = kzalloc(sizeof(*hdev), GFP_KERNEL);
> + if (!hdev)
> + return NULL;
> +
> + snprintf(hdev->name, sizeof(hdev->name), "%s", name);
> + atomic_set(&hdev->hrefcnt, 0);
> + mutex_init(&hdev->uexit_hash_lock);
> + hash_init(hdev->uexit_pid_htable);
> + return hdev;
> +}
> +
> +void cn_hash_free_dev(struct cn_hash_dev *hdev)
> +{
> + struct uexit_pid_hnode *hnode;
> + struct hlist_node *tmp;
> + int bucket;
> +
> + pr_debug("%s: Freeing entire hdev %p\n", __func__, hdev);
> +
> + mutex_lock(&hdev->uexit_hash_lock);
> + hash_for_each_safe(hdev->uexit_pid_htable, bucket, tmp,
> + hnode, uexit_pid_hlist) {
> + hash_del(&hnode->uexit_pid_hlist);
> + pr_debug("%s: Freeing node for pid %d\n",
> + __func__, hnode->pid);
> + kfree(hnode);
> + }
> +
> + mutex_unlock(&hdev->uexit_hash_lock);
> + mutex_destroy(&hdev->uexit_hash_lock);
> +
> + /*
> + * This refcnt check is added in case CONFIG_CONNECTOR is
> + * compiled with =m as a module. In that case, when unloading
> + * the module, we need to make sure no hash entries are still
> + * present in the hdev table.
> + */
> + while (atomic_read(&hdev->hrefcnt)) {
> + pr_info("Waiting for %s to become free: refcnt=%d\n",
> + hdev->name, atomic_read(&hdev->hrefcnt));
> + msleep(1000);
> + }
> +
> + kfree(hdev);
> + hdev = NULL;
> +}
> +
> +static struct uexit_pid_hnode *cn_hash_alloc_elem(__u32 uexit_code, pid_t pid)
> +{
> + struct uexit_pid_hnode *elem;
> +
> + elem = kzalloc(sizeof(*elem), GFP_KERNEL);
> + if (!elem)
> + return NULL;
> +
> + INIT_HLIST_NODE(&elem->uexit_pid_hlist);
> + elem->uexit_code = uexit_code;
> + elem->pid = pid;
> + return elem;
> +}
> +
> +static inline void cn_hash_free_elem(struct uexit_pid_hnode *elem)
> +{
> + kfree(elem);
> +}
> +
> +int cn_hash_add_elem(struct cn_hash_dev *hdev, __u32 uexit_code, pid_t pid)
> +{
> + struct uexit_pid_hnode *elem, *hnode;
> +
> + elem = cn_hash_alloc_elem(uexit_code, pid);
> + if (!elem) {
> + pr_err("%s: cn_hash_alloc_elem() returned NULL pid %d\n",
> + __func__, pid);
> + return -ENOMEM;
> + }
> +
> + mutex_lock(&hdev->uexit_hash_lock);
> + /*
> + * Check if an entry for the same pid already exists
> + */
> + hash_for_each_possible(hdev->uexit_pid_htable,
> + hnode, uexit_pid_hlist, pid) {
> + if (hnode->pid == pid) {
> + mutex_unlock(&hdev->uexit_hash_lock);
> + cn_hash_free_elem(elem);
> + pr_debug("%s: pid %d already exists in hash table\n",
> + __func__, pid);
> + return -EEXIST;
> + }
> + }
> +
> + hash_add(hdev->uexit_pid_htable, &elem->uexit_pid_hlist, pid);
> + mutex_unlock(&hdev->uexit_hash_lock);
> +
> + atomic_inc(&hdev->hrefcnt);
> +
> + pr_debug("%s: After hash_add of pid %d elem %p hrefcnt %d\n",
> + __func__, pid, elem, atomic_read(&hdev->hrefcnt));
> + return 0;
> +}
> +
> +int cn_hash_del_get_exval(struct cn_hash_dev *hdev, pid_t pid)
> +{
> + struct uexit_pid_hnode *hnode;
> + struct hlist_node *tmp;
> + int excde;
> +
> + mutex_lock(&hdev->uexit_hash_lock);
> + hash_for_each_possible_safe(hdev->uexit_pid_htable,
> + hnode, tmp, uexit_pid_hlist, pid) {
> + if (hnode->pid == pid) {
> + excde = hnode->uexit_code;
> + hash_del(&hnode->uexit_pid_hlist);
> + mutex_unlock(&hdev->uexit_hash_lock);
> + kfree(hnode);
> + atomic_dec(&hdev->hrefcnt);
> + pr_debug("%s: After hash_del of pid %d, found exit code %u hrefcnt %d\n",
> + __func__, pid, excde,
> + atomic_read(&hdev->hrefcnt));
> + return excde;
> + }
> + }
> +
> + mutex_unlock(&hdev->uexit_hash_lock);
> + pr_err("%s: pid %d not found in hash table\n",
> + __func__, pid);
> + return -EINVAL;
> +}
> +
> +int cn_hash_get_exval(struct cn_hash_dev *hdev, pid_t pid)
> +{
> + struct uexit_pid_hnode *hnode;
> + __u32 excde;
> +
> + mutex_lock(&hdev->uexit_hash_lock);
> + hash_for_each_possible(hdev->uexit_pid_htable,
> + hnode, uexit_pid_hlist, pid) {
> + if (hnode->pid == pid) {
> + excde = hnode->uexit_code;
> + mutex_unlock(&hdev->uexit_hash_lock);
> + pr_debug("%s: Found exit code %u for pid %d\n",
> + __func__, excde, pid);
> + return excde;
> + }
> + }
> +
> + mutex_unlock(&hdev->uexit_hash_lock);
> + pr_debug("%s: pid %d not found in hash table\n",
> + __func__, pid);
> + return -EINVAL;
> +}
> +
> +bool cn_hash_table_empty(struct cn_hash_dev *hdev)
> +{
> + bool is_empty;
> +
> + is_empty = hash_empty(hdev->uexit_pid_htable);
> + pr_debug("Hash table is %s\n", (is_empty ? "empty" : "not empty"));
> +
> + return is_empty;
> +}
> diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c
> index 44b19e696176..0632a70a89a0 100644
> --- a/drivers/connector/cn_proc.c
> +++ b/drivers/connector/cn_proc.c
> @@ -69,6 +69,8 @@ static int cn_filter(struct sock *dsk, struct sk_buff *skb, void *data)
> if ((__u32)val == PROC_EVENT_ALL)
> return 0;
>
> + pr_debug("%s: val %lx, what %x\n", __func__, val, what);
> +
> /*
> * Drop packet if we have to report only non-zero exit status
> * (PROC_EVENT_NONZERO_EXIT) and exit status is 0
> @@ -326,9 +328,15 @@ void proc_exit_connector(struct task_struct *task)
> struct proc_event *ev;
> struct task_struct *parent;
> __u8 buffer[CN_PROC_MSG_SIZE] __aligned(8);
> + int uexit_code;
>
> - if (atomic_read(&proc_event_num_listeners) < 1)
> + if (atomic_read(&proc_event_num_listeners) < 1) {
> + if (likely(!(task->flags & PF_EXIT_NOTIFY)))
> + return;
> +
> + cn_del_get_exval(task->pid);
> return;
> + }
>
> msg = buffer_to_cn_msg(buffer);
> ev = (struct proc_event *)msg->data;
> @@ -337,7 +345,26 @@ void proc_exit_connector(struct task_struct *task)
> ev->what = PROC_EVENT_EXIT;
> ev->event_data.exit.process_pid = task->pid;
> ev->event_data.exit.process_tgid = task->tgid;
> - ev->event_data.exit.exit_code = task->exit_code;
> + if (unlikely(task->flags & PF_EXIT_NOTIFY)) {
> + task->flags &= ~PF_EXIT_NOTIFY;
> +
> + uexit_code = cn_del_get_exval(task->pid);
> + if (uexit_code <= 0) {
> + pr_debug("%s: err %d returning task's exit code %u\n",
> + uexit_code, __func__,
> + task->exit_code);
The compiler complains about the format string:
In file included from ./include/linux/kernel.h:31,
from drivers/connector/cn_proc.c:11:
drivers/connector/cn_proc.c: In function ‘proc_exit_connector’:
drivers/connector/cn_proc.c:353:34: error: format ‘%s’ expects argument of type ‘char *’, but argument 3 has type ‘int’ [-Werror=format=]
353 | pr_debug("%s: err %d returning task's exit code %u\n",
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
pw-bot: cr
Powered by blists - more mailing lists