[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <20220201205534.1962784-2-haoluo@google.com>
Date: Tue, 1 Feb 2022 12:55:30 -0800
From: Hao Luo <haoluo@...gle.com>
To: Alexei Starovoitov <ast@...nel.org>,
Andrii Nakryiko <andrii@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>
Cc: Martin KaFai Lau <kafai@...com>, Song Liu <songliubraving@...com>,
Yonghong Song <yhs@...com>, KP Singh <kpsingh@...nel.org>,
Shakeel Butt <shakeelb@...gle.com>,
Joe Burton <jevburton.kernel@...il.com>,
Stanislav Fomichev <sdf@...gle.com>, bpf@...r.kernel.org,
linux-kernel@...r.kernel.org, Hao Luo <haoluo@...gle.com>
Subject: [PATCH RFC bpf-next v2 1/5] bpf: Bpffs directory tag
Introduce a tag structure for directories in bpffs. A tag carries
special information about a directory. For example, a BPF_DIR_KERNFS_REP
tag denotes that a directory is a replicate of a kernfs hierarchy.
At mkdir, if the parent directory has a tag, the child directory also
gets tag. For KERNFS_REP directories, the tag references a kernfs node.
The KERNFS_REP hierarchy mirrors the hierarchy in kernfs. Userspace is
responsible for sync'ing two hierarchies.
The initial tag can be created by pinning a certain type of bpf objects.
The following patches will introduce such objects and the tagged
directory will mirror the cgroup hierarchy.
Tags are destroyed at rmdir.
Signed-off-by: Hao Luo <haoluo@...gle.com>
---
kernel/bpf/inode.c | 80 +++++++++++++++++++++++++++++++++++++++++++++-
kernel/bpf/inode.h | 22 +++++++++++++
2 files changed, 101 insertions(+), 1 deletion(-)
create mode 100644 kernel/bpf/inode.h
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 5a8d9f7467bf..ecc357009df5 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -16,11 +16,13 @@
#include <linux/fs.h>
#include <linux/fs_context.h>
#include <linux/fs_parser.h>
+#include <linux/kernfs.h>
#include <linux/kdev_t.h>
#include <linux/filter.h>
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
#include "preload/bpf_preload.h"
+#include "inode.h"
enum bpf_type {
BPF_TYPE_UNSPEC = 0,
@@ -142,6 +144,52 @@ static int bpf_inode_type(const struct inode *inode, enum bpf_type *type)
return 0;
}
+static struct bpf_dir_tag *inode_tag(const struct inode *inode)
+{
+ if (unlikely(!S_ISDIR(inode->i_mode)))
+ return NULL;
+
+ return inode->i_private;
+}
+
+/* tag_dir_inode - tag a newly created directory.
+ * @tag: tag of parent directory
+ * @dentry: dentry of the new directory
+ * @inode: inode of the new directory
+ *
+ * Called from bpf_mkdir.
+ */
+static int tag_dir_inode(const struct bpf_dir_tag *tag,
+ const struct dentry *dentry, struct inode *inode)
+{
+ struct bpf_dir_tag *t;
+ struct kernfs_node *kn;
+
+ WARN_ON(tag->type != BPF_DIR_KERNFS_REP);
+
+ /* kn is put at tag deallocation. */
+ kn = kernfs_find_and_get_ns(tag->private, dentry->d_name.name, NULL);
+ if (unlikely(!kn))
+ return -ENOENT;
+
+ if (unlikely(kernfs_type(kn) != KERNFS_DIR)) {
+ kernfs_put(kn);
+ return -EPERM;
+ }
+
+ t = kzalloc(sizeof(struct bpf_dir_tag), GFP_KERNEL | __GFP_NOWARN);
+ if (unlikely(!t)) {
+ kernfs_put(kn);
+ return -ENOMEM;
+ }
+
+ t->type = tag->type;
+ t->private = kn;
+
+ inode->i_private = t;
+ return 0;
+}
+
static void bpf_dentry_finalize(struct dentry *dentry, struct inode *inode,
struct inode *dir)
{
@@ -156,6 +204,8 @@ static int bpf_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
struct dentry *dentry, umode_t mode)
{
struct inode *inode;
+ struct bpf_dir_tag *tag;
+ int err;
inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFDIR);
if (IS_ERR(inode))
@@ -164,6 +214,15 @@ static int bpf_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
inode->i_op = &bpf_dir_iops;
inode->i_fop = &simple_dir_operations;
+ tag = inode_tag(dir);
+ if (tag) {
+ err = tag_dir_inode(tag, dentry, inode);
+ if (err) {
+ iput(inode);
+ return err;
+ }
+ }
+
inc_nlink(inode);
inc_nlink(dir);
@@ -404,11 +463,30 @@ static int bpf_symlink(struct user_namespace *mnt_userns, struct inode *dir,
return 0;
}
+static void untag_dir_inode(struct inode *dir)
+{
+ struct bpf_dir_tag *tag = inode_tag(dir);
+
+ WARN_ON(tag->type != BPF_DIR_KERNFS_REP);
+
+ dir->i_private = NULL;
+ kernfs_put(tag->private);
+ kfree(tag);
+}
+
+static int bpf_rmdir(struct inode *dir, struct dentry *dentry)
+{
+ if (inode_tag(dir))
+ untag_dir_inode(dir);
+
+ return simple_rmdir(dir, dentry);
+}
+
static const struct inode_operations bpf_dir_iops = {
.lookup = bpf_lookup,
.mkdir = bpf_mkdir,
.symlink = bpf_symlink,
- .rmdir = simple_rmdir,
+ .rmdir = bpf_rmdir,
.rename = simple_rename,
.link = simple_link,
.unlink = simple_unlink,
diff --git a/kernel/bpf/inode.h b/kernel/bpf/inode.h
new file mode 100644
index 000000000000..2cfeef39e861
--- /dev/null
+++ b/kernel/bpf/inode.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (c) 2022 Google
+ */
+#ifndef __BPF_INODE_H_
+#define __BPF_INODE_H_
+
+enum tag_type {
+ /* The directory is a replicate of a kernfs directory hierarchy. */
+ BPF_DIR_KERNFS_REP = 0,
+};
+
+/* A tag for bpffs directories. It carries special information about a
+ * directory. For example, BPF_DIR_KERNFS_REP denotes that the directory is
+ * a replicate of a kernfs hierarchy. Pinning a certain type of objects tags
+ * a directory and the tag will be removed at rmdir.
+ */
+struct bpf_dir_tag {
+ enum tag_type type;
+ void *private; /* tag private data */
+};
+
+#endif
--
2.35.0.rc2.247.g8bbb082509-goog
Powered by blists - more mailing lists