lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-ID: <47B56BF2.4020600@openvz.org>
Date:	Fri, 15 Feb 2008 13:39:46 +0300
From:	Pavel Emelyanov <xemul@...nvz.org>
To:	Andrew Morton <akpm@...ux-foundation.org>
CC:	Al Viro <viro@...iv.linux.org.uk>,
	Linux Kernel Mailing List <linux-kernel@...r.kernel.org>,
	Alexey Dobriyan <adobriyan@...nvz.org>
Subject: [PATCH] Make sysctl a separate filesystem

Sysctl files/inodes now have their own readdir and lookup
methods, so there is one step left in turning this into a 
separate filesystem.

The benefits of this are:

1. this will allow to remove a fancy revalidation rules from
   sysctl dentries (will be in a separate patch);
2. the same approach will make /proc/net implementation MUCH
   cleaner in respect to net namespaces interaction, i.e.
   no racy shadows and no revalidation for proc entries in 
   this subdir;
3. sysctl inodes are now smaller than the procfs ones.

Note: update your initscripts to mount sysctl filesystem 
right after the proc is mounted in order not to lose your
/etc/sysctl.conf configuration (and optionally fstab).

Signed-off-by: Pavel Emelyanov <xemul@...nvz.org>
Signed-off-by: Alexey Dobriyan <adobriyan@...nvz.org>

---

diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 1c81c8f..47dec4b 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -11,11 +11,6 @@
 
 #include <linux/proc_fs.h>
 
-#ifdef CONFIG_PROC_SYSCTL
-extern int proc_sys_init(void);
-#else
-static inline void proc_sys_init(void) { }
-#endif
 #ifdef CONFIG_NET
 extern int proc_net_init(void);
 #else
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 614c34b..1b52f43 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -1,11 +1,9 @@
 /*
  * /proc/sys support
  */
-
+#include <linux/magic.h>
 #include <linux/sysctl.h>
-#include <linux/proc_fs.h>
 #include <linux/security.h>
-#include "internal.h"
 
 static struct dentry_operations proc_sys_dentry_operations;
 static const struct file_operations proc_sys_file_operations;
@@ -28,22 +26,26 @@ static void proc_sys_refresh_inode(struct inode *inode, struct ctl_table *table)
 	}
 }
 
+static inline long inode_depth(struct inode *ino)
+{
+	return (long)ino->i_private;
+}
+
+static inline void set_inode_depth(struct inode *ino, long depth)
+{
+	ino->i_private = (void *)depth;
+}
+
 static struct inode *proc_sys_make_inode(struct inode *dir, struct ctl_table *table)
 {
 	struct inode *inode;
-	struct proc_inode *dir_ei, *ei;
-	int depth;
 
 	inode = new_inode(dir->i_sb);
 	if (!inode)
 		goto out;
 
 	/* A directory is always one deeper than it's parent */
-	dir_ei = PROC_I(dir);
-	depth = dir_ei->fd + 1;
-
-	ei = PROC_I(inode);
-	ei->fd = depth;
+	set_inode_depth(inode, inode_depth(dir) + 1);
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
 	inode->i_op = &proc_sys_inode_operations;
 	inode->i_fop = &proc_sys_file_operations;
@@ -56,10 +58,7 @@ out:
 static struct dentry *proc_sys_ancestor(struct dentry *dentry, int depth)
 {
 	for (;;) {
-		struct proc_inode *ei;
-
-		ei = PROC_I(dentry->d_inode);
-		if (ei->fd == depth)
+		if (inode_depth(dentry->d_inode) == depth)
 			break; /* found */
 
 		dentry = dentry->d_parent;
@@ -93,12 +92,9 @@ static struct ctl_table *proc_sys_lookup_table(struct dentry *dentry,
 						struct ctl_table *table)
 {
 	struct dentry *ancestor;
-	struct proc_inode *ei;
 	int depth, i;
 
-	ei = PROC_I(dentry->d_inode);
-	depth = ei->fd;
-
+	depth = inode_depth(dentry->d_inode);
 	if (depth == 0)
 		return table;
 
@@ -385,7 +381,7 @@ static int proc_sys_permission(struct inode *inode, int mask, struct nameidata *
 	int error;
 
 	head = NULL;
-	depth = PROC_I(inode)->fd;
+	depth = inode_depth(inode);
 
 	/* First check the cached permissions, in case we don't have
 	 * enough information to lookup the sysctl table entry.
@@ -466,13 +462,56 @@ static struct dentry_operations proc_sys_dentry_operations = {
 	.d_revalidate	= proc_sys_revalidate,
 };
 
-static struct proc_dir_entry *proc_sys_root;
+static const struct super_operations sysctl_ops = {
+	.statfs		= simple_statfs,
+	.drop_inode	= generic_delete_inode,
+};
 
-int proc_sys_init(void)
+static int sysctl_fill_super(struct super_block *sb, void *data, int flags)
 {
-	proc_sys_root = proc_mkdir("sys", NULL);
-	proc_sys_root->proc_iops = &proc_sys_inode_operations;
-	proc_sys_root->proc_fops = &proc_sys_file_operations;
-	proc_sys_root->nlink = 0;
+	struct inode *ino;
+
+	sb->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC | MS_NODEV;
+	sb->s_blocksize = 1024;
+	sb->s_blocksize_bits = 10;
+	sb->s_magic = PROC_SUPER_MAGIC;
+	sb->s_op = &sysctl_ops;
+	sb->s_time_gran = 1;
+
+	ino = new_inode(sb);
+	if (ino == NULL)
+		return -ENOMEM;
+
+	ino->i_op = &proc_sys_inode_operations;
+	ino->i_fop = &proc_sys_file_operations;
+	set_inode_depth(ino, 0);
+	ino->i_uid = 0;
+	ino->i_gid = 0;
+	ino->i_mode = 0555 | S_IFDIR;
+
+	sb->s_root = d_alloc_root(ino);
+	if (sb->s_root == NULL) {
+		iput(ino);
+		return -ENOMEM;
+	}
+
 	return 0;
 }
+
+static int sysctl_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+{
+	return get_sb_single(fs_type, flags, data, sysctl_fill_super, mnt);
+}
+
+static struct file_system_type sysctl_fs = {
+	.name = "sysctl",
+	.get_sb = sysctl_get_sb,
+	.kill_sb = kill_anon_super,
+};
+
+static int __init proc_sys_init(void)
+{
+	return register_filesystem(&sysctl_fs);
+}
+module_init(proc_sys_init);
diff --git a/fs/proc/root.c b/fs/proc/root.c
index ef0fb57..9035938 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -129,6 +129,7 @@ void __init proc_root_init(void)
 	proc_root_fs = proc_mkdir("fs", NULL);
 	proc_root_driver = proc_mkdir("driver", NULL);
 	proc_mkdir("fs/nfsd", NULL); /* somewhere for the nfsd filesystem to be mounted */
+	proc_mkdir("sys", NULL);
 #if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE)
 	/* just give it a mountpoint */
 	proc_mkdir("openprom", NULL);
@@ -138,7 +139,6 @@ void __init proc_root_init(void)
 	proc_device_tree_init();
 #endif
 	proc_bus = proc_mkdir("bus", NULL);
-	proc_sys_init();
 }
 
 static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ