[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20090806190557.GA5244@redhat.com>
Date: Thu, 6 Aug 2009 21:05:57 +0200
From: Oleg Nesterov <oleg@...hat.com>
To: Peter Zijlstra <a.p.zijlstra@...llo.nl>
Cc: Andrew Morton <akpm@...ux-foundation.org>, eranian@...il.com,
mingo@...e.hu, linux-kernel@...r.kernel.org, tglx@...utronix.de,
robert.richter@....com, paulus@...ba.org, andi@...stfloor.org,
mpjohn@...ibm.com, cel@...ibm.com, cjashfor@...ibm.com,
mucci@...s.utk.edu, terpstra@...s.utk.edu,
perfmon2-devel@...ts.sourceforge.net, mtk.manpages@...glemail.com,
roland@...hat.com
Subject: Re: [PATCH 3/2 -v4] fcntl: F_[SG]ETOWN_EX
On 08/06, Peter Zijlstra wrote:
>
> Subject: fcntl: F_[SG]ETOWN_EX
> From: Peter Zijlstra <a.p.zijlstra@...llo.nl>
> Date: Fri, 31 Jul 2009 10:35:30 +0200
>
> In order to direct the SIGIO signal to a particular thread of a
> multi-threaded application we cannot, like suggested by the manpage, put
> a TID into the regular fcntl(F_SETOWN) call. It will still be send to
> the whole process of which that thread is part.
>
> Since people do want to properly direct SIGIO we introduce F_SETOWN_EX.
>
> The need to direct SIGIO comes from self-monitoring profiling such as
> with perf-counters. Perf-counters uses SIGIO to notify that new sample
> data is available. If the signal is delivered to the same task that
> generated the new sample it can augment that data by inspecting the
> task's user-space state right after it returns from the kernel. This
> is esp. convenient for interpreted or virtual machine driven
> environments.
>
> Both F_SETOWN_EX and F_GETOWN_EX take a pointer to a struct f_owner_ex
> as argument:
>
> struct f_owner_ex {
> int type;
> pid_t pid;
> };
>
> Where type is one of F_OWNER_TID, F_OWNER_PID or F_OWNER_GID.
I think the patch is right.
Reviewed-by: Oleg Nesterov <oleg@...hat.com>
> Signed-off-by: Peter Zijlstra <a.p.zijlstra@...llo.nl>
> ---
> arch/alpha/include/asm/fcntl.h | 2
> arch/parisc/include/asm/fcntl.h | 2
> fs/fcntl.c | 108 +++++++++++++++++++++++++++++++++++++---
> include/asm-generic/fcntl.h | 13 ++++
> 4 files changed, 117 insertions(+), 8 deletions(-)
>
> Index: linux-2.6/arch/parisc/include/asm/fcntl.h
> ===================================================================
> --- linux-2.6.orig/arch/parisc/include/asm/fcntl.h
> +++ linux-2.6/arch/parisc/include/asm/fcntl.h
> @@ -28,6 +28,8 @@
> #define F_SETOWN 12 /* for sockets. */
> #define F_SETSIG 13 /* for sockets. */
> #define F_GETSIG 14 /* for sockets. */
> +#define F_GETOWN_EX 15
> +#define F_SETOWN_EX 16
>
> /* for posix fcntl() and lockf() */
> #define F_RDLCK 01
> Index: linux-2.6/fs/fcntl.c
> ===================================================================
> --- linux-2.6.orig/fs/fcntl.c
> +++ linux-2.6/fs/fcntl.c
> @@ -263,6 +263,79 @@ pid_t f_getown(struct file *filp)
> return pid;
> }
>
> +static int f_setown_ex(struct file *filp, unsigned long arg)
> +{
> + struct f_owner_ex * __user owner_p = (void * __user)arg;
> + struct f_owner_ex owner;
> + struct pid *pid;
> + int type;
> + int ret;
> +
> + ret = copy_from_user(&owner, owner_p, sizeof(owner));
> + if (ret)
> + return ret;
> +
> + switch (owner.type) {
> + case F_OWNER_TID:
> + type = PIDTYPE_MAX;
> + break;
> +
> + case F_OWNER_PID:
> + type = PIDTYPE_PID;
> + break;
> +
> + case F_OWNER_GID:
> + type = PIDTYPE_PGID;
> + break;
> +
> + default:
> + return -EINVAL;
> + }
> +
> + rcu_read_lock();
> + pid = find_vpid(owner.pid);
> + if (owner.pid && !pid)
> + ret = -ESRCH;
> + else
> + ret = __f_setown(filp, pid, type, 1);
> + rcu_read_unlock();
> +
> + return ret;
> +}
> +
> +static int f_getown_ex(struct file *filp, unsigned long arg)
> +{
> + struct f_owner_ex * __user owner_p = (void * __user)arg;
> + struct f_owner_ex owner;
> + int ret = 0;
> +
> + read_lock(&filp->f_owner.lock);
> + owner.pid = pid_vnr(filp->f_owner.pid);
> + switch (filp->f_owner.pid_type) {
> + case PIDTYPE_MAX:
> + owner.type = F_OWNER_TID;
> + break;
> +
> + case PIDTYPE_PID:
> + owner.type = F_OWNER_PID;
> + break;
> +
> + case PIDTYPE_PGID:
> + owner.type = F_OWNER_GID;
> + break;
> +
> + default:
> + WARN_ON(1);
> + ret = -EINVAL;
> + break;
> + }
> + read_unlock(&filp->f_owner.lock);
> +
> + if (!ret)
> + ret = copy_to_user(owner_p, &owner, sizeof(owner));
> + return ret;
> +}
> +
> static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
> struct file *filp)
> {
> @@ -313,6 +386,12 @@ static long do_fcntl(int fd, unsigned in
> case F_SETOWN:
> err = f_setown(filp, arg, 1);
> break;
> + case F_GETOWN_EX:
> + err = f_getown_ex(filp, arg);
> + break;
> + case F_SETOWN_EX:
> + err = f_setown_ex(filp, arg);
> + break;
> case F_GETSIG:
> err = filp->f_owner.signum;
> break;
> @@ -428,8 +507,7 @@ static inline int sigio_perm(struct task
>
> static void send_sigio_to_task(struct task_struct *p,
> struct fown_struct *fown,
> - int fd,
> - int reason)
> + int fd, int reason, int group)
> {
> /*
> * F_SETSIG can change ->signum lockless in parallel, make
> @@ -461,11 +539,11 @@ static void send_sigio_to_task(struct ta
> else
> si.si_band = band_table[reason - POLL_IN];
> si.si_fd = fd;
> - if (!do_send_sig_info(signum, &si, p, true))
> + if (!do_send_sig_info(signum, &si, p, group))
> break;
> /* fall-through: fall back on the old plain SIGIO signal */
> case 0:
> - do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, true);
> + do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, group);
> }
> }
>
> @@ -474,16 +552,23 @@ void send_sigio(struct fown_struct *fown
> struct task_struct *p;
> enum pid_type type;
> struct pid *pid;
> + int group = 1;
>
> read_lock(&fown->lock);
> +
> type = fown->pid_type;
> + if (type == PIDTYPE_MAX) {
> + group = 0;
> + type = PIDTYPE_PID;
> + }
> +
> pid = fown->pid;
> if (!pid)
> goto out_unlock_fown;
>
> read_lock(&tasklist_lock);
> do_each_pid_task(pid, type, p) {
> - send_sigio_to_task(p, fown, fd, band);
> + send_sigio_to_task(p, fown, fd, band, group);
> } while_each_pid_task(pid, type, p);
> read_unlock(&tasklist_lock);
> out_unlock_fown:
> @@ -491,10 +576,10 @@ void send_sigio(struct fown_struct *fown
> }
>
> static void send_sigurg_to_task(struct task_struct *p,
> - struct fown_struct *fown)
> + struct fown_struct *fown, int group)
> {
> if (sigio_perm(p, fown, SIGURG))
> - group_send_sig_info(SIGURG, SEND_SIG_PRIV, p);
> + do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, group);
> }
>
> int send_sigurg(struct fown_struct *fown)
> @@ -502,10 +587,17 @@ int send_sigurg(struct fown_struct *fown
> struct task_struct *p;
> enum pid_type type;
> struct pid *pid;
> + int group = 1;
> int ret = 0;
>
> read_lock(&fown->lock);
> +
> type = fown->pid_type;
> + if (type == PIDTYPE_MAX) {
> + group = 0;
> + type = PIDTYPE_PID;
> + }
> +
> pid = fown->pid;
> if (!pid)
> goto out_unlock_fown;
> @@ -514,7 +606,7 @@ int send_sigurg(struct fown_struct *fown
>
> read_lock(&tasklist_lock);
> do_each_pid_task(pid, type, p) {
> - send_sigurg_to_task(p, fown);
> + send_sigurg_to_task(p, fown, group);
> } while_each_pid_task(pid, type, p);
> read_unlock(&tasklist_lock);
> out_unlock_fown:
> Index: linux-2.6/include/asm-generic/fcntl.h
> ===================================================================
> --- linux-2.6.orig/include/asm-generic/fcntl.h
> +++ linux-2.6/include/asm-generic/fcntl.h
> @@ -73,6 +73,19 @@
> #define F_SETSIG 10 /* for sockets. */
> #define F_GETSIG 11 /* for sockets. */
> #endif
> +#ifndef F_SETOWN_EX
> +#define F_SETOWN_EX 12
> +#define F_GETOWN_EX 13
> +#endif
> +
> +#define F_OWNER_TID 0
> +#define F_OWNER_PID 1
> +#define F_OWNER_GID 2
> +
> +struct f_owner_ex {
> + int type;
> + pid_t pid;
> +};
>
> /* for F_[GET|SET]FL */
> #define FD_CLOEXEC 1 /* actually anything with low bit set goes */
> Index: linux-2.6/arch/alpha/include/asm/fcntl.h
> ===================================================================
> --- linux-2.6.orig/arch/alpha/include/asm/fcntl.h
> +++ linux-2.6/arch/alpha/include/asm/fcntl.h
> @@ -26,6 +26,8 @@
> #define F_GETOWN 6 /* for sockets. */
> #define F_SETSIG 10 /* for sockets. */
> #define F_GETSIG 11 /* for sockets. */
> +#define F_SETOWN_EX 12
> +#define F_GETOWN_EX 13
>
> /* for posix fcntl() and lockf() */
> #define F_RDLCK 1
>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists