[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <7c86c4470908070510recfc3f5u2a4a07ee843978ac@mail.gmail.com>
Date: Fri, 7 Aug 2009 14:10:49 +0200
From: stephane eranian <eranian@...glemail.com>
To: Oleg Nesterov <oleg@...hat.com>
Cc: Peter Zijlstra <a.p.zijlstra@...llo.nl>,
Andrew Morton <akpm@...ux-foundation.org>, mingo@...e.hu,
linux-kernel@...r.kernel.org, tglx@...utronix.de,
robert.richter@....com, paulus@...ba.org, andi@...stfloor.org,
mpjohn@...ibm.com, cel@...ibm.com, cjashfor@...ibm.com,
mucci@...s.utk.edu, terpstra@...s.utk.edu,
perfmon2-devel@...ts.sourceforge.net, mtk.manpages@...glemail.com,
roland@...hat.com
Subject: Re: [PATCH 3/2 -v4] fcntl: F_[SG]ETOWN_EX
HI,
On Thu, Aug 6, 2009 at 9:05 PM, Oleg Nesterov<oleg@...hat.com> wrote:
> On 08/06, Peter Zijlstra wrote:
>>
>> Subject: fcntl: F_[SG]ETOWN_EX
>> From: Peter Zijlstra <a.p.zijlstra@...llo.nl>
>> Date: Fri, 31 Jul 2009 10:35:30 +0200
>>
>> In order to direct the SIGIO signal to a particular thread of a
>> multi-threaded application we cannot, like suggested by the manpage, put
>> a TID into the regular fcntl(F_SETOWN) call. It will still be send to
>> the whole process of which that thread is part.
>>
>> Since people do want to properly direct SIGIO we introduce F_SETOWN_EX.
>>
>> The need to direct SIGIO comes from self-monitoring profiling such as
>> with perf-counters. Perf-counters uses SIGIO to notify that new sample
>> data is available. If the signal is delivered to the same task that
>> generated the new sample it can augment that data by inspecting the
>> task's user-space state right after it returns from the kernel. This
>> is esp. convenient for interpreted or virtual machine driven
>> environments.
>>
>> Both F_SETOWN_EX and F_GETOWN_EX take a pointer to a struct f_owner_ex
>> as argument:
>>
>> struct f_owner_ex {
>> int type;
>> pid_t pid;
>> };
>>
>> Where type is one of F_OWNER_TID, F_OWNER_PID or F_OWNER_GID.
>
> I think the patch is right.
>
> Reviewed-by: Oleg Nesterov <oleg@...hat.com>
>
I have tested the patch in 2.6.30 (backport) + perfmon and it seems to
work in my test case.
Have not tried with perfcounters + 2.6.31.
I am glad there is finally a solution to this problem.
Thanks.
>> Signed-off-by: Peter Zijlstra <a.p.zijlstra@...llo.nl>
>> ---
>> arch/alpha/include/asm/fcntl.h | 2
>> arch/parisc/include/asm/fcntl.h | 2
>> fs/fcntl.c | 108 +++++++++++++++++++++++++++++++++++++---
>> include/asm-generic/fcntl.h | 13 ++++
>> 4 files changed, 117 insertions(+), 8 deletions(-)
>>
>> Index: linux-2.6/arch/parisc/include/asm/fcntl.h
>> ===================================================================
>> --- linux-2.6.orig/arch/parisc/include/asm/fcntl.h
>> +++ linux-2.6/arch/parisc/include/asm/fcntl.h
>> @@ -28,6 +28,8 @@
>> #define F_SETOWN 12 /* for sockets. */
>> #define F_SETSIG 13 /* for sockets. */
>> #define F_GETSIG 14 /* for sockets. */
>> +#define F_GETOWN_EX 15
>> +#define F_SETOWN_EX 16
>>
>> /* for posix fcntl() and lockf() */
>> #define F_RDLCK 01
>> Index: linux-2.6/fs/fcntl.c
>> ===================================================================
>> --- linux-2.6.orig/fs/fcntl.c
>> +++ linux-2.6/fs/fcntl.c
>> @@ -263,6 +263,79 @@ pid_t f_getown(struct file *filp)
>> return pid;
>> }
>>
>> +static int f_setown_ex(struct file *filp, unsigned long arg)
>> +{
>> + struct f_owner_ex * __user owner_p = (void * __user)arg;
>> + struct f_owner_ex owner;
>> + struct pid *pid;
>> + int type;
>> + int ret;
>> +
>> + ret = copy_from_user(&owner, owner_p, sizeof(owner));
>> + if (ret)
>> + return ret;
>> +
>> + switch (owner.type) {
>> + case F_OWNER_TID:
>> + type = PIDTYPE_MAX;
>> + break;
>> +
>> + case F_OWNER_PID:
>> + type = PIDTYPE_PID;
>> + break;
>> +
>> + case F_OWNER_GID:
>> + type = PIDTYPE_PGID;
>> + break;
>> +
>> + default:
>> + return -EINVAL;
>> + }
>> +
>> + rcu_read_lock();
>> + pid = find_vpid(owner.pid);
>> + if (owner.pid && !pid)
>> + ret = -ESRCH;
>> + else
>> + ret = __f_setown(filp, pid, type, 1);
>> + rcu_read_unlock();
>> +
>> + return ret;
>> +}
>> +
>> +static int f_getown_ex(struct file *filp, unsigned long arg)
>> +{
>> + struct f_owner_ex * __user owner_p = (void * __user)arg;
>> + struct f_owner_ex owner;
>> + int ret = 0;
>> +
>> + read_lock(&filp->f_owner.lock);
>> + owner.pid = pid_vnr(filp->f_owner.pid);
>> + switch (filp->f_owner.pid_type) {
>> + case PIDTYPE_MAX:
>> + owner.type = F_OWNER_TID;
>> + break;
>> +
>> + case PIDTYPE_PID:
>> + owner.type = F_OWNER_PID;
>> + break;
>> +
>> + case PIDTYPE_PGID:
>> + owner.type = F_OWNER_GID;
>> + break;
>> +
>> + default:
>> + WARN_ON(1);
>> + ret = -EINVAL;
>> + break;
>> + }
>> + read_unlock(&filp->f_owner.lock);
>> +
>> + if (!ret)
>> + ret = copy_to_user(owner_p, &owner, sizeof(owner));
>> + return ret;
>> +}
>> +
>> static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
>> struct file *filp)
>> {
>> @@ -313,6 +386,12 @@ static long do_fcntl(int fd, unsigned in
>> case F_SETOWN:
>> err = f_setown(filp, arg, 1);
>> break;
>> + case F_GETOWN_EX:
>> + err = f_getown_ex(filp, arg);
>> + break;
>> + case F_SETOWN_EX:
>> + err = f_setown_ex(filp, arg);
>> + break;
>> case F_GETSIG:
>> err = filp->f_owner.signum;
>> break;
>> @@ -428,8 +507,7 @@ static inline int sigio_perm(struct task
>>
>> static void send_sigio_to_task(struct task_struct *p,
>> struct fown_struct *fown,
>> - int fd,
>> - int reason)
>> + int fd, int reason, int group)
>> {
>> /*
>> * F_SETSIG can change ->signum lockless in parallel, make
>> @@ -461,11 +539,11 @@ static void send_sigio_to_task(struct ta
>> else
>> si.si_band = band_table[reason - POLL_IN];
>> si.si_fd = fd;
>> - if (!do_send_sig_info(signum, &si, p, true))
>> + if (!do_send_sig_info(signum, &si, p, group))
>> break;
>> /* fall-through: fall back on the old plain SIGIO signal */
>> case 0:
>> - do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, true);
>> + do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, group);
>> }
>> }
>>
>> @@ -474,16 +552,23 @@ void send_sigio(struct fown_struct *fown
>> struct task_struct *p;
>> enum pid_type type;
>> struct pid *pid;
>> + int group = 1;
>>
>> read_lock(&fown->lock);
>> +
>> type = fown->pid_type;
>> + if (type == PIDTYPE_MAX) {
>> + group = 0;
>> + type = PIDTYPE_PID;
>> + }
>> +
>> pid = fown->pid;
>> if (!pid)
>> goto out_unlock_fown;
>>
>> read_lock(&tasklist_lock);
>> do_each_pid_task(pid, type, p) {
>> - send_sigio_to_task(p, fown, fd, band);
>> + send_sigio_to_task(p, fown, fd, band, group);
>> } while_each_pid_task(pid, type, p);
>> read_unlock(&tasklist_lock);
>> out_unlock_fown:
>> @@ -491,10 +576,10 @@ void send_sigio(struct fown_struct *fown
>> }
>>
>> static void send_sigurg_to_task(struct task_struct *p,
>> - struct fown_struct *fown)
>> + struct fown_struct *fown, int group)
>> {
>> if (sigio_perm(p, fown, SIGURG))
>> - group_send_sig_info(SIGURG, SEND_SIG_PRIV, p);
>> + do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, group);
>> }
>>
>> int send_sigurg(struct fown_struct *fown)
>> @@ -502,10 +587,17 @@ int send_sigurg(struct fown_struct *fown
>> struct task_struct *p;
>> enum pid_type type;
>> struct pid *pid;
>> + int group = 1;
>> int ret = 0;
>>
>> read_lock(&fown->lock);
>> +
>> type = fown->pid_type;
>> + if (type == PIDTYPE_MAX) {
>> + group = 0;
>> + type = PIDTYPE_PID;
>> + }
>> +
>> pid = fown->pid;
>> if (!pid)
>> goto out_unlock_fown;
>> @@ -514,7 +606,7 @@ int send_sigurg(struct fown_struct *fown
>>
>> read_lock(&tasklist_lock);
>> do_each_pid_task(pid, type, p) {
>> - send_sigurg_to_task(p, fown);
>> + send_sigurg_to_task(p, fown, group);
>> } while_each_pid_task(pid, type, p);
>> read_unlock(&tasklist_lock);
>> out_unlock_fown:
>> Index: linux-2.6/include/asm-generic/fcntl.h
>> ===================================================================
>> --- linux-2.6.orig/include/asm-generic/fcntl.h
>> +++ linux-2.6/include/asm-generic/fcntl.h
>> @@ -73,6 +73,19 @@
>> #define F_SETSIG 10 /* for sockets. */
>> #define F_GETSIG 11 /* for sockets. */
>> #endif
>> +#ifndef F_SETOWN_EX
>> +#define F_SETOWN_EX 12
>> +#define F_GETOWN_EX 13
>> +#endif
>> +
>> +#define F_OWNER_TID 0
>> +#define F_OWNER_PID 1
>> +#define F_OWNER_GID 2
>> +
>> +struct f_owner_ex {
>> + int type;
>> + pid_t pid;
>> +};
>>
>> /* for F_[GET|SET]FL */
>> #define FD_CLOEXEC 1 /* actually anything with low bit set goes */
>> Index: linux-2.6/arch/alpha/include/asm/fcntl.h
>> ===================================================================
>> --- linux-2.6.orig/arch/alpha/include/asm/fcntl.h
>> +++ linux-2.6/arch/alpha/include/asm/fcntl.h
>> @@ -26,6 +26,8 @@
>> #define F_GETOWN 6 /* for sockets. */
>> #define F_SETSIG 10 /* for sockets. */
>> #define F_GETSIG 11 /* for sockets. */
>> +#define F_SETOWN_EX 12
>> +#define F_GETOWN_EX 13
>>
>> /* for posix fcntl() and lockf() */
>> #define F_RDLCK 1
>>
>>
>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists