[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAKywueTEXOH3s0z2X7e=QugJaQAZ6JPoaYmYPnFLHMOTC7_y4w@mail.gmail.com>
Date: Mon, 11 Mar 2013 22:57:27 +0400
From: Pavel Shilovsky <piastry@...rsoft.ru>
To: Jeff Layton <jlayton@...hat.com>
Cc: linux-kernel@...r.kernel.org, linux-cifs@...r.kernel.org,
linux-fsdevel@...r.kernel.org, linux-nfs@...r.kernel.org,
wine-devel@...ehq.org
Subject: Re: [PATCH v3 2/7] vfs: Add O_DENYREAD/WRITE flags support for open syscall
2013/3/11 Jeff Layton <jlayton@...hat.com>:
> On Thu, 28 Feb 2013 19:25:28 +0400
> Pavel Shilovsky <piastry@...rsoft.ru> wrote:
>
>> If O_DENYMAND flag is specified, O_DENYREAD/WRITE/MAND flags are
>> translated to flock's flags:
>>
>> !O_DENYREAD -> LOCK_READ
>> !O_DENYWRITE -> LOCK_WRITE
>> O_DENYMAND -> LOCK_MAND
>>
>> and set through flock_lock_file on a file.
>>
>> This change affects opens that use O_DENYMAND flag - all other
>> native Linux opens don't care about these flags. It allow us to
>> enable this feature for applications that need it (e.g. NFS and
>> Samba servers that export the same directory for Windows clients,
>> or Wine applications that access the same files simultaneously).
>>
>> Create codepath is slightly changed to prevent data races on
>> newely created files: when open with O_CREAT can return with -ETXTBSY
>> error for successfully created files due to a deny lock set by
>> another task.
>>
>> Signed-off-by: Pavel Shilovsky <piastry@...rsoft.ru>
>> ---
>> fs/locks.c | 116 +++++++++++++++++++++++++++++++++++++++++++++++------
>> fs/namei.c | 44 ++++++++++++++++++--
>> include/linux/fs.h | 6 +++
>> 3 files changed, 151 insertions(+), 15 deletions(-)
>>
>> diff --git a/fs/locks.c b/fs/locks.c
>> index a94e331..0cc7d1b 100644
>> --- a/fs/locks.c
>> +++ b/fs/locks.c
>> @@ -605,20 +605,81 @@ static int posix_locks_conflict(struct file_lock *caller_fl, struct file_lock *s
>> return (locks_conflict(caller_fl, sys_fl));
>> }
>>
>> -/* Determine if lock sys_fl blocks lock caller_fl. FLOCK specific
>> - * checking before calling the locks_conflict().
>> +static unsigned int
>> +deny_flags_to_cmd(unsigned int flags)
>> +{
>> + unsigned int cmd = LOCK_MAND;
>> +
>> + if (!(flags & O_DENYREAD))
>> + cmd |= LOCK_READ;
>> + if (!(flags & O_DENYWRITE))
>> + cmd |= LOCK_WRITE;
>> +
>> + return cmd;
>> +}
>> +
>> +/*
>> + * locks_mand_conflict - Determine if there's a share reservation conflict
>> + * @caller_fl: lock we're attempting to acquire
>> + * @sys_fl: lock already present on system that we're checking against
>> + *
>> + * Check to see if there's a share_reservation conflict. LOCK_READ/LOCK_WRITE
>> + * tell us whether the reservation allows other readers and writers.
>> + *
>> + * We only check against other LOCK_MAND locks, so applications that want to
>> + * use share mode locking will only conflict against one another. "normal"
>> + * applications that open files won't be affected by and won't themselves
>> + * affect the share reservations.
>> */
>> -static int flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
>> +static int
>> +locks_mand_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl)
>> {
>> - /* FLOCK locks referring to the same filp do not conflict with
>> + unsigned char caller_type = caller_fl->fl_type;
>> + unsigned char sys_type = sys_fl->fl_type;
>> + fmode_t caller_fmode = caller_fl->fl_file->f_mode;
>> + fmode_t sys_fmode = sys_fl->fl_file->f_mode;
>> +
>> + /* they can only conflict if they're both LOCK_MAND */
>> + if (!(caller_type & LOCK_MAND) || !(sys_type & LOCK_MAND))
>> + return 0;
>> +
>> + if (!(caller_type & LOCK_READ) && (sys_fmode & FMODE_READ))
>> + return 1;
>> + if (!(caller_type & LOCK_WRITE) && (sys_fmode & FMODE_WRITE))
>> + return 1;
>> + if (!(sys_type & LOCK_READ) && (caller_fmode & FMODE_READ))
>> + return 1;
>> + if (!(sys_type & LOCK_WRITE) && (caller_fmode & FMODE_WRITE))
>> + return 1;
>> +
>> + return 0;
>> +}
>> +
>> +/*
>> + * Determine if lock sys_fl blocks lock caller_fl. FLOCK specific checking
>> + * before calling the locks_conflict(). Boolean is_mand indicates whether
>> + * we should use a share reservation scheme or not.
>> + */
>> +static int
>> +flock_locks_conflict(struct file_lock *caller_fl, struct file_lock *sys_fl,
>> + bool is_mand)
>
> I'm not sure you really need to add this new is_mand bool. Won't that
> be equivalent to (caller_fl->fl_type & LOCK_MAND)?
This function is already used by flock system call that can pass
LOCK_MAND flag to caller_fl->fl_type. I don't want to affect existing
flock behavior by introduing new denylocking strategy - so, we need to
let flock_locks_conflict function know if we are in flock or open
codepath - in open codepath it will call locks_mand_conflict to check
if there is any other open that prevents us.
>
>> +{
>> + /*
>> + * FLOCK locks referring to the same filp do not conflict with
>> * each other.
>> */
>> - if (!IS_FLOCK(sys_fl) || (caller_fl->fl_file == sys_fl->fl_file))
>> - return (0);
>> - if ((caller_fl->fl_type & LOCK_MAND) || (sys_fl->fl_type & LOCK_MAND))
>> + if (!IS_FLOCK(sys_fl))
>> + return 0;
>> + if ((caller_fl->fl_type & LOCK_MAND) || (sys_fl->fl_type & LOCK_MAND)) {
>> + if (is_mand)
>> + return locks_mand_conflict(caller_fl, sys_fl);
>> + else
>> + return 0;
>> + }
>> + if (caller_fl->fl_file == sys_fl->fl_file)
>> return 0;
>>
>> - return (locks_conflict(caller_fl, sys_fl));
>> + return locks_conflict(caller_fl, sys_fl);
>> }
>>
>> void
>> @@ -697,14 +758,19 @@ static int posix_locks_deadlock(struct file_lock *caller_fl,
>> return 0;
>> }
>>
>> -/* Try to create a FLOCK lock on filp. We always insert new FLOCK locks
>> +/*
>> + * Try to create a FLOCK lock on filp. We always insert new FLOCK locks
>> * after any leases, but before any posix locks.
>> *
>> * Note that if called with an FL_EXISTS argument, the caller may determine
>> * whether or not a lock was successfully freed by testing the return
>> * value for -ENOENT.
>> + *
>> + * Take @is_conflict callback that determines how to check if locks have
>> + * conflicts or not.
>> */
>> -static int flock_lock_file(struct file *filp, struct file_lock *request)
>> +static int
>> +flock_lock_file(struct file *filp, struct file_lock *request, bool is_mand)
>
> Ditto here on the is_mand bool. I think you can determine that from
> request->fl_type. Right?
The same suggestions are applied to this place too.
>
>> {
>> struct file_lock *new_fl = NULL;
>> struct file_lock **before;
>> @@ -760,7 +826,7 @@ find_conflict:
>> break;
>> if (IS_LEASE(fl))
>> continue;
>> - if (!flock_locks_conflict(request, fl))
>> + if (!flock_locks_conflict(request, fl, is_mand))
>> continue;
>> error = -EAGAIN;
>> if (!(request->fl_flags & FL_SLEEP))
>> @@ -783,6 +849,32 @@ out:
>> return error;
>> }
>>
>> +/*
>> + * Determine if a file is allowed to be opened with specified access and deny
>> + * modes. Lock the file and return 0 if checks passed, otherwise return a error
>> + * code.
>> + */
>> +int
>> +deny_lock_file(struct file *filp)
>> +{
>> + struct file_lock *lock;
>> + int error = 0;
>> +
>> + if (!(filp->f_flags & O_DENYMAND))
>> + return error;
>> +
>> + error = flock_make_lock(filp, &lock, deny_flags_to_cmd(filp->f_flags));
>> + if (error)
>> + return error;
>> +
>> + error = flock_lock_file(filp, lock, true);
>> + if (error == -EAGAIN)
>> + error = -ETXTBSY;
>> +
>
> I think EBUSY would be a better return code here. ETXTBSY is returned
> in more specific circumstances -- mostly when you're opening a file for
> write that is being executed.
Yes, I agree. This work was done before the discussion in linux-cifs@
about a error code for STATUS_SHARING_VIOLATION.
>
>> + locks_free_lock(lock);
>> + return error;
>> +}
>> +
>> static int __posix_lock_file(struct inode *inode, struct file_lock *request, struct file_lock *conflock)
>> {
>> struct file_lock *fl;
>> @@ -1589,7 +1681,7 @@ int flock_lock_file_wait(struct file *filp, struct file_lock *fl)
>> int error;
>> might_sleep();
>> for (;;) {
>> - error = flock_lock_file(filp, fl);
>> + error = flock_lock_file(filp, fl, false);
>> if (error != FILE_LOCK_DEFERRED)
>> break;
>> error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
>> diff --git a/fs/namei.c b/fs/namei.c
>> index 43a97ee..c1f7e08 100644
>> --- a/fs/namei.c
>> +++ b/fs/namei.c
>> @@ -2559,9 +2559,14 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
>> * here.
>> */
>> error = may_open(&file->f_path, acc_mode, open_flag);
>> - if (error)
>> + if (error) {
>> fput(file);
>> + goto out;
>> + }
>>
>> + error = deny_lock_file(file);
>> + if (error)
>> + fput(file);
>> out:
>> dput(dentry);
>> return error;
>> @@ -2771,9 +2776,9 @@ retry_lookup:
>> }
>> mutex_lock(&dir->d_inode->i_mutex);
>> error = lookup_open(nd, path, file, op, got_write, opened);
>> - mutex_unlock(&dir->d_inode->i_mutex);
>>
>> if (error <= 0) {
>> + mutex_unlock(&dir->d_inode->i_mutex);
>> if (error)
>> goto out;
>>
>> @@ -2791,8 +2796,32 @@ retry_lookup:
>> will_truncate = false;
>> acc_mode = MAY_OPEN;
>> path_to_nameidata(path, nd);
>> - goto finish_open_created;
>> +
>> + /*
>> + * Unlock parent i_mutex later when the open finishes - prevent
>> + * races when a file can be locked with a deny lock by another
>> + * task that opens the file.
>> + */
>> + error = may_open(&nd->path, acc_mode, open_flag);
>> + if (error) {
>> + mutex_unlock(&dir->d_inode->i_mutex);
>> + goto out;
>> + }
>> + file->f_path.mnt = nd->path.mnt;
>> + error = finish_open(file, nd->path.dentry, NULL, opened);
>> + if (error) {
>> + mutex_unlock(&dir->d_inode->i_mutex);
>> + if (error == -EOPENSTALE)
>> + goto stale_open;
>> + goto out;
>> + }
>> + error = deny_lock_file(file);
>> + mutex_unlock(&dir->d_inode->i_mutex);
>> + if (error)
>> + goto exit_fput;
>> + goto opened;
>> }
>> + mutex_unlock(&dir->d_inode->i_mutex);
>>
>> /*
>> * create/update audit record if it already exists.
>> @@ -2885,6 +2914,15 @@ finish_open_created:
>> goto stale_open;
>> goto out;
>> }
>> + /*
>> + * Lock parent i_mutex to prevent races with deny locks on newely
>> + * created files.
>> + */
>> + mutex_lock(&dir->d_inode->i_mutex);
>> + error = deny_lock_file(file);
>> + mutex_unlock(&dir->d_inode->i_mutex);
>> + if (error)
>> + goto exit_fput;
>> opened:
>> error = open_check_o_direct(file);
>> if (error)
>> diff --git a/include/linux/fs.h b/include/linux/fs.h
>> index 7617ee0..347e1de 100644
>> --- a/include/linux/fs.h
>> +++ b/include/linux/fs.h
>> @@ -1005,6 +1005,7 @@ extern int lease_modify(struct file_lock **, int);
>> extern int lock_may_read(struct inode *, loff_t start, unsigned long count);
>> extern int lock_may_write(struct inode *, loff_t start, unsigned long count);
>> extern void locks_delete_block(struct file_lock *waiter);
>> +extern int deny_lock_file(struct file *);
>> extern void lock_flocks(void);
>> extern void unlock_flocks(void);
>> #else /* !CONFIG_FILE_LOCKING */
>> @@ -1153,6 +1154,11 @@ static inline void locks_delete_block(struct file_lock *waiter)
>> {
>> }
>>
>> +static inline int deny_lock_file(struct file *filp)
>> +{
>> + return 0;
>> +}
>> +
>> static inline void lock_flocks(void)
>> {
>> }
>
>
> --
> Jeff Layton <jlayton@...hat.com>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-cifs" in
> the body of a message to majordomo@...r.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
--
Best regards,
Pavel Shilovsky.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists