[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <87y3ufvxaz.fsf@gmail.com>
Date: Tue, 02 May 2017 22:05:56 +0200
From: Nicolai Stange <nicstange@...il.com>
To: Johannes Berg <johannes@...solutions.net>
Cc: Nicolai Stange <nicstange@...il.com>,
Greg Kroah-Hartman <gregkh@...uxfoundation.org>,
"Paul E.McKenney" <paulmck@...ux.vnet.ibm.com>,
linux-kernel@...r.kernel.org
Subject: Re: [RFC PATCH 8/9] debugfs: defer debugfs_fsdata allocation to first usage
On Tue, Apr 18 2017, Johannes Berg wrote:
> On Sun, 2017-04-16 at 11:51 +0200, Nicolai Stange wrote:
>>
>> +++ b/fs/debugfs/file.c
>> @@ -53,6 +53,7 @@ const struct file_operations
>> *debugfs_real_fops(const struct file *filp)
>> {
>> struct debugfs_fsdata *fsd = F_DENTRY(filp)->d_fsdata;
>>
>> + WARN_ON((unsigned long)fsd &
>> DEBUGFS_FSDATA_IS_REAL_FOPS_BIT);
>> return fsd->real_fops;
>
> I'm not a fan of BUG_ON(), but in this case, if you have a completely
> bogus pointer here, and then you return fsd->real_fops which will be
> even more bogus, and *then* you call a function from within it... that
> seems like a recipe for disaster.
Hmm. Note that debugfs_real_fops() is provided for users outside of the
debugfs core which encapsulate their file_operations in a larger struct
and access this one through a container_of(debugfs_real_fops(dentry), ...).
Now, what the above WARN_ON() really checks for is whether the
debugfs_real_fops() has been called under a debugfs_file_get()/-_put()
pair. The point of requiring this isn't protection ([1]) or anything, but
merely to be able to streamline the implementation of
debugfs_real_fops(): knowing that a debugfs_file_get() is active allows
for the omission of the WARNed_ON case, namely that the fops are
encoded in ->d_fsdata itself.
> So either you could return some valid ops (perhaps
> debugfs_noop_file_operations although those don't have .name or .poll,
> so it doesn't cover everything), or you can just BUG_ON() here
> directly, saving the incomprehensible crash later.
The purpose of that WARN_ON() there was to turn a potentially
incomprehensible crash into a comprehensible one ;)
In order to avoid a new BUG_ON(), what about keeping the WARN_ON() as is
and returning NULL instead of the garbage? That would crash current on
first access and the earlier warning would hopefully give some clue?
Thanks,
Nicolai
[1] "protection" in the sense of "protection against file removal". With
the proposed [9/9] ("debugfs: free debugfs_fsdata instances"), the
patch introducing occasional freeing of ->d_fsdata, an additional
RCU read side section would be needed if debugfs_real_fops() were
allowed to be called outside of a debugfs_file_get()/-_put() pair.
>> EXPORT_SYMBOL_GPL(debugfs_real_fops);
>> @@ -74,9 +75,35 @@ EXPORT_SYMBOL_GPL(debugfs_real_fops);
>> */
>> int debugfs_file_get(struct dentry *dentry)
>> {
>> - struct debugfs_fsdata *fsd = dentry->d_fsdata;
>> + struct debugfs_fsdata *fsd;
>> + void *d_fsd;
>> +
>> + d_fsd = READ_ONCE(dentry->d_fsdata);
>> + if (!((unsigned long)d_fsd &
>> DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)) {
>> + fsd = d_fsd;
>> + } else {
>> + fsd = kmalloc(sizeof(*fsd), GFP_KERNEL);
>> + if (!fsd)
>> + return -ENOMEM;
>> +
>> + fsd->real_fops = (void *)((unsigned long)d_fsd &
>> + ~DEBUGFS_FSDATA_IS_REAL_FOPS
>> _BIT);
>> + refcount_set(&fsd->active_users, 1);
>> + init_completion(&fsd->active_users_drained);
>> + if (cmpxchg(&dentry->d_fsdata, d_fsd, fsd) != d_fsd)
>> {
>> + kfree(fsd);
>> + fsd = READ_ONCE(dentry->d_fsdata);
>> + }
>> + }
>>
>> - /* Avoid starvation of removers. */
>> + /*
>> + * In case of a successful cmpxchg() above, this check is
>> + * strictly necessary and must follow it, see the comment in
>> + * __debugfs_remove_file().
>> + * OTOH, if the cmpxchg() hasn't been executed or wasn't
>> + * successful, this serves the purpose of not starving
>> + * removers.
>> + */
>> if (d_unlinked(dentry))
>> return -EIO;
>>
>> @@ -98,7 +125,7 @@ EXPORT_SYMBOL_GPL(debugfs_file_get);
>> */
>> void debugfs_file_put(struct dentry *dentry)
>> {
>> - struct debugfs_fsdata *fsd = dentry->d_fsdata;
>> + struct debugfs_fsdata *fsd = READ_ONCE(dentry->d_fsdata);
>>
>> if (refcount_dec_and_test(&fsd->active_users))
>> complete(&fsd->active_users_drained);
>> @@ -109,10 +136,11 @@ static int open_proxy_open(struct inode *inode,
>> struct file *filp)
>> {
>> struct dentry *dentry = F_DENTRY(filp);
>> const struct file_operations *real_fops = NULL;
>> - int r = 0;
>> + int r;
>>
>> - if (debugfs_file_get(dentry))
>> - return -ENOENT;
>> + r = debugfs_file_get(dentry);
>> + if (r)
>> + return r == -EIO ? -ENOENT : r;
>>
>> real_fops = debugfs_real_fops(filp);
>> real_fops = fops_get(real_fops);
>> @@ -233,10 +261,11 @@ static int full_proxy_open(struct inode *inode,
>> struct file *filp)
>> struct dentry *dentry = F_DENTRY(filp);
>> const struct file_operations *real_fops = NULL;
>> struct file_operations *proxy_fops = NULL;
>> - int r = 0;
>> + int r;
>>
>> - if (debugfs_file_get(dentry))
>> - return -ENOENT;
>> + r = debugfs_file_get(dentry);
>> + if (r)
>> + return r == -EIO ? -ENOENT : r;
>>
>> real_fops = debugfs_real_fops(filp);
>> real_fops = fops_get(real_fops);
>> diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
>> index 5550f11d60bd..2360c17ec00a 100644
>> --- a/fs/debugfs/inode.c
>> +++ b/fs/debugfs/inode.c
>> @@ -184,7 +184,10 @@ static const struct super_operations
>> debugfs_super_operations = {
>>
>> static void debugfs_release_dentry(struct dentry *dentry)
>> {
>> - kfree(dentry->d_fsdata);
>> + void *fsd = dentry->d_fsdata;
>> +
>> + if (!((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT))
>> + kfree(dentry->d_fsdata);
>> }
>>
>> static struct vfsmount *debugfs_automount(struct path *path)
>> @@ -346,35 +349,25 @@ static struct dentry
>> *__debugfs_create_file(const char *name, umode_t mode,
>> {
>> struct dentry *dentry;
>> struct inode *inode;
>> - struct debugfs_fsdata *fsd;
>> -
>> - fsd = kmalloc(sizeof(*fsd), GFP_KERNEL);
>> - if (!fsd)
>> - return NULL;
>>
>> if (!(mode & S_IFMT))
>> mode |= S_IFREG;
>> BUG_ON(!S_ISREG(mode));
>> dentry = start_creating(name, parent);
>>
>> - if (IS_ERR(dentry)) {
>> - kfree(fsd);
>> + if (IS_ERR(dentry))
>> return NULL;
>> - }
>>
>> inode = debugfs_get_inode(dentry->d_sb);
>> - if (unlikely(!inode)) {
>> - kfree(fsd);
>> + if (unlikely(!inode))
>> return failed_creating(dentry);
>> - }
>>
>> inode->i_mode = mode;
>> inode->i_private = data;
>>
>> inode->i_fop = proxy_fops;
>> - fsd->real_fops = real_fops;
>> - refcount_set(&fsd->active_users, 1);
>> - dentry->d_fsdata = fsd;
>> + dentry->d_fsdata = (void *)((unsigned long)real_fops |
>> + DEBUGFS_FSDATA_IS_REAL_FOPS_BIT);
>>
>> d_instantiate(dentry, inode);
>> fsnotify_create(d_inode(dentry->d_parent), dentry);
>> @@ -637,8 +630,17 @@ static void __debugfs_remove_file(struct dentry
>> *dentry, struct dentry *parent)
>>
>> simple_unlink(d_inode(parent), dentry);
>> d_delete(dentry);
>> - fsd = dentry->d_fsdata;
>> - init_completion(&fsd->active_users_drained);
>> +
>> + /*
>> + * Paired with the closing smp_mb() implied by a successful
>> + * cmpxchg() in debugfs_file_get(): either
>> + * debugfs_file_get() must see a dead dentry or we must see
>> a
>> + * debugfs_fsdata instance at ->d_fsdata here (or both).
>> + */
>> + smp_mb();
>> + fsd = READ_ONCE(dentry->d_fsdata);
>> + if ((unsigned long)fsd & DEBUGFS_FSDATA_IS_REAL_FOPS_BIT)
>> + return;
>> if (!refcount_dec_and_test(&fsd->active_users))
>> wait_for_completion(&fsd->active_users_drained);
>> }
>> diff --git a/fs/debugfs/internal.h b/fs/debugfs/internal.h
>> index 0eea99432840..cb1e8139c398 100644
>> --- a/fs/debugfs/internal.h
>> +++ b/fs/debugfs/internal.h
>> @@ -25,4 +25,12 @@ struct debugfs_fsdata {
>> struct completion active_users_drained;
>> };
>>
>> +/*
>> + * A dentry's ->d_fsdata either points to the real fops or to a
>> + * dynamically allocated debugfs_fsdata instance.
>> + * In order to distinguish between these two cases, a real fops
>> + * pointer gets its lowest bit set.
>> + */
>> +#define DEBUGFS_FSDATA_IS_REAL_FOPS_BIT BIT(0)
>> +
>> #endif /* _DEBUGFS_INTERNAL_H_ */
Powered by blists - more mailing lists