[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID: <672939d2-e556-f9a5-d0f6-3f0b73edf0c9@bytedance.com>
Date: Sun, 9 Oct 2022 20:49:49 +0800
From: Jia Zhu <zhujia.zj@...edance.com>
To: JeffleXu <jefflexu@...ux.alibaba.com>, dhowells@...hat.com,
xiang@...nel.org
Cc: linux-cachefs@...hat.com, linux-erofs@...ts.ozlabs.org,
linux-fsdevel@...r.kernel.org, linux-kernel@...r.kernel.org,
yinxin.x@...edance.com
Subject: Re: [External] Re: [RFC PATCH 3/5] cachefiles: resend an open request
if the read request's object is closed
在 2022/10/8 17:05, JeffleXu 写道:
>
>
> On 8/18/22 9:52 PM, Jia Zhu wrote:
>> When an anonymous fd is closed by user daemon, if there is a new read
>> request for this file comes up, the anonymous fd should be re-opened
>> to handle that read request rather than fail it directly.
>>
>> 1. Introduce reopening state for objects that are closed but have
>> inflight/subsequent read requests.
>> 2. No longer flush READ requests but only CLOSE requests when anonymous
>> fd is closed.
>> 3. Enqueue the reopen work to workqueue, thus user daemon could get rid
>> of daemon_read context and handle that request smoothly. Otherwise,
>> the user daemon will send a reopen request and wait for itself to
>> process the request.
>>
>> Signed-off-by: Jia Zhu <zhujia.zj@...edance.com>
>> Reviewed-by: Xin Yin <yinxin.x@...edance.com>
>> ---
>> fs/cachefiles/internal.h | 3 ++
>> fs/cachefiles/ondemand.c | 79 +++++++++++++++++++++++++++-------------
>> 2 files changed, 56 insertions(+), 26 deletions(-)
>>
>> diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
>> index cdf4ec781933..66bbd4f1d22a 100644
>> --- a/fs/cachefiles/internal.h
>> +++ b/fs/cachefiles/internal.h
>> @@ -48,9 +48,11 @@ struct cachefiles_volume {
>> enum cachefiles_object_state {
>> CACHEFILES_ONDEMAND_OBJSTATE_close, /* Anonymous fd closed by daemon or initial state */
>> CACHEFILES_ONDEMAND_OBJSTATE_open, /* Anonymous fd associated with object is available */
>> + CACHEFILES_ONDEMAND_OBJSTATE_reopening, /* Object that was closed and is being reopened. */
>> };
>>
>> struct cachefiles_ondemand_info {
>> + struct work_struct work;
>> int ondemand_id;
>> enum cachefiles_object_state state;
>> struct cachefiles_object *object;
>> @@ -341,6 +343,7 @@ cachefiles_ondemand_set_object_##_state(struct cachefiles_object *object) \
>>
>> CACHEFILES_OBJECT_STATE_FUNCS(open);
>> CACHEFILES_OBJECT_STATE_FUNCS(close);
>> +CACHEFILES_OBJECT_STATE_FUNCS(reopening);
>> #else
>> #define CACHEFILES_ONDEMAND_OBJINFO(object) NULL
>>
>> diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c
>> index f51266554e4d..79ffb19380cd 100644
>> --- a/fs/cachefiles/ondemand.c
>> +++ b/fs/cachefiles/ondemand.c
>> @@ -18,14 +18,10 @@ static int cachefiles_ondemand_fd_release(struct inode *inode,
>> info->ondemand_id = CACHEFILES_ONDEMAND_ID_CLOSED;
>> cachefiles_ondemand_set_object_close(object);
>>
>> - /*
>> - * Flush all pending READ requests since their completion depends on
>> - * anon_fd.
>> - */
>> - xas_for_each(&xas, req, ULONG_MAX) {
>> + /* Only flush CACHEFILES_REQ_NEW marked req to avoid race with daemon_read */
>> + xas_for_each_marked(&xas, req, ULONG_MAX, CACHEFILES_REQ_NEW) {
>
> Could you please add a more detailed comment here, explaing why flushing
> CLOSE requests when anony fd gets closed is needed, and why the original
> xas_for_each() would race with daemon_read()? There are some refs at [1]
> and [2].
>
> [1]
> https://hackmd.io/YNsTQqLcQYOZ4gAlFWrNcA#flush-CLOSE-requests-when-anon-fd-is-closed
> [2]
> https://hackmd.io/YNsTQqLcQYOZ4gAlFWrNcA#race-between-readingflush-requests
>
> The sequence chart is welcome to be added into the comment to explain
> the race, or the code will be difficult to understand since the subtlety
> of the race.
OK, I'd like to do that.
>
>
>> if (req->msg.object_id == object_id &&
>> - req->msg.opcode == CACHEFILES_OP_READ) {
>> - req->error = -EIO;
>> + req->msg.opcode == CACHEFILES_OP_CLOSE) {
>> complete(&req->done);
>> xas_store(&xas, NULL);
>> }
>> @@ -175,6 +171,7 @@ int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args)
>> trace_cachefiles_ondemand_copen(req->object, id, size);
>>
>> cachefiles_ondemand_set_object_open(req->object);
>> + wake_up_all(&cache->daemon_pollwq);
>>
>> out:
>> complete(&req->done);
>> @@ -234,6 +231,14 @@ static int cachefiles_ondemand_get_fd(struct cachefiles_req *req)
>> return ret;
>> }
>>
>> +static void ondemand_object_worker(struct work_struct *work)
>> +{
>> + struct cachefiles_object *object =
>> + ((struct cachefiles_ondemand_info *)work)->object;
>> +
>> + cachefiles_ondemand_init_object(object);
>> +}
>> +
>> ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache,
>> char __user *_buffer, size_t buflen)
>> {
>> @@ -249,7 +254,27 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache,
>> * requests from being processed repeatedly.
>> */
>> xa_lock(&cache->reqs);
>> - req = xas_find_marked(&xas, UINT_MAX, CACHEFILES_REQ_NEW);
>> + xas_for_each_marked(&xas, req, UINT_MAX, CACHEFILES_REQ_NEW) {
>> + /*
>> + * Reopen the closed object with associated read request.
>> + * Skip read requests whose related object are reopening.
>> + */
>> + if (req->msg.opcode == CACHEFILES_OP_READ) {
>> + ret = cmpxchg(&CACHEFILES_ONDEMAND_OBJINFO(req->object)->state,
>> + CACHEFILES_ONDEMAND_OBJSTATE_close,
>> + CACHEFILES_ONDEMAND_OBJSTATE_reopening);
>> + if (ret == CACHEFILES_ONDEMAND_OBJSTATE_close) {
>> + INIT_WORK(&CACHEFILES_ONDEMAND_OBJINFO(req->object)->work,
>> + ondemand_object_worker);
>
> How about initializing @work in cachefiles_ondemand_init_obj_info(), so
> that the work_struct of each object only needs to be initialized once?
>
SGTM.
>
>> + queue_work(fscache_wq,
>> + &CACHEFILES_ONDEMAND_OBJINFO(req->object)->work);
>> + continue;
>> + } else if (ret == CACHEFILES_ONDEMAND_OBJSTATE_reopening) {
>> + continue;
>> + }
>> + }
>> + break;
>> + }
>> if (!req) {
>> xa_unlock(&cache->reqs);
>> return 0;
>> @@ -267,14 +292,18 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache,
>> xa_unlock(&cache->reqs);
>>
>> id = xas.xa_index;
>> - msg->msg_id = id;
>>
>> if (msg->opcode == CACHEFILES_OP_OPEN) {
>> ret = cachefiles_ondemand_get_fd(req);
>> - if (ret)
>> + if (ret) {
>> + cachefiles_ondemand_set_object_close(req->object);
>> goto error;
>> + }
>> }
>>
>> + msg->msg_id = id;
>> + msg->object_id = CACHEFILES_ONDEMAND_OBJINFO(req->object)->ondemand_id;
>> +
>> if (copy_to_user(_buffer, msg, n) != 0) {
>> ret = -EFAULT;
>> goto err_put_fd;
>> @@ -307,19 +336,23 @@ static int cachefiles_ondemand_send_req(struct cachefiles_object *object,
>> void *private)
>> {
>> struct cachefiles_cache *cache = object->volume->cache;
>> - struct cachefiles_req *req;
>> + struct cachefiles_req *req = NULL;
>> XA_STATE(xas, &cache->reqs, 0);
>> int ret;
>>
>> if (!test_bit(CACHEFILES_ONDEMAND_MODE, &cache->flags))
>> return 0;
>>
>> - if (test_bit(CACHEFILES_DEAD, &cache->flags))
>> - return -EIO;
>> + if (test_bit(CACHEFILES_DEAD, &cache->flags)) {
>> + ret = -EIO;
>> + goto out;
>> + }
>>
>> req = kzalloc(sizeof(*req) + data_len, GFP_KERNEL);
>> - if (!req)
>> - return -ENOMEM;
>> + if (!req) {
>> + ret = -ENOMEM;
>> + goto out;
>> + }
>>
>> req->object = object;
>> init_completion(&req->done);
>> @@ -357,7 +390,7 @@ static int cachefiles_ondemand_send_req(struct cachefiles_object *object,
>> /* coupled with the barrier in cachefiles_flush_reqs() */
>> smp_mb();
>>
>> - if (opcode != CACHEFILES_OP_OPEN &&
>> + if (opcode == CACHEFILES_OP_CLOSE &&
>> !cachefiles_ondemand_object_is_open(object)) {
>> WARN_ON_ONCE(CACHEFILES_ONDEMAND_OBJINFO(object)->ondemand_id == 0);
>> xas_unlock(&xas);
>> @@ -382,8 +415,12 @@ static int cachefiles_ondemand_send_req(struct cachefiles_object *object,
>> wake_up_all(&cache->daemon_pollwq);
>> wait_for_completion(&req->done);
>> ret = req->error;
>> + kfree(req);
>> + return ret;
>> out:
>> kfree(req);
>> + if (opcode == CACHEFILES_OP_OPEN)
>> + cachefiles_ondemand_set_object_close(req->object);
>
> Could you please add a comment here explaining why we need to set the
> object state back to CLOSE state for OPEN (espectially reopening)
> requests when error occured, and why we only set it back to CLOSE state
> when error occured before the anony fd gets initialized? (That's because
> when the error occures after the anony fd has been initialized, the
> object will be reset to CLOSE state through
> cachefiles_ondemand_fd_release() triggered by close_fd().) Or the code
> is quite difficult to comprehend.
>
Thanks for the suggestion. I'll do it.
>
>> return ret;
>> }
>>
>> @@ -435,7 +472,6 @@ static int cachefiles_ondemand_init_close_req(struct cachefiles_req *req,
>> if (!cachefiles_ondemand_object_is_open(object))
>> return -ENOENT;
>>
>> - req->msg.object_id = CACHEFILES_ONDEMAND_OBJINFO(object)->ondemand_id;
>> trace_cachefiles_ondemand_close(object, &req->msg);
>> return 0;
>> }
>> @@ -451,16 +487,7 @@ static int cachefiles_ondemand_init_read_req(struct cachefiles_req *req,
>> struct cachefiles_object *object = req->object;
>> struct cachefiles_read *load = (void *)req->msg.data;
>> struct cachefiles_read_ctx *read_ctx = private;
>> - int object_id = CACHEFILES_ONDEMAND_OBJINFO(object)->ondemand_id;
>>
>> - /* Stop enqueuing requests when daemon has closed anon_fd. */
>> - if (!cachefiles_ondemand_object_is_open(object)) {
>> - WARN_ON_ONCE(object_id == 0);
>> - pr_info_once("READ: anonymous fd closed prematurely.\n");
>> - return -EIO;
>> - }
>> -
>> - req->msg.object_id = object_id;
>> load->off = read_ctx->off;
>> load->len = read_ctx->len;
>> trace_cachefiles_ondemand_read(object, &req->msg, load);
>
Powered by blists - more mailing lists