fs/aio.c | 53 +++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 43 insertions(+), 10 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 062a5f6a1448..758430665b3a 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -243,6 +243,9 @@ static void aio_free_ring(struct kioctx *ctx) { int i; + /* This makes the ctx unreachable */ + put_aio_ring_file(ctx); + for (i = 0; i < ctx->nr_pages; i++) { struct page *page; pr_debug("pid(%d) [%d] page->count=%d\n", current->pid, i, @@ -254,8 +257,6 @@ static void aio_free_ring(struct kioctx *ctx) put_page(page); } - put_aio_ring_file(ctx); - if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages) { kfree(ctx->ring_pages); ctx->ring_pages = NULL; @@ -287,9 +288,29 @@ static int aio_migratepage(struct address_space *mapping, struct page *new, rc = 0; - /* Make sure the old page hasn't already been changed */ + /* Get a reference on the ioctx so we can take the ring_lock mutex. */ spin_lock(&mapping->private_lock); ctx = mapping->private_data; + if (ctx) + percpu_ref_get(&ctx->users); + spin_unlock(&mapping->private_lock); + + if (!ctx) + return -EINVAL; + + /* We use mutex_trylock() here as the callers of migratepage may + * already be holding current->mm->mmap_sem, and ->ring_lock must be + * outside of mmap_sem due to its usage in aio_read_events_ring(). + * Since page migration is not an absolutely critical operation, the + * occasional failure here is acceptable. + */ + if (!mutex_trylock(&ctx->ring_lock)) { + percpu_ref_put(&ctx->users); + return -EAGAIN; + } + + /* Make sure the old page hasn't already been changed */ + spin_lock(&mapping->private_lock); if (ctx) { pgoff_t idx; spin_lock_irqsave(&ctx->completion_lock, flags); @@ -305,7 +326,7 @@ static int aio_migratepage(struct address_space *mapping, struct page *new, spin_unlock(&mapping->private_lock); if (rc != 0) - return rc; + goto out_unlock; /* Writeback must be complete */ BUG_ON(PageWriteback(old)); @@ -314,7 +335,7 @@ static int aio_migratepage(struct address_space *mapping, struct page *new, rc = migrate_page_move_mapping(mapping, new, old, NULL, mode, 1); if (rc != MIGRATEPAGE_SUCCESS) { put_page(new); - return rc; + goto out_unlock; } /* We can potentially race against kioctx teardown here. Use the @@ -346,6 +367,9 @@ static int aio_migratepage(struct address_space *mapping, struct page *new, else put_page(new); +out_unlock: + mutex_unlock(&ctx->ring_lock); + percpu_ref_put(&ctx->users); return rc; } #endif @@ -380,7 +404,7 @@ static int aio_setup_ring(struct kioctx *ctx) file = aio_private_file(ctx, nr_pages); if (IS_ERR(file)) { ctx->aio_ring_file = NULL; - return -EAGAIN; + return -ENOMEM; } ctx->aio_ring_file = file; @@ -415,7 +439,7 @@ static int aio_setup_ring(struct kioctx *ctx) if (unlikely(i != nr_pages)) { aio_free_ring(ctx); - return -EAGAIN; + return -ENOMEM; } ctx->mmap_size = nr_pages * PAGE_SIZE; @@ -429,7 +453,7 @@ static int aio_setup_ring(struct kioctx *ctx) if (IS_ERR((void *)ctx->mmap_base)) { ctx->mmap_size = 0; aio_free_ring(ctx); - return -EAGAIN; + return -ENOMEM; } pr_debug("mmap address: 0x%08lx\n", ctx->mmap_base); @@ -657,8 +681,13 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) if (!ctx->cpu) goto err; - if (aio_setup_ring(ctx) < 0) - goto err; + /* Prevent races with page migration during setup by holding + * the ring_lock mutex. + */ + mutex_lock(&ctx->ring_lock); + err = aio_setup_ring(ctx); + if (err < 0) + goto err_unlock; atomic_set(&ctx->reqs_available, ctx->nr_events - 1); ctx->req_batch = (ctx->nr_events - 1) / (num_possible_cpus() * 4); @@ -683,6 +712,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) if (err) goto err_cleanup; + mutex_unlock(&ctx->ring_lock); pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n", ctx, ctx->user_id, mm, ctx->nr_events); return ctx; @@ -691,6 +721,8 @@ err_cleanup: aio_nr_sub(ctx->max_reqs); err_ctx: aio_free_ring(ctx); +err_unlock: + mutex_unlock(&ctx->ring_lock); err: free_percpu(ctx->cpu); free_percpu(ctx->reqs.pcpu_count); @@ -1024,6 +1056,7 @@ static long aio_read_events_ring(struct kioctx *ctx, mutex_lock(&ctx->ring_lock); + /* Access to ->ring_pages here is protected by ctx->ring_lock. */ ring = kmap_atomic(ctx->ring_pages[0]); head = ring->head; tail = ring->tail;