lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20180709152508.smwg252x57pnfkoq@kshutemo-mobl1>
Date:   Mon, 9 Jul 2018 18:25:08 +0300
From:   "Kirill A. Shutemov" <kirill@...temov.name>
To:     Dmitry Vyukov <dvyukov@...gle.com>
Cc:     syzbot <syzbot+3f84280d52be9b7083cc@...kaller.appspotmail.com>,
        Andrew Morton <akpm@...ux-foundation.org>,
        Dan Williams <dan.j.williams@...el.com>,
        Jerome Glisse <jglisse@...hat.com>,
        "Kirill A. Shutemov" <kirill.shutemov@...ux.intel.com>,
        ldufour@...ux.vnet.ibm.com, LKML <linux-kernel@...r.kernel.org>,
        Linux-MM <linux-mm@...ck.org>, Michal Hocko <mhocko@...e.com>,
        Minchan Kim <minchan@...nel.org>,
        Ross Zwisler <ross.zwisler@...ux.intel.com>,
        syzkaller-bugs <syzkaller-bugs@...glegroups.com>,
        Matthew Wilcox <willy@...radead.org>, ying.huang@...el.com
Subject: Re: kernel BUG at mm/memory.c:LINE!

On Mon, Jul 09, 2018 at 05:21:55PM +0300, Kirill A. Shutemov wrote:
> > This also happened only once so far:
> > https://syzkaller.appspot.com/bug?extid=3f84280d52be9b7083cc
> > and I can't reproduce it rerunning this program. So it's either a very
> > subtle race, or fd in the middle of netlink address magically matched
> > some fd once, or something else...
> 
> Okay, I've got it reproduced. See below.
> 
> The problem is that kcov doesn't set vm_ops for the VMA and it makes
> kernel think that the VMA is anonymous.
> 
> It's not necessary the way it was triggered by syzkaller. I just found
> that kcov's ->mmap doesn't set vm_ops. There can more such cases.
> vma_is_anonymous() is what we need to fix.
> 
> ( Although, I found logic around mmaping the file second time questinable
>   at best. It seems broken to me. )
> 
> It is known that vma_is_anonymous() can produce false-positives. It tried
> to fix it once[1], but it back-fired[2].
> 
> I'll look at this again.

Below is a patch that seems work. But it definately requires more testing.

Dmitry, could you give it a try in syzkaller?

diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index ffeb60d3434c..f0a8b0b1768b 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -708,6 +708,7 @@ static int mmap_zero(struct file *file, struct vm_area_struct *vma)
 #endif
 	if (vma->vm_flags & VM_SHARED)
 		return shmem_zero_setup(vma);
+	vma->vm_ops = &anon_vm_ops;
 	return 0;
 }
 
diff --git a/fs/exec.c b/fs/exec.c
index 2d4e0075bd24..a1a246062561 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -307,6 +307,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
 	 * configured yet.
 	 */
 	BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP);
+	vma->vm_ops = &anon_vm_ops;
 	vma->vm_end = STACK_TOP_MAX;
 	vma->vm_start = vma->vm_end - PAGE_SIZE;
 	vma->vm_flags = VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP;
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 2015d8c45e4a..945c3d306d8f 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -336,9 +336,6 @@ static void kernfs_vma_open(struct vm_area_struct *vma)
 	struct file *file = vma->vm_file;
 	struct kernfs_open_file *of = kernfs_of(file);
 
-	if (!of->vm_ops)
-		return;
-
 	if (!kernfs_get_active(of->kn))
 		return;
 
@@ -354,9 +351,6 @@ static vm_fault_t kernfs_vma_fault(struct vm_fault *vmf)
 	struct kernfs_open_file *of = kernfs_of(file);
 	vm_fault_t ret;
 
-	if (!of->vm_ops)
-		return VM_FAULT_SIGBUS;
-
 	if (!kernfs_get_active(of->kn))
 		return VM_FAULT_SIGBUS;
 
@@ -374,9 +368,6 @@ static vm_fault_t kernfs_vma_page_mkwrite(struct vm_fault *vmf)
 	struct kernfs_open_file *of = kernfs_of(file);
 	vm_fault_t ret;
 
-	if (!of->vm_ops)
-		return VM_FAULT_SIGBUS;
-
 	if (!kernfs_get_active(of->kn))
 		return VM_FAULT_SIGBUS;
 
@@ -397,9 +388,6 @@ static int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr,
 	struct kernfs_open_file *of = kernfs_of(file);
 	int ret;
 
-	if (!of->vm_ops)
-		return -EINVAL;
-
 	if (!kernfs_get_active(of->kn))
 		return -EINVAL;
 
@@ -419,9 +407,6 @@ static int kernfs_vma_set_policy(struct vm_area_struct *vma,
 	struct kernfs_open_file *of = kernfs_of(file);
 	int ret;
 
-	if (!of->vm_ops)
-		return 0;
-
 	if (!kernfs_get_active(of->kn))
 		return -EINVAL;
 
@@ -440,9 +425,6 @@ static struct mempolicy *kernfs_vma_get_policy(struct vm_area_struct *vma,
 	struct kernfs_open_file *of = kernfs_of(file);
 	struct mempolicy *pol;
 
-	if (!of->vm_ops)
-		return vma->vm_policy;
-
 	if (!kernfs_get_active(of->kn))
 		return vma->vm_policy;
 
@@ -511,7 +493,7 @@ static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma)
 	 * So error if someone is trying to use close.
 	 */
 	rc = -EINVAL;
-	if (vma->vm_ops && vma->vm_ops->close)
+	if (vma->vm_ops->close)
 		goto out_put;
 
 	rc = 0;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index e9679016271f..e959623123e4 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -326,7 +326,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
 		goto done;
 	}
 
-	if (vma->vm_ops && vma->vm_ops->name) {
+	if (vma->vm_ops->name) {
 		name = vma->vm_ops->name(vma);
 		if (name)
 			goto done;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a0fbb9ffe380..f1db03c919c3 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1536,9 +1536,11 @@ int clear_page_dirty_for_io(struct page *page);
 
 int get_cmdline(struct task_struct *task, char *buffer, int buflen);
 
+extern const struct vm_operations_struct anon_vm_ops;
+
 static inline bool vma_is_anonymous(struct vm_area_struct *vma)
 {
-	return !vma->vm_ops;
+	return vma->vm_ops == &anon_vm_ops;
 }
 
 #ifdef CONFIG_SHMEM
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 8f0434a9951a..2e35401a5c68 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7269,7 +7269,7 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
 
 		goto got_name;
 	} else {
-		if (vma->vm_ops && vma->vm_ops->name) {
+		if (vma->vm_ops->name) {
 			name = (char *) vma->vm_ops->name(vma);
 			if (name)
 				goto cpy_name;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 039ddbc574e9..2065acc5a6aa 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -637,7 +637,7 @@ EXPORT_SYMBOL_GPL(linear_hugepage_index);
  */
 unsigned long vma_kernel_pagesize(struct vm_area_struct *vma)
 {
-	if (vma->vm_ops && vma->vm_ops->pagesize)
+	if (vma->vm_ops->pagesize)
 		return vma->vm_ops->pagesize(vma);
 	return PAGE_SIZE;
 }
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index d7b2a4bf8671..5ae34097aed1 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -440,7 +440,7 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
 		 * page fault if needed.
 		 */
 		return 0;
-	if (vma->vm_ops || (vm_flags & VM_NO_KHUGEPAGED))
+	if (!vma_is_anonymous(vma) || (vm_flags & VM_NO_KHUGEPAGED))
 		/* khugepaged not yet working on file or special mappings */
 		return 0;
 	hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
@@ -831,7 +831,7 @@ static bool hugepage_vma_check(struct vm_area_struct *vma)
 		return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff,
 				HPAGE_PMD_NR);
 	}
-	if (!vma->anon_vma || vma->vm_ops)
+	if (!vma->anon_vma || !vma_is_anonymous(vma))
 		return false;
 	if (is_vma_temporary_stack(vma))
 		return false;
diff --git a/mm/memory.c b/mm/memory.c
index 7206a634270b..02fbef2bd024 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -768,7 +768,7 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
 		 (void *)addr, vma->vm_flags, vma->anon_vma, mapping, index);
 	pr_alert("file:%pD fault:%pf mmap:%pf readpage:%pf\n",
 		 vma->vm_file,
-		 vma->vm_ops ? vma->vm_ops->fault : NULL,
+		 vma->vm_ops->fault,
 		 vma->vm_file ? vma->vm_file->f_op->mmap : NULL,
 		 mapping ? mapping->a_ops->readpage : NULL);
 	dump_stack();
@@ -825,7 +825,7 @@ struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
 	if (IS_ENABLED(CONFIG_ARCH_HAS_PTE_SPECIAL)) {
 		if (likely(!pte_special(pte)))
 			goto check_pfn;
-		if (vma->vm_ops && vma->vm_ops->find_special_page)
+		if (vma->vm_ops->find_special_page)
 			return vma->vm_ops->find_special_page(vma, addr);
 		if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))
 			return NULL;
@@ -2404,7 +2404,7 @@ static void fault_dirty_shared_page(struct vm_area_struct *vma,
 {
 	struct address_space *mapping;
 	bool dirtied;
-	bool page_mkwrite = vma->vm_ops && vma->vm_ops->page_mkwrite;
+	bool page_mkwrite = vma->vm_ops->page_mkwrite;
 
 	dirtied = set_page_dirty(page);
 	VM_BUG_ON_PAGE(PageAnon(page), page);
@@ -2648,7 +2648,7 @@ static int wp_pfn_shared(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
 
-	if (vma->vm_ops && vma->vm_ops->pfn_mkwrite) {
+	if (vma->vm_ops->pfn_mkwrite) {
 		int ret;
 
 		pte_unmap_unlock(vmf->pte, vmf->ptl);
@@ -2669,7 +2669,7 @@ static int wp_page_shared(struct vm_fault *vmf)
 
 	get_page(vmf->page);
 
-	if (vma->vm_ops && vma->vm_ops->page_mkwrite) {
+	if (vma->vm_ops->page_mkwrite) {
 		int tmp;
 
 		pte_unmap_unlock(vmf->pte, vmf->ptl);
@@ -4439,7 +4439,7 @@ int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
 			vma = find_vma(mm, addr);
 			if (!vma || vma->vm_start > addr)
 				break;
-			if (vma->vm_ops && vma->vm_ops->access)
+			if (vma->vm_ops->access)
 				ret = vma->vm_ops->access(vma, addr, buf,
 							  len, write);
 			if (ret <= 0)
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 9ac49ef17b4e..f0fcf70bcec7 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -651,13 +651,13 @@ static int vma_replace_policy(struct vm_area_struct *vma,
 	pr_debug("vma %lx-%lx/%lx vm_ops %p vm_file %p set_policy %p\n",
 		 vma->vm_start, vma->vm_end, vma->vm_pgoff,
 		 vma->vm_ops, vma->vm_file,
-		 vma->vm_ops ? vma->vm_ops->set_policy : NULL);
+		 vma->vm_ops->set_policy);
 
 	new = mpol_dup(pol);
 	if (IS_ERR(new))
 		return PTR_ERR(new);
 
-	if (vma->vm_ops && vma->vm_ops->set_policy) {
+	if (vma->vm_ops->set_policy) {
 		err = vma->vm_ops->set_policy(vma, new);
 		if (err)
 			goto err_out;
@@ -845,7 +845,7 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
 			up_read(&mm->mmap_sem);
 			return -EFAULT;
 		}
-		if (vma->vm_ops && vma->vm_ops->get_policy)
+		if (vma->vm_ops->get_policy)
 			pol = vma->vm_ops->get_policy(vma, addr);
 		else
 			pol = vma->vm_policy;
@@ -1617,7 +1617,7 @@ struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
 	struct mempolicy *pol = NULL;
 
 	if (vma) {
-		if (vma->vm_ops && vma->vm_ops->get_policy) {
+		if (vma->vm_ops->get_policy) {
 			pol = vma->vm_ops->get_policy(vma, addr);
 		} else if (vma->vm_policy) {
 			pol = vma->vm_policy;
@@ -1663,7 +1663,7 @@ bool vma_policy_mof(struct vm_area_struct *vma)
 {
 	struct mempolicy *pol;
 
-	if (vma->vm_ops && vma->vm_ops->get_policy) {
+	if (vma->vm_ops->get_policy) {
 		bool ret = false;
 
 		pol = vma->vm_ops->get_policy(vma, vma->vm_start);
diff --git a/mm/mmap.c b/mm/mmap.c
index d1eb87ef4b1a..516fb5c5bfe5 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -71,6 +71,8 @@ int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS;
 static bool ignore_rlimit_data;
 core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644);
 
+const struct vm_operations_struct anon_vm_ops = {};
+
 static void unmap_region(struct mm_struct *mm,
 		struct vm_area_struct *vma, struct vm_area_struct *prev,
 		unsigned long start, unsigned long end);
@@ -177,7 +179,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
 	struct vm_area_struct *next = vma->vm_next;
 
 	might_sleep();
-	if (vma->vm_ops && vma->vm_ops->close)
+	if (vma->vm_ops->close)
 		vma->vm_ops->close(vma);
 	if (vma->vm_file)
 		fput(vma->vm_file);
@@ -561,6 +563,8 @@ static unsigned long count_vma_pages_range(struct mm_struct *mm,
 void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
 		struct rb_node **rb_link, struct rb_node *rb_parent)
 {
+	WARN_ONCE(!vma->vm_ops, "missing vma->vm_ops");
+
 	/* Update tracking information for the gap following the new vma. */
 	if (vma->vm_next)
 		vma_gap_update(vma->vm_next);
@@ -996,7 +1000,7 @@ static inline int is_mergeable_vma(struct vm_area_struct *vma,
 		return 0;
 	if (vma->vm_file != file)
 		return 0;
-	if (vma->vm_ops && vma->vm_ops->close)
+	if (vma->vm_ops->close)
 		return 0;
 	if (!is_mergeable_vm_userfaultfd_ctx(vma, vm_userfaultfd_ctx))
 		return 0;
@@ -1636,7 +1640,7 @@ int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot)
 		return 0;
 
 	/* The backer wishes to know when pages are first written to? */
-	if (vm_ops && (vm_ops->page_mkwrite || vm_ops->pfn_mkwrite))
+	if (vm_ops->page_mkwrite || vm_ops->pfn_mkwrite)
 		return 1;
 
 	/* The open routine did something to the protections that pgprot_modify
@@ -1774,12 +1778,20 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 		 */
 		WARN_ON_ONCE(addr != vma->vm_start);
 
+		/* All mappings must have ->vm_ops set */
+		if (!vma->vm_ops) {
+			static const struct vm_operations_struct dummy_ops = {};
+			vma->vm_ops = &dummy_ops;
+		}
+
 		addr = vma->vm_start;
 		vm_flags = vma->vm_flags;
 	} else if (vm_flags & VM_SHARED) {
 		error = shmem_zero_setup(vma);
 		if (error)
 			goto free_vma;
+	} else {
+		vma->vm_ops = &anon_vm_ops;
 	}
 
 	vma_link(mm, vma, prev, rb_link, rb_parent);
@@ -2614,7 +2626,7 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
 	struct vm_area_struct *new;
 	int err;
 
-	if (vma->vm_ops && vma->vm_ops->split) {
+	if (vma->vm_ops->split) {
 		err = vma->vm_ops->split(vma, addr);
 		if (err)
 			return err;
@@ -2647,7 +2659,7 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
 	if (new->vm_file)
 		get_file(new->vm_file);
 
-	if (new->vm_ops && new->vm_ops->open)
+	if (new->vm_ops->open)
 		new->vm_ops->open(new);
 
 	if (new_below)
@@ -2661,7 +2673,7 @@ int __split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
 		return 0;
 
 	/* Clean everything up if vma_adjust failed. */
-	if (new->vm_ops && new->vm_ops->close)
+	if (new->vm_ops->close)
 		new->vm_ops->close(new);
 	if (new->vm_file)
 		fput(new->vm_file);
@@ -2999,6 +3011,7 @@ static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long
 
 	INIT_LIST_HEAD(&vma->anon_vma_chain);
 	vma->vm_mm = mm;
+	vma->vm_ops = &anon_vm_ops;
 	vma->vm_start = addr;
 	vma->vm_end = addr + len;
 	vma->vm_pgoff = pgoff;
@@ -3221,7 +3234,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 			goto out_free_mempol;
 		if (new_vma->vm_file)
 			get_file(new_vma->vm_file);
-		if (new_vma->vm_ops && new_vma->vm_ops->open)
+		if (new_vma->vm_ops->open)
 			new_vma->vm_ops->open(new_vma);
 		vma_link(mm, new_vma, prev, rb_link, rb_parent);
 		*need_rmap_locks = false;
diff --git a/mm/mremap.c b/mm/mremap.c
index 5c2e18505f75..7ab222c283de 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -302,7 +302,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
 				     need_rmap_locks);
 	if (moved_len < old_len) {
 		err = -ENOMEM;
-	} else if (vma->vm_ops && vma->vm_ops->mremap) {
+	} else if (vma->vm_ops->mremap) {
 		err = vma->vm_ops->mremap(new_vma);
 	}
 
diff --git a/mm/nommu.c b/mm/nommu.c
index 4452d8bd9ae4..e7f447bfd704 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -764,7 +764,7 @@ static void delete_vma_from_mm(struct vm_area_struct *vma)
  */
 static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma)
 {
-	if (vma->vm_ops && vma->vm_ops->close)
+	if (vma->vm_ops->close)
 		vma->vm_ops->close(vma);
 	if (vma->vm_file)
 		fput(vma->vm_file);
@@ -1489,7 +1489,7 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
 		region->vm_pgoff = new->vm_pgoff += npages;
 	}
 
-	if (new->vm_ops && new->vm_ops->open)
+	if (new->vm_ops->open)
 		new->vm_ops->open(new);
 
 	delete_vma_from_mm(vma);
diff --git a/mm/shmem.c b/mm/shmem.c
index 2cab84403055..bf991c9230b3 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1424,6 +1424,7 @@ static void shmem_pseudo_vma_init(struct vm_area_struct *vma,
 	/* Bias interleave by inode number to distribute better across nodes */
 	vma->vm_pgoff = index + info->vfs_inode.i_ino;
 	vma->vm_policy = mpol_shared_policy_lookup(&info->policy, index);
+	vma->vm_ops = &anon_vm_ops;
 }
 
 static void shmem_pseudo_vma_destroy(struct vm_area_struct *vma)
-- 
 Kirill A. Shutemov

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ