lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1200654050.5920.14.camel@twins>
Date:	Fri, 18 Jan 2008 12:00:50 +0100
From:	Peter Zijlstra <a.p.zijlstra@...llo.nl>
To:	Miklos Szeredi <miklos@...redi.hu>
Cc:	salikhmetov@...il.com, linux-mm@...ck.org, jakob@...hought.net,
	linux-kernel@...r.kernel.org, valdis.kletnieks@...edu,
	riel@...hat.com, ksm@...dk, staubach@...hat.com,
	jesper.juhl@...il.com, torvalds@...ux-foundation.org,
	akpm@...ux-foundation.org, protasnb@...il.com,
	r.e.wolff@...wizard.nl, hidave.darkstar@...il.com,
	hch@...radead.org
Subject: Re: [PATCH -v6 2/2] Updating ctime and mtime for memory-mapped
	files


On Fri, 2008-01-18 at 11:38 +0100, Miklos Szeredi wrote:
> > On Fri, 2008-01-18 at 10:51 +0100, Miklos Szeredi wrote:
> > 
> > > > diff --git a/mm/msync.c b/mm/msync.c
> > > > index a4de868..a49af28 100644
> > > > --- a/mm/msync.c
> > > > +++ b/mm/msync.c
> > > > @@ -13,11 +13,33 @@
> > > >  #include <linux/syscalls.h>
> > > >  
> > > >  /*
> > > > + * Scan the PTEs for pages belonging to the VMA and mark them read-only.
> > > > + * It will force a pagefault on the next write access.
> > > > + */
> > > > +static void vma_wrprotect(struct vm_area_struct *vma)
> > > > +{
> > > > +	unsigned long addr;
> > > > +
> > > > +	for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) {
> > > > +		spinlock_t *ptl;
> > > > +		pgd_t *pgd = pgd_offset(vma->vm_mm, addr);
> > > > +		pud_t *pud = pud_offset(pgd, addr);
> > > > +		pmd_t *pmd = pmd_offset(pud, addr);
> > > > +		pte_t *pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
> > > > +
> > > > +		if (pte_dirty(*pte) && pte_write(*pte))
> > > > +			*pte = pte_wrprotect(*pte);
> > > > +		pte_unmap_unlock(pte, ptl);
> > > > +	}
> > > > +}
> > > 
> > > What about ram based filesystems?  They don't start out with read-only
> > > pte's, so I think they don't want them read-protected now either.
> > > Unless this is essential for correct mtime/ctime accounting on these
> > > filesystems (I don't think it really is).  But then the mapping should
> > > start out read-only as well, otherwise the time update will only work
> > > after an msync(MS_ASYNC).
> > 
> > page_mkclean() has all the needed logic for this, it also walks the rmap
> > and cleans out all other users, which I think is needed too for
> > consistencies sake:
> > 
> > Process A			Process B
> > 
> > mmap(foo.txt)			mmap(foo.txt)
> > 
> > dirty page
> > 				dirty page
> > 
> > msync(MS_ASYNC)
> > 
> > 				dirty page
> > 
> > msync(MS_ASYNC) <--- now what?!

how about:

diff --git a/mm/msync.c b/mm/msync.c
index 144a757..a1b3fc6 100644
--- a/mm/msync.c
+++ b/mm/msync.c
@@ -14,6 +14,122 @@
 #include <linux/syscalls.h>
 #include <linux/sched.h>
 
+unsigned long masync_pte_range(struct vm_area_struct *vma, pmd_t *pdm,
+		unsigned long addr, unsigned long end)
+{
+	pte_t *pte;
+	spinlock_t *ptl;
+
+	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
+	arch_enter_lazy_mmu_mode();
+	do {
+		pte_t ptent = *pte;
+
+		if (pte_none(ptent))
+			continue;
+
+		if (!pte_present(ptent))
+			continue;
+
+		if (pte_dirty(ptent) && pte_write(ptent)) {
+			flush_cache_page(vma, addr, pte_pfn(ptent));
+			ptent = ptep_clear_flush(vma, addr, pte);
+			ptent = pte_wrprotect(ptent);
+			set_pte_at(vma->vm_mnm, addr, pte, ptent);
+		}
+	} while (pte++, addr += PAGE_SIZE, addr != end);
+	arch_leave_lazy_mmu_mode();
+	pte_unmap_unlock(pte - 1, ptl);
+
+	return addr;
+}
+
+unsigned long masync_pmd_range(struct vm_area_struct *vma, pud_t *pud,
+		unsigned long addr, unsigned long end)
+{
+	pmd_t *pmd;
+	unsigned long next;
+
+	pmd = pmd_offset(pud, addr);
+	do {
+		next = pmd_addr_end(addr, end);
+		if (pmd_none_or_clear_bad(pmd))
+			continue;
+		next = masync_pte_range(vma, pmd, addr, next);
+	} while (pmd++, addr = next, addr != end);
+
+	return addr;
+}
+
+unsigned long masync_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
+	       	unsigned long addr, unsigned long end)
+{
+	pud_t *pud;
+	unsigned long next;
+
+	pud = pud_offset(pgd, addr);
+	do {
+		next = pud_addr_end(addr, end);
+		if (pud_none_or_clear_bad(pud))
+			continue;
+		next = masync_pmd_range(vma, pud, addr, next);
+	} while (pud++, addr = next, addr != end);
+
+	return addr;
+}
+
+unsigned long masync_pgd_range()
+{
+	pgd_t *pgd;
+	unsigned long next;
+
+	pgd = pgd_offset(vma->vm_mm, addr);
+	do {
+		next = pgd_addr_end(addr, end);
+		if (pgd_none_of_clear_bad(pgd))
+			continue;
+		next = masync_pud_range(vma, pgd, addr, next);
+	} while (pgd++, addr = next, addr != end);
+
+	return addr;
+}
+
+int masync_vma_one(struct vm_area_struct *vma,
+		unsigned long start, unsigned long end)
+{
+	if (start < vma->vm_start)
+		start = vma->vm_start;
+
+	if (end > vma->vm_end)
+		end = vma->vm_end;
+
+	masync_pgd_range(vma, start, end);
+
+	return 0;
+}
+
+int masync_vma(struct vm_area_struct *vma, 
+		unsigned long start, unsigned long end)
+{
+	struct address_space *mapping;
+	struct vm_area_struct *vma_iter;
+
+	if (!(vma->vm_flags & VM_SHARED))
+		return 0;
+
+	mapping = vma->vm_file->f_mapping;
+
+	if (!mapping_cap_account_dirty(mapping))
+		return 0;
+
+	spin_lock(&mapping->i_mmap_lock);
+	vma_prio_tree_foreach(vma_iter, &iter, &mapping->i_mmap, start, end)
+		masync_vma_one(vma_iter, start, end);
+	spin_unlock(&mapping->i_mmap_lock);
+
+	return 0;
+}
+
 /*
  * MS_SYNC syncs the entire file - including mappings.
  *



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ