[2/4] add page_wrprotect(): write protecting page.

Message ID	1238457560-7613-3-git-send-email-ieidus@redhat.com (mailing list archive)
State	Accepted
Headers	show Received: from vger.kernel.org (vger.kernel.org [209.132.176.167]) by demeter.kernel.org (8.14.2/8.14.2) with ESMTP id n2V00xsr026854 for <patchwork-kvm@patchwork.kernel.org>; Tue, 31 Mar 2009 00:01:02 GMT From: Izik Eidus <ieidus@redhat.com> Cc: linux-kernel@vger.kernel.org, kvm@vger.kernel.org, linux-mm@kvack.org, avi@redhat.com, aarcange@redhat.com, chrisw@redhat.com, riel@redhat.com, jeremy@goop.org, mtosatti@redhat.com, hugh@veritas.com, corbet@lwn.net, yaniv@redhat.com, dmonakhov@openvz.org, Izik Eidus <ieidus@redhat.com> Subject: [PATCH 2/4] add page_wrprotect(): write protecting page. Date: Tue, 31 Mar 2009 02:59:18 +0300 Message-Id: <1238457560-7613-3-git-send-email-ieidus@redhat.com> In-Reply-To: <1238457560-7613-2-git-send-email-ieidus@redhat.com> References: <1238457560-7613-1-git-send-email-ieidus@redhat.com> <1238457560-7613-2-git-send-email-ieidus@redhat.com> To: unlisted-recipients:; (no To-header on input) Sender: kvm-owner@vger.kernel.org Precedence: bulk

diff --git a/include/linux/rmap.h b/include/linux/rmap.h index b35bc0e..469376d 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -118,6 +118,10 @@ static inline int try_to_munlock(struct page *page) } #endif +#if defined(CONFIG_KSM) || defined(CONFIG_KSM_MODULE) +int page_wrprotect(struct page *page, int *odirect_sync, int count_offset); +#endif + #else /* !CONFIG_MMU */ #define anon_vma_init() do {} while (0) @@ -132,6 +136,13 @@ static inline int page_mkclean(struct page *page) return 0; } +#if defined(CONFIG_KSM) || defined(CONFIG_KSM_MODULE) +static inline int page_wrprotect(struct page *page, int *odirect_sync, + int count_offset) +{ + return 0; +} +#endif #endif /* CONFIG_MMU */ diff --git a/mm/rmap.c b/mm/rmap.c index 1652166..95c55ea 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -585,6 +585,145 @@ int page_mkclean(struct page *page) } EXPORT_SYMBOL_GPL(page_mkclean); +#if defined(CONFIG_KSM) || defined(CONFIG_KSM_MODULE) + +static int page_wrprotect_one(struct page *page, struct vm_area_struct *vma, + int *odirect_sync, int count_offset) +{ + struct mm_struct *mm = vma->vm_mm; + unsigned long address; + pte_t *pte; + spinlock_t *ptl; + int ret = 0; + + address = vma_address(page, vma); + if (address == -EFAULT) + goto out; + + pte = page_check_address(page, mm, address, &ptl, 0); + if (!pte) + goto out; + + if (pte_write(*pte)) { + pte_t entry; + + flush_cache_page(vma, address, pte_pfn(*pte)); + /* + * Ok this is tricky, when get_user_pages_fast() run it doesnt + * take any lock, therefore the check that we are going to make + * with the pagecount against the mapcount is racey and + * O_DIRECT can happen right after the check. + * So we clear the pte and flush the tlb before the check + * this assure us that no O_DIRECT can happen after the check + * or in the middle of the check. + */ + entry = ptep_clear_flush(vma, address, pte); + /* + * Check that no O_DIRECT or similar I/O is in progress on the + * page + */ + if ((page_mapcount(page) + count_offset) != page_count(page)) { + *odirect_sync = 0; + set_pte_at_notify(mm, address, pte, entry); + goto out_unlock; + } + entry = pte_wrprotect(entry); + set_pte_at_notify(mm, address, pte, entry); + } + ret = 1; + +out_unlock: + pte_unmap_unlock(pte, ptl); +out: + return ret; +} + +static int page_wrprotect_file(struct page *page, int *odirect_sync, + int count_offset) +{ + struct address_space *mapping; + struct prio_tree_iter iter; + struct vm_area_struct *vma; + pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); + int ret = 0; + + mapping = page_mapping(page); + if (!mapping) + return ret; + + spin_lock(&mapping->i_mmap_lock); + + vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) + ret += page_wrprotect_one(page, vma, odirect_sync, + count_offset); + + spin_unlock(&mapping->i_mmap_lock); + + return ret; +} + +static int page_wrprotect_anon(struct page *page, int *odirect_sync, + int count_offset) +{ + struct vm_area_struct *vma; + struct anon_vma *anon_vma; + int ret = 0; + + anon_vma = page_lock_anon_vma(page); + if (!anon_vma) + return ret; + + /* + * If the page is inside the swap cache, its _count number was + * increased by one, therefore we have to increase count_offset by one. + */ + if (PageSwapCache(page)) + count_offset++; + + list_for_each_entry(vma, &anon_vma->head, anon_vma_node) + ret += page_wrprotect_one(page, vma, odirect_sync, + count_offset); + + page_unlock_anon_vma(anon_vma); + + return ret; +} + +/** + * page_wrprotect - set all ptes pointing to a page as readonly + * @page: the page to set as readonly + * @odirect_sync: boolean value that is set to 0 when some of the ptes were not + * marked as readonly beacuse page_wrprotect_one() was not able + * to mark this ptes as readonly without opening window to a race + * with odirect + * @count_offset: number of times page_wrprotect() caller had called get_page() + * on the page + * + * returns the number of ptes which were marked as readonly. + * (ptes that were readonly before this function was called are counted as well) + */ +int page_wrprotect(struct page *page, int *odirect_sync, int count_offset) +{ + int ret = 0; + + /* + * Page lock is needed for anon pages for the PageSwapCache check, + * and for page_mapping for filebacked pages + */ + BUG_ON(!PageLocked(page)); + + *odirect_sync = 1; + if (PageAnon(page)) + ret = page_wrprotect_anon(page, odirect_sync, count_offset); + else + ret = page_wrprotect_file(page, odirect_sync, count_offset); + + return ret; +} +EXPORT_SYMBOL(page_wrprotect); + +#endif + /** * __page_set_anon_rmap - setup new anonymous rmap * @page: the page to add the mapping to

[2/4] add page_wrprotect(): write protecting page.

Commit Message

Patch