ACK: [azure][PATCH 2/2] mm, thp: Do not make page table dirty unconditionally in touch_p[mu]d()

Kleber Souza kleber.souza at canonical.com
Tue Dec 5 14:31:04 UTC 2017


On 12/05/17 15:03, Marcelo Henrique Cerri wrote:
> From: Kirill A. Shutemov <kirill.shutemov at linux.intel.com>
> 
> Currently, we unconditionally make page table dirty in touch_pmd().
> It may result in false-positive can_follow_write_pmd().
> 
> We may avoid the situation, if we would only make the page table entry
> dirty if caller asks for write access -- FOLL_WRITE.
> 
> The patch also changes touch_pud() in the same way.
> 
> Signed-off-by: Kirill A. Shutemov <kirill.shutemov at linux.intel.com>
> Cc: Michal Hocko <mhocko at suse.com>
> Cc: Hugh Dickins <hughd at google.com>
> Signed-off-by: Linus Torvalds <torvalds at linux-foundation.org>
> 
> CVE-2017-1000405
> (cherry picked from commit a8f97366452ed491d13cf1e44241bc0b5740b1f0)
> Signed-off-by: Marcelo Henrique Cerri <marcelo.cerri at canonical.com>

Clean cherry-pick, fix tested on other series as well and verified to
fix the issue.

Acked-by: Kleber Sacilotto de Souza <kleber.souza at canonical.com>

> ---
>  mm/huge_memory.c | 36 +++++++++++++-----------------------
>  1 file changed, 13 insertions(+), 23 deletions(-)
> 
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 6560174edf2a..4c32bb392a5e 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -814,20 +814,15 @@ EXPORT_SYMBOL_GPL(vmf_insert_pfn_pud);
>  #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
>  
>  static void touch_pmd(struct vm_area_struct *vma, unsigned long addr,
> -		pmd_t *pmd)
> +		pmd_t *pmd, int flags)
>  {
>  	pmd_t _pmd;
>  
> -	/*
> -	 * We should set the dirty bit only for FOLL_WRITE but for now
> -	 * the dirty bit in the pmd is meaningless.  And if the dirty
> -	 * bit will become meaningful and we'll only set it with
> -	 * FOLL_WRITE, an atomic set_bit will be required on the pmd to
> -	 * set the young bit, instead of the current set_pmd_at.
> -	 */
> -	_pmd = pmd_mkyoung(pmd_mkdirty(*pmd));
> +	_pmd = pmd_mkyoung(*pmd);
> +	if (flags & FOLL_WRITE)
> +		_pmd = pmd_mkdirty(_pmd);
>  	if (pmdp_set_access_flags(vma, addr & HPAGE_PMD_MASK,
> -				pmd, _pmd,  1))
> +				pmd, _pmd, flags & FOLL_WRITE))
>  		update_mmu_cache_pmd(vma, addr, pmd);
>  }
>  
> @@ -856,7 +851,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
>  		return NULL;
>  
>  	if (flags & FOLL_TOUCH)
> -		touch_pmd(vma, addr, pmd);
> +		touch_pmd(vma, addr, pmd, flags);
>  
>  	/*
>  	 * device mapped pages can only be returned if the
> @@ -945,20 +940,15 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
>  
>  #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
>  static void touch_pud(struct vm_area_struct *vma, unsigned long addr,
> -		pud_t *pud)
> +		pud_t *pud, int flags)
>  {
>  	pud_t _pud;
>  
> -	/*
> -	 * We should set the dirty bit only for FOLL_WRITE but for now
> -	 * the dirty bit in the pud is meaningless.  And if the dirty
> -	 * bit will become meaningful and we'll only set it with
> -	 * FOLL_WRITE, an atomic set_bit will be required on the pud to
> -	 * set the young bit, instead of the current set_pud_at.
> -	 */
> -	_pud = pud_mkyoung(pud_mkdirty(*pud));
> +	_pud = pud_mkyoung(*pud);
> +	if (flags & FOLL_WRITE)
> +		_pud = pud_mkdirty(_pud);
>  	if (pudp_set_access_flags(vma, addr & HPAGE_PUD_MASK,
> -				pud, _pud,  1))
> +				pud, _pud, flags & FOLL_WRITE))
>  		update_mmu_cache_pud(vma, addr, pud);
>  }
>  
> @@ -981,7 +971,7 @@ struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
>  		return NULL;
>  
>  	if (flags & FOLL_TOUCH)
> -		touch_pud(vma, addr, pud);
> +		touch_pud(vma, addr, pud, flags);
>  
>  	/*
>  	 * device mapped pages can only be returned if the
> @@ -1343,7 +1333,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
>  	page = pmd_page(*pmd);
>  	VM_BUG_ON_PAGE(!PageHead(page) && !is_zone_device_page(page), page);
>  	if (flags & FOLL_TOUCH)
> -		touch_pmd(vma, addr, pmd);
> +		touch_pmd(vma, addr, pmd, flags);
>  	if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
>  		/*
>  		 * We don't mlock() pte-mapped THPs. This way we can avoid
> 




More information about the kernel-team mailing list