On 3/11/20 11:35 AM, Jason Gunthorpe wrote:
From: Jason Gunthorpe jgg@mellanox.com
hmm_range_fault() should never return 0 if the caller requested a valid page, but the pfns output for that page would be HMM_PFN_ERROR.
hmm_pte_need_fault() must always be called before setting HMM_PFN_ERROR to detect if the page is in faulting mode or not.
Fix two cases in hmm_vma_walk_pmd() and reorganize some of the duplicated code.
Fixes: d08faca018c4 ("mm/hmm: properly handle migration pmd") Fixes: da4c3c735ea4 ("mm/hmm/mirror: helper to snapshot CPU page table") Signed-off-by: Jason Gunthorpe jgg@mellanox.com
Reviewed-by: Ralph Campbell rcampbell@nvidia.com
mm/hmm.c | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-)
diff --git a/mm/hmm.c b/mm/hmm.c index bf676cfef3e8ee..f61fddf2ef6505 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -363,8 +363,10 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, { struct hmm_vma_walk *hmm_vma_walk = walk->private; struct hmm_range *range = hmm_vma_walk->range;
- uint64_t *pfns = range->pfns;
- unsigned long addr = start, i;
- uint64_t *pfns = &range->pfns[(start - range->start) >> PAGE_SHIFT];
- unsigned long npages = (end - start) >> PAGE_SHIFT;
- unsigned long addr = start;
- bool fault, write_fault; pte_t *ptep; pmd_t pmd;
@@ -374,14 +376,6 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, return hmm_vma_walk_hole(start, end, -1, walk);
if (thp_migration_supported() && is_pmd_migration_entry(pmd)) {
bool fault, write_fault;
unsigned long npages;
uint64_t *pfns;
i = (addr - range->start) >> PAGE_SHIFT;
npages = (end - addr) >> PAGE_SHIFT;
pfns = &range->pfns[i];
- hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0, &fault, &write_fault); if (fault || write_fault) {
@@ -390,8 +384,15 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, return -EBUSY; } return hmm_pfns_fill(start, end, range, HMM_PFN_NONE);
- } else if (!pmd_present(pmd))
- }
- if (!pmd_present(pmd)) {
hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0, &fault,
&write_fault);
if (fault || write_fault)
return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);return -EFAULT;
Shouldn't this fill with HMM_PFN_NONE instead of HMM_PFN_ERROR? Otherwise, when a THP is swapped out, you will get a different value than if a PTE is swapped out and you are prefetching/snapshotting.
}
if (pmd_devmap(pmd) || pmd_trans_huge(pmd)) { /*
@@ -408,8 +409,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, if (!pmd_devmap(pmd) && !pmd_trans_huge(pmd)) goto again;
i = (addr - range->start) >> PAGE_SHIFT;
return hmm_vma_handle_pmd(walk, addr, end, &pfns[i], pmd);
return hmm_vma_handle_pmd(walk, addr, end, pfns, pmd);
}
/*
@@ -418,15 +418,19 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, * entry pointing to pte directory or it is a bad pmd that will not * recover. */
- if (pmd_bad(pmd))
if (pmd_bad(pmd)) {
hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0, &fault,
&write_fault);
if (fault || write_fault)
return -EFAULT;
return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);
}
ptep = pte_offset_map(pmdp, addr);
- i = (addr - range->start) >> PAGE_SHIFT;
- for (; addr < end; addr += PAGE_SIZE, ptep++, i++) {
- for (; addr < end; addr += PAGE_SIZE, ptep++, pfns++) { int r;
r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, &pfns[i]);
if (r) { /* hmm_vma_handle_pte() did pte_unmap() */ hmm_vma_walk->last = addr;r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, pfns);