From: Jason Gunthorpe jgg@mellanox.com
I've had these in my work queue for a bit, nothing profound here, just some small edits for clarity.
Ralph's hmm tester will need a small diff to work after this - which illustrates how setting default_flags == 0 is the same as what was called SNAPSHOT:
diff --git a/lib/test_hmm.c b/lib/test_hmm.c index 6ca953926dc13f..5f31f5b3e64cb9 100644 --- a/lib/test_hmm.c +++ b/lib/test_hmm.c @@ -300,7 +300,7 @@ static int dmirror_range_fault(struct dmirror *dmirror,
range->notifier_seq = mmu_interval_read_begin(range->notifier); down_read(&mm->mmap_sem); - count = hmm_range_fault(range, 0); + count = hmm_range_fault(range); up_read(&mm->mmap_sem); if (count <= 0) { if (count == 0 || count == -EBUSY) @@ -337,8 +337,7 @@ static int dmirror_fault(struct dmirror *dmirror, unsigned long start, .flags = dmirror_hmm_flags, .values = dmirror_hmm_values, .pfn_shift = DPT_SHIFT, - .pfn_flags_mask = ~(dmirror_hmm_flags[HMM_PFN_VALID] | - dmirror_hmm_flags[HMM_PFN_WRITE]), + .pfn_flags_mask = 0, .default_flags = dmirror_hmm_flags[HMM_PFN_VALID] | (write ? dmirror_hmm_flags[HMM_PFN_WRITE] : 0), .dev_private_owner = dmirror->mdevice, @@ -872,7 +871,7 @@ static int dmirror_range_snapshot(struct dmirror *dmirror, range->notifier_seq = mmu_interval_read_begin(range->notifier);
down_read(&mm->mmap_sem); - count = hmm_range_fault(range, HMM_FAULT_SNAPSHOT); + count = hmm_range_fault(range); up_read(&mm->mmap_sem); if (count <= 0) { if (count == 0 || count == -EBUSY) @@ -916,7 +915,7 @@ static int dmirror_snapshot(struct dmirror *dmirror, .flags = dmirror_hmm_flags, .values = dmirror_hmm_values, .pfn_shift = DPT_SHIFT, - .pfn_flags_mask = ~0ULL, + .pfn_flags_mask = 0, .dev_private_owner = dmirror->mdevice, }; int ret = 0;
Jason Gunthorpe (6): mm/hmm: remove pgmap checking for devmap pages mm/hmm: return the fault type from hmm_pte_need_fault() mm/hmm: remove unused code and tidy comments mm/hmm: remove HMM_FAULT_SNAPSHOT mm/hmm: remove the CONFIG_TRANSPARENT_HUGEPAGE #ifdef mm/hmm: use device_private_entry_to_pfn()
Documentation/vm/hmm.rst | 12 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +- drivers/gpu/drm/nouveau/nouveau_svm.c | 2 +- include/linux/hmm.h | 55 +----- mm/hmm.c | 238 +++++++++--------------- 5 files changed, 98 insertions(+), 211 deletions(-)
From: Jason Gunthorpe jgg@mellanox.com
The checking boils down to some racy check if the pagemap is still available or not. Instead of checking this, rely entirely on the notifiers, if a pagemap is destroyed then all pages that belong to it must be removed from the tables and the notifiers triggered.
Signed-off-by: Jason Gunthorpe jgg@mellanox.com --- mm/hmm.c | 50 ++------------------------------------------------ 1 file changed, 2 insertions(+), 48 deletions(-)
diff --git a/mm/hmm.c b/mm/hmm.c index a491d9aaafe45d..3a2610e0713329 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -28,7 +28,6 @@
struct hmm_vma_walk { struct hmm_range *range; - struct dev_pagemap *pgmap; unsigned long last; unsigned int flags; }; @@ -196,19 +195,8 @@ static int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr, return hmm_vma_fault(addr, end, fault, write_fault, walk);
pfn = pmd_pfn(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); - for (i = 0; addr < end; addr += PAGE_SIZE, i++, pfn++) { - if (pmd_devmap(pmd)) { - hmm_vma_walk->pgmap = get_dev_pagemap(pfn, - hmm_vma_walk->pgmap); - if (unlikely(!hmm_vma_walk->pgmap)) - return -EBUSY; - } + for (i = 0; addr < end; addr += PAGE_SIZE, i++, pfn++) pfns[i] = hmm_device_entry_from_pfn(range, pfn) | cpu_flags; - } - if (hmm_vma_walk->pgmap) { - put_dev_pagemap(hmm_vma_walk->pgmap); - hmm_vma_walk->pgmap = NULL; - } hmm_vma_walk->last = end; return 0; } @@ -300,15 +288,6 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, if (fault || write_fault) goto fault;
- if (pte_devmap(pte)) { - hmm_vma_walk->pgmap = get_dev_pagemap(pte_pfn(pte), - hmm_vma_walk->pgmap); - if (unlikely(!hmm_vma_walk->pgmap)) { - pte_unmap(ptep); - return -EBUSY; - } - } - /* * Since each architecture defines a struct page for the zero page, just * fall through and treat it like a normal page. @@ -328,10 +307,6 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, return 0;
fault: - if (hmm_vma_walk->pgmap) { - put_dev_pagemap(hmm_vma_walk->pgmap); - hmm_vma_walk->pgmap = NULL; - } pte_unmap(ptep); /* Fault any virtual address we were asked to fault */ return hmm_vma_fault(addr, end, fault, write_fault, walk); @@ -418,16 +393,6 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, return r; } } - if (hmm_vma_walk->pgmap) { - /* - * We do put_dev_pagemap() here and not in hmm_vma_handle_pte() - * so that we can leverage get_dev_pagemap() optimization which - * will not re-take a reference on a pgmap if we already have - * one. - */ - put_dev_pagemap(hmm_vma_walk->pgmap); - hmm_vma_walk->pgmap = NULL; - } pte_unmap(ptep - 1);
hmm_vma_walk->last = addr; @@ -491,20 +456,9 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end, }
pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); - for (i = 0; i < npages; ++i, ++pfn) { - hmm_vma_walk->pgmap = get_dev_pagemap(pfn, - hmm_vma_walk->pgmap); - if (unlikely(!hmm_vma_walk->pgmap)) { - ret = -EBUSY; - goto out_unlock; - } + for (i = 0; i < npages; ++i, ++pfn) pfns[i] = hmm_device_entry_from_pfn(range, pfn) | cpu_flags; - } - if (hmm_vma_walk->pgmap) { - put_dev_pagemap(hmm_vma_walk->pgmap); - hmm_vma_walk->pgmap = NULL; - } hmm_vma_walk->last = end; goto out_unlock; }
From: Jason Gunthorpe jgg@mellanox.com
Using two bools instead of flags return is not necessary and leads to bugs. Returning a value is easier for the compiler to check and easier to pass around the code flow.
Convert the two bools into flags and push the change to all callers.
Signed-off-by: Jason Gunthorpe jgg@mellanox.com --- mm/hmm.c | 153 ++++++++++++++++++++++++------------------------------- 1 file changed, 67 insertions(+), 86 deletions(-)
diff --git a/mm/hmm.c b/mm/hmm.c index 3a2610e0713329..b4f662eadb7a7c 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -32,6 +32,11 @@ struct hmm_vma_walk { unsigned int flags; };
+enum { + NEED_FAULT = 1 << 0, + NEED_WRITE_FAULT = 1 << 1, +}; + static int hmm_pfns_fill(unsigned long addr, unsigned long end, struct hmm_range *range, enum hmm_pfn_value_e value) { @@ -49,8 +54,7 @@ static int hmm_pfns_fill(unsigned long addr, unsigned long end, * hmm_vma_fault() - fault in a range lacking valid pmd or pte(s) * @addr: range virtual start address (inclusive) * @end: range virtual end address (exclusive) - * @fault: should we fault or not ? - * @write_fault: write fault ? + * @required_fault: NEED_FAULT_* flags * @walk: mm_walk structure * Return: -EBUSY after page fault, or page fault error * @@ -58,8 +62,7 @@ static int hmm_pfns_fill(unsigned long addr, unsigned long end, * or whenever there is no page directory covering the virtual address range. */ static int hmm_vma_fault(unsigned long addr, unsigned long end, - bool fault, bool write_fault, - struct mm_walk *walk) + unsigned int required_fault, struct mm_walk *walk) { struct hmm_vma_walk *hmm_vma_walk = walk->private; struct hmm_range *range = hmm_vma_walk->range; @@ -68,13 +71,13 @@ static int hmm_vma_fault(unsigned long addr, unsigned long end, unsigned long i = (addr - range->start) >> PAGE_SHIFT; unsigned int fault_flags = FAULT_FLAG_REMOTE;
- WARN_ON_ONCE(!fault && !write_fault); + WARN_ON_ONCE(!required_fault); hmm_vma_walk->last = addr;
if (!vma) goto out_error;
- if (write_fault) { + if (required_fault & NEED_WRITE_FAULT) { if (!(vma->vm_flags & VM_WRITE)) return -EPERM; fault_flags |= FAULT_FLAG_WRITE; @@ -91,14 +94,13 @@ static int hmm_vma_fault(unsigned long addr, unsigned long end, return -EFAULT; }
-static inline void hmm_pte_need_fault(const struct hmm_vma_walk *hmm_vma_walk, - uint64_t pfns, uint64_t cpu_flags, - bool *fault, bool *write_fault) +static unsigned int hmm_pte_need_fault(const struct hmm_vma_walk *hmm_vma_walk, + uint64_t pfns, uint64_t cpu_flags) { struct hmm_range *range = hmm_vma_walk->range;
if (hmm_vma_walk->flags & HMM_FAULT_SNAPSHOT) - return; + return 0;
/* * So we not only consider the individual per page request we also @@ -114,37 +116,37 @@ static inline void hmm_pte_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
/* We aren't ask to do anything ... */ if (!(pfns & range->flags[HMM_PFN_VALID])) - return; + return 0;
- /* If CPU page table is not valid then we need to fault */ - *fault = !(cpu_flags & range->flags[HMM_PFN_VALID]); /* Need to write fault ? */ if ((pfns & range->flags[HMM_PFN_WRITE]) && - !(cpu_flags & range->flags[HMM_PFN_WRITE])) { - *write_fault = true; - *fault = true; - } + !(cpu_flags & range->flags[HMM_PFN_WRITE])) + return NEED_FAULT | NEED_WRITE_FAULT; + + /* If CPU page table is not valid then we need to fault */ + if (!(cpu_flags & range->flags[HMM_PFN_VALID])) + return NEED_FAULT; + return 0; }
-static void hmm_range_need_fault(const struct hmm_vma_walk *hmm_vma_walk, - const uint64_t *pfns, unsigned long npages, - uint64_t cpu_flags, bool *fault, - bool *write_fault) +static unsigned int +hmm_range_need_fault(const struct hmm_vma_walk *hmm_vma_walk, + const uint64_t *pfns, unsigned long npages, + uint64_t cpu_flags) { + unsigned int required_fault = 0; unsigned long i;
- if (hmm_vma_walk->flags & HMM_FAULT_SNAPSHOT) { - *fault = *write_fault = false; - return; - } + if (hmm_vma_walk->flags & HMM_FAULT_SNAPSHOT) + return 0;
- *fault = *write_fault = false; for (i = 0; i < npages; ++i) { - hmm_pte_need_fault(hmm_vma_walk, pfns[i], cpu_flags, - fault, write_fault); - if ((*write_fault)) - return; + required_fault |= + hmm_pte_need_fault(hmm_vma_walk, pfns[i], cpu_flags); + if (required_fault == (NEED_FAULT | NEED_WRITE_FAULT)) + return required_fault; } + return required_fault; }
static int hmm_vma_walk_hole(unsigned long addr, unsigned long end, @@ -152,17 +154,16 @@ static int hmm_vma_walk_hole(unsigned long addr, unsigned long end, { struct hmm_vma_walk *hmm_vma_walk = walk->private; struct hmm_range *range = hmm_vma_walk->range; - bool fault, write_fault; + unsigned int required_fault; unsigned long i, npages; uint64_t *pfns;
i = (addr - range->start) >> PAGE_SHIFT; npages = (end - addr) >> PAGE_SHIFT; pfns = &range->pfns[i]; - hmm_range_need_fault(hmm_vma_walk, pfns, npages, - 0, &fault, &write_fault); - if (fault || write_fault) - return hmm_vma_fault(addr, end, fault, write_fault, walk); + required_fault = hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0); + if (required_fault) + return hmm_vma_fault(addr, end, required_fault, walk); hmm_vma_walk->last = addr; return hmm_pfns_fill(addr, end, range, HMM_PFN_NONE); } @@ -183,16 +184,15 @@ static int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr, struct hmm_vma_walk *hmm_vma_walk = walk->private; struct hmm_range *range = hmm_vma_walk->range; unsigned long pfn, npages, i; - bool fault, write_fault; + unsigned int required_fault; uint64_t cpu_flags;
npages = (end - addr) >> PAGE_SHIFT; cpu_flags = pmd_to_hmm_pfn_flags(range, pmd); - hmm_range_need_fault(hmm_vma_walk, pfns, npages, cpu_flags, - &fault, &write_fault); - - if (fault || write_fault) - return hmm_vma_fault(addr, end, fault, write_fault, walk); + required_fault = + hmm_range_need_fault(hmm_vma_walk, pfns, npages, cpu_flags); + if (required_fault) + return hmm_vma_fault(addr, end, required_fault, walk);
pfn = pmd_pfn(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); for (i = 0; addr < end; addr += PAGE_SIZE, i++, pfn++) @@ -229,18 +229,15 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, { struct hmm_vma_walk *hmm_vma_walk = walk->private; struct hmm_range *range = hmm_vma_walk->range; - bool fault, write_fault; + unsigned int required_fault; uint64_t cpu_flags; pte_t pte = *ptep; uint64_t orig_pfn = *pfn;
*pfn = range->values[HMM_PFN_NONE]; - fault = write_fault = false; - if (pte_none(pte)) { - hmm_pte_need_fault(hmm_vma_walk, orig_pfn, 0, - &fault, &write_fault); - if (fault || write_fault) + required_fault = hmm_pte_need_fault(hmm_vma_walk, orig_pfn, 0); + if (required_fault) goto fault; return 0; } @@ -261,9 +258,8 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, return 0; }
- hmm_pte_need_fault(hmm_vma_walk, orig_pfn, 0, &fault, - &write_fault); - if (!fault && !write_fault) + required_fault = hmm_pte_need_fault(hmm_vma_walk, orig_pfn, 0); + if (!required_fault) return 0;
if (!non_swap_entry(entry)) @@ -283,9 +279,8 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, }
cpu_flags = pte_to_hmm_pfn_flags(range, pte); - hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags, &fault, - &write_fault); - if (fault || write_fault) + required_fault = hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags); + if (required_fault) goto fault;
/* @@ -293,9 +288,7 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, * fall through and treat it like a normal page. */ if (pte_special(pte) && !is_zero_pfn(pte_pfn(pte))) { - hmm_pte_need_fault(hmm_vma_walk, orig_pfn, 0, &fault, - &write_fault); - if (fault || write_fault) { + if (hmm_pte_need_fault(hmm_vma_walk, orig_pfn, 0)) { pte_unmap(ptep); return -EFAULT; } @@ -309,7 +302,7 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, fault: pte_unmap(ptep); /* Fault any virtual address we were asked to fault */ - return hmm_vma_fault(addr, end, fault, write_fault, walk); + return hmm_vma_fault(addr, end, required_fault, walk); }
static int hmm_vma_walk_pmd(pmd_t *pmdp, @@ -322,7 +315,6 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, uint64_t *pfns = &range->pfns[(start - range->start) >> PAGE_SHIFT]; unsigned long npages = (end - start) >> PAGE_SHIFT; unsigned long addr = start; - bool fault, write_fault; pte_t *ptep; pmd_t pmd;
@@ -332,9 +324,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, return hmm_vma_walk_hole(start, end, -1, walk);
if (thp_migration_supported() && is_pmd_migration_entry(pmd)) { - hmm_range_need_fault(hmm_vma_walk, pfns, npages, - 0, &fault, &write_fault); - if (fault || write_fault) { + if (hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0)) { hmm_vma_walk->last = addr; pmd_migration_entry_wait(walk->mm, pmdp); return -EBUSY; @@ -343,9 +333,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, }
if (!pmd_present(pmd)) { - hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0, &fault, - &write_fault); - if (fault || write_fault) + if (hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0)) return -EFAULT; return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR); } @@ -375,9 +363,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, * recover. */ if (pmd_bad(pmd)) { - hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0, &fault, - &write_fault); - if (fault || write_fault) + if (hmm_range_need_fault(hmm_vma_walk, pfns, npages, 0)) return -EFAULT; return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR); } @@ -434,8 +420,8 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end,
if (pud_huge(pud) && pud_devmap(pud)) { unsigned long i, npages, pfn; + unsigned int required_fault; uint64_t *pfns, cpu_flags; - bool fault, write_fault;
if (!pud_present(pud)) { spin_unlock(ptl); @@ -447,12 +433,11 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end, pfns = &range->pfns[i];
cpu_flags = pud_to_hmm_pfn_flags(range, pud); - hmm_range_need_fault(hmm_vma_walk, pfns, npages, - cpu_flags, &fault, &write_fault); - if (fault || write_fault) { + required_fault = hmm_range_need_fault(hmm_vma_walk, pfns, + npages, cpu_flags); + if (required_fault) { spin_unlock(ptl); - return hmm_vma_fault(addr, end, fault, write_fault, - walk); + return hmm_vma_fault(addr, end, required_fault, walk); }
pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); @@ -484,7 +469,7 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask, struct hmm_range *range = hmm_vma_walk->range; struct vm_area_struct *vma = walk->vma; uint64_t orig_pfn, cpu_flags; - bool fault, write_fault; + unsigned int required_fault; spinlock_t *ptl; pte_t entry;
@@ -495,12 +480,10 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask, orig_pfn = range->pfns[i]; range->pfns[i] = range->values[HMM_PFN_NONE]; cpu_flags = pte_to_hmm_pfn_flags(range, entry); - fault = write_fault = false; - hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags, - &fault, &write_fault); - if (fault || write_fault) { + required_fault = hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags); + if (required_fault) { spin_unlock(ptl); - return hmm_vma_fault(addr, end, fault, write_fault, walk); + return hmm_vma_fault(addr, end, required_fault, walk); }
pfn = pte_pfn(entry) + ((start & ~hmask) >> PAGE_SHIFT); @@ -532,17 +515,15 @@ static int hmm_vma_walk_test(unsigned long start, unsigned long end, */ if ((vma->vm_flags & (VM_IO | VM_PFNMAP | VM_MIXEDMAP)) || !(vma->vm_flags & VM_READ)) { - bool fault, write_fault; - /* * Check to see if a fault is requested for any page in the * range. */ - hmm_range_need_fault(hmm_vma_walk, range->pfns + - ((start - range->start) >> PAGE_SHIFT), - (end - start) >> PAGE_SHIFT, - 0, &fault, &write_fault); - if (fault || write_fault) + if (hmm_range_need_fault(hmm_vma_walk, + range->pfns + + ((start - range->start) >> + PAGE_SHIFT), + (end - start) >> PAGE_SHIFT, 0)) return -EFAULT;
hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);
From: Jason Gunthorpe jgg@mellanox.com
Delete several functions that are never called, fix some desync between comments and structure content, remove an unused ret, and move one function only used by hmm.c into hmm.c
Signed-off-by: Jason Gunthorpe jgg@mellanox.com --- include/linux/hmm.h | 50 --------------------------------------------- mm/hmm.c | 12 +++++++++++ 2 files changed, 12 insertions(+), 50 deletions(-)
diff --git a/include/linux/hmm.h b/include/linux/hmm.h index bb6be4428633a8..184a8633260f9d 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -120,9 +120,6 @@ enum hmm_pfn_value_e { * * @notifier: a mmu_interval_notifier that includes the start/end * @notifier_seq: result of mmu_interval_read_begin() - * @hmm: the core HMM structure this range is active against - * @vma: the vm area struct for the range - * @list: all range lock are on a list * @start: range virtual start address (inclusive) * @end: range virtual end address (exclusive) * @pfns: array of pfns (big enough for the range) @@ -131,7 +128,6 @@ enum hmm_pfn_value_e { * @default_flags: default flags for the range (write, read, ... see hmm doc) * @pfn_flags_mask: allows to mask pfn flags so that only default_flags matter * @pfn_shifts: pfn shift value (should be <= PAGE_SHIFT) - * @valid: pfns array did not change since it has been fill by an HMM function * @dev_private_owner: owner of device private pages */ struct hmm_range { @@ -171,52 +167,6 @@ static inline struct page *hmm_device_entry_to_page(const struct hmm_range *rang return pfn_to_page(entry >> range->pfn_shift); }
-/* - * hmm_device_entry_to_pfn() - return pfn value store in a device entry - * @range: range use to decode device entry value - * @entry: device entry to extract pfn from - * Return: pfn value if device entry is valid, -1UL otherwise - */ -static inline unsigned long -hmm_device_entry_to_pfn(const struct hmm_range *range, uint64_t pfn) -{ - if (pfn == range->values[HMM_PFN_NONE]) - return -1UL; - if (pfn == range->values[HMM_PFN_ERROR]) - return -1UL; - if (pfn == range->values[HMM_PFN_SPECIAL]) - return -1UL; - if (!(pfn & range->flags[HMM_PFN_VALID])) - return -1UL; - return (pfn >> range->pfn_shift); -} - -/* - * hmm_device_entry_from_page() - create a valid device entry for a page - * @range: range use to encode HMM pfn value - * @page: page for which to create the device entry - * Return: valid device entry for the page - */ -static inline uint64_t hmm_device_entry_from_page(const struct hmm_range *range, - struct page *page) -{ - return (page_to_pfn(page) << range->pfn_shift) | - range->flags[HMM_PFN_VALID]; -} - -/* - * hmm_device_entry_from_pfn() - create a valid device entry value from pfn - * @range: range use to encode HMM pfn value - * @pfn: pfn value for which to create the device entry - * Return: valid device entry for the pfn - */ -static inline uint64_t hmm_device_entry_from_pfn(const struct hmm_range *range, - unsigned long pfn) -{ - return (pfn << range->pfn_shift) | - range->flags[HMM_PFN_VALID]; -} - /* Don't fault in missing PTEs, just snapshot the current state. */ #define HMM_FAULT_SNAPSHOT (1 << 1)
diff --git a/mm/hmm.c b/mm/hmm.c index b4f662eadb7a7c..687d21c675ee60 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -37,6 +37,18 @@ enum { NEED_WRITE_FAULT = 1 << 1, };
+/* + * hmm_device_entry_from_pfn() - create a valid device entry value from pfn + * @range: range use to encode HMM pfn value + * @pfn: pfn value for which to create the device entry + * Return: valid device entry for the pfn + */ +static uint64_t hmm_device_entry_from_pfn(const struct hmm_range *range, + unsigned long pfn) +{ + return (pfn << range->pfn_shift) | range->flags[HMM_PFN_VALID]; +} + static int hmm_pfns_fill(unsigned long addr, unsigned long end, struct hmm_range *range, enum hmm_pfn_value_e value) {
On 3/20/20 9:49 AM, Jason Gunthorpe wrote:
From: Jason Gunthorpe jgg@mellanox.com
Delete several functions that are never called, fix some desync between comments and structure content, remove an unused ret, and move one function only used by hmm.c into hmm.c
Signed-off-by: Jason Gunthorpe jgg@mellanox.com
Reviewed-by: Ralph Campbell rcampbell@nvidia.com
include/linux/hmm.h | 50 --------------------------------------------- mm/hmm.c | 12 +++++++++++ 2 files changed, 12 insertions(+), 50 deletions(-)
diff --git a/include/linux/hmm.h b/include/linux/hmm.h index bb6be4428633a8..184a8633260f9d 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -120,9 +120,6 @@ enum hmm_pfn_value_e {
- @notifier: a mmu_interval_notifier that includes the start/end
- @notifier_seq: result of mmu_interval_read_begin()
- @hmm: the core HMM structure this range is active against
- @vma: the vm area struct for the range
- @list: all range lock are on a list
- @start: range virtual start address (inclusive)
- @end: range virtual end address (exclusive)
- @pfns: array of pfns (big enough for the range)
@@ -131,7 +128,6 @@ enum hmm_pfn_value_e {
- @default_flags: default flags for the range (write, read, ... see hmm doc)
- @pfn_flags_mask: allows to mask pfn flags so that only default_flags matter
- @pfn_shifts: pfn shift value (should be <= PAGE_SHIFT)
s/pfn_shifts/pfn_shift
*/ struct hmm_range {
- @valid: pfns array did not change since it has been fill by an HMM function
- @dev_private_owner: owner of device private pages
@@ -171,52 +167,6 @@ static inline struct page *hmm_device_entry_to_page(const struct hmm_range *rang return pfn_to_page(entry >> range->pfn_shift); }
-/*
- hmm_device_entry_to_pfn() - return pfn value store in a device entry
- @range: range use to decode device entry value
- @entry: device entry to extract pfn from
- Return: pfn value if device entry is valid, -1UL otherwise
- */
-static inline unsigned long -hmm_device_entry_to_pfn(const struct hmm_range *range, uint64_t pfn) -{
- if (pfn == range->values[HMM_PFN_NONE])
return -1UL;
- if (pfn == range->values[HMM_PFN_ERROR])
return -1UL;
- if (pfn == range->values[HMM_PFN_SPECIAL])
return -1UL;
- if (!(pfn & range->flags[HMM_PFN_VALID]))
return -1UL;
- return (pfn >> range->pfn_shift);
-}
-/*
- hmm_device_entry_from_page() - create a valid device entry for a page
- @range: range use to encode HMM pfn value
- @page: page for which to create the device entry
- Return: valid device entry for the page
- */
-static inline uint64_t hmm_device_entry_from_page(const struct hmm_range *range,
struct page *page)
-{
- return (page_to_pfn(page) << range->pfn_shift) |
range->flags[HMM_PFN_VALID];
-}
-/*
- hmm_device_entry_from_pfn() - create a valid device entry value from pfn
- @range: range use to encode HMM pfn value
- @pfn: pfn value for which to create the device entry
- Return: valid device entry for the pfn
- */
-static inline uint64_t hmm_device_entry_from_pfn(const struct hmm_range *range,
unsigned long pfn)
-{
- return (pfn << range->pfn_shift) |
range->flags[HMM_PFN_VALID];
-}
- /* Don't fault in missing PTEs, just snapshot the current state. */ #define HMM_FAULT_SNAPSHOT (1 << 1)
diff --git a/mm/hmm.c b/mm/hmm.c index b4f662eadb7a7c..687d21c675ee60 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -37,6 +37,18 @@ enum { NEED_WRITE_FAULT = 1 << 1, };
+/*
- hmm_device_entry_from_pfn() - create a valid device entry value from pfn
- @range: range use to encode HMM pfn value
- @pfn: pfn value for which to create the device entry
- Return: valid device entry for the pfn
- */
+static uint64_t hmm_device_entry_from_pfn(const struct hmm_range *range,
unsigned long pfn)
+{
- return (pfn << range->pfn_shift) | range->flags[HMM_PFN_VALID];
+}
- static int hmm_pfns_fill(unsigned long addr, unsigned long end, struct hmm_range *range, enum hmm_pfn_value_e value) {
On Fri, Mar 20, 2020 at 02:46:09PM -0700, Ralph Campbell wrote:
On 3/20/20 9:49 AM, Jason Gunthorpe wrote:
From: Jason Gunthorpe jgg@mellanox.com
Delete several functions that are never called, fix some desync between comments and structure content, remove an unused ret, and move one function only used by hmm.c into hmm.c
Signed-off-by: Jason Gunthorpe jgg@mellanox.com
Reviewed-by: Ralph Campbell rcampbell@nvidia.com
include/linux/hmm.h | 50 --------------------------------------------- mm/hmm.c | 12 +++++++++++ 2 files changed, 12 insertions(+), 50 deletions(-)
diff --git a/include/linux/hmm.h b/include/linux/hmm.h index bb6be4428633a8..184a8633260f9d 100644 +++ b/include/linux/hmm.h @@ -120,9 +120,6 @@ enum hmm_pfn_value_e {
- @notifier: a mmu_interval_notifier that includes the start/end
- @notifier_seq: result of mmu_interval_read_begin()
- @hmm: the core HMM structure this range is active against
- @vma: the vm area struct for the range
- @list: all range lock are on a list
- @start: range virtual start address (inclusive)
- @end: range virtual end address (exclusive)
- @pfns: array of pfns (big enough for the range)
@@ -131,7 +128,6 @@ enum hmm_pfn_value_e {
- @default_flags: default flags for the range (write, read, ... see hmm doc)
- @pfn_flags_mask: allows to mask pfn flags so that only default_flags matter
- @pfn_shifts: pfn shift value (should be <= PAGE_SHIFT)
s/pfn_shifts/pfn_shift
Got it in v2, thanks
Jason
From: Jason Gunthorpe jgg@mellanox.com
Now that flags are handled on a fine-grained per-page basis this global flag is redundant and has a confusing overlap with the pfn_flags_mask and default_flags.
Normalize the HMM_FAULT_SNAPSHOT behavior into one place. Callers needing the SNAPSHOT behavior should set a pfn_flags_mask and default_flags that always results in a cleared HMM_PFN_REQ_FAULT. Then no pages will be faulted, and HMM_FAULT_SNAPSHOT is not a special flow that overrides the masking mechanism.
As this is the last flag, also remove the flags argument. If future flags are needed they can be part of the struct hmm_range function arguments.
Signed-off-by: Jason Gunthorpe jgg@mellanox.com --- Documentation/vm/hmm.rst | 12 +++++------- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +- drivers/gpu/drm/nouveau/nouveau_svm.c | 2 +- include/linux/hmm.h | 5 +---- mm/hmm.c | 17 +++++++++-------- 5 files changed, 17 insertions(+), 21 deletions(-)
diff --git a/Documentation/vm/hmm.rst b/Documentation/vm/hmm.rst index 95fec596836262..4e3e9362afeb10 100644 --- a/Documentation/vm/hmm.rst +++ b/Documentation/vm/hmm.rst @@ -161,13 +161,11 @@ device must complete the update before the driver callback returns. When the device driver wants to populate a range of virtual addresses, it can use::
- long hmm_range_fault(struct hmm_range *range, unsigned int flags); + long hmm_range_fault(struct hmm_range *range);
-With the HMM_RANGE_SNAPSHOT flag, it will only fetch present CPU page table -entries and will not trigger a page fault on missing or non-present entries. -Without that flag, it does trigger a page fault on missing or read-only entries -if write access is requested (see below). Page faults use the generic mm page -fault code path just like a CPU page fault. +It will trigger a page fault on missing or read-only entries if write access is +requested (see below). Page faults use the generic mm page fault code path just +like a CPU page fault.
Both functions copy CPU page table entries into their pfns array argument. Each entry in that array corresponds to an address in the virtual range. HMM @@ -197,7 +195,7 @@ The usage pattern is:: again: range.notifier_seq = mmu_interval_read_begin(&interval_sub); down_read(&mm->mmap_sem); - ret = hmm_range_fault(&range, HMM_RANGE_SNAPSHOT); + ret = hmm_range_fault(&range); if (ret) { up_read(&mm->mmap_sem); if (ret == -EBUSY) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 90821ce5e6cad0..c520290709371b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -856,7 +856,7 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) range->notifier_seq = mmu_interval_read_begin(&bo->notifier);
down_read(&mm->mmap_sem); - r = hmm_range_fault(range, 0); + r = hmm_range_fault(range); up_read(&mm->mmap_sem); if (unlikely(r <= 0)) { /* diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c index 39c731a99937c6..e3797b2d4d1759 100644 --- a/drivers/gpu/drm/nouveau/nouveau_svm.c +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c @@ -540,7 +540,7 @@ static int nouveau_range_fault(struct nouveau_svmm *svmm, range.default_flags = 0; range.pfn_flags_mask = -1UL; down_read(&mm->mmap_sem); - ret = hmm_range_fault(&range, 0); + ret = hmm_range_fault(&range); up_read(&mm->mmap_sem); if (ret <= 0) { if (ret == 0 || ret == -EBUSY) diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 184a8633260f9d..6b4004905aac89 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -167,13 +167,10 @@ static inline struct page *hmm_device_entry_to_page(const struct hmm_range *rang return pfn_to_page(entry >> range->pfn_shift); }
-/* Don't fault in missing PTEs, just snapshot the current state. */ -#define HMM_FAULT_SNAPSHOT (1 << 1) - /* * Please see Documentation/vm/hmm.rst for how to use the range API. */ -long hmm_range_fault(struct hmm_range *range, unsigned int flags); +long hmm_range_fault(struct hmm_range *range);
/* * HMM_RANGE_DEFAULT_TIMEOUT - default timeout (ms) when waiting for a range diff --git a/mm/hmm.c b/mm/hmm.c index 687d21c675ee60..7f77fb6e35cf78 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -29,7 +29,6 @@ struct hmm_vma_walk { struct hmm_range *range; unsigned long last; - unsigned int flags; };
enum { @@ -111,9 +110,6 @@ static unsigned int hmm_pte_need_fault(const struct hmm_vma_walk *hmm_vma_walk, { struct hmm_range *range = hmm_vma_walk->range;
- if (hmm_vma_walk->flags & HMM_FAULT_SNAPSHOT) - return 0; - /* * So we not only consider the individual per page request we also * consider the default flags requested for the range. The API can @@ -146,10 +142,17 @@ hmm_range_need_fault(const struct hmm_vma_walk *hmm_vma_walk, const uint64_t *pfns, unsigned long npages, uint64_t cpu_flags) { + struct hmm_range *range = hmm_vma_walk->range; unsigned int required_fault = 0; unsigned long i;
- if (hmm_vma_walk->flags & HMM_FAULT_SNAPSHOT) + /* + * If there is no way for valid to be set in hmm_pte_need_fault() then + * don't bother to call it. + */ + if (!(((range->flags[HMM_PFN_VALID] & range->pfn_flags_mask) | + range->default_flags) & + range->flags[HMM_PFN_VALID])) return 0;
for (i = 0; i < npages; ++i) { @@ -559,7 +562,6 @@ static const struct mm_walk_ops hmm_walk_ops = { /** * hmm_range_fault - try to fault some address in a virtual address range * @range: range being faulted - * @flags: HMM_FAULT_* flags * * Return: the number of valid pages in range->pfns[] (from range start * address), which may be zero. On error one of the following status codes @@ -583,12 +585,11 @@ static const struct mm_walk_ops hmm_walk_ops = { * On error, for one virtual address in the range, the function will mark the * corresponding HMM pfn entry with an error flag. */ -long hmm_range_fault(struct hmm_range *range, unsigned int flags) +long hmm_range_fault(struct hmm_range *range) { struct hmm_vma_walk hmm_vma_walk = { .range = range, .last = range->start, - .flags = flags, }; struct mm_struct *mm = range->notifier->mm; int ret;
From: Jason Gunthorpe jgg@mellanox.com
This code can be compiled when CONFIG_TRANSPARENT_HUGEPAGE is off, so remove the ifdef.
Signed-off-by: Jason Gunthorpe jgg@mellanox.com --- mm/hmm.c | 6 ------ 1 file changed, 6 deletions(-)
diff --git a/mm/hmm.c b/mm/hmm.c index 7f77fb6e35cf78..a09b4908e9c81a 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -192,7 +192,6 @@ static inline uint64_t pmd_to_hmm_pfn_flags(struct hmm_range *range, pmd_t pmd) range->flags[HMM_PFN_VALID]; }
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE static int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr, unsigned long end, uint64_t *pfns, pmd_t pmd) { @@ -215,11 +214,6 @@ static int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr, hmm_vma_walk->last = end; return 0; } -#else /* CONFIG_TRANSPARENT_HUGEPAGE */ -/* stub to allow the code below to compile */ -int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr, - unsigned long end, uint64_t *pfns, pmd_t pmd); -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
static inline bool hmm_is_device_private_entry(struct hmm_range *range, swp_entry_t entry)
From: Jason Gunthorpe jgg@mellanox.com
swp_offset() should not be called directly, the wrappers are supposed to abstract away the encoding of the device_private specific information in the swap entry.
Signed-off-by: Jason Gunthorpe jgg@mellanox.com --- mm/hmm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/mm/hmm.c b/mm/hmm.c index a09b4908e9c81a..fd9ee2b5fd9989 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -259,8 +259,8 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, * the PFN even if not present. */ if (hmm_is_device_private_entry(range, entry)) { - *pfn = hmm_device_entry_from_pfn(range, - swp_offset(entry)); + *pfn = hmm_device_entry_from_pfn( + range, device_private_entry_to_pfn(entry)); *pfn |= range->flags[HMM_PFN_VALID]; if (is_write_device_private_entry(entry)) *pfn |= range->flags[HMM_PFN_WRITE];
On 3/20/20 9:48 AM, Jason Gunthorpe wrote:
From: Jason Gunthorpe jgg@mellanox.com
I've had these in my work queue for a bit, nothing profound here, just some small edits for clarity.
The hmm tester changes are clear enough but I'm having a bit of trouble figuring out what this series applies cleanly to since I'm trying to apply it on top of the other patches you and Christoph have sent out. Is there a private git tree/branch where everything is applied?
Ralph's hmm tester will need a small diff to work after this - which illustrates how setting default_flags == 0 is the same as what was called SNAPSHOT:
diff --git a/lib/test_hmm.c b/lib/test_hmm.c index 6ca953926dc13f..5f31f5b3e64cb9 100644 --- a/lib/test_hmm.c +++ b/lib/test_hmm.c @@ -300,7 +300,7 @@ static int dmirror_range_fault(struct dmirror *dmirror,
range->notifier_seq = mmu_interval_read_begin(range->notifier); down_read(&mm->mmap_sem);
count = hmm_range_fault(range, 0);
up_read(&mm->mmap_sem); if (count <= 0) { if (count == 0 || count == -EBUSY)count = hmm_range_fault(range);
@@ -337,8 +337,7 @@ static int dmirror_fault(struct dmirror *dmirror, unsigned long start, .flags = dmirror_hmm_flags, .values = dmirror_hmm_values, .pfn_shift = DPT_SHIFT,
.pfn_flags_mask = ~(dmirror_hmm_flags[HMM_PFN_VALID] |
dmirror_hmm_flags[HMM_PFN_WRITE]),
.default_flags = dmirror_hmm_flags[HMM_PFN_VALID] | (write ? dmirror_hmm_flags[HMM_PFN_WRITE] : 0), .dev_private_owner = dmirror->mdevice,.pfn_flags_mask = 0,
@@ -872,7 +871,7 @@ static int dmirror_range_snapshot(struct dmirror *dmirror, range->notifier_seq = mmu_interval_read_begin(range->notifier);
down_read(&mm->mmap_sem);
count = hmm_range_fault(range, HMM_FAULT_SNAPSHOT);
up_read(&mm->mmap_sem); if (count <= 0) { if (count == 0 || count == -EBUSY)count = hmm_range_fault(range);
@@ -916,7 +915,7 @@ static int dmirror_snapshot(struct dmirror *dmirror, .flags = dmirror_hmm_flags, .values = dmirror_hmm_values, .pfn_shift = DPT_SHIFT,
.pfn_flags_mask = ~0ULL,
.dev_private_owner = dmirror->mdevice, }; int ret = 0;.pfn_flags_mask = 0,
Jason Gunthorpe (6): mm/hmm: remove pgmap checking for devmap pages mm/hmm: return the fault type from hmm_pte_need_fault() mm/hmm: remove unused code and tidy comments mm/hmm: remove HMM_FAULT_SNAPSHOT mm/hmm: remove the CONFIG_TRANSPARENT_HUGEPAGE #ifdef mm/hmm: use device_private_entry_to_pfn()
Documentation/vm/hmm.rst | 12 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +- drivers/gpu/drm/nouveau/nouveau_svm.c | 2 +- include/linux/hmm.h | 55 +----- mm/hmm.c | 238 +++++++++--------------- 5 files changed, 98 insertions(+), 211 deletions(-)
On Fri, Mar 20, 2020 at 11:51:47AM -0700, Ralph Campbell wrote:
On 3/20/20 9:48 AM, Jason Gunthorpe wrote:
From: Jason Gunthorpe jgg@mellanox.com
I've had these in my work queue for a bit, nothing profound here, just some small edits for clarity.
The hmm tester changes are clear enough but I'm having a bit of trouble figuring out what this series applies cleanly to since I'm trying to apply it on top of the other patches you and Christoph have sent out. Is there a private git tree/branch where everything is applied?
I accumulate everything here:
https://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git/log/?h=hmm
The patches should apply on top of that
Jason
On 3/20/20 9:48 AM, Jason Gunthorpe wrote:
From: Jason Gunthorpe jgg@mellanox.com
I've had these in my work queue for a bit, nothing profound here, just some small edits for clarity.
Ralph's hmm tester will need a small diff to work after this - which illustrates how setting default_flags == 0 is the same as what was called SNAPSHOT:
diff --git a/lib/test_hmm.c b/lib/test_hmm.c index 6ca953926dc13f..5f31f5b3e64cb9 100644 --- a/lib/test_hmm.c +++ b/lib/test_hmm.c @@ -300,7 +300,7 @@ static int dmirror_range_fault(struct dmirror *dmirror,
range->notifier_seq = mmu_interval_read_begin(range->notifier); down_read(&mm->mmap_sem);
count = hmm_range_fault(range, 0);
up_read(&mm->mmap_sem); if (count <= 0) { if (count == 0 || count == -EBUSY)count = hmm_range_fault(range);
@@ -337,8 +337,7 @@ static int dmirror_fault(struct dmirror *dmirror, unsigned long start, .flags = dmirror_hmm_flags, .values = dmirror_hmm_values, .pfn_shift = DPT_SHIFT,
.pfn_flags_mask = ~(dmirror_hmm_flags[HMM_PFN_VALID] |
dmirror_hmm_flags[HMM_PFN_WRITE]),
.default_flags = dmirror_hmm_flags[HMM_PFN_VALID] | (write ? dmirror_hmm_flags[HMM_PFN_WRITE] : 0), .dev_private_owner = dmirror->mdevice,.pfn_flags_mask = 0,
@@ -872,7 +871,7 @@ static int dmirror_range_snapshot(struct dmirror *dmirror, range->notifier_seq = mmu_interval_read_begin(range->notifier);
down_read(&mm->mmap_sem);
count = hmm_range_fault(range, HMM_FAULT_SNAPSHOT);
up_read(&mm->mmap_sem); if (count <= 0) { if (count == 0 || count == -EBUSY)count = hmm_range_fault(range);
@@ -916,7 +915,7 @@ static int dmirror_snapshot(struct dmirror *dmirror, .flags = dmirror_hmm_flags, .values = dmirror_hmm_values, .pfn_shift = DPT_SHIFT,
.pfn_flags_mask = ~0ULL,
.dev_private_owner = dmirror->mdevice, }; int ret = 0;.pfn_flags_mask = 0,
Jason Gunthorpe (6): mm/hmm: remove pgmap checking for devmap pages mm/hmm: return the fault type from hmm_pte_need_fault() mm/hmm: remove unused code and tidy comments mm/hmm: remove HMM_FAULT_SNAPSHOT mm/hmm: remove the CONFIG_TRANSPARENT_HUGEPAGE #ifdef mm/hmm: use device_private_entry_to_pfn()
Documentation/vm/hmm.rst | 12 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 2 +- drivers/gpu/drm/nouveau/nouveau_svm.c | 2 +- include/linux/hmm.h | 55 +----- mm/hmm.c | 238 +++++++++--------------- 5 files changed, 98 insertions(+), 211 deletions(-)
The series looks good to me so, Reviewed-by: Ralph Campbell rcampbell@nvidia.com
dri-devel@lists.freedesktop.org