On 2/19/19 12:04 PM, jglisse@redhat.com wrote:
From: Jérôme Glisse jglisse@redhat.com
When notifying change for a range use MMU_NOTIFIER_USE_CHANGE_PTE flag for page table update that use set_pte_at_notify() and where the we are going either from read and write to read only with same pfn or read only to read and write with new pfn.
Note that set_pte_at_notify() itself should only be use in rare cases ie we do not want to use it when we are updating a significant range of virtual addresses and thus a significant number of pte. Instead for those cases the event provided to mmu notifer invalidate_range_start() callback should be use for optimization.
Changes since v1: - Use the new unsigned flags field in struct mmu_notifier_range - Use the new flags parameter to mmu_notifier_range_init() - Explicitly list all the patterns where we can use change_pte()
Signed-off-by: Jérôme Glisse jglisse@redhat.com Cc: Christian König christian.koenig@amd.com Cc: Joonas Lahtinen joonas.lahtinen@linux.intel.com Cc: Jani Nikula jani.nikula@linux.intel.com Cc: Rodrigo Vivi rodrigo.vivi@intel.com Cc: Jan Kara jack@suse.cz Cc: Andrea Arcangeli aarcange@redhat.com Cc: Peter Xu peterx@redhat.com Cc: Felix Kuehling Felix.Kuehling@amd.com Cc: Jason Gunthorpe jgg@mellanox.com Cc: Ross Zwisler zwisler@kernel.org Cc: Dan Williams dan.j.williams@intel.com Cc: Paolo Bonzini pbonzini@redhat.com Cc: Radim Krčmář rkrcmar@redhat.com Cc: Michal Hocko mhocko@kernel.org Cc: Christian Koenig christian.koenig@amd.com Cc: Ralph Campbell rcampbell@nvidia.com Cc: John Hubbard jhubbard@nvidia.com Cc: kvm@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: linux-rdma@vger.kernel.org Cc: Arnd Bergmann arnd@arndb.de
include/linux/mmu_notifier.h | 34 ++++++++++++++++++++++++++++++++-- mm/ksm.c | 11 ++++++----- mm/memory.c | 5 +++-- 3 files changed, 41 insertions(+), 9 deletions(-)
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index b6c004bd9f6a..0230a4b06b46 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -40,6 +40,26 @@ enum mmu_notifier_event { MMU_NOTIFY_SOFT_DIRTY, };
+/*
- @MMU_NOTIFIER_RANGE_BLOCKABLE: can the mmu notifier range_start/range_end
- callback block or not ? If set then the callback can block.
- @MMU_NOTIFIER_USE_CHANGE_PTE: only set when the page table it updated with
- the set_pte_at_notify() the valid patterns for this are:
- pte read and write to read only same pfn
- pte read only to read and write (pfn can change or stay the same)
- pte read only to read only with different pfn
- It is illegal to set in any other circumstances.
- Note that set_pte_at_notify() should not be use outside of the above cases.
- When updating a range in batch (like write protecting a range) it is better
- to rely on invalidate_range_start() and struct mmu_notifier_range to infer
- the kind of update that is happening (as an example you can look at the
- mmu_notifier_range_update_to_read_only() function).
- */
+#define MMU_NOTIFIER_RANGE_BLOCKABLE (1 << 0) +#define MMU_NOTIFIER_USE_CHANGE_PTE (1 << 1)
#ifdef CONFIG_MMU_NOTIFIER
/*
@@ -55,8 +75,6 @@ struct mmu_notifier_mm { spinlock_t lock; };
-#define MMU_NOTIFIER_RANGE_BLOCKABLE (1 << 0)
- struct mmu_notifier_range { struct vm_area_struct *vma; struct mm_struct *mm;
@@ -268,6 +286,12 @@ mmu_notifier_range_blockable(const struct mmu_notifier_range *range) return (range->flags & MMU_NOTIFIER_RANGE_BLOCKABLE); }
+static inline bool +mmu_notifier_range_use_change_pte(const struct mmu_notifier_range *range) +{
- return (range->flags & MMU_NOTIFIER_USE_CHANGE_PTE);
+}
- static inline void mmu_notifier_release(struct mm_struct *mm) { if (mm_has_notifiers(mm))
@@ -509,6 +533,12 @@ mmu_notifier_range_blockable(const struct mmu_notifier_range *range) return true; }
+static inline bool +mmu_notifier_range_use_change_pte(const struct mmu_notifier_range *range) +{
- return false;
+}
- static inline int mm_has_notifiers(struct mm_struct *mm) { return 0;
diff --git a/mm/ksm.c b/mm/ksm.c index b782fadade8f..41e51882f999 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -1066,9 +1066,9 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
BUG_ON(PageTransCompound(page));
- mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm,
pvmw.address,
pvmw.address + PAGE_SIZE);
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR,
MMU_NOTIFIER_USE_CHANGE_PTE, vma, mm,
pvmw.address, pvmw.address + PAGE_SIZE);
mmu_notifier_invalidate_range_start(&range);
if (!page_vma_mapped_walk(&pvmw))
@@ -1155,8 +1155,9 @@ static int replace_page(struct vm_area_struct *vma, struct page *page, if (!pmd) goto out;
- mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm, addr,
addr + PAGE_SIZE);
mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR,
MMU_NOTIFIER_USE_CHANGE_PTE,
vma, mm, addr, addr + PAGE_SIZE);
mmu_notifier_invalidate_range_start(&range);
ptep = pte_offset_map_lock(mm, pmd, addr, &ptl);
diff --git a/mm/memory.c b/mm/memory.c index 45dbc174a88c..cb71d3ff1b97 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2282,8 +2282,9 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
__SetPageUptodate(new_page);
- mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm,
vmf->address & PAGE_MASK,
- mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR,
MMU_NOTIFIER_USE_CHANGE_PTE,
mmu_notifier_invalidate_range_start(&range);vma, mm, vmf->address & PAGE_MASK, (vmf->address & PAGE_MASK) + PAGE_SIZE);
Reviewed-by: Ralph Campbell rcampbell@nvidia.com