On Tue, Mar 17, 2020 at 04:14:31PM -0700, Ralph Campbell wrote:
On 3/17/20 5:59 AM, Christoph Hellwig wrote:
On Tue, Mar 17, 2020 at 09:47:55AM -0300, Jason Gunthorpe wrote:
I've been using v7 of Ralph's tester and it is working well - it has DEVICE_PRIVATE support so I think it can test this flow too. Ralph are you able?
This hunk seems trivial enough to me, can we include it now?
I can send a separate patch for it once the tester covers it. I don't want to add it to the original patch as it is a significant behavior change compared to the existing code.
Attached is an updated version of my HMM tests based on linux-5.6.0-rc6. I ran this OK with Jason's 8+1 HMM patches, Christoph's 1-5 misc HMM clean ups, and Christoph's 1-4 device private page changes applied.
I'd like to get this to mergable, it looks pretty good now, but I have no idea about selftests - and I'm struggling to even compile the tools dir
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 69def4a9df00..4d22ce7879a7 100644 +++ b/lib/Kconfig.debug @@ -2162,6 +2162,18 @@ config TEST_MEMINIT
If unsure, say N.
+config TEST_HMM
- tristate "Test HMM (Heterogeneous Memory Management)"
- depends on DEVICE_PRIVATE
- select HMM_MIRROR
select MMU_NOTIFIER
extra spaces
In general I wonder if it even makes sense that DEVICE_PRIVATE is user selectable?
+static int dmirror_fops_open(struct inode *inode, struct file *filp) +{
- struct cdev *cdev = inode->i_cdev;
- struct dmirror *dmirror;
- int ret;
- /* Mirror this process address space */
- dmirror = kzalloc(sizeof(*dmirror), GFP_KERNEL);
- if (dmirror == NULL)
return -ENOMEM;
- dmirror->mdevice = container_of(cdev, struct dmirror_device, cdevice);
- mutex_init(&dmirror->mutex);
- xa_init(&dmirror->pt);
- ret = mmu_interval_notifier_insert(&dmirror->notifier, current->mm,
0, ULONG_MAX & PAGE_MASK, &dmirror_min_ops);
- if (ret) {
kfree(dmirror);
return ret;
- }
- /* Pairs with the mmdrop() in dmirror_fops_release(). */
- mmgrab(current->mm);
- dmirror->mm = current->mm;
The notifier holds a mmgrab, no need for another one
- /* Only the first open registers the address space. */
- filp->private_data = dmirror;
Not sure what this comment means
+static inline struct dmirror_device *dmirror_page_to_device(struct page *page)
+{
- struct dmirror_chunk *devmem;
- devmem = container_of(page->pgmap, struct dmirror_chunk, pagemap);
- return devmem->mdevice;
+}
extra devmem var is not really needed
+static bool dmirror_device_is_mine(struct dmirror_device *mdevice,
struct page *page)
+{
- if (!is_zone_device_page(page))
return false;
- return page->pgmap->ops == &dmirror_devmem_ops &&
dmirror_page_to_device(page) == mdevice;
+}
Use new owner stuff, right? Actually this is redunant now, the check should be just WARN_ON pageowner != self owner
+static int dmirror_do_fault(struct dmirror *dmirror, struct hmm_range *range) +{
- uint64_t *pfns = range->pfns;
- unsigned long pfn;
- for (pfn = (range->start >> PAGE_SHIFT);
pfn < (range->end >> PAGE_SHIFT);
pfn++, pfns++) {
struct page *page;
void *entry;
/*
* HMM_PFN_ERROR is returned if it is accessing invalid memory
* either because of memory error (hardware detected memory
* corruption) or more likely because of truncate on mmap
* file.
*/
if (*pfns == range->values[HMM_PFN_ERROR])
return -EFAULT;
Unless that snapshot is use hmm_range_fault() never returns success and sets PFN_ERROR, so this should be a WARN_ON
if (!(*pfns & range->flags[HMM_PFN_VALID]))
return -EFAULT;
Same with valid.
page = hmm_device_entry_to_page(range, *pfns);
/* We asked for pages to be populated but check anyway. */
if (!page)
return -EFAULT;
WARN_ON
if (is_zone_device_page(page)) {
/*
* TODO: need a way to ask HMM to fault foreign zone
* device private pages.
*/
if (!dmirror_device_is_mine(dmirror->mdevice, page))
continue;
Actually re
+static bool dmirror_interval_invalidate(struct mmu_interval_notifier *mni,
const struct mmu_notifier_range *range,
unsigned long cur_seq)
+{
- struct dmirror *dmirror = container_of(mni, struct dmirror, notifier);
- struct mm_struct *mm = dmirror->mm;
- /*
* If the process doesn't exist, we don't need to invalidate the
* device page table since the address space will be torn down.
*/
- if (!mmget_not_zero(mm))
return true;
Why? Don't the notifiers provide for this already.
mmget_not_zero() is required before calling hmm_range_fault() though
+static int dmirror_fault(struct dmirror *dmirror, unsigned long start,
unsigned long end, bool write)
+{
- struct mm_struct *mm = dmirror->mm;
- unsigned long addr;
- uint64_t pfns[64];
- struct hmm_range range = {
.notifier = &dmirror->notifier,
.pfns = pfns,
.flags = dmirror_hmm_flags,
.values = dmirror_hmm_values,
.pfn_shift = DPT_SHIFT,
.pfn_flags_mask = ~(dmirror_hmm_flags[HMM_PFN_VALID] |
dmirror_hmm_flags[HMM_PFN_WRITE]),
.default_flags = dmirror_hmm_flags[HMM_PFN_VALID] |
(write ? dmirror_hmm_flags[HMM_PFN_WRITE] : 0),
.dev_private_owner = dmirror->mdevice,
- };
- int ret = 0;
- /* Since the mm is for the mirrored process, get a reference first. */
- if (!mmget_not_zero(mm))
return 0;
Right
- for (addr = start; addr < end; addr = range.end) {
range.start = addr;
range.end = min(addr + (ARRAY_SIZE(pfns) << PAGE_SHIFT), end);
ret = dmirror_range_fault(dmirror, &range);
if (ret)
break;
- }
- mmput(mm);
- return ret;
+}
+static int dmirror_do_read(struct dmirror *dmirror, unsigned long start,
unsigned long end, struct dmirror_bounce *bounce)
+{
- unsigned long pfn;
- void *ptr;
- ptr = bounce->ptr + ((start - bounce->addr) & PAGE_MASK);
- for (pfn = start >> PAGE_SHIFT; pfn < (end >> PAGE_SHIFT); pfn++) {
void *entry;
struct page *page;
void *tmp;
entry = xa_load(&dmirror->pt, pfn);
page = xa_untag_pointer(entry);
if (!page)
return -ENOENT;
tmp = kmap(page);
memcpy(ptr, tmp, PAGE_SIZE);
kunmap(page);
ptr += PAGE_SIZE;
bounce->cpages++;
- }
- return 0;
+}
+static int dmirror_read(struct dmirror *dmirror, struct hmm_dmirror_cmd *cmd) +{
- struct dmirror_bounce bounce;
- unsigned long start, end;
- unsigned long size = cmd->npages << PAGE_SHIFT;
- int ret;
- start = cmd->addr;
- end = start + size;
- if (end < start)
return -EINVAL;
- ret = dmirror_bounce_init(&bounce, start, size);
- if (ret)
return ret;
+again:
- mutex_lock(&dmirror->mutex);
- ret = dmirror_do_read(dmirror, start, end, &bounce);
- mutex_unlock(&dmirror->mutex);
- if (ret == 0)
ret = copy_to_user((void __user *)cmd->ptr, bounce.ptr,
bounce.size);
Use u64_to_user_ptr() instead of the cast
+static int dmirror_write(struct dmirror *dmirror, struct hmm_dmirror_cmd *cmd) +{
- struct dmirror_bounce bounce;
- unsigned long start, end;
- unsigned long size = cmd->npages << PAGE_SHIFT;
- int ret;
- start = cmd->addr;
- end = start + size;
- if (end < start)
return -EINVAL;
- ret = dmirror_bounce_init(&bounce, start, size);
- if (ret)
return ret;
- ret = copy_from_user(bounce.ptr, (void __user *)cmd->ptr,
bounce.size);
ditto
- if (ret)
return ret;
+again:
- mutex_lock(&dmirror->mutex);
- ret = dmirror_do_write(dmirror, start, end, &bounce);
- mutex_unlock(&dmirror->mutex);
- if (ret == -ENOENT) {
start = cmd->addr + (bounce.cpages << PAGE_SHIFT);
ret = dmirror_fault(dmirror, start, end, true);
if (ret == 0) {
cmd->faults++;
goto again;
Use a loop instead of goto?
Also I get this:
lib/test_hmm.c: In function ‘dmirror_devmem_fault_alloc_and_copy’: lib/test_hmm.c:1041:25: warning: unused variable ‘vma’ [-Wunused-variable] 1041 | struct vm_area_struct *vma = args->vma;
But this is a kernel bug, due to alloc_page_vma being a #define not a static inline and me having CONFIG_NUMA off in this .config
Jason