On Tuesday, 14 September 2021 2:16:02 AM AEST Alex Sierra wrote:
Device Public type uses device memory that is coherently accesible by the CPU. This could be shown as SP (special purpose) memory range at the BIOS-e820 memory enumeration. If no SP memory is supported in system, this could be faked by setting CONFIG_EFI_FAKE_MEMMAP.
Currently, test_hmm only supports two different SP ranges of at least 256MB size. This could be specified in the kernel parameter variable efi_fake_mem. Ex. Two SP ranges of 1GB starting at 0x100000000 & 0x140000000 physical address. Ex. efi_fake_mem=1G@0x100000000:0x40000,1G@0x140000000:0x40000
Signed-off-by: Alex Sierra alex.sierra@amd.com
lib/test_hmm.c | 166 +++++++++++++++++++++++++++----------------- lib/test_hmm_uapi.h | 10 ++- 2 files changed, 113 insertions(+), 63 deletions(-)
diff --git a/lib/test_hmm.c b/lib/test_hmm.c index ef27e355738a..e346a48e2509 100644 --- a/lib/test_hmm.c +++ b/lib/test_hmm.c @@ -469,6 +469,7 @@ static int dmirror_allocate_chunk(struct dmirror_device *mdevice, unsigned long pfn_first; unsigned long pfn_last; void *ptr;
int ret = -ENOMEM;
devmem = kzalloc(sizeof(*devmem), GFP_KERNEL); if (!devmem)
@@ -551,7 +552,7 @@ static int dmirror_allocate_chunk(struct dmirror_device *mdevice, } spin_unlock(&mdevice->lock);
- return true;
- return 0;
err_release: mutex_unlock(&mdevice->devmem_lock); @@ -560,7 +561,7 @@ static int dmirror_allocate_chunk(struct dmirror_device *mdevice, err_devmem: kfree(devmem);
- return false;
- return ret;
}
static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice) @@ -569,8 +570,10 @@ static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice) struct page *rpage;
/*
* This is a fake device so we alloc real system memory to store
* our device memory.
* For ZONE_DEVICE private type, this is a fake device so we alloc real
* system memory to store our device memory.
* For ZONE_DEVICE public type we use the actual dpage to store the data
*/ rpage = alloc_page(GFP_HIGHUSER); if (!rpage)* and ignore rpage.
@@ -603,7 +606,7 @@ static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args, struct dmirror *dmirror) { struct dmirror_device *mdevice = dmirror->mdevice;
- const unsigned long *src = args->src;
- unsigned long *src = args->src; unsigned long *dst = args->dst; unsigned long addr;
@@ -621,12 +624,18 @@ static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args, * unallocated pte_none() or read-only zero page. */ spage = migrate_pfn_to_page(*src);
if (spage && is_zone_device_page(spage)) {
pr_debug("page already in device spage pfn: 0x%lx\n",
page_to_pfn(spage));
*src &= ~MIGRATE_PFN_MIGRATE;
I don't think this is quite correct, callers shouldn't modify the src array. To mark a page as not migrating callers need to set *dst = 0.
However I think this should be considered a test failure anyway. If we are migrating from system to device memory we would have set MIGRATE_VMA_SELECT_SYSTEM meaning no device private pages should be returned. Therefore I don't think we can reach this unless there is a bug right?
continue;
dpage = dmirror_devmem_alloc_page(mdevice); if (!dpage) continue;}
rpage = dpage->zone_device_data;
rpage = is_device_private_page(dpage) ? dpage->zone_device_data :
if (spage) copy_highpage(rpage, spage); elsedpage;
@@ -638,8 +647,10 @@ static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args, * the simulated device memory and that page holds the pointer * to the mirror. */
rpage->zone_device_data = dmirror;rpage = dpage->zone_device_data;
pr_debug("migrating from sys to dev pfn src: 0x%lx pfn dst: 0x%lx\n",
*dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED; if ((*src & MIGRATE_PFN_WRITE) ||page_to_pfn(spage), page_to_pfn(dpage));
@@ -673,10 +684,13 @@ static int dmirror_migrate_finalize_and_map(struct migrate_vma *args, continue;
/*
* Store the page that holds the data so the page table
* doesn't have to deal with ZONE_DEVICE private pages.
* For ZONE_DEVICE private pages we store the page that
* holds the data so the page table doesn't have to deal it.
* For ZONE_DEVICE public pages we store the actual page, since
*/* the CPU has coherent access to the page.
entry = dpage->zone_device_data;
entry = is_device_private_page(dpage) ? dpage->zone_device_data :
if (*dst & MIGRATE_PFN_WRITE) entry = xa_tag_pointer(entry, DPT_XA_TAG_WRITE); entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC);dpage;
@@ -690,6 +704,47 @@ static int dmirror_migrate_finalize_and_map(struct migrate_vma *args, return 0; }
+static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args,
struct dmirror *dmirror)
+{
- unsigned long *src = args->src;
- unsigned long *dst = args->dst;
- unsigned long start = args->start;
- unsigned long end = args->end;
- unsigned long addr;
- for (addr = start; addr < end; addr += PAGE_SIZE,
src++, dst++) {
struct page *dpage, *spage;
spage = migrate_pfn_to_page(*src);
if (!spage || !(*src & MIGRATE_PFN_MIGRATE))
continue;
if (is_device_private_page(spage)) {
spage = spage->zone_device_data;
} else {
pr_debug("page already in system or SPM spage pfn: 0x%lx\n",
page_to_pfn(spage));
*src &= ~MIGRATE_PFN_MIGRATE;
Same comment as above - shouldn't touch *src and also shouldn't be able to get here anyway.
continue;
}
dpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, args->vma, addr);
if (!dpage)
continue;
pr_debug("migrating from dev to sys pfn src: 0x%lx pfn dst: 0x%lx\n",
page_to_pfn(spage), page_to_pfn(dpage));
lock_page(dpage);
xa_erase(&dmirror->pt, addr >> PAGE_SHIFT);
copy_highpage(dpage, spage);
*dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
if (*src & MIGRATE_PFN_WRITE)
*dst |= MIGRATE_PFN_WRITE;
- }
- return 0;
+}
static int dmirror_migrate(struct dmirror *dmirror, struct hmm_dmirror_cmd *cmd) { @@ -731,33 +786,46 @@ static int dmirror_migrate(struct dmirror *dmirror, args.start = addr; args.end = next; args.pgmap_owner = dmirror->mdevice;
args.flags = MIGRATE_VMA_SELECT_SYSTEM;
args.flags = (!cmd->alloc_to_devmem &&
dmirror->mdevice->zone_device_type ==
HMM_DMIRROR_MEMORY_DEVICE_PRIVATE) ?
MIGRATE_VMA_SELECT_DEVICE_PRIVATE :
ret = migrate_vma_setup(&args); if (ret) goto out;MIGRATE_VMA_SELECT_SYSTEM;
dmirror_migrate_alloc_and_copy(&args, dmirror);
if (cmd->alloc_to_devmem) {
pr_debug("Migrating from sys mem to device mem\n");
dmirror_migrate_alloc_and_copy(&args, dmirror);
} else {
pr_debug("Migrating from device mem to sys mem\n");
dmirror_devmem_fault_alloc_and_copy(&args, dmirror);
migrate_vma_pages(&args);}
dmirror_migrate_finalize_and_map(&args, dmirror);
if (cmd->alloc_to_devmem)
migrate_vma_finalize(&args); } mmap_read_unlock(mm); mmput(mm);dmirror_migrate_finalize_and_map(&args, dmirror);
- /* Return the migrated data for verification. */
- ret = dmirror_bounce_init(&bounce, start, size);
- if (ret)
return ret;
- mutex_lock(&dmirror->mutex);
- ret = dmirror_do_read(dmirror, start, end, &bounce);
- mutex_unlock(&dmirror->mutex);
- if (ret == 0) {
if (copy_to_user(u64_to_user_ptr(cmd->ptr), bounce.ptr,
bounce.size))
ret = -EFAULT;
- /* Return the migrated data for verification. only for pages in device zone */
- if (cmd->alloc_to_devmem) {
ret = dmirror_bounce_init(&bounce, start, size);
if (ret)
return ret;
mutex_lock(&dmirror->mutex);
ret = dmirror_do_read(dmirror, start, end, &bounce);
mutex_unlock(&dmirror->mutex);
if (ret == 0) {
if (copy_to_user(u64_to_user_ptr(cmd->ptr), bounce.ptr,
bounce.size))
ret = -EFAULT;
}
cmd->cpages = bounce.cpages;
}dmirror_bounce_fini(&bounce);
- cmd->cpages = bounce.cpages;
- dmirror_bounce_fini(&bounce); return ret;
Rather than passing a flag (alloc_to_devmem) can you split this into two functions - one to migrate to device memory and one to migrate to system memory?
out: @@ -781,9 +849,15 @@ static void dmirror_mkentry(struct dmirror *dmirror, struct hmm_range *range, }
page = hmm_pfn_to_page(entry);
- if (is_device_private_page(page)) {
/* Is the page migrated to this device or some other? */
if (dmirror->mdevice == dmirror_page_to_device(page))
- if (is_device_page(page)) {
/* Is page ZONE_DEVICE public? */
if (!is_device_private_page(page))
*perm = HMM_DMIRROR_PROT_DEV_PUBLIC;
/*
* Is page ZONE_DEVICE private migrated to
* this device or some other?
*/
else *perm = HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE;else if (dmirror->mdevice == dmirror_page_to_device(page)) *perm = HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL;
@@ -1030,38 +1104,6 @@ static void dmirror_devmem_free(struct page *page) spin_unlock(&mdevice->lock); }
-static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args,
struct dmirror *dmirror)
-{
- const unsigned long *src = args->src;
- unsigned long *dst = args->dst;
- unsigned long start = args->start;
- unsigned long end = args->end;
- unsigned long addr;
- for (addr = start; addr < end; addr += PAGE_SIZE,
src++, dst++) {
struct page *dpage, *spage;
spage = migrate_pfn_to_page(*src);
if (!spage || !(*src & MIGRATE_PFN_MIGRATE))
continue;
spage = spage->zone_device_data;
dpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, args->vma, addr);
if (!dpage)
continue;
lock_page(dpage);
xa_erase(&dmirror->pt, addr >> PAGE_SHIFT);
copy_highpage(dpage, spage);
*dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
if (*src & MIGRATE_PFN_WRITE)
*dst |= MIGRATE_PFN_WRITE;
- }
- return 0;
-}
static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf) { struct migrate_vma args; diff --git a/lib/test_hmm_uapi.h b/lib/test_hmm_uapi.h index 00259d994410..b6cb8a7d2470 100644 --- a/lib/test_hmm_uapi.h +++ b/lib/test_hmm_uapi.h @@ -17,8 +17,12 @@
- @addr: (in) user address the device will read/write
- @ptr: (in) user address where device data is copied to/from
- @npages: (in) number of pages to read/write
- @alloc_to_devmem: (in) desired allocation destination during migration.
- True if allocation is to device memory.
- False if allocation is to system memory.
- @cpages: (out) number of pages copied
- @faults: (out) number of device page faults seen
*/
- @zone_device_type: (out) zone device memory type
struct hmm_dmirror_cmd { __u64 addr; @@ -26,7 +30,8 @@ struct hmm_dmirror_cmd { __u64 npages; __u64 cpages; __u64 faults;
- __u64 zone_device_type;
- __u32 zone_device_type;
- __u32 alloc_to_devmem;
Similar comment here. Rather than add a boolean flag to every command could you rename the existing command to HMM_DMIRROR_MIGRATE_TO_DEV and add another (HMM_DMIRROR_MIGRATE_TO_SYS) for this new operation? I think that would end up being a bit cleaner and matches how this actually gets used in hmm-test.c where you end up defining hmm_migrate_sys_to_dev() and hmm_migrate_to_dev_sys() anyway.
- Alistair
};
/* Expose the address space of the calling process through hmm device file */ @@ -49,6 +54,8 @@ struct hmm_dmirror_cmd {
device the ioctl() is made
- HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE: Migrated device private page on some
other device
- HMM_DMIRROR_PROT_DEV_PUBLIC: Migrate device public page on the device
*/
the ioctl() is made
enum { HMM_DMIRROR_PROT_ERROR = 0xFF, @@ -60,6 +67,7 @@ enum { HMM_DMIRROR_PROT_ZERO = 0x10, HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL = 0x20, HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE = 0x30,
- HMM_DMIRROR_PROT_DEV_PUBLIC = 0x40,
};
enum {