Am 2021-11-15 um 2:30 p.m. schrieb Alex Sierra:
Device Coherent type uses device memory that is coherently accesible by the CPU. This could be shown as SP (special purpose) memory range at the BIOS-e820 memory enumeration. If no SP memory is supported in system, this could be faked by setting CONFIG_EFI_FAKE_MEMMAP.
Currently, test_hmm only supports two different SP ranges of at least 256MB size. This could be specified in the kernel parameter variable efi_fake_mem. Ex. Two SP ranges of 1GB starting at 0x100000000 & 0x140000000 physical address. Ex. efi_fake_mem=1G@0x100000000:0x40000,1G@0x140000000:0x40000
Signed-off-by: Alex Sierra alex.sierra@amd.com
lib/test_hmm.c | 191 +++++++++++++++++++++++++++++++++----------- lib/test_hmm_uapi.h | 15 ++-- 2 files changed, 153 insertions(+), 53 deletions(-)
diff --git a/lib/test_hmm.c b/lib/test_hmm.c index 45334df28d7b..9e2cc0cd4412 100644 --- a/lib/test_hmm.c +++ b/lib/test_hmm.c @@ -471,6 +471,7 @@ static int dmirror_allocate_chunk(struct dmirror_device *mdevice, unsigned long pfn_first; unsigned long pfn_last; void *ptr;
int ret = -ENOMEM;
devmem = kzalloc(sizeof(*devmem), GFP_KERNEL); if (!devmem)
@@ -553,7 +554,7 @@ static int dmirror_allocate_chunk(struct dmirror_device *mdevice, } spin_unlock(&mdevice->lock);
- return true;
- return 0;
You're changing the meaning of the return value, but you're not updating the callers. I think at least dmirror_devmem_alloc_page will be broken by this change.
err_release: mutex_unlock(&mdevice->devmem_lock); @@ -562,7 +563,7 @@ static int dmirror_allocate_chunk(struct dmirror_device *mdevice, err_devmem: kfree(devmem);
- return false;
- return ret;
}
static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice) @@ -571,8 +572,10 @@ static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice) struct page *rpage;
/*
* This is a fake device so we alloc real system memory to store
* our device memory.
* For ZONE_DEVICE private type, this is a fake device so we alloc real
* system memory to store our device memory.
* For ZONE_DEVICE coherent type we use the actual dpage to store the data
*/ rpage = alloc_page(GFP_HIGHUSER); if (!rpage)* and ignore rpage.
@@ -623,12 +626,17 @@ static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args, * unallocated pte_none() or read-only zero page. */ spage = migrate_pfn_to_page(*src);
if (spage && is_zone_device_page(spage))
pr_err("page already in device spage pfn: 0x%lx\n",
page_to_pfn(spage));
WARN_ON(spage && is_zone_device_page(spage));
You can write WARN_ON inside the if-condition:
if (WARN_ON(spage && is_zone_device_page(spage)) ...
But I don't see why you need both pr_err and WARN_ON. You can add a custom message by using WARN instead of WARN_ON:
WARN(spage && is_zone_device_page(spage), "page already in device spage pfn: 0x%lx\n", page_to_pfn(spage));
dpage = dmirror_devmem_alloc_page(mdevice); if (!dpage) continue;
rpage = dpage->zone_device_data;
rpage = is_device_private_page(dpage) ? dpage->zone_device_data :
if (spage) copy_highpage(rpage, spage); elsedpage;
@@ -640,8 +648,11 @@ static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args, * the simulated device memory and that page holds the pointer * to the mirror. */
rpage = dpage->zone_device_data;
rpage->zone_device_data = dmirror;
pr_debug("migrating from sys to dev pfn src: 0x%lx pfn dst: 0x%lx\n",
page_to_pfn(spage), page_to_pfn(dpage));
*dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED; if ((*src & MIGRATE_PFN_WRITE) ||
@@ -721,10 +732,13 @@ static int dmirror_migrate_finalize_and_map(struct migrate_vma *args, continue;
/*
* Store the page that holds the data so the page table
* doesn't have to deal with ZONE_DEVICE private pages.
* For ZONE_DEVICE private pages we store the page that
* holds the data so the page table doesn't have to deal it.
* For ZONE_DEVICE coherent pages we store the actual page, since
*/* the CPU has coherent access to the page.
I find this explanation confusing. The comment in dmirror_devmem_alloc_page is much clearer. I think all we need here is that dpage->zone_device_data points to the backing page for device_private pages. Device_coherent struct pages don't have a backing page because they represent a real CPU-accessible page already.
entry = dpage->zone_device_data;
entry = is_device_private_page(dpage) ? dpage->zone_device_data :
dpage;
I see this in a few places. Maybe better to wrap that in a helper function or macro. Something like this:
#define BACKING_PAGE(page) (is_device_private_page((page)) ? \ (page)->zone_device_data : (page))
if (*dst & MIGRATE_PFN_WRITE) entry = xa_tag_pointer(entry, DPT_XA_TAG_WRITE); entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC);
@@ -804,8 +818,110 @@ static int dmirror_exclusive(struct dmirror *dmirror, return ret; }
-static int dmirror_migrate(struct dmirror *dmirror,
struct hmm_dmirror_cmd *cmd)
+static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args,
struct dmirror *dmirror)
+{
- const unsigned long *src = args->src;
- unsigned long *dst = args->dst;
- unsigned long start = args->start;
- unsigned long end = args->end;
- unsigned long addr;
- for (addr = start; addr < end; addr += PAGE_SIZE,
src++, dst++) {
struct page *dpage, *spage;
spage = migrate_pfn_to_page(*src);
if (!spage || !(*src & MIGRATE_PFN_MIGRATE))
continue;
WARN_ON(!is_device_page(spage));
spage = is_device_private_page(spage) ? spage->zone_device_data :
spage;
dpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, args->vma, addr);
if (!dpage)
continue;
pr_debug("migrating from dev to sys pfn src: 0x%lx pfn dst: 0x%lx\n",
page_to_pfn(spage), page_to_pfn(dpage));
lock_page(dpage);
xa_erase(&dmirror->pt, addr >> PAGE_SHIFT);
copy_highpage(dpage, spage);
*dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
if (*src & MIGRATE_PFN_WRITE)
*dst |= MIGRATE_PFN_WRITE;
- }
- return 0;
+}
+static int dmirror_migrate_to_system(struct dmirror *dmirror,
struct hmm_dmirror_cmd *cmd)
+{
- unsigned long start, end, addr;
- unsigned long size = cmd->npages << PAGE_SHIFT;
- struct mm_struct *mm = dmirror->notifier.mm;
- struct vm_area_struct *vma;
- unsigned long src_pfns[64];
- unsigned long dst_pfns[64];
- struct migrate_vma args;
- unsigned long next;
- int ret;
- start = cmd->addr;
- end = start + size;
- if (end < start)
return -EINVAL;
- /* Since the mm is for the mirrored process, get a reference first. */
- if (!mmget_not_zero(mm))
return -EINVAL;
- mmap_read_lock(mm);
- for (addr = start; addr < end; addr = next) {
vma = find_vma(mm, addr);
if (!vma || addr < vma->vm_start ||
!(vma->vm_flags & VM_READ)) {
ret = -EINVAL;
goto out;
}
next = min(end, addr + (ARRAY_SIZE(src_pfns) << PAGE_SHIFT));
if (next > vma->vm_end)
next = vma->vm_end;
args.vma = vma;
args.src = src_pfns;
args.dst = dst_pfns;
args.start = addr;
args.end = next;
args.pgmap_owner = dmirror->mdevice;
args.flags = (dmirror->mdevice->zone_device_type ==
HMM_DMIRROR_MEMORY_DEVICE_PRIVATE) ?
MIGRATE_VMA_SELECT_DEVICE_PRIVATE :
MIGRATE_VMA_SELECT_DEVICE_COHERENT;
ret = migrate_vma_setup(&args);
if (ret)
goto out;
pr_debug("Migrating from device mem to sys mem\n");
dmirror_devmem_fault_alloc_and_copy(&args, dmirror);
migrate_vma_pages(&args);
migrate_vma_finalize(&args);
- }
- mmap_read_unlock(mm);
- mmput(mm);
- return ret;
+out:
- mmap_read_unlock(mm);
- mmput(mm);
- return ret;
+}
+static int dmirror_migrate_to_device(struct dmirror *dmirror,
struct hmm_dmirror_cmd *cmd)
{ unsigned long start, end, addr; unsigned long size = cmd->npages << PAGE_SHIFT; @@ -849,6 +965,7 @@ static int dmirror_migrate(struct dmirror *dmirror, if (ret) goto out;
dmirror_migrate_alloc_and_copy(&args, dmirror); migrate_vma_pages(&args); dmirror_migrate_finalize_and_map(&args, dmirror);pr_debug("Migrating from sys mem to device mem\n");
@@ -857,7 +974,7 @@ static int dmirror_migrate(struct dmirror *dmirror, mmap_read_unlock(mm); mmput(mm);
- /* Return the migrated data for verification. */
- /* Return the migrated data for verification. only for pages in device zone */ ret = dmirror_bounce_init(&bounce, start, size); if (ret) return ret;
@@ -894,9 +1011,15 @@ static void dmirror_mkentry(struct dmirror *dmirror, struct hmm_range *range, }
page = hmm_pfn_to_page(entry);
- if (is_device_private_page(page)) {
/* Is the page migrated to this device or some other? */
if (dmirror->mdevice == dmirror_page_to_device(page))
- if (is_device_page(page)) {
/* Is page ZONE_DEVICE coherent? */
if (!is_device_private_page(page))
*perm = HMM_DMIRROR_PROT_DEV_COHERENT;
Does it make sense to distinguish between DEV_COHERENT_LOCAL and DEV_COHERENT_REMOTE as well?
/*
* Is page ZONE_DEVICE private migrated to
* this device or some other?
*/
else *perm = HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE;else if (dmirror->mdevice == dmirror_page_to_device(page)) *perm = HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL;
@@ -1096,8 +1219,12 @@ static long dmirror_fops_unlocked_ioctl(struct file *filp, ret = dmirror_write(dmirror, &cmd); break;
- case HMM_DMIRROR_MIGRATE:
ret = dmirror_migrate(dmirror, &cmd);
case HMM_DMIRROR_MIGRATE_TO_DEV:
ret = dmirror_migrate_to_device(dmirror, &cmd);
break;
case HMM_DMIRROR_MIGRATE_TO_SYS:
ret = dmirror_migrate_to_system(dmirror, &cmd);
break;
case HMM_DMIRROR_EXCLUSIVE:
@@ -1152,38 +1279,6 @@ static void dmirror_devmem_free(struct page *page) spin_unlock(&mdevice->lock); }
-static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args,
struct dmirror *dmirror)
-{
- const unsigned long *src = args->src;
- unsigned long *dst = args->dst;
- unsigned long start = args->start;
- unsigned long end = args->end;
- unsigned long addr;
- for (addr = start; addr < end; addr += PAGE_SIZE,
src++, dst++) {
struct page *dpage, *spage;
spage = migrate_pfn_to_page(*src);
if (!spage || !(*src & MIGRATE_PFN_MIGRATE))
continue;
spage = spage->zone_device_data;
dpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, args->vma, addr);
if (!dpage)
continue;
lock_page(dpage);
xa_erase(&dmirror->pt, addr >> PAGE_SHIFT);
copy_highpage(dpage, spage);
*dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
if (*src & MIGRATE_PFN_WRITE)
*dst |= MIGRATE_PFN_WRITE;
- }
- return 0;
-}
static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf) { struct migrate_vma args; diff --git a/lib/test_hmm_uapi.h b/lib/test_hmm_uapi.h index 77f81e6314eb..af15c35a90f4 100644 --- a/lib/test_hmm_uapi.h +++ b/lib/test_hmm_uapi.h @@ -19,6 +19,7 @@
- @npages: (in) number of pages to read/write
- @cpages: (out) number of pages copied
- @faults: (out) number of device page faults seen
*/
- @zone_device_type: (out) zone device memory type
struct hmm_dmirror_cmd { __u64 addr; @@ -32,11 +33,12 @@ struct hmm_dmirror_cmd { /* Expose the address space of the calling process through hmm device file */ #define HMM_DMIRROR_READ _IOWR('H', 0x00, struct hmm_dmirror_cmd) #define HMM_DMIRROR_WRITE _IOWR('H', 0x01, struct hmm_dmirror_cmd) -#define HMM_DMIRROR_MIGRATE _IOWR('H', 0x02, struct hmm_dmirror_cmd) -#define HMM_DMIRROR_SNAPSHOT _IOWR('H', 0x03, struct hmm_dmirror_cmd) -#define HMM_DMIRROR_EXCLUSIVE _IOWR('H', 0x04, struct hmm_dmirror_cmd) -#define HMM_DMIRROR_CHECK_EXCLUSIVE _IOWR('H', 0x05, struct hmm_dmirror_cmd) -#define HMM_DMIRROR_GET_MEM_DEV_TYPE _IOWR('H', 0x06, struct hmm_dmirror_cmd) +#define HMM_DMIRROR_MIGRATE_TO_DEV _IOWR('H', 0x02, struct hmm_dmirror_cmd) +#define HMM_DMIRROR_MIGRATE_TO_SYS _IOWR('H', 0x03, struct hmm_dmirror_cmd) +#define HMM_DMIRROR_SNAPSHOT _IOWR('H', 0x04, struct hmm_dmirror_cmd) +#define HMM_DMIRROR_EXCLUSIVE _IOWR('H', 0x05, struct hmm_dmirror_cmd) +#define HMM_DMIRROR_CHECK_EXCLUSIVE _IOWR('H', 0x06, struct hmm_dmirror_cmd) +#define HMM_DMIRROR_GET_MEM_DEV_TYPE _IOWR('H', 0x07, struct hmm_dmirror_cmd)
/*
- Values returned in hmm_dmirror_cmd.ptr for HMM_DMIRROR_SNAPSHOT.
@@ -51,6 +53,8 @@ struct hmm_dmirror_cmd {
device the ioctl() is made
- HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE: Migrated device private page on some
other device
- HMM_DMIRROR_PROT_DEV_COHERENT: Migrate device coherent page on the device
*/
the ioctl() is made
enum { HMM_DMIRROR_PROT_ERROR = 0xFF, @@ -62,6 +66,7 @@ enum { HMM_DMIRROR_PROT_ZERO = 0x10, HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL = 0x20, HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE = 0x30,
- HMM_DMIRROR_PROT_DEV_COHERENT = 0x40,
Does it make sense to distinguish between DEV_COHERENT_LOCAL and DEV_COHERENT_REMOTE as well?
Regards, Felix
};
enum {