I haven't tested the change which checks that pages migrated back to sysmem, but it looks ok so:
Reviewed-by: Alistair Popple apopple@nvidia.com
On Thursday, 27 January 2022 2:09:47 PM AEDT Alex Sierra wrote:
Device Coherent type uses device memory that is coherently accesible by the CPU. This could be shown as SP (special purpose) memory range at the BIOS-e820 memory enumeration. If no SP memory is supported in system, this could be faked by setting CONFIG_EFI_FAKE_MEMMAP.
Currently, test_hmm only supports two different SP ranges of at least 256MB size. This could be specified in the kernel parameter variable efi_fake_mem. Ex. Two SP ranges of 1GB starting at 0x100000000 & 0x140000000 physical address. Ex. efi_fake_mem=1G@0x100000000:0x40000,1G@0x140000000:0x40000
Private and coherent device mirror instances can be created in the same probed. This is done by passing the module parameters spm_addr_dev0 & spm_addr_dev1. In this case, it will create four instances of device_mirror. The first two correspond to private device type, the last two to coherent type. Then, they can be easily accessed from user space through /dev/hmm_mirror<num_device>. Usually num_device 0 and 1 are for private, and 2 and 3 for coherent types. If no module parameters are passed, two instances of private type device_mirror will be created only.
Signed-off-by: Alex Sierra alex.sierra@amd.com
v4: Return number of coherent device pages successfully migrated to system. This is returned at cmd->cpages.
lib/test_hmm.c | 260 +++++++++++++++++++++++++++++++++----------- lib/test_hmm_uapi.h | 15 ++- 2 files changed, 205 insertions(+), 70 deletions(-)
diff --git a/lib/test_hmm.c b/lib/test_hmm.c index 6f068f7c4ee3..850d5331e370 100644 --- a/lib/test_hmm.c +++ b/lib/test_hmm.c @@ -29,11 +29,22 @@
#include "test_hmm_uapi.h"
-#define DMIRROR_NDEVICES 2 +#define DMIRROR_NDEVICES 4 #define DMIRROR_RANGE_FAULT_TIMEOUT 1000 #define DEVMEM_CHUNK_SIZE (256 * 1024 * 1024U) #define DEVMEM_CHUNKS_RESERVE 16
+/*
- For device_private pages, dpage is just a dummy struct page
- representing a piece of device memory. dmirror_devmem_alloc_page
- allocates a real system memory page as backing storage to fake a
- real device. zone_device_data points to that backing page. But
- for device_coherent memory, the struct page represents real
- physical CPU-accessible memory that we can use directly.
- */
+#define BACKING_PAGE(page) (is_device_private_page((page)) ? \
(page)->zone_device_data : (page))
static unsigned long spm_addr_dev0; module_param(spm_addr_dev0, long, 0644); MODULE_PARM_DESC(spm_addr_dev0, @@ -122,6 +133,21 @@ static int dmirror_bounce_init(struct dmirror_bounce *bounce, return 0; }
+static bool dmirror_is_private_zone(struct dmirror_device *mdevice) +{
- return (mdevice->zone_device_type ==
HMM_DMIRROR_MEMORY_DEVICE_PRIVATE) ? true : false;
+}
+static enum migrate_vma_direction
- dmirror_select_device(struct dmirror *dmirror)
+{
- return (dmirror->mdevice->zone_device_type ==
HMM_DMIRROR_MEMORY_DEVICE_PRIVATE) ?
MIGRATE_VMA_SELECT_DEVICE_PRIVATE :
MIGRATE_VMA_SELECT_DEVICE_COHERENT;
+}
static void dmirror_bounce_fini(struct dmirror_bounce *bounce) { vfree(bounce->ptr); @@ -572,16 +598,19 @@ static int dmirror_allocate_chunk(struct dmirror_device *mdevice, static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice) { struct page *dpage = NULL;
- struct page *rpage;
struct page *rpage = NULL;
/*
* This is a fake device so we alloc real system memory to store
* our device memory.
* For ZONE_DEVICE private type, this is a fake device so we alloc real
* system memory to store our device memory.
* For ZONE_DEVICE coherent type we use the actual dpage to store the data
*/* and ignore rpage.
- rpage = alloc_page(GFP_HIGHUSER);
- if (!rpage)
return NULL;
if (dmirror_is_private_zone(mdevice)) {
rpage = alloc_page(GFP_HIGHUSER);
if (!rpage)
return NULL;
} spin_lock(&mdevice->lock);
if (mdevice->free_pages) {
@@ -601,7 +630,8 @@ static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice) return dpage;
error:
- __free_page(rpage);
- if (rpage)
return NULL;__free_page(rpage);
}
@@ -627,12 +657,16 @@ static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args, * unallocated pte_none() or read-only zero page. */ spage = migrate_pfn_to_page(*src);
if (WARN(spage && is_zone_device_page(spage),
"page already in device spage pfn: 0x%lx\n",
page_to_pfn(spage)))
continue;
dpage = dmirror_devmem_alloc_page(mdevice); if (!dpage) continue;
rpage = dpage->zone_device_data;
if (spage) copy_highpage(rpage, spage); elserpage = BACKING_PAGE(dpage);
@@ -646,6 +680,8 @@ static void dmirror_migrate_alloc_and_copy(struct migrate_vma *args, */ rpage->zone_device_data = dmirror;
pr_debug("migrating from sys to dev pfn src: 0x%lx pfn dst: 0x%lx\n",
*dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED; if ((*src & MIGRATE_PFN_WRITE) ||page_to_pfn(spage), page_to_pfn(dpage));
@@ -724,11 +760,7 @@ static int dmirror_migrate_finalize_and_map(struct migrate_vma *args, if (!dpage) continue;
/*
* Store the page that holds the data so the page table
* doesn't have to deal with ZONE_DEVICE private pages.
*/
entry = dpage->zone_device_data;
if (*dst & MIGRATE_PFN_WRITE) entry = xa_tag_pointer(entry, DPT_XA_TAG_WRITE); entry = xa_store(&dmirror->pt, pfn, entry, GFP_ATOMIC);entry = BACKING_PAGE(dpage);
@@ -808,15 +840,124 @@ static int dmirror_exclusive(struct dmirror *dmirror, return ret; }
-static int dmirror_migrate(struct dmirror *dmirror,
struct hmm_dmirror_cmd *cmd)
+static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args,
struct dmirror *dmirror)
+{
- const unsigned long *src = args->src;
- unsigned long *dst = args->dst;
- unsigned long start = args->start;
- unsigned long end = args->end;
- unsigned long addr;
- for (addr = start; addr < end; addr += PAGE_SIZE,
src++, dst++) {
struct page *dpage, *spage;
spage = migrate_pfn_to_page(*src);
if (!spage || !(*src & MIGRATE_PFN_MIGRATE))
continue;
if (WARN_ON(!is_dev_private_or_coherent_page(spage)))
continue;
spage = BACKING_PAGE(spage);
dpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, args->vma, addr);
if (!dpage)
continue;
pr_debug("migrating from dev to sys pfn src: 0x%lx pfn dst: 0x%lx\n",
page_to_pfn(spage), page_to_pfn(dpage));
lock_page(dpage);
xa_erase(&dmirror->pt, addr >> PAGE_SHIFT);
copy_highpage(dpage, spage);
*dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
if (*src & MIGRATE_PFN_WRITE)
*dst |= MIGRATE_PFN_WRITE;
- }
- return 0;
+}
+static unsigned long dmirror_successful_migrated_pages(struct migrate_vma *migrate) +{
- unsigned long cpages = 0;
- unsigned long i;
- for (i = 0; i < migrate->npages; i++) {
if (migrate->src[i] & MIGRATE_PFN_VALID &&
migrate->src[i] & MIGRATE_PFN_MIGRATE)
cpages++;
- }
- return cpages;
+}
+static int dmirror_migrate_to_system(struct dmirror *dmirror,
struct hmm_dmirror_cmd *cmd)
+{
- unsigned long start, end, addr;
- unsigned long size = cmd->npages << PAGE_SHIFT;
- struct mm_struct *mm = dmirror->notifier.mm;
- struct vm_area_struct *vma;
- unsigned long src_pfns[64] = { 0 };
- unsigned long dst_pfns[64] = { 0 };
- struct migrate_vma args;
- unsigned long next;
- int ret;
- start = cmd->addr;
- end = start + size;
- if (end < start)
return -EINVAL;
- /* Since the mm is for the mirrored process, get a reference first. */
- if (!mmget_not_zero(mm))
return -EINVAL;
- cmd->cpages = 0;
- mmap_read_lock(mm);
- for (addr = start; addr < end; addr = next) {
vma = vma_lookup(mm, addr);
if (!vma || !(vma->vm_flags & VM_READ)) {
ret = -EINVAL;
goto out;
}
next = min(end, addr + (ARRAY_SIZE(src_pfns) << PAGE_SHIFT));
if (next > vma->vm_end)
next = vma->vm_end;
args.vma = vma;
args.src = src_pfns;
args.dst = dst_pfns;
args.start = addr;
args.end = next;
args.pgmap_owner = dmirror->mdevice;
args.flags = dmirror_select_device(dmirror);
ret = migrate_vma_setup(&args);
if (ret)
goto out;
pr_debug("Migrating from device mem to sys mem\n");
dmirror_devmem_fault_alloc_and_copy(&args, dmirror);
migrate_vma_pages(&args);
cmd->cpages += dmirror_successful_migrated_pages(&args);
migrate_vma_finalize(&args);
- }
+out:
- mmap_read_unlock(mm);
- mmput(mm);
- return ret;
+}
+static int dmirror_migrate_to_device(struct dmirror *dmirror,
struct hmm_dmirror_cmd *cmd)
{ unsigned long start, end, addr; unsigned long size = cmd->npages << PAGE_SHIFT; struct mm_struct *mm = dmirror->notifier.mm; struct vm_area_struct *vma;
- unsigned long src_pfns[64];
- unsigned long dst_pfns[64];
- unsigned long src_pfns[64] = { 0 };
- unsigned long dst_pfns[64] = { 0 }; struct dmirror_bounce bounce; struct migrate_vma args; unsigned long next;
@@ -853,6 +994,7 @@ static int dmirror_migrate(struct dmirror *dmirror, if (ret) goto out;
dmirror_migrate_alloc_and_copy(&args, dmirror); migrate_vma_pages(&args); dmirror_migrate_finalize_and_map(&args, dmirror);pr_debug("Migrating from sys mem to device mem\n");
@@ -861,7 +1003,7 @@ static int dmirror_migrate(struct dmirror *dmirror, mmap_read_unlock(mm); mmput(mm);
- /* Return the migrated data for verification. */
- /* Return the migrated data for verification. only for pages in device zone */ ret = dmirror_bounce_init(&bounce, start, size); if (ret) return ret;
@@ -898,12 +1040,22 @@ static void dmirror_mkentry(struct dmirror *dmirror, struct hmm_range *range, }
page = hmm_pfn_to_page(entry);
- if (is_device_private_page(page)) {
/* Is the page migrated to this device or some other? */
if (dmirror->mdevice == dmirror_page_to_device(page))
- if (is_dev_private_or_coherent_page(page)) {
/* Is page ZONE_DEVICE coherent? */
if (is_device_coherent_page(page)) {
if (dmirror->mdevice == dmirror_page_to_device(page))
*perm = HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL;
else
*perm = HMM_DMIRROR_PROT_DEV_COHERENT_REMOTE;
/*
* Is page ZONE_DEVICE private migrated to
* this device or some other?
*/
} else if (dmirror->mdevice == dmirror_page_to_device(page)) { *perm = HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL;
else
} else { *perm = HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE;
} else if (is_zero_pfn(page_to_pfn(page))) *perm = HMM_DMIRROR_PROT_ZERO; else}
@@ -1100,8 +1252,12 @@ static long dmirror_fops_unlocked_ioctl(struct file *filp, ret = dmirror_write(dmirror, &cmd); break;
- case HMM_DMIRROR_MIGRATE:
ret = dmirror_migrate(dmirror, &cmd);
case HMM_DMIRROR_MIGRATE_TO_DEV:
ret = dmirror_migrate_to_device(dmirror, &cmd);
break;
case HMM_DMIRROR_MIGRATE_TO_SYS:
ret = dmirror_migrate_to_system(dmirror, &cmd);
break;
case HMM_DMIRROR_EXCLUSIVE:
@@ -1142,14 +1298,13 @@ static const struct file_operations dmirror_fops = {
static void dmirror_devmem_free(struct page *page) {
- struct page *rpage = page->zone_device_data;
- struct page *rpage = BACKING_PAGE(page); struct dmirror_device *mdevice;
- if (rpage)
if (rpage != page) __free_page(rpage);
mdevice = dmirror_page_to_device(page);
- spin_lock(&mdevice->lock); mdevice->cfree++; page->zone_device_data = mdevice->free_pages;
@@ -1157,43 +1312,11 @@ static void dmirror_devmem_free(struct page *page) spin_unlock(&mdevice->lock); }
-static vm_fault_t dmirror_devmem_fault_alloc_and_copy(struct migrate_vma *args,
struct dmirror *dmirror)
-{
- const unsigned long *src = args->src;
- unsigned long *dst = args->dst;
- unsigned long start = args->start;
- unsigned long end = args->end;
- unsigned long addr;
- for (addr = start; addr < end; addr += PAGE_SIZE,
src++, dst++) {
struct page *dpage, *spage;
spage = migrate_pfn_to_page(*src);
if (!spage || !(*src & MIGRATE_PFN_MIGRATE))
continue;
spage = spage->zone_device_data;
dpage = alloc_page_vma(GFP_HIGHUSER_MOVABLE, args->vma, addr);
if (!dpage)
continue;
lock_page(dpage);
xa_erase(&dmirror->pt, addr >> PAGE_SHIFT);
copy_highpage(dpage, spage);
*dst = migrate_pfn(page_to_pfn(dpage)) | MIGRATE_PFN_LOCKED;
if (*src & MIGRATE_PFN_WRITE)
*dst |= MIGRATE_PFN_WRITE;
- }
- return 0;
-}
static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf) { struct migrate_vma args;
- unsigned long src_pfns;
- unsigned long dst_pfns;
- unsigned long src_pfns = 0;
- unsigned long dst_pfns = 0; struct page *rpage; struct dmirror *dmirror; vm_fault_t ret;
@@ -1213,7 +1336,7 @@ static vm_fault_t dmirror_devmem_fault(struct vm_fault *vmf) args.src = &src_pfns; args.dst = &dst_pfns; args.pgmap_owner = dmirror->mdevice;
- args.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE;
args.flags = dmirror_select_device(dmirror);
if (migrate_vma_setup(&args)) return VM_FAULT_SIGBUS;
@@ -1292,6 +1415,12 @@ static int __init hmm_dmirror_init(void) HMM_DMIRROR_MEMORY_DEVICE_PRIVATE; dmirror_devices[ndevices++].zone_device_type = HMM_DMIRROR_MEMORY_DEVICE_PRIVATE;
- if (spm_addr_dev0 && spm_addr_dev1) {
dmirror_devices[ndevices++].zone_device_type =
HMM_DMIRROR_MEMORY_DEVICE_COHERENT;
dmirror_devices[ndevices++].zone_device_type =
HMM_DMIRROR_MEMORY_DEVICE_COHERENT;
- } for (id = 0; id < ndevices; id++) { ret = dmirror_device_init(dmirror_devices + id, id); if (ret)
@@ -1314,7 +1443,8 @@ static void __exit hmm_dmirror_exit(void) int id;
for (id = 0; id < DMIRROR_NDEVICES; id++)
dmirror_device_remove(dmirror_devices + id);
if (dmirror_devices[id].zone_device_type)
unregister_chrdev_region(dmirror_dev, DMIRROR_NDEVICES);dmirror_device_remove(dmirror_devices + id);
}
diff --git a/lib/test_hmm_uapi.h b/lib/test_hmm_uapi.h index 625f3690d086..e190b2ab6f19 100644 --- a/lib/test_hmm_uapi.h +++ b/lib/test_hmm_uapi.h @@ -33,11 +33,12 @@ struct hmm_dmirror_cmd { /* Expose the address space of the calling process through hmm device file */ #define HMM_DMIRROR_READ _IOWR('H', 0x00, struct hmm_dmirror_cmd) #define HMM_DMIRROR_WRITE _IOWR('H', 0x01, struct hmm_dmirror_cmd) -#define HMM_DMIRROR_MIGRATE _IOWR('H', 0x02, struct hmm_dmirror_cmd) -#define HMM_DMIRROR_SNAPSHOT _IOWR('H', 0x03, struct hmm_dmirror_cmd) -#define HMM_DMIRROR_EXCLUSIVE _IOWR('H', 0x04, struct hmm_dmirror_cmd) -#define HMM_DMIRROR_CHECK_EXCLUSIVE _IOWR('H', 0x05, struct hmm_dmirror_cmd) -#define HMM_DMIRROR_GET_MEM_DEV_TYPE _IOWR('H', 0x06, struct hmm_dmirror_cmd) +#define HMM_DMIRROR_MIGRATE_TO_DEV _IOWR('H', 0x02, struct hmm_dmirror_cmd) +#define HMM_DMIRROR_MIGRATE_TO_SYS _IOWR('H', 0x03, struct hmm_dmirror_cmd) +#define HMM_DMIRROR_SNAPSHOT _IOWR('H', 0x04, struct hmm_dmirror_cmd) +#define HMM_DMIRROR_EXCLUSIVE _IOWR('H', 0x05, struct hmm_dmirror_cmd) +#define HMM_DMIRROR_CHECK_EXCLUSIVE _IOWR('H', 0x06, struct hmm_dmirror_cmd) +#define HMM_DMIRROR_GET_MEM_DEV_TYPE _IOWR('H', 0x07, struct hmm_dmirror_cmd)
/*
- Values returned in hmm_dmirror_cmd.ptr for HMM_DMIRROR_SNAPSHOT.
@@ -52,6 +53,8 @@ struct hmm_dmirror_cmd {
device the ioctl() is made
- HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE: Migrated device private page on some
other device
- HMM_DMIRROR_PROT_DEV_COHERENT: Migrate device coherent page on the device
*/
the ioctl() is made
enum { HMM_DMIRROR_PROT_ERROR = 0xFF, @@ -63,6 +66,8 @@ enum { HMM_DMIRROR_PROT_ZERO = 0x10, HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL = 0x20, HMM_DMIRROR_PROT_DEV_PRIVATE_REMOTE = 0x30,
- HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL = 0x40,
- HMM_DMIRROR_PROT_DEV_COHERENT_REMOTE = 0x50,
};
enum {