This is the third version of the patch series. Change log:
v3: * Add parameter 'iova' to the new ibv_reg_dmabuf_mr() API * Change the way of allocating dma-buf object - use /dev/dri/renderD* instead of /dev/dri/card* and use GEM object instead of dumb buffer * Add cmake function to allow building modules with mixed cython and C source files * Add new tests that use dma-buf MRs for send/recv and rdma traffic * Skip dma-buf tests on unsupported systems * Remove some use of random values in the new tests * Add dealloc() and close() methods to the new classes * Replace string.format with f-string in python code * Fix some coding style issues: spacing, indentation, typo, comments
v2: https://www.spinics.net/lists/linux-rdma/msg97936.html * Put the kernel header updates into a separate commit * Add comments for the data structure used in python ioctl calls * Fix issues related to symbol versioning * Fix styling issues: extra spaces, unncecessary variable, typo * Fix an inproper error code usage * Put the new op into ibv_context_ops instead if verbs_context
v1: https://www.spinics.net/lists/linux-rdma/msg97865.html * Add user space API for registering dma-buf based memory regions * Update pyverbs with the new API * Add new tests
This is the user space counter-part of the kernel patch set to add dma-buf support to the RDMA subsystem.
This series consists of six patches. The first patch updates the kernel headers for dma-buf support. Patch 2 adds the new API function and updates the man pages. Patch 3 implements the new API in the mlx5 provider. Patch 4 adds new class definitions to pyverbs for the new API. Patch 5 adds a set of new tests for the new API. Patch 6 fixes bug in the utility code of the tests.
Pull request at github: https://github.com/linux-rdma/rdma-core/pull/895
Jianxin Xiong (6): Update kernel headers verbs: Support dma-buf based memory region mlx5: Support dma-buf based memory region pyverbs: Add dma-buf based MR support tests: Add tests for dma-buf based memory regions tests: Bug fix for get_access_flags()
buildlib/pyverbs_functions.cmake | 52 ++++++ debian/libibverbs1.symbols | 2 + kernel-headers/rdma/ib_user_ioctl_cmds.h | 14 ++ kernel-headers/rdma/ib_user_verbs.h | 14 -- libibverbs/CMakeLists.txt | 2 +- libibverbs/cmd_mr.c | 38 ++++ libibverbs/driver.h | 7 + libibverbs/dummy_ops.c | 11 ++ libibverbs/libibverbs.map.in | 6 + libibverbs/man/ibv_reg_mr.3 | 27 ++- libibverbs/verbs.c | 18 ++ libibverbs/verbs.h | 11 ++ providers/mlx5/mlx5.c | 2 + providers/mlx5/mlx5.h | 3 + providers/mlx5/verbs.c | 22 +++ pyverbs/CMakeLists.txt | 7 + pyverbs/dmabuf.pxd | 15 ++ pyverbs/dmabuf.pyx | 72 ++++++++ pyverbs/dmabuf_alloc.c | 296 +++++++++++++++++++++++++++++++ pyverbs/dmabuf_alloc.h | 19 ++ pyverbs/libibverbs.pxd | 2 + pyverbs/mr.pxd | 6 + pyverbs/mr.pyx | 103 ++++++++++- tests/test_mr.py | 239 ++++++++++++++++++++++++- tests/utils.py | 30 +++- 25 files changed, 996 insertions(+), 22 deletions(-) create mode 100644 pyverbs/dmabuf.pxd create mode 100644 pyverbs/dmabuf.pyx create mode 100644 pyverbs/dmabuf_alloc.c create mode 100644 pyverbs/dmabuf_alloc.h
To commit 2eef437c4669 ("RDMA/uverbs: Add uverbs command for dma-buf based MR registration").
Signed-off-by: Jianxin Xiong jianxin.xiong@intel.com --- kernel-headers/rdma/ib_user_ioctl_cmds.h | 14 ++++++++++++++ kernel-headers/rdma/ib_user_verbs.h | 14 -------------- 2 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/kernel-headers/rdma/ib_user_ioctl_cmds.h b/kernel-headers/rdma/ib_user_ioctl_cmds.h index 7968a18..dafc7eb 100644 --- a/kernel-headers/rdma/ib_user_ioctl_cmds.h +++ b/kernel-headers/rdma/ib_user_ioctl_cmds.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2018, Mellanox Technologies inc. All rights reserved. + * Copyright (c) 2020, Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -251,6 +252,7 @@ enum uverbs_methods_mr { UVERBS_METHOD_MR_DESTROY, UVERBS_METHOD_ADVISE_MR, UVERBS_METHOD_QUERY_MR, + UVERBS_METHOD_REG_DMABUF_MR, };
enum uverbs_attrs_mr_destroy_ids { @@ -272,6 +274,18 @@ enum uverbs_attrs_query_mr_cmd_attr_ids { UVERBS_ATTR_QUERY_MR_RESP_IOVA, };
+enum uverbs_attrs_reg_dmabuf_mr_cmd_attr_ids { + UVERBS_ATTR_REG_DMABUF_MR_HANDLE, + UVERBS_ATTR_REG_DMABUF_MR_PD_HANDLE, + UVERBS_ATTR_REG_DMABUF_MR_OFFSET, + UVERBS_ATTR_REG_DMABUF_MR_LENGTH, + UVERBS_ATTR_REG_DMABUF_MR_IOVA, + UVERBS_ATTR_REG_DMABUF_MR_FD, + UVERBS_ATTR_REG_DMABUF_MR_ACCESS_FLAGS, + UVERBS_ATTR_REG_DMABUF_MR_RESP_LKEY, + UVERBS_ATTR_REG_DMABUF_MR_RESP_RKEY, +}; + enum uverbs_attrs_create_counters_cmd_attr_ids { UVERBS_ATTR_CREATE_COUNTERS_HANDLE, }; diff --git a/kernel-headers/rdma/ib_user_verbs.h b/kernel-headers/rdma/ib_user_verbs.h index 456438c..7ee73a0 100644 --- a/kernel-headers/rdma/ib_user_verbs.h +++ b/kernel-headers/rdma/ib_user_verbs.h @@ -596,20 +596,6 @@ enum { IB_UVERBS_CREATE_QP_SUP_COMP_MASK = IB_UVERBS_CREATE_QP_MASK_IND_TABLE, };
-enum { - /* - * This value is equal to IB_QP_DEST_QPN. - */ - IB_USER_LEGACY_LAST_QP_ATTR_MASK = 1ULL << 20, -}; - -enum { - /* - * This value is equal to IB_QP_RATE_LIMIT. - */ - IB_USER_LAST_QP_ATTR_MASK = 1ULL << 25, -}; - struct ib_uverbs_ex_create_qp { __aligned_u64 user_handle; __u32 pd_handle;
Add new API function and new provider method for registering dma-buf based memory region. Update the man page and bump the API version.
Signed-off-by: Jianxin Xiong jianxin.xiong@intel.com --- debian/libibverbs1.symbols | 2 ++ libibverbs/CMakeLists.txt | 2 +- libibverbs/cmd_mr.c | 38 ++++++++++++++++++++++++++++++++++++++ libibverbs/driver.h | 7 +++++++ libibverbs/dummy_ops.c | 11 +++++++++++ libibverbs/libibverbs.map.in | 6 ++++++ libibverbs/man/ibv_reg_mr.3 | 27 +++++++++++++++++++++++++-- libibverbs/verbs.c | 18 ++++++++++++++++++ libibverbs/verbs.h | 11 +++++++++++ 9 files changed, 119 insertions(+), 3 deletions(-)
diff --git a/debian/libibverbs1.symbols b/debian/libibverbs1.symbols index 9130f41..fcf4d87 100644 --- a/debian/libibverbs1.symbols +++ b/debian/libibverbs1.symbols @@ -9,6 +9,7 @@ libibverbs.so.1 libibverbs1 #MINVER# IBVERBS_1.9@IBVERBS_1.9 30 IBVERBS_1.10@IBVERBS_1.10 31 IBVERBS_1.11@IBVERBS_1.11 32 + IBVERBS_1.12@IBVERBS_1.12 33 (symver)IBVERBS_PRIVATE_33 33 _ibv_query_gid_ex@IBVERBS_1.11 32 _ibv_query_gid_table@IBVERBS_1.11 32 @@ -99,6 +100,7 @@ libibverbs.so.1 libibverbs1 #MINVER# ibv_rate_to_mbps@IBVERBS_1.1 1.1.8 ibv_rate_to_mult@IBVERBS_1.0 1.1.6 ibv_read_sysfs_file@IBVERBS_1.0 1.1.6 + ibv_reg_dmabuf_mr@IBVERBS_1.12 33 ibv_reg_mr@IBVERBS_1.0 1.1.6 ibv_reg_mr@IBVERBS_1.1 1.1.6 ibv_reg_mr_iova@IBVERBS_1.7 25 diff --git a/libibverbs/CMakeLists.txt b/libibverbs/CMakeLists.txt index 0fe4256..d075225 100644 --- a/libibverbs/CMakeLists.txt +++ b/libibverbs/CMakeLists.txt @@ -21,7 +21,7 @@ configure_file("libibverbs.map.in"
rdma_library(ibverbs "${CMAKE_CURRENT_BINARY_DIR}/libibverbs.map" # See Documentation/versioning.md - 1 1.11.${PACKAGE_VERSION} + 1 1.12.${PACKAGE_VERSION} all_providers.c cmd.c cmd_ah.c diff --git a/libibverbs/cmd_mr.c b/libibverbs/cmd_mr.c index 42dbe42..95ed2d1 100644 --- a/libibverbs/cmd_mr.c +++ b/libibverbs/cmd_mr.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2018 Mellanox Technologies, Ltd. All rights reserved. + * Copyright (c) 2020 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -116,3 +117,40 @@ int ibv_cmd_query_mr(struct ibv_pd *pd, struct verbs_mr *vmr, return 0; }
+int ibv_cmd_reg_dmabuf_mr(struct ibv_pd *pd, uint64_t offset, size_t length, + uint64_t iova, int fd, int access, + struct verbs_mr *vmr) +{ + DECLARE_COMMAND_BUFFER(cmdb, UVERBS_OBJECT_MR, + UVERBS_METHOD_REG_DMABUF_MR, + 9); + struct ib_uverbs_attr *handle; + uint32_t lkey, rkey; + int ret; + + handle = fill_attr_out_obj(cmdb, UVERBS_ATTR_REG_DMABUF_MR_HANDLE); + fill_attr_out_ptr(cmdb, UVERBS_ATTR_REG_DMABUF_MR_RESP_LKEY, &lkey); + fill_attr_out_ptr(cmdb, UVERBS_ATTR_REG_DMABUF_MR_RESP_RKEY, &rkey); + + fill_attr_in_obj(cmdb, UVERBS_ATTR_REG_DMABUF_MR_PD_HANDLE, pd->handle); + fill_attr_in_uint64(cmdb, UVERBS_ATTR_REG_DMABUF_MR_OFFSET, offset); + fill_attr_in_uint64(cmdb, UVERBS_ATTR_REG_DMABUF_MR_LENGTH, length); + fill_attr_in_uint64(cmdb, UVERBS_ATTR_REG_DMABUF_MR_IOVA, iova); + fill_attr_in_uint32(cmdb, UVERBS_ATTR_REG_DMABUF_MR_FD, fd); + fill_attr_in_uint32(cmdb, UVERBS_ATTR_REG_DMABUF_MR_ACCESS_FLAGS, access); + + ret = execute_ioctl(pd->context, cmdb); + if (ret) + return errno; + + vmr->ibv_mr.handle = read_attr_obj(UVERBS_ATTR_REG_DMABUF_MR_HANDLE, + handle); + vmr->ibv_mr.context = pd->context; + vmr->ibv_mr.lkey = lkey; + vmr->ibv_mr.rkey = rkey; + vmr->ibv_mr.pd = pd; + vmr->ibv_mr.addr = (void *)offset; + vmr->ibv_mr.length = length; + vmr->mr_type = IBV_MR_TYPE_MR; + return 0; +} diff --git a/libibverbs/driver.h b/libibverbs/driver.h index ab80f4b..d6a9d0a 100644 --- a/libibverbs/driver.h +++ b/libibverbs/driver.h @@ -2,6 +2,7 @@ * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005, 2006 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2005 PathScale, Inc. All rights reserved. + * Copyright (c) 2020 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -373,6 +374,9 @@ struct verbs_context_ops { struct ibv_mr *(*reg_dm_mr)(struct ibv_pd *pd, struct ibv_dm *dm, uint64_t dm_offset, size_t length, unsigned int access); + struct ibv_mr *(*reg_dmabuf_mr)(struct ibv_pd *pd, uint64_t offset, + size_t length, uint64_t iova, + int fd, int access); struct ibv_mr *(*reg_mr)(struct ibv_pd *pd, void *addr, size_t length, uint64_t hca_va, int access); int (*req_notify_cq)(struct ibv_cq *cq, int solicited_only); @@ -498,6 +502,9 @@ int ibv_cmd_advise_mr(struct ibv_pd *pd, uint32_t flags, struct ibv_sge *sg_list, uint32_t num_sge); +int ibv_cmd_reg_dmabuf_mr(struct ibv_pd *pd, uint64_t offset, size_t length, + uint64_t iova, int fd, int access, + struct verbs_mr *vmr); int ibv_cmd_alloc_mw(struct ibv_pd *pd, enum ibv_mw_type type, struct ibv_mw *mw, struct ibv_alloc_mw *cmd, size_t cmd_size, diff --git a/libibverbs/dummy_ops.c b/libibverbs/dummy_ops.c index e5af9e4..64ef279 100644 --- a/libibverbs/dummy_ops.c +++ b/libibverbs/dummy_ops.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2017 Mellanox Technologies, Inc. All rights reserved. + * Copyright (c) 2020 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -452,6 +453,14 @@ static struct ibv_mr *reg_mr(struct ibv_pd *pd, void *addr, size_t length, return NULL; }
+static struct ibv_mr *reg_dmabuf_mr(struct ibv_pd *pd, uint64_t offset, + size_t length, uint64_t iova, + int fd, int access) +{ + errno = EOPNOTSUPP; + return NULL; +} + static int req_notify_cq(struct ibv_cq *cq, int solicited_only) { return EOPNOTSUPP; @@ -560,6 +569,7 @@ const struct verbs_context_ops verbs_dummy_ops = { query_srq, read_counters, reg_dm_mr, + reg_dmabuf_mr, reg_mr, req_notify_cq, rereg_mr, @@ -689,6 +699,7 @@ void verbs_set_ops(struct verbs_context *vctx, SET_PRIV_OP_IC(vctx, set_ece); SET_PRIV_OP_IC(vctx, unimport_mr); SET_PRIV_OP_IC(vctx, unimport_pd); + SET_OP(ctx, reg_dmabuf_mr);
#undef SET_OP #undef SET_OP2 diff --git a/libibverbs/libibverbs.map.in b/libibverbs/libibverbs.map.in index b5ccaca..f67e1ef 100644 --- a/libibverbs/libibverbs.map.in +++ b/libibverbs/libibverbs.map.in @@ -148,6 +148,11 @@ IBVERBS_1.11 { _ibv_query_gid_table; } IBVERBS_1.10;
+IBVERBS_1.12 { + global: + ibv_reg_dmabuf_mr; +} IBVERBS_1.11; + /* If any symbols in this stanza change ABI then the entire staza gets a new symbol version. See the top level CMakeLists.txt for this setting. */
@@ -211,6 +216,7 @@ IBVERBS_PRIVATE_@IBVERBS_PABI_VERSION@ { ibv_cmd_query_srq; ibv_cmd_read_counters; ibv_cmd_reg_dm_mr; + ibv_cmd_reg_dmabuf_mr; ibv_cmd_reg_mr; ibv_cmd_req_notify_cq; ibv_cmd_rereg_mr; diff --git a/libibverbs/man/ibv_reg_mr.3 b/libibverbs/man/ibv_reg_mr.3 index 2bfc955..a522527 100644 --- a/libibverbs/man/ibv_reg_mr.3 +++ b/libibverbs/man/ibv_reg_mr.3 @@ -3,7 +3,7 @@ ." .TH IBV_REG_MR 3 2006-10-31 libibverbs "Libibverbs Programmer's Manual" .SH "NAME" -ibv_reg_mr, ibv_reg_mr_iova, ibv_dereg_mr - register or deregister a memory region (MR) +ibv_reg_mr, ibv_reg_mr_iova, ibv_reg_dmabuf_mr, ibv_dereg_mr - register or deregister a memory region (MR) .SH "SYNOPSIS" .nf .B #include <infiniband/verbs.h> @@ -15,6 +15,10 @@ ibv_reg_mr, ibv_reg_mr_iova, ibv_dereg_mr - register or deregister a memory reg .BI " size_t " "length" ", uint64_t " "hca_va" , .BI " int " "access" ); .sp +.BI "struct ibv_mr *ibv_reg_dmabuf_mr(struct ibv_pd " "*pd" ", uint64_t " "offset" , +.BI " size_t " "length" ", uint64_t " "iova" , +.BI " int " "fd" ", int " "access" ); +.sp .BI "int ibv_dereg_mr(struct ibv_mr " "*mr" ); .fi .SH "DESCRIPTION" @@ -71,11 +75,30 @@ a lkey or rkey. The offset in the memory region is computed as 'addr + (iova - hca_va)'. Specifying 0 for hca_va has the same effect as IBV_ACCESS_ZERO_BASED. .PP +.B ibv_reg_dmabuf_mr() +registers a dma-buf based memory region (MR) associated with the protection domain +.I pd\fR. +The MR starts at +.I offset +of the dma-buf and its size is +.I length\fR. +The dma-buf is identified by the file descriptor +.I fd\fR. +The argument +.I iova +specifies the virtual base address of the MR when accessed through a lkey or rkey. +It must have the same page offset as +.I offset\fR. +The argument +.I access +describes the desired memory protection attributes; it is similar to the ibv_reg_mr case except that only the following flags are supported: +.B IBV_ACCESS_LOCAL_WRITE, IBV_ACCESS_REMOTE_WRITE, IBV_ACCESS_REMOTE_READ, IBV_ACCESS_REMOTE_ATOMIC, IBV_ACCESS_RELAXED_ORDERING. +.PP .B ibv_dereg_mr() deregisters the MR .I mr\fR. .SH "RETURN VALUE" -.B ibv_reg_mr() / ibv_reg_mr_iova() +.B ibv_reg_mr() / ibv_reg_mr_iova() / ibv_reg_dmabuf_mr() returns a pointer to the registered MR, or NULL if the request fails. The local key (\fBL_Key\fR) field .B lkey diff --git a/libibverbs/verbs.c b/libibverbs/verbs.c index 2b0ede8..6293462 100644 --- a/libibverbs/verbs.c +++ b/libibverbs/verbs.c @@ -1,6 +1,7 @@ /* * Copyright (c) 2005 Topspin Communications. All rights reserved. * Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved. + * Copyright (c) 2020 Intel Corperation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -367,6 +368,23 @@ void ibv_unimport_mr(struct ibv_mr *mr) get_ops(mr->context)->unimport_mr(mr); }
+struct ibv_mr *ibv_reg_dmabuf_mr(struct ibv_pd *pd, uint64_t offset, + size_t length, uint64_t iova, int fd, + int access) +{ + struct ibv_mr *mr; + + mr = get_ops(pd->context)->reg_dmabuf_mr(pd, offset, length, iova, + fd, access); + if (mr) { + mr->context = pd->context; + mr->pd = pd; + mr->addr = (void *)offset; + mr->length = length; + } + return mr; +} + LATEST_SYMVER_FUNC(ibv_rereg_mr, 1_1, "IBVERBS_1.1", int, struct ibv_mr *mr, int flags, diff --git a/libibverbs/verbs.h b/libibverbs/verbs.h index ee57e05..1a0af0e 100644 --- a/libibverbs/verbs.h +++ b/libibverbs/verbs.h @@ -3,6 +3,7 @@ * Copyright (c) 2004, 2011-2012 Intel Corporation. All rights reserved. * Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved. * Copyright (c) 2005 PathScale, Inc. All rights reserved. + * Copyright (c) 2020 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -1961,6 +1962,10 @@ struct ibv_context_ops { void *(*_compat_attach_mcast)(void); void *(*_compat_detach_mcast)(void); void *(*_compat_async_event)(void); + struct ibv_mr * (*reg_dmabuf_mr)(struct ibv_pd *pd, + uint64_t offset, size_t length, + uint64_t iova, int fd, + int access); };
struct ibv_context { @@ -2535,6 +2540,12 @@ __ibv_reg_mr_iova(struct ibv_pd *pd, void *addr, size_t length, uint64_t iova, __builtin_constant_p( \ ((access) & IBV_ACCESS_OPTIONAL_RANGE) == 0))
+/** + * ibv_reg_dmabuf_mr - Register a dambuf-based memory region + */ +struct ibv_mr *ibv_reg_dmabuf_mr(struct ibv_pd *pd, uint64_t offset, size_t length, + uint64_t iova, int fd, int access); + enum ibv_rereg_mr_err_code { /* Old MR is valid, invalid input */ IBV_REREG_MR_ERR_INPUT = -1,
On 11/27/2020 10:55 PM, Jianxin Xiong wrote:
Add new API function and new provider method for registering dma-buf based memory region. Update the man page and bump the API version.
Signed-off-by: Jianxin Xiong jianxin.xiong@intel.com
debian/libibverbs1.symbols | 2 ++ libibverbs/CMakeLists.txt | 2 +- libibverbs/cmd_mr.c | 38 ++++++++++++++++++++++++++++++++++++++ libibverbs/driver.h | 7 +++++++ libibverbs/dummy_ops.c | 11 +++++++++++ libibverbs/libibverbs.map.in | 6 ++++++ libibverbs/man/ibv_reg_mr.3 | 27 +++++++++++++++++++++++++-- libibverbs/verbs.c | 18 ++++++++++++++++++ libibverbs/verbs.h | 11 +++++++++++ 9 files changed, 119 insertions(+), 3 deletions(-)
This version still didn't handle the fork() note that was given on previous one.
Was that missed ?
diff --git a/debian/libibverbs1.symbols b/debian/libibverbs1.symbols index 9130f41..fcf4d87 100644 --- a/debian/libibverbs1.symbols +++ b/debian/libibverbs1.symbols @@ -9,6 +9,7 @@ libibverbs.so.1 libibverbs1 #MINVER# IBVERBS_1.9@IBVERBS_1.9 30 IBVERBS_1.10@IBVERBS_1.10 31 IBVERBS_1.11@IBVERBS_1.11 32
- IBVERBS_1.12@IBVERBS_1.12 33 (symver)IBVERBS_PRIVATE_33 33 _ibv_query_gid_ex@IBVERBS_1.11 32 _ibv_query_gid_table@IBVERBS_1.11 32
@@ -99,6 +100,7 @@ libibverbs.so.1 libibverbs1 #MINVER# ibv_rate_to_mbps@IBVERBS_1.1 1.1.8 ibv_rate_to_mult@IBVERBS_1.0 1.1.6 ibv_read_sysfs_file@IBVERBS_1.0 1.1.6
- ibv_reg_dmabuf_mr@IBVERBS_1.12 33 ibv_reg_mr@IBVERBS_1.0 1.1.6 ibv_reg_mr@IBVERBS_1.1 1.1.6 ibv_reg_mr_iova@IBVERBS_1.7 25
diff --git a/libibverbs/CMakeLists.txt b/libibverbs/CMakeLists.txt index 0fe4256..d075225 100644 --- a/libibverbs/CMakeLists.txt +++ b/libibverbs/CMakeLists.txt @@ -21,7 +21,7 @@ configure_file("libibverbs.map.in"
rdma_library(ibverbs "${CMAKE_CURRENT_BINARY_DIR}/libibverbs.map" # See Documentation/versioning.md
- 1 1.11.${PACKAGE_VERSION}
- 1 1.12.${PACKAGE_VERSION} all_providers.c cmd.c cmd_ah.c
diff --git a/libibverbs/cmd_mr.c b/libibverbs/cmd_mr.c index 42dbe42..95ed2d1 100644 --- a/libibverbs/cmd_mr.c +++ b/libibverbs/cmd_mr.c @@ -1,5 +1,6 @@ /*
- Copyright (c) 2018 Mellanox Technologies, Ltd. All rights reserved.
- Copyright (c) 2020 Intel Corporation. All rights reserved.
- This software is available to you under a choice of one of two
- licenses. You may choose to be licensed under the terms of the GNU
@@ -116,3 +117,40 @@ int ibv_cmd_query_mr(struct ibv_pd *pd, struct verbs_mr *vmr, return 0; }
+int ibv_cmd_reg_dmabuf_mr(struct ibv_pd *pd, uint64_t offset, size_t length,
uint64_t iova, int fd, int access,
struct verbs_mr *vmr)
+{
- DECLARE_COMMAND_BUFFER(cmdb, UVERBS_OBJECT_MR,
UVERBS_METHOD_REG_DMABUF_MR,
9);
- struct ib_uverbs_attr *handle;
- uint32_t lkey, rkey;
- int ret;
- handle = fill_attr_out_obj(cmdb, UVERBS_ATTR_REG_DMABUF_MR_HANDLE);
- fill_attr_out_ptr(cmdb, UVERBS_ATTR_REG_DMABUF_MR_RESP_LKEY, &lkey);
- fill_attr_out_ptr(cmdb, UVERBS_ATTR_REG_DMABUF_MR_RESP_RKEY, &rkey);
- fill_attr_in_obj(cmdb, UVERBS_ATTR_REG_DMABUF_MR_PD_HANDLE, pd->handle);
- fill_attr_in_uint64(cmdb, UVERBS_ATTR_REG_DMABUF_MR_OFFSET, offset);
- fill_attr_in_uint64(cmdb, UVERBS_ATTR_REG_DMABUF_MR_LENGTH, length);
- fill_attr_in_uint64(cmdb, UVERBS_ATTR_REG_DMABUF_MR_IOVA, iova);
- fill_attr_in_uint32(cmdb, UVERBS_ATTR_REG_DMABUF_MR_FD, fd);
- fill_attr_in_uint32(cmdb, UVERBS_ATTR_REG_DMABUF_MR_ACCESS_FLAGS, access);
- ret = execute_ioctl(pd->context, cmdb);
- if (ret)
return errno;
- vmr->ibv_mr.handle = read_attr_obj(UVERBS_ATTR_REG_DMABUF_MR_HANDLE,
handle);
- vmr->ibv_mr.context = pd->context;
- vmr->ibv_mr.lkey = lkey;
- vmr->ibv_mr.rkey = rkey;
- vmr->ibv_mr.pd = pd;
- vmr->ibv_mr.addr = (void *)offset;
- vmr->ibv_mr.length = length;
- vmr->mr_type = IBV_MR_TYPE_MR;
- return 0;
+} diff --git a/libibverbs/driver.h b/libibverbs/driver.h index ab80f4b..d6a9d0a 100644 --- a/libibverbs/driver.h +++ b/libibverbs/driver.h @@ -2,6 +2,7 @@
- Copyright (c) 2004, 2005 Topspin Communications. All rights reserved.
- Copyright (c) 2005, 2006 Cisco Systems, Inc. All rights reserved.
- Copyright (c) 2005 PathScale, Inc. All rights reserved.
- Copyright (c) 2020 Intel Corporation. All rights reserved.
- This software is available to you under a choice of one of two
- licenses. You may choose to be licensed under the terms of the GNU
@@ -373,6 +374,9 @@ struct verbs_context_ops { struct ibv_mr *(*reg_dm_mr)(struct ibv_pd *pd, struct ibv_dm *dm, uint64_t dm_offset, size_t length, unsigned int access);
- struct ibv_mr *(*reg_dmabuf_mr)(struct ibv_pd *pd, uint64_t offset,
size_t length, uint64_t iova,
struct ibv_mr *(*reg_mr)(struct ibv_pd *pd, void *addr, size_t length, uint64_t hca_va, int access); int (*req_notify_cq)(struct ibv_cq *cq, int solicited_only);int fd, int access);
@@ -498,6 +502,9 @@ int ibv_cmd_advise_mr(struct ibv_pd *pd, uint32_t flags, struct ibv_sge *sg_list, uint32_t num_sge); +int ibv_cmd_reg_dmabuf_mr(struct ibv_pd *pd, uint64_t offset, size_t length,
uint64_t iova, int fd, int access,
int ibv_cmd_alloc_mw(struct ibv_pd *pd, enum ibv_mw_type type, struct ibv_mw *mw, struct ibv_alloc_mw *cmd, size_t cmd_size,struct verbs_mr *vmr);
diff --git a/libibverbs/dummy_ops.c b/libibverbs/dummy_ops.c index e5af9e4..64ef279 100644 --- a/libibverbs/dummy_ops.c +++ b/libibverbs/dummy_ops.c @@ -1,5 +1,6 @@ /*
- Copyright (c) 2017 Mellanox Technologies, Inc. All rights reserved.
- Copyright (c) 2020 Intel Corporation. All rights reserved.
- This software is available to you under a choice of one of two
- licenses. You may choose to be licensed under the terms of the GNU
@@ -452,6 +453,14 @@ static struct ibv_mr *reg_mr(struct ibv_pd *pd, void *addr, size_t length, return NULL; }
+static struct ibv_mr *reg_dmabuf_mr(struct ibv_pd *pd, uint64_t offset,
size_t length, uint64_t iova,
int fd, int access)
+{
- errno = EOPNOTSUPP;
- return NULL;
+}
- static int req_notify_cq(struct ibv_cq *cq, int solicited_only) { return EOPNOTSUPP;
@@ -560,6 +569,7 @@ const struct verbs_context_ops verbs_dummy_ops = { query_srq, read_counters, reg_dm_mr,
- reg_dmabuf_mr, reg_mr, req_notify_cq, rereg_mr,
@@ -689,6 +699,7 @@ void verbs_set_ops(struct verbs_context *vctx, SET_PRIV_OP_IC(vctx, set_ece); SET_PRIV_OP_IC(vctx, unimport_mr); SET_PRIV_OP_IC(vctx, unimport_pd);
SET_OP(ctx, reg_dmabuf_mr);
#undef SET_OP #undef SET_OP2
diff --git a/libibverbs/libibverbs.map.in b/libibverbs/libibverbs.map.in index b5ccaca..f67e1ef 100644 --- a/libibverbs/libibverbs.map.in +++ b/libibverbs/libibverbs.map.in @@ -148,6 +148,11 @@ IBVERBS_1.11 { _ibv_query_gid_table; } IBVERBS_1.10;
+IBVERBS_1.12 {
- global:
ibv_reg_dmabuf_mr;
+} IBVERBS_1.11;
- /* If any symbols in this stanza change ABI then the entire staza gets a new symbol version. See the top level CMakeLists.txt for this setting. */
@@ -211,6 +216,7 @@ IBVERBS_PRIVATE_@IBVERBS_PABI_VERSION@ { ibv_cmd_query_srq; ibv_cmd_read_counters; ibv_cmd_reg_dm_mr;
ibv_cmd_reg_mr; ibv_cmd_req_notify_cq; ibv_cmd_rereg_mr;ibv_cmd_reg_dmabuf_mr;
diff --git a/libibverbs/man/ibv_reg_mr.3 b/libibverbs/man/ibv_reg_mr.3 index 2bfc955..a522527 100644 --- a/libibverbs/man/ibv_reg_mr.3 +++ b/libibverbs/man/ibv_reg_mr.3 @@ -3,7 +3,7 @@ ." .TH IBV_REG_MR 3 2006-10-31 libibverbs "Libibverbs Programmer's Manual" .SH "NAME" -ibv_reg_mr, ibv_reg_mr_iova, ibv_dereg_mr - register or deregister a memory region (MR) +ibv_reg_mr, ibv_reg_mr_iova, ibv_reg_dmabuf_mr, ibv_dereg_mr - register or deregister a memory region (MR) .SH "SYNOPSIS" .nf .B #include <infiniband/verbs.h> @@ -15,6 +15,10 @@ ibv_reg_mr, ibv_reg_mr_iova, ibv_dereg_mr - register or deregister a memory reg .BI " size_t " "length" ", uint64_t " "hca_va" , .BI " int " "access" ); .sp +.BI "struct ibv_mr *ibv_reg_dmabuf_mr(struct ibv_pd " "*pd" ", uint64_t " "offset" , +.BI " size_t " "length" ", uint64_t " "iova" , +.BI " int " "fd" ", int " "access" ); +.sp .BI "int ibv_dereg_mr(struct ibv_mr " "*mr" ); .fi .SH "DESCRIPTION" @@ -71,11 +75,30 @@ a lkey or rkey. The offset in the memory region is computed as 'addr + (iova - hca_va)'. Specifying 0 for hca_va has the same effect as IBV_ACCESS_ZERO_BASED. .PP +.B ibv_reg_dmabuf_mr() +registers a dma-buf based memory region (MR) associated with the protection domain +.I pd\fR. +The MR starts at +.I offset +of the dma-buf and its size is +.I length\fR. +The dma-buf is identified by the file descriptor +.I fd\fR. +The argument +.I iova +specifies the virtual base address of the MR when accessed through a lkey or rkey. +It must have the same page offset as +.I offset\fR. +The argument +.I access +describes the desired memory protection attributes; it is similar to the ibv_reg_mr case except that only the following flags are supported: +.B IBV_ACCESS_LOCAL_WRITE, IBV_ACCESS_REMOTE_WRITE, IBV_ACCESS_REMOTE_READ, IBV_ACCESS_REMOTE_ATOMIC, IBV_ACCESS_RELAXED_ORDERING. +.PP .B ibv_dereg_mr() deregisters the MR .I mr\fR. .SH "RETURN VALUE" -.B ibv_reg_mr() / ibv_reg_mr_iova() +.B ibv_reg_mr() / ibv_reg_mr_iova() / ibv_reg_dmabuf_mr() returns a pointer to the registered MR, or NULL if the request fails. The local key (\fBL_Key\fR) field .B lkey diff --git a/libibverbs/verbs.c b/libibverbs/verbs.c index 2b0ede8..6293462 100644 --- a/libibverbs/verbs.c +++ b/libibverbs/verbs.c @@ -1,6 +1,7 @@ /*
- Copyright (c) 2005 Topspin Communications. All rights reserved.
- Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
- Copyright (c) 2020 Intel Corperation. All rights reserved.
- This software is available to you under a choice of one of two
- licenses. You may choose to be licensed under the terms of the GNU
@@ -367,6 +368,23 @@ void ibv_unimport_mr(struct ibv_mr *mr) get_ops(mr->context)->unimport_mr(mr); }
+struct ibv_mr *ibv_reg_dmabuf_mr(struct ibv_pd *pd, uint64_t offset,
size_t length, uint64_t iova, int fd,
int access)
+{
- struct ibv_mr *mr;
- mr = get_ops(pd->context)->reg_dmabuf_mr(pd, offset, length, iova,
fd, access);
- if (mr) {
mr->context = pd->context;
mr->pd = pd;
mr->addr = (void *)offset;
mr->length = length;
- }
- return mr;
+}
- LATEST_SYMVER_FUNC(ibv_rereg_mr, 1_1, "IBVERBS_1.1", int, struct ibv_mr *mr, int flags,
diff --git a/libibverbs/verbs.h b/libibverbs/verbs.h index ee57e05..1a0af0e 100644 --- a/libibverbs/verbs.h +++ b/libibverbs/verbs.h @@ -3,6 +3,7 @@
- Copyright (c) 2004, 2011-2012 Intel Corporation. All rights reserved.
- Copyright (c) 2005, 2006, 2007 Cisco Systems, Inc. All rights reserved.
- Copyright (c) 2005 PathScale, Inc. All rights reserved.
- Copyright (c) 2020 Intel Corporation. All rights reserved.
- This software is available to you under a choice of one of two
- licenses. You may choose to be licensed under the terms of the GNU
@@ -1961,6 +1962,10 @@ struct ibv_context_ops { void *(*_compat_attach_mcast)(void); void *(*_compat_detach_mcast)(void); void *(*_compat_async_event)(void);
struct ibv_mr * (*reg_dmabuf_mr)(struct ibv_pd *pd,
uint64_t offset, size_t length,
uint64_t iova, int fd,
int access);
};
struct ibv_context {
@@ -2535,6 +2540,12 @@ __ibv_reg_mr_iova(struct ibv_pd *pd, void *addr, size_t length, uint64_t iova, __builtin_constant_p( \ ((access) & IBV_ACCESS_OPTIONAL_RANGE) == 0))
+/**
- ibv_reg_dmabuf_mr - Register a dambuf-based memory region
- */
+struct ibv_mr *ibv_reg_dmabuf_mr(struct ibv_pd *pd, uint64_t offset, size_t length,
uint64_t iova, int fd, int access);
- enum ibv_rereg_mr_err_code { /* Old MR is valid, invalid input */ IBV_REREG_MR_ERR_INPUT = -1,
Implement the new provider method for registering dma-buf based memory regions.
Signed-off-by: Jianxin Xiong jianxin.xiong@intel.com --- providers/mlx5/mlx5.c | 2 ++ providers/mlx5/mlx5.h | 3 +++ providers/mlx5/verbs.c | 22 ++++++++++++++++++++++ 3 files changed, 27 insertions(+)
diff --git a/providers/mlx5/mlx5.c b/providers/mlx5/mlx5.c index 1378acf..b3e2d57 100644 --- a/providers/mlx5/mlx5.c +++ b/providers/mlx5/mlx5.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2012 Mellanox Technologies, Inc. All rights reserved. + * Copyright (c) 2020 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -96,6 +97,7 @@ static const struct verbs_context_ops mlx5_ctx_common_ops = { .async_event = mlx5_async_event, .dealloc_pd = mlx5_free_pd, .reg_mr = mlx5_reg_mr, + .reg_dmabuf_mr = mlx5_reg_dmabuf_mr, .rereg_mr = mlx5_rereg_mr, .dereg_mr = mlx5_dereg_mr, .alloc_mw = mlx5_alloc_mw, diff --git a/providers/mlx5/mlx5.h b/providers/mlx5/mlx5.h index 8c94f72..17a2470 100644 --- a/providers/mlx5/mlx5.h +++ b/providers/mlx5/mlx5.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2012 Mellanox Technologies, Inc. All rights reserved. + * Copyright (c) 2020 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -903,6 +904,8 @@ void mlx5_async_event(struct ibv_context *context, struct ibv_mr *mlx5_alloc_null_mr(struct ibv_pd *pd); struct ibv_mr *mlx5_reg_mr(struct ibv_pd *pd, void *addr, size_t length, uint64_t hca_va, int access); +struct ibv_mr *mlx5_reg_dmabuf_mr(struct ibv_pd *pd, uint64_t offset, size_t length, + uint64_t iova, int fd, int access); int mlx5_rereg_mr(struct verbs_mr *mr, int flags, struct ibv_pd *pd, void *addr, size_t length, int access); int mlx5_dereg_mr(struct verbs_mr *mr); diff --git a/providers/mlx5/verbs.c b/providers/mlx5/verbs.c index b956156..a7fc3b0 100644 --- a/providers/mlx5/verbs.c +++ b/providers/mlx5/verbs.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2012 Mellanox Technologies, Inc. All rights reserved. + * Copyright (c) 2020 Intel Corporation. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -647,6 +648,27 @@ struct ibv_mr *mlx5_reg_mr(struct ibv_pd *pd, void *addr, size_t length, return &mr->vmr.ibv_mr; }
+struct ibv_mr *mlx5_reg_dmabuf_mr(struct ibv_pd *pd, uint64_t offset, size_t length, + uint64_t iova, int fd, int acc) +{ + struct mlx5_mr *mr; + int ret; + + mr = calloc(1, sizeof(*mr)); + if (!mr) + return NULL; + + ret = ibv_cmd_reg_dmabuf_mr(pd, offset, length, iova, fd, acc, + &mr->vmr); + if (ret) { + free(mr); + return NULL; + } + mr->alloc_flags = acc; + + return &mr->vmr.ibv_mr; +} + struct ibv_mr *mlx5_alloc_null_mr(struct ibv_pd *pd) { struct mlx5_mr *mr;
Define a new sub-class of 'MR' that uses dma-buf object for the memory region. Define a new class 'DmaBuf' as a wrapper for dma-buf allocation mechanism implemented in C.
Add a method to buildlib for building modules with mixed Cython and C source.
Signed-off-by: Jianxin Xiong jianxin.xiong@intel.com --- buildlib/pyverbs_functions.cmake | 52 +++++++ pyverbs/CMakeLists.txt | 7 + pyverbs/dmabuf.pxd | 15 ++ pyverbs/dmabuf.pyx | 72 ++++++++++ pyverbs/dmabuf_alloc.c | 296 +++++++++++++++++++++++++++++++++++++++ pyverbs/dmabuf_alloc.h | 19 +++ pyverbs/libibverbs.pxd | 2 + pyverbs/mr.pxd | 6 + pyverbs/mr.pyx | 103 +++++++++++++- 9 files changed, 570 insertions(+), 2 deletions(-) create mode 100644 pyverbs/dmabuf.pxd create mode 100644 pyverbs/dmabuf.pyx create mode 100644 pyverbs/dmabuf_alloc.c create mode 100644 pyverbs/dmabuf_alloc.h
diff --git a/buildlib/pyverbs_functions.cmake b/buildlib/pyverbs_functions.cmake index 953cec2..2f6788e 100644 --- a/buildlib/pyverbs_functions.cmake +++ b/buildlib/pyverbs_functions.cmake @@ -1,5 +1,6 @@ # SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) # Copyright (c) 2018, Mellanox Technologies. All rights reserved. See COPYING file +# Copyright (c) 2020, Intel Corporation. All rights reserved. See COPYING file
function(rdma_cython_module PY_MODULE LINKER_FLAGS) foreach(PYX_FILE ${ARGN}) @@ -32,6 +33,57 @@ function(rdma_cython_module PY_MODULE LINKER_FLAGS) endforeach() endfunction()
+function(rdma_multifile_module PY_MODULE MODULE_NAME LINKER_FLAGS) + set(ALL_CFILES "") + foreach(SRC_FILE ${ARGN}) + get_filename_component(FILENAME ${SRC_FILE} NAME_WE) + get_filename_component(DIR ${SRC_FILE} DIRECTORY) + get_filename_component(EXT ${SRC_FILE} EXT) + if (DIR) + set(SRC_PATH "${CMAKE_CURRENT_SOURCE_DIR}/${DIR}") + else() + set(SRC_PATH "${CMAKE_CURRENT_SOURCE_DIR}") + endif() + if (${EXT} STREQUAL ".pyx") + set(PYX "${SRC_PATH}/${FILENAME}.pyx") + set(CFILE "${CMAKE_CURRENT_BINARY_DIR}/${FILENAME}.c") + include_directories(${PYTHON_INCLUDE_DIRS}) + add_custom_command( + OUTPUT "${CFILE}" + MAIN_DEPENDENCY "${PYX}" + COMMAND ${CYTHON_EXECUTABLE} "${PYX}" -o "${CFILE}" + "-I${PYTHON_INCLUDE_DIRS}" + COMMENT "Cythonizing ${PYX}" + ) + set(ALL_CFILES "${ALL_CFILES};${CFILE}") + elseif(${EXT} STREQUAL ".c") + set(CFILE_ORIG "${SRC_PATH}/${FILENAME}.c") + set(CFILE "${CMAKE_CURRENT_BINARY_DIR}/${FILENAME}.c") + if (NOT ${CFILE_ORIG} STREQUAL ${CFILE}) + rdma_create_symlink("${CFILE_ORIG}" "${CFILE}") + endif() + set(ALL_CFILES "${ALL_CFILES};${CFILE}") + elseif(${EXT} STREQUAL ".h") + set(HFILE_ORIG "${SRC_PATH}/${FILENAME}.h") + set(HFILE "${CMAKE_CURRENT_BINARY_DIR}/${FILENAME}.h") + if (NOT ${HFILE_ORIG} STREQUAL ${HFILE}) + rdma_create_symlink("${HFILE_ORIG}" "${HFILE}") + endif() + else() + continue() + endif() + endforeach() + string(REGEX REPLACE "\.so$" "" SONAME "${MODULE_NAME}${CMAKE_PYTHON_SO_SUFFIX}") + add_library(${SONAME} SHARED ${ALL_CFILES}) + set_target_properties(${SONAME} PROPERTIES + COMPILE_FLAGS "${CMAKE_C_FLAGS} -fPIC -fno-strict-aliasing -Wno-unused-function -Wno-redundant-decls -Wno-shadow -Wno-cast-function-type -Wno-implicit-fallthrough -Wno-unknown-warning -Wno-unknown-warning-option -Wno-deprecated-declarations ${NO_VAR_TRACKING_FLAGS}" + LIBRARY_OUTPUT_DIRECTORY "${BUILD_PYTHON}/${PY_MODULE}" + PREFIX "") + target_link_libraries(${SONAME} LINK_PRIVATE ${PYTHON_LIBRARIES} ibverbs rdmacm ${LINKER_FLAGS}) + install(TARGETS ${SONAME} + DESTINATION ${CMAKE_INSTALL_PYTHON_ARCH_LIB}/${PY_MODULE}) +endfunction() + function(rdma_python_module PY_MODULE) foreach(PY_FILE ${ARGN}) get_filename_component(LINK "${CMAKE_CURRENT_SOURCE_DIR}/${PY_FILE}" ABSOLUTE) diff --git a/pyverbs/CMakeLists.txt b/pyverbs/CMakeLists.txt index 9542c4b..1b21e7b 100644 --- a/pyverbs/CMakeLists.txt +++ b/pyverbs/CMakeLists.txt @@ -1,5 +1,6 @@ # SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) # Copyright (c) 2019, Mellanox Technologies. All rights reserved. See COPYING file +# Copyright (c) 2020, Intel Corporation. All rights reserved. See COPYING file
rdma_cython_module(pyverbs "" addr.pyx @@ -24,6 +25,12 @@ rdma_python_module(pyverbs utils.py )
+rdma_multifile_module(pyverbs dmabuf "" + dmabuf.pyx + dmabuf_alloc.c + dmabuf_alloc.h + ) + # mlx5 and efa providers are not built without coherent DMA, e.g. ARM32 build. if (HAVE_COHERENT_DMA) add_subdirectory(providers/mlx5) diff --git a/pyverbs/dmabuf.pxd b/pyverbs/dmabuf.pxd new file mode 100644 index 0000000..3ef5dfb --- /dev/null +++ b/pyverbs/dmabuf.pxd @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) +# Copyright (c) 2020, Intel Corporation. All rights reserved. See COPYING file + +#cython: language_level=3 + +cdef class DmaBuf: + cdef int dri_fd + cdef int handle + cdef int fd + cdef unsigned long size + cdef unsigned long map_offset + cdef void *dmabuf + cdef object dmabuf_mrs + cdef add_ref(self, obj) + cpdef close(self) diff --git a/pyverbs/dmabuf.pyx b/pyverbs/dmabuf.pyx new file mode 100644 index 0000000..23d8e2a --- /dev/null +++ b/pyverbs/dmabuf.pyx @@ -0,0 +1,72 @@ +# SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) +# Copyright (c) 2020, Intel Corporation. All rights reserved. See COPYING file + +#cython: language_level=3 + +import weakref + +from pyverbs.base cimport close_weakrefs +from pyverbs.base import PyverbsRDMAErrno +from pyverbs.mr cimport DmaBufMR + +cdef extern from "dmabuf_alloc.h": + cdef struct dmabuf: + pass + dmabuf *dmabuf_alloc(int unit, unsigned long size) + void dmabuf_free(dmabuf *dmabuf) + int dmabuf_get_dri_fd(dmabuf *dmabuf) + int dmabuf_get_fd(dmabuf *dmabuf) + unsigned long dmabuf_get_offset(dmabuf *dmabuf) + + +cdef class DmaBuf: + def __init__(self, size, unit=0): + """ + Allocate DmaBuf object from a GPU device. This is done through the + DRI device interface. Usually this requires the effective user id + being a member of the 'render' group. + :param size: The size (in number of bytes) of the buffer. + :param unit: The unit number of the GPU to allocate the buffer from. + :return: The newly created DmaBuf object on success. + """ + self.dmabuf_mrs = weakref.WeakSet() + self.dmabuf = dmabuf_alloc(size, unit) + if self.dmabuf == NULL: + raise PyverbsRDMAErrno(f'Failed to allocate dmabuf of size {size} on unit {unit}') + self.dri_fd = dmabuf_get_dri_fd(<dmabuf *>self.dmabuf) + self.fd = dmabuf_get_fd(<dmabuf *>self.dmabuf) + self.map_offset = dmabuf_get_offset(<dmabuf *>self.dmabuf) + + def __dealloc__(self): + self.close() + + cpdef close(self): + if self.dmabuf == NULL: + return None + close_weakrefs([self.dmabuf_mrs]) + dmabuf_free(<dmabuf *>self.dmabuf) + self.dmabuf = NULL + + cdef add_ref(self, obj): + if isinstance(obj, DmaBufMR): + self.dmabuf_mrs.add(obj) + + @property + def dri_fd(self): + return self.dri_fd + + @property + def handle(self): + return self.handle + + @property + def fd(self): + return self.fd + + @property + def size(self): + return self.size + + @property + def map_offset(self): + return self.map_offset diff --git a/pyverbs/dmabuf_alloc.c b/pyverbs/dmabuf_alloc.c new file mode 100644 index 0000000..b958a3e --- /dev/null +++ b/pyverbs/dmabuf_alloc.c @@ -0,0 +1,296 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright 2020 Intel Corporation. All rights reserved. See COPYING file + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> +#include <drm/drm.h> +#include <drm/i915_drm.h> +#include <drm/amdgpu_drm.h> +#include <drm/radeon_drm.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include "dmabuf_alloc.h" + +/* + * Abstraction of the buffer allocation mechanism using the DRI interface. + * The interface is accessed by ioctl() calls over the '/dev/dri/renderD*' + * device. Successful access usually requires the effective user id being + * in the 'render' group. + */ + +struct dri { + int fd; + int (*alloc)(struct dri *dri, uint64_t size, uint32_t *handle); + int (*mmap_offset)(struct dri *dri, uint32_t handle, uint64_t *offset); +}; + +static int i915_alloc(struct dri *dri, uint64_t size, uint32_t *handle) +{ + struct drm_i915_gem_create gem_create = {0}; + int err; + + gem_create.size = size; + err = ioctl(dri->fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create); + if (err) + return err; + + *handle = gem_create.handle; + return 0; +} + +static int amdgpu_alloc(struct dri *dri, size_t size, uint32_t *handle) +{ + union drm_amdgpu_gem_create gem_create = {{0}}; + int err; + + gem_create.in.bo_size = size; + gem_create.in.domains = AMDGPU_GEM_DOMAIN_VRAM; + err = ioctl(dri->fd, DRM_IOCTL_AMDGPU_GEM_CREATE, &gem_create); + if (err) + return err; + + *handle = gem_create.out.handle; + return 0; +} + +static int radeon_alloc(struct dri *dri, size_t size, uint32_t *handle) +{ + struct drm_radeon_gem_create gem_create = {0}; + int err; + + gem_create.size = size; + gem_create.initial_domain = RADEON_GEM_DOMAIN_VRAM; + err = ioctl(dri->fd, DRM_IOCTL_RADEON_GEM_CREATE, &gem_create); + if (err) + return err; + + *handle = gem_create.handle; + return 0; +} + +static int i915_mmap_offset(struct dri *dri, uint32_t handle, uint64_t *offset) +{ + struct drm_i915_gem_mmap_gtt gem_mmap = {0}; + int err; + + gem_mmap.handle = handle; + err = ioctl(dri->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &gem_mmap); + if (err) + return err; + + *offset = gem_mmap.offset; + return 0; +} + +static int amdgpu_mmap_offset(struct dri *dri, uint32_t handle, + uint64_t *offset) +{ + union drm_amdgpu_gem_mmap gem_mmap = {{0}}; + int err; + + gem_mmap.in.handle = handle; + err = ioctl(dri->fd, DRM_IOCTL_AMDGPU_GEM_MMAP, &gem_mmap); + if (err) + return err; + + *offset = gem_mmap.out.addr_ptr; + return 0; +} + +static int radeon_mmap_offset(struct dri *dri, uint32_t handle, + uint64_t *offset) +{ + struct drm_radeon_gem_mmap gem_mmap = {0}; + int err; + + gem_mmap.handle = handle; + err = ioctl(dri->fd, DRM_IOCTL_RADEON_GEM_MMAP, &gem_mmap); + if (err) + return err; + + *offset = gem_mmap.addr_ptr; + return 0; +} + +static struct dri *dri_open(int unit) +{ + char path[32]; + struct drm_version version = {0}; + char name[16] = {0}; + int err; + struct dri *dri; + + dri = malloc(sizeof(*dri)); + if (!dri) + return NULL; + + sprintf(path, "/dev/dri/renderD%d", unit + 128); + + dri->fd = open(path, O_RDWR); + if (dri->fd < 0) + goto out_free; + + version.name = name; + version.name_len = 16; + err = ioctl(dri->fd, DRM_IOCTL_VERSION, &version); + if (err) + goto out_close; + + if (!strcmp(name, "amdgpu")) { + dri->alloc = amdgpu_alloc; + dri->mmap_offset = amdgpu_mmap_offset; + } else if (!strcmp(name, "i915")) { + dri->alloc = i915_alloc; + dri->mmap_offset = i915_mmap_offset; + } else if (!strcmp(name, "radeon")) { + dri->alloc = radeon_alloc; + dri->mmap_offset = radeon_mmap_offset; + } else { + goto out_close; + } + return dri; + +out_close: + close(dri->fd); + +out_free: + free(dri); + return NULL; +} + +static void dri_close(struct dri *dri) +{ + if (!dri || dri->fd < 0) + return; + + close(dri->fd); + free(dri); +} + +static void dri_free_buf(struct dri *dri, uint32_t handle) +{ + struct drm_gem_close close = {0}; + + close.handle = handle; + ioctl(dri->fd, DRM_IOCTL_GEM_CLOSE, &close); +} + +static int dri_alloc_buf(struct dri *dri, size_t size, uint32_t *handle, int *fd) +{ + struct drm_prime_handle prime_handle = {0}; + int err; + + if (!dri || dri->fd < 0) + return -EINVAL; + + err = dri->alloc(dri, size, handle); + if (err) + return err; + + prime_handle.handle = *handle; + prime_handle.flags = O_RDWR; + err = ioctl(dri->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &prime_handle); + if (err) { + dri_free_buf(dri, *handle); + return err; + } + + *fd = prime_handle.fd; + return 0; +} + +static int dri_map_buf(struct dri *dri, uint32_t handle, uint64_t *offset) +{ + if (!dri || dri->fd < 0) + return -EINVAL; + + return dri->mmap_offset(dri, handle, offset); +} + +/* + * Abstraction of dmabuf object, allocated using the DRI abstraction defined + * above. + */ + +struct dmabuf { + struct dri *dri; + int fd; + uint32_t handle; + uint64_t map_offset; +}; + +struct dmabuf *dmabuf_alloc(uint64_t size, int unit) +{ + struct dmabuf *dmabuf; + int err; + + dmabuf = malloc(sizeof *dmabuf); + if (!dmabuf) + return NULL; + + dmabuf->dri = dri_open(unit); + if (!dmabuf->dri) + goto out_free; + + err = dri_alloc_buf(dmabuf->dri, size, &dmabuf->handle, &dmabuf->fd); + if (err) + goto out_close; + + err = dri_map_buf(dmabuf->dri, dmabuf->handle, &dmabuf->map_offset); + if (err) + goto out_free_buf; + + return dmabuf; + +out_free_buf: + dri_free_buf(dmabuf->dri, dmabuf->handle); + +out_close: + dri_close(dmabuf->dri); + +out_free: + free(dmabuf); + return NULL; +} + +void dmabuf_free(struct dmabuf *dmabuf) +{ + if (!dmabuf) + return; + + close(dmabuf->fd); + dri_free_buf(dmabuf->dri, dmabuf->handle); + dri_close(dmabuf->dri); + free(dmabuf); +} + +int dmabuf_get_dri_fd(struct dmabuf *dmabuf) +{ + if (!dmabuf || !dmabuf->dri) + return -1; + + return dmabuf->dri->fd; +} + +int dmabuf_get_fd(struct dmabuf *dmabuf) +{ + if (!dmabuf) + return -1; + + return dmabuf->fd; +} + +uint64_t dmabuf_get_offset(struct dmabuf *dmabuf) +{ + if (!dmabuf) + return -1; + + return dmabuf->map_offset; +} + diff --git a/pyverbs/dmabuf_alloc.h b/pyverbs/dmabuf_alloc.h new file mode 100644 index 0000000..f36c337 --- /dev/null +++ b/pyverbs/dmabuf_alloc.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright 2020 Intel Corporation. All rights reserved. See COPYING file + */ + +#ifndef _DRI_H_ +#define _DRI_H_ + +#include <stdint.h> + +struct dmabuf; + +struct dmabuf *dmabuf_alloc(uint64_t size, int unit); +void dmabuf_free(struct dmabuf *dmabuf); +int dmabuf_get_dri_fd(struct dmabuf *dmabuf); +int dmabuf_get_fd(struct dmabuf *dmabuf); +uint64_t dmabuf_get_offset(struct dmabuf *dmabuf); + +#endif /* _DRI_H_ */ diff --git a/pyverbs/libibverbs.pxd b/pyverbs/libibverbs.pxd index 6fbba54..d76f633 100644 --- a/pyverbs/libibverbs.pxd +++ b/pyverbs/libibverbs.pxd @@ -507,6 +507,8 @@ cdef extern from 'infiniband/verbs.h': ibv_pd *ibv_alloc_pd(ibv_context *context) int ibv_dealloc_pd(ibv_pd *pd) ibv_mr *ibv_reg_mr(ibv_pd *pd, void *addr, size_t length, int access) + ibv_mr *ibv_reg_dmabuf_mr(ibv_pd *pd, uint64_t offset, size_t length, + uint64_t iova, int fd, int access) int ibv_dereg_mr(ibv_mr *mr) int ibv_advise_mr(ibv_pd *pd, uint32_t advice, uint32_t flags, ibv_sge *sg_list, uint32_t num_sge) diff --git a/pyverbs/mr.pxd b/pyverbs/mr.pxd index ebe8ada..d9a79ff 100644 --- a/pyverbs/mr.pxd +++ b/pyverbs/mr.pxd @@ -1,5 +1,6 @@ # SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) # Copyright (c) 2019, Mellanox Technologies. All rights reserved. See COPYING file +# Copyright (c) 2020, Intel Corporation. All rights reserved. See COPYING file
#cython: language_level=3
@@ -33,3 +34,8 @@ cdef class MW(PyverbsCM):
cdef class DMMR(MR): cdef object dm + +cdef class DmaBufMR(MR): + cdef object dmabuf + cdef unsigned long offset + cdef object is_dmabuf_internal diff --git a/pyverbs/mr.pyx b/pyverbs/mr.pyx index 7011da1..e4ed2dc 100644 --- a/pyverbs/mr.pyx +++ b/pyverbs/mr.pyx @@ -1,11 +1,12 @@ # SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) # Copyright (c) 2019, Mellanox Technologies. All rights reserved. See COPYING file +# Copyright (c) 2020, Intel Corporation. All rights reserved. See COPYING file
import resource import logging
from posix.mman cimport mmap, munmap, MAP_PRIVATE, PROT_READ, PROT_WRITE, \ - MAP_ANONYMOUS, MAP_HUGETLB + MAP_ANONYMOUS, MAP_HUGETLB, MAP_SHARED from pyverbs.pyverbs_error import PyverbsError, PyverbsRDMAError, \ PyverbsUserError from libc.stdint cimport uintptr_t, SIZE_MAX @@ -14,9 +15,10 @@ from posix.stdlib cimport posix_memalign from libc.string cimport memcpy, memset cimport pyverbs.libibverbs_enums as e from pyverbs.device cimport DM -from libc.stdlib cimport free +from libc.stdlib cimport free, malloc from .cmid cimport CMID from .pd cimport PD +from .dmabuf cimport DmaBuf
cdef extern from 'sys/mman.h': cdef void* MAP_FAILED @@ -348,6 +350,103 @@ cdef class DMMR(MR): cpdef read(self, length, offset): return self.dm.copy_from_dm(offset, length)
+cdef class DmaBufMR(MR): + def __init__(self, PD pd not None, length, access, DmaBuf dmabuf=None, + offset=0): + """ + Initializes a DmaBufMR (DMA-BUF Memory Region) of the given length + and access flags using the given PD and DmaBuf objects. + :param pd: A PD object + :param length: Length in bytes + :param access: Access flags, see ibv_access_flags enum + :param dmabuf: A DmaBuf object. One will be allocated if absent + :param offset: Byte offset from the beginning of the dma-buf + :return: The newly created DMABUFMR + """ + self.logger = logging.getLogger(self.__class__.__name__) + if dmabuf is None: + self.is_dmabuf_internal = True + dmabuf = DmaBuf(length + offset) + self.mr = v.ibv_reg_dmabuf_mr(pd.pd, offset, length, offset, dmabuf.fd, access) + if self.mr == NULL: + raise PyverbsRDMAErrno(f'Failed to register a dma-buf MR. length: {length}, access flags: {access}') + super().__init__(pd, length, access) + self.pd = pd + self.dmabuf = dmabuf + self.offset = offset + pd.add_ref(self) + dmabuf.add_ref(self) + self.logger.debug(f'Registered dma-buf ibv_mr. Length: {length}, access flags {access}') + + def __dealloc__(self): + self.close() + + cpdef close(self): + """ + Closes the underlying C object of the MR and frees the memory allocated. + :return: None + """ + if self.mr != NULL: + self.logger.debug('Closing dma-buf MR') + rc = v.ibv_dereg_mr(self.mr) + if rc != 0: + raise PyverbsRDMAError('Failed to dereg dma-buf MR', rc) + self.pd = None + self.mr = NULL + # Set self.mr to NULL before closing dmabuf because this method is + # re-entered when close_weakrefs() is called inside dmabuf.close(). + if self.is_dmabuf_internal: + self.dmabuf.close() + self.dmabuf = None + + @property + def offset(self): + return self.offset + + @property + def dmabuf(self): + return self.dmabuf + + def write(self, data, length, offset=0): + """ + Write user data to the dma-buf backing the MR + :param data: User data to write + :param length: Length of the data to write + :param offset: Writing offset + :return: None + """ + if isinstance(data, str): + data = data.encode() + cdef int off = offset + self.offset + cdef void *buf = mmap(NULL, length + off, PROT_READ | PROT_WRITE, + MAP_SHARED, self.dmabuf.dri_fd, + self.dmabuf.map_offset) + if buf == MAP_FAILED: + raise PyverbsError(f'Failed to map dma-buf of size {length}') + memcpy(<char*>(buf + off), <char *>data, length) + munmap(buf, length + off) + + cpdef read(self, length, offset): + """ + Reads data from the dma-buf backing the MR + :param length: Length of data to read + :param offset: Reading offset + :return: The data on the buffer in the requested offset + """ + cdef int off = offset + self.offset + cdef void *buf = mmap(NULL, length + off, PROT_READ | PROT_WRITE, + MAP_SHARED, self.dmabuf.dri_fd, + self.dmabuf.map_offset) + if buf == MAP_FAILED: + raise PyverbsError(f'Failed to map dma-buf of size {length}') + cdef char *data =<char*>malloc(length) + memset(data, 0, length) + memcpy(data, <char*>(buf + off), length) + munmap(buf, length + off) + res = data[:length] + free(data) + return res +
def mwtype2str(mw_type): mw_types = {1:'IBV_MW_TYPE_1', 2:'IBV_MW_TYPE_2'}
On Fri, Nov 27, 2020 at 12:55:41PM -0800, Jianxin Xiong wrote:
Define a new sub-class of 'MR' that uses dma-buf object for the memory region. Define a new class 'DmaBuf' as a wrapper for dma-buf allocation mechanism implemented in C.
Add a method to buildlib for building modules with mixed Cython and C source.
Signed-off-by: Jianxin Xiong jianxin.xiong@intel.com
buildlib/pyverbs_functions.cmake | 52 +++++++ pyverbs/CMakeLists.txt | 7 + pyverbs/dmabuf.pxd | 15 ++ pyverbs/dmabuf.pyx | 72 ++++++++++ pyverbs/dmabuf_alloc.c | 296 +++++++++++++++++++++++++++++++++++++++ pyverbs/dmabuf_alloc.h | 19 +++ pyverbs/libibverbs.pxd | 2 + pyverbs/mr.pxd | 6 + pyverbs/mr.pyx | 103 +++++++++++++- 9 files changed, 570 insertions(+), 2 deletions(-) create mode 100644 pyverbs/dmabuf.pxd create mode 100644 pyverbs/dmabuf.pyx create mode 100644 pyverbs/dmabuf_alloc.c create mode 100644 pyverbs/dmabuf_alloc.h
diff --git a/buildlib/pyverbs_functions.cmake b/buildlib/pyverbs_functions.cmake index 953cec2..2f6788e 100644 --- a/buildlib/pyverbs_functions.cmake +++ b/buildlib/pyverbs_functions.cmake @@ -1,5 +1,6 @@ # SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) # Copyright (c) 2018, Mellanox Technologies. All rights reserved. See COPYING file +# Copyright (c) 2020, Intel Corporation. All rights reserved. See COPYING file
function(rdma_cython_module PY_MODULE LINKER_FLAGS) foreach(PYX_FILE ${ARGN}) @@ -32,6 +33,57 @@ function(rdma_cython_module PY_MODULE LINKER_FLAGS) endforeach() endfunction()
+function(rdma_multifile_module PY_MODULE MODULE_NAME LINKER_FLAGS)
- set(ALL_CFILES "")
- foreach(SRC_FILE ${ARGN})
- get_filename_component(FILENAME ${SRC_FILE} NAME_WE)
- get_filename_component(DIR ${SRC_FILE} DIRECTORY)
- get_filename_component(EXT ${SRC_FILE} EXT)
- if (DIR)
set(SRC_PATH "${CMAKE_CURRENT_SOURCE_DIR}/${DIR}")
- else()
set(SRC_PATH "${CMAKE_CURRENT_SOURCE_DIR}")
- endif()
- if (${EXT} STREQUAL ".pyx")
set(PYX "${SRC_PATH}/${FILENAME}.pyx")
set(CFILE "${CMAKE_CURRENT_BINARY_DIR}/${FILENAME}.c")
include_directories(${PYTHON_INCLUDE_DIRS})
add_custom_command(
OUTPUT "${CFILE}"
MAIN_DEPENDENCY "${PYX}"
COMMAND ${CYTHON_EXECUTABLE} "${PYX}" -o "${CFILE}"
"-I${PYTHON_INCLUDE_DIRS}"
COMMENT "Cythonizing ${PYX}"
)
set(ALL_CFILES "${ALL_CFILES};${CFILE}")
- elseif(${EXT} STREQUAL ".c")
set(CFILE_ORIG "${SRC_PATH}/${FILENAME}.c")
set(CFILE "${CMAKE_CURRENT_BINARY_DIR}/${FILENAME}.c")
if (NOT ${CFILE_ORIG} STREQUAL ${CFILE})
rdma_create_symlink("${CFILE_ORIG}" "${CFILE}")
endif()
set(ALL_CFILES "${ALL_CFILES};${CFILE}")
- elseif(${EXT} STREQUAL ".h")
set(HFILE_ORIG "${SRC_PATH}/${FILENAME}.h")
set(HFILE "${CMAKE_CURRENT_BINARY_DIR}/${FILENAME}.h")
if (NOT ${HFILE_ORIG} STREQUAL ${HFILE})
rdma_create_symlink("${HFILE_ORIG}" "${HFILE}")
endif()
- else()
continue()
- endif()
- endforeach()
- string(REGEX REPLACE "\.so$" "" SONAME "${MODULE_NAME}${CMAKE_PYTHON_SO_SUFFIX}")
- add_library(${SONAME} SHARED ${ALL_CFILES})
- set_target_properties(${SONAME} PROPERTIES
- COMPILE_FLAGS "${CMAKE_C_FLAGS} -fPIC -fno-strict-aliasing -Wno-unused-function -Wno-redundant-decls -Wno-shadow -Wno-cast-function-type -Wno-implicit-fallthrough -Wno-unknown-warning -Wno-unknown-warning-option -Wno-deprecated-declarations ${NO_VAR_TRACKING_FLAGS}"
- LIBRARY_OUTPUT_DIRECTORY "${BUILD_PYTHON}/${PY_MODULE}"
- PREFIX "")
- target_link_libraries(${SONAME} LINK_PRIVATE ${PYTHON_LIBRARIES} ibverbs rdmacm ${LINKER_FLAGS})
- install(TARGETS ${SONAME}
- DESTINATION ${CMAKE_INSTALL_PYTHON_ARCH_LIB}/${PY_MODULE})
+endfunction()
function(rdma_python_module PY_MODULE) foreach(PY_FILE ${ARGN}) get_filename_component(LINK "${CMAKE_CURRENT_SOURCE_DIR}/${PY_FILE}" ABSOLUTE) diff --git a/pyverbs/CMakeLists.txt b/pyverbs/CMakeLists.txt index 9542c4b..1b21e7b 100644 --- a/pyverbs/CMakeLists.txt +++ b/pyverbs/CMakeLists.txt @@ -1,5 +1,6 @@ # SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) # Copyright (c) 2019, Mellanox Technologies. All rights reserved. See COPYING file +# Copyright (c) 2020, Intel Corporation. All rights reserved. See COPYING file
rdma_cython_module(pyverbs "" addr.pyx @@ -24,6 +25,12 @@ rdma_python_module(pyverbs utils.py )
+rdma_multifile_module(pyverbs dmabuf ""
- dmabuf.pyx
- dmabuf_alloc.c
- dmabuf_alloc.h
- )
# mlx5 and efa providers are not built without coherent DMA, e.g. ARM32 build. if (HAVE_COHERENT_DMA) add_subdirectory(providers/mlx5) diff --git a/pyverbs/dmabuf.pxd b/pyverbs/dmabuf.pxd new file mode 100644 index 0000000..3ef5dfb --- /dev/null +++ b/pyverbs/dmabuf.pxd @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) +# Copyright (c) 2020, Intel Corporation. All rights reserved. See COPYING file
+#cython: language_level=3
+cdef class DmaBuf:
- cdef int dri_fd
- cdef int handle
- cdef int fd
- cdef unsigned long size
- cdef unsigned long map_offset
- cdef void *dmabuf
- cdef object dmabuf_mrs
- cdef add_ref(self, obj)
- cpdef close(self)
diff --git a/pyverbs/dmabuf.pyx b/pyverbs/dmabuf.pyx new file mode 100644 index 0000000..23d8e2a --- /dev/null +++ b/pyverbs/dmabuf.pyx @@ -0,0 +1,72 @@ +# SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) +# Copyright (c) 2020, Intel Corporation. All rights reserved. See COPYING file
+#cython: language_level=3
+import weakref
+from pyverbs.base cimport close_weakrefs +from pyverbs.base import PyverbsRDMAErrno +from pyverbs.mr cimport DmaBufMR
+cdef extern from "dmabuf_alloc.h":
- cdef struct dmabuf:
pass
- dmabuf *dmabuf_alloc(int unit, unsigned long size)
- void dmabuf_free(dmabuf *dmabuf)
- int dmabuf_get_dri_fd(dmabuf *dmabuf)
- int dmabuf_get_fd(dmabuf *dmabuf)
- unsigned long dmabuf_get_offset(dmabuf *dmabuf)
+cdef class DmaBuf:
- def __init__(self, size, unit=0):
"""
Allocate DmaBuf object from a GPU device. This is done through the
DRI device interface. Usually this requires the effective user id
being a member of the 'render' group.
:param size: The size (in number of bytes) of the buffer.
:param unit: The unit number of the GPU to allocate the buffer from.
:return: The newly created DmaBuf object on success.
"""
self.dmabuf_mrs = weakref.WeakSet()
self.dmabuf = dmabuf_alloc(size, unit)
if self.dmabuf == NULL:
raise PyverbsRDMAErrno(f'Failed to allocate dmabuf of size {size} on unit {unit}')
self.dri_fd = dmabuf_get_dri_fd(<dmabuf *>self.dmabuf)
dri_fd seems unused by the tests
self.fd = dmabuf_get_fd(<dmabuf *>self.dmabuf)
self.map_offset = dmabuf_get_offset(<dmabuf *>self.dmabuf)
- def __dealloc__(self):
self.close()
- cpdef close(self):
if self.dmabuf == NULL:
return None
close_weakrefs([self.dmabuf_mrs])
dmabuf_free(<dmabuf *>self.dmabuf)
self.dmabuf = NULL
- cdef add_ref(self, obj):
if isinstance(obj, DmaBufMR):
self.dmabuf_mrs.add(obj)
- @property
- def dri_fd(self):
return self.dri_fd
- @property
- def handle(self):
return self.handle
- @property
- def fd(self):
return self.fd
- @property
- def size(self):
return self.size
- @property
- def map_offset(self):
return self.map_offset
diff --git a/pyverbs/dmabuf_alloc.c b/pyverbs/dmabuf_alloc.c new file mode 100644 index 0000000..b958a3e --- /dev/null +++ b/pyverbs/dmabuf_alloc.c @@ -0,0 +1,296 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/*
- Copyright 2020 Intel Corporation. All rights reserved. See COPYING file
- */
+#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> +#include <drm/drm.h> +#include <drm/i915_drm.h> +#include <drm/amdgpu_drm.h> +#include <drm/radeon_drm.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include "dmabuf_alloc.h"
+/*
- Abstraction of the buffer allocation mechanism using the DRI interface.
- The interface is accessed by ioctl() calls over the '/dev/dri/renderD*'
- device. Successful access usually requires the effective user id being
- in the 'render' group.
- */
+struct dri {
- int fd;
- int (*alloc)(struct dri *dri, uint64_t size, uint32_t *handle);
- int (*mmap_offset)(struct dri *dri, uint32_t handle, uint64_t *offset);
+};
+static int i915_alloc(struct dri *dri, uint64_t size, uint32_t *handle) +{
- struct drm_i915_gem_create gem_create = {0};
- int err;
- gem_create.size = size;
- err = ioctl(dri->fd, DRM_IOCTL_I915_GEM_CREATE, &gem_create);
- if (err)
return err;
- *handle = gem_create.handle;
- return 0;
+}
+static int amdgpu_alloc(struct dri *dri, size_t size, uint32_t *handle) +{
- union drm_amdgpu_gem_create gem_create = {{0}};
- int err;
- gem_create.in.bo_size = size;
- gem_create.in.domains = AMDGPU_GEM_DOMAIN_VRAM;
I think you minimally also need domain_flags = AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, otherwise you can end up being unlucky and the mmap fails.
I think it would also be good to have a separate mode where the buffer is forced to be in system memory (AMDGPU_GEM_DOMAIN_GTT and additionally AMDGPU_GEM_CREATE_CPU_GTT_USWC needed). This should be useful for cases where p2p doesn't work, but we still want to check the dma-buf functionality.
- err = ioctl(dri->fd, DRM_IOCTL_AMDGPU_GEM_CREATE, &gem_create);
- if (err)
return err;
- *handle = gem_create.out.handle;
- return 0;
+}
+static int radeon_alloc(struct dri *dri, size_t size, uint32_t *handle)
Tbh radeon chips are old enough I wouldn't care. Also doesn't support p2p dma-buf, so always going to be in system memory when you share. Plus you also need some more flags like I suggested above I think.
+{
- struct drm_radeon_gem_create gem_create = {0};
- int err;
- gem_create.size = size;
- gem_create.initial_domain = RADEON_GEM_DOMAIN_VRAM;
- err = ioctl(dri->fd, DRM_IOCTL_RADEON_GEM_CREATE, &gem_create);
- if (err)
return err;
- *handle = gem_create.handle;
- return 0;
+}
+static int i915_mmap_offset(struct dri *dri, uint32_t handle, uint64_t *offset) +{
- struct drm_i915_gem_mmap_gtt gem_mmap = {0};
- int err;
- gem_mmap.handle = handle;
- err = ioctl(dri->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &gem_mmap);
- if (err)
return err;
- *offset = gem_mmap.offset;
- return 0;
+}
+static int amdgpu_mmap_offset(struct dri *dri, uint32_t handle
uint64_t *offset)
+{
- union drm_amdgpu_gem_mmap gem_mmap = {{0}};
- int err;
- gem_mmap.in.handle = handle;
- err = ioctl(dri->fd, DRM_IOCTL_AMDGPU_GEM_MMAP, &gem_mmap);
- if (err)
return err;
- *offset = gem_mmap.out.addr_ptr;
- return 0;
+}
+static int radeon_mmap_offset(struct dri *dri, uint32_t handle,
uint64_t *offset)
+{
- struct drm_radeon_gem_mmap gem_mmap = {0};
- int err;
- gem_mmap.handle = handle;
- err = ioctl(dri->fd, DRM_IOCTL_RADEON_GEM_MMAP, &gem_mmap);
- if (err)
return err;
- *offset = gem_mmap.addr_ptr;
- return 0;
+}
+static struct dri *dri_open(int unit) +{
- char path[32];
- struct drm_version version = {0};
- char name[16] = {0};
- int err;
- struct dri *dri;
- dri = malloc(sizeof(*dri));
- if (!dri)
return NULL;
- sprintf(path, "/dev/dri/renderD%d", unit + 128);
- dri->fd = open(path, O_RDWR);
- if (dri->fd < 0)
goto out_free;
- version.name = name;
- version.name_len = 16;
- err = ioctl(dri->fd, DRM_IOCTL_VERSION, &version);
- if (err)
goto out_close;
- if (!strcmp(name, "amdgpu")) {
dri->alloc = amdgpu_alloc;
dri->mmap_offset = amdgpu_mmap_offset;
- } else if (!strcmp(name, "i915")) {
dri->alloc = i915_alloc;
dri->mmap_offset = i915_mmap_offset;
- } else if (!strcmp(name, "radeon")) {
dri->alloc = radeon_alloc;
dri->mmap_offset = radeon_mmap_offset;
- } else {
goto out_close;
- }
- return dri;
+out_close:
- close(dri->fd);
+out_free:
- free(dri);
- return NULL;
+}
+static void dri_close(struct dri *dri) +{
- if (!dri || dri->fd < 0)
return;
- close(dri->fd);
- free(dri);
+}
+static void dri_free_buf(struct dri *dri, uint32_t handle) +{
- struct drm_gem_close close = {0};
- close.handle = handle;
- ioctl(dri->fd, DRM_IOCTL_GEM_CLOSE, &close);
+}
+static int dri_alloc_buf(struct dri *dri, size_t size, uint32_t *handle, int *fd) +{
- struct drm_prime_handle prime_handle = {0};
- int err;
- if (!dri || dri->fd < 0)
return -EINVAL;
- err = dri->alloc(dri, size, handle);
- if (err)
return err;
- prime_handle.handle = *handle;
- prime_handle.flags = O_RDWR;
- err = ioctl(dri->fd, DRM_IOCTL_PRIME_HANDLE_TO_FD, &prime_handle);
- if (err) {
dri_free_buf(dri, *handle);
return err;
- }
- *fd = prime_handle.fd;
- return 0;
+}
+static int dri_map_buf(struct dri *dri, uint32_t handle, uint64_t *offset) +{
- if (!dri || dri->fd < 0)
return -EINVAL;
- return dri->mmap_offset(dri, handle, offset);
+}
+/*
- Abstraction of dmabuf object, allocated using the DRI abstraction defined
- above.
- */
+struct dmabuf {
- struct dri *dri;
- int fd;
- uint32_t handle;
- uint64_t map_offset;
+};
+struct dmabuf *dmabuf_alloc(uint64_t size, int unit) +{
- struct dmabuf *dmabuf;
- int err;
- dmabuf = malloc(sizeof *dmabuf);
- if (!dmabuf)
return NULL;
- dmabuf->dri = dri_open(unit);
- if (!dmabuf->dri)
goto out_free;
- err = dri_alloc_buf(dmabuf->dri, size, &dmabuf->handle, &dmabuf->fd);
- if (err)
goto out_close;
- err = dri_map_buf(dmabuf->dri, dmabuf->handle, &dmabuf->map_offset);
- if (err)
goto out_free_buf;
- return dmabuf;
+out_free_buf:
- dri_free_buf(dmabuf->dri, dmabuf->handle);
+out_close:
- dri_close(dmabuf->dri);
+out_free:
- free(dmabuf);
- return NULL;
+}
+void dmabuf_free(struct dmabuf *dmabuf) +{
- if (!dmabuf)
return;
- close(dmabuf->fd);
- dri_free_buf(dmabuf->dri, dmabuf->handle);
- dri_close(dmabuf->dri);
- free(dmabuf);
+}
+int dmabuf_get_dri_fd(struct dmabuf *dmabuf) +{
- if (!dmabuf || !dmabuf->dri)
return -1;
- return dmabuf->dri->fd;
+}
+int dmabuf_get_fd(struct dmabuf *dmabuf) +{
- if (!dmabuf)
return -1;
- return dmabuf->fd;
+}
+uint64_t dmabuf_get_offset(struct dmabuf *dmabuf) +{
- if (!dmabuf)
return -1;
- return dmabuf->map_offset;
+}
diff --git a/pyverbs/dmabuf_alloc.h b/pyverbs/dmabuf_alloc.h new file mode 100644 index 0000000..f36c337 --- /dev/null +++ b/pyverbs/dmabuf_alloc.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/*
- Copyright 2020 Intel Corporation. All rights reserved. See COPYING file
- */
+#ifndef _DRI_H_ +#define _DRI_H_
+#include <stdint.h>
+struct dmabuf;
+struct dmabuf *dmabuf_alloc(uint64_t size, int unit); +void dmabuf_free(struct dmabuf *dmabuf); +int dmabuf_get_dri_fd(struct dmabuf *dmabuf); +int dmabuf_get_fd(struct dmabuf *dmabuf); +uint64_t dmabuf_get_offset(struct dmabuf *dmabuf);
+#endif /* _DRI_H_ */ diff --git a/pyverbs/libibverbs.pxd b/pyverbs/libibverbs.pxd index 6fbba54..d76f633 100644 --- a/pyverbs/libibverbs.pxd +++ b/pyverbs/libibverbs.pxd @@ -507,6 +507,8 @@ cdef extern from 'infiniband/verbs.h': ibv_pd *ibv_alloc_pd(ibv_context *context) int ibv_dealloc_pd(ibv_pd *pd) ibv_mr *ibv_reg_mr(ibv_pd *pd, void *addr, size_t length, int access)
- ibv_mr *ibv_reg_dmabuf_mr(ibv_pd *pd, uint64_t offset, size_t length,
int ibv_dereg_mr(ibv_mr *mr) int ibv_advise_mr(ibv_pd *pd, uint32_t advice, uint32_t flags, ibv_sge *sg_list, uint32_t num_sge)uint64_t iova, int fd, int access)
diff --git a/pyverbs/mr.pxd b/pyverbs/mr.pxd index ebe8ada..d9a79ff 100644 --- a/pyverbs/mr.pxd +++ b/pyverbs/mr.pxd @@ -1,5 +1,6 @@ # SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) # Copyright (c) 2019, Mellanox Technologies. All rights reserved. See COPYING file +# Copyright (c) 2020, Intel Corporation. All rights reserved. See COPYING file
#cython: language_level=3
@@ -33,3 +34,8 @@ cdef class MW(PyverbsCM):
cdef class DMMR(MR): cdef object dm
+cdef class DmaBufMR(MR):
- cdef object dmabuf
- cdef unsigned long offset
- cdef object is_dmabuf_internal
diff --git a/pyverbs/mr.pyx b/pyverbs/mr.pyx index 7011da1..e4ed2dc 100644 --- a/pyverbs/mr.pyx +++ b/pyverbs/mr.pyx @@ -1,11 +1,12 @@ # SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) # Copyright (c) 2019, Mellanox Technologies. All rights reserved. See COPYING file +# Copyright (c) 2020, Intel Corporation. All rights reserved. See COPYING file
import resource import logging
from posix.mman cimport mmap, munmap, MAP_PRIVATE, PROT_READ, PROT_WRITE, \
- MAP_ANONYMOUS, MAP_HUGETLB
- MAP_ANONYMOUS, MAP_HUGETLB, MAP_SHARED
from pyverbs.pyverbs_error import PyverbsError, PyverbsRDMAError, \ PyverbsUserError from libc.stdint cimport uintptr_t, SIZE_MAX @@ -14,9 +15,10 @@ from posix.stdlib cimport posix_memalign from libc.string cimport memcpy, memset cimport pyverbs.libibverbs_enums as e from pyverbs.device cimport DM -from libc.stdlib cimport free +from libc.stdlib cimport free, malloc from .cmid cimport CMID from .pd cimport PD +from .dmabuf cimport DmaBuf
cdef extern from 'sys/mman.h': cdef void* MAP_FAILED @@ -348,6 +350,103 @@ cdef class DMMR(MR): cpdef read(self, length, offset): return self.dm.copy_from_dm(offset, length)
+cdef class DmaBufMR(MR):
- def __init__(self, PD pd not None, length, access, DmaBuf dmabuf=None,
offset=0):
"""
Initializes a DmaBufMR (DMA-BUF Memory Region) of the given length
and access flags using the given PD and DmaBuf objects.
:param pd: A PD object
:param length: Length in bytes
:param access: Access flags, see ibv_access_flags enum
:param dmabuf: A DmaBuf object. One will be allocated if absent
:param offset: Byte offset from the beginning of the dma-buf
:return: The newly created DMABUFMR
"""
self.logger = logging.getLogger(self.__class__.__name__)
if dmabuf is None:
self.is_dmabuf_internal = True
dmabuf = DmaBuf(length + offset)
self.mr = v.ibv_reg_dmabuf_mr(pd.pd, offset, length, offset, dmabuf.fd, access)
if self.mr == NULL:
raise PyverbsRDMAErrno(f'Failed to register a dma-buf MR. length: {length}, access flags: {access}')
super().__init__(pd, length, access)
self.pd = pd
self.dmabuf = dmabuf
self.offset = offset
pd.add_ref(self)
dmabuf.add_ref(self)
self.logger.debug(f'Registered dma-buf ibv_mr. Length: {length}, access flags {access}')
- def __dealloc__(self):
self.close()
- cpdef close(self):
"""
Closes the underlying C object of the MR and frees the memory allocated.
:return: None
"""
if self.mr != NULL:
self.logger.debug('Closing dma-buf MR')
rc = v.ibv_dereg_mr(self.mr)
if rc != 0:
raise PyverbsRDMAError('Failed to dereg dma-buf MR', rc)
self.pd = None
self.mr = NULL
# Set self.mr to NULL before closing dmabuf because this method is
# re-entered when close_weakrefs() is called inside dmabuf.close().
if self.is_dmabuf_internal:
self.dmabuf.close()
self.dmabuf = None
- @property
- def offset(self):
return self.offset
- @property
- def dmabuf(self):
return self.dmabuf
- def write(self, data, length, offset=0):
"""
Write user data to the dma-buf backing the MR
:param data: User data to write
:param length: Length of the data to write
:param offset: Writing offset
:return: None
"""
if isinstance(data, str):
data = data.encode()
cdef int off = offset + self.offset
cdef void *buf = mmap(NULL, length + off, PROT_READ | PROT_WRITE,
MAP_SHARED, self.dmabuf.dri_fd,
self.dmabuf.map_offset)
if buf == MAP_FAILED:
raise PyverbsError(f'Failed to map dma-buf of size {length}')
memcpy(<char*>(buf + off), <char *>data, length)
munmap(buf, length + off)
- cpdef read(self, length, offset):
Note reads are generally uncached so really slow. Maybe put that as a warning somewhere.
"""
Reads data from the dma-buf backing the MR
:param length: Length of data to read
:param offset: Reading offset
:return: The data on the buffer in the requested offset
"""
cdef int off = offset + self.offset
cdef void *buf = mmap(NULL, length + off, PROT_READ | PROT_WRITE,
MAP_SHARED, self.dmabuf.dri_fd,
self.dmabuf.map_offset)
if buf == MAP_FAILED:
raise PyverbsError(f'Failed to map dma-buf of size {length}')
cdef char *data =<char*>malloc(length)
memset(data, 0, length)
memcpy(data, <char*>(buf + off), length)
munmap(buf, length + off)
res = data[:length]
free(data)
return res
def mwtype2str(mw_type): mw_types = {1:'IBV_MW_TYPE_1', 2:'IBV_MW_TYPE_2'}
gpu side looks reasonable.
One bikeshed maybe: Kernel gpu drivers are drm (for direct rendering manager). DRI is the X11 protocols to support glx direct rendering (i.e. it's direct rendering infrastructure). devnodes being put into dri is an unfortunate historical accident. I'd rename all the dri_ to drm_ for consistency with other drm users, e.g. libdrm.
Cheers, Daniel
-- 1.8.3.1
dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
On Mon, Nov 30, 2020 at 03:57:41PM +0100, Daniel Vetter wrote:
- err = ioctl(dri->fd, DRM_IOCTL_AMDGPU_GEM_CREATE, &gem_create);
- if (err)
return err;
- *handle = gem_create.out.handle;
- return 0;
+}
+static int radeon_alloc(struct dri *dri, size_t size, uint32_t *handle)
Tbh radeon chips are old enough I wouldn't care. Also doesn't support p2p dma-buf, so always going to be in system memory when you share. Plus you also need some more flags like I suggested above I think.
What about nouveau?
Jason
On Mon, Nov 30, 2020 at 11:55:44AM -0400, Jason Gunthorpe wrote:
On Mon, Nov 30, 2020 at 03:57:41PM +0100, Daniel Vetter wrote:
- err = ioctl(dri->fd, DRM_IOCTL_AMDGPU_GEM_CREATE, &gem_create);
- if (err)
return err;
- *handle = gem_create.out.handle;
- return 0;
+}
+static int radeon_alloc(struct dri *dri, size_t size, uint32_t *handle)
Tbh radeon chips are old enough I wouldn't care. Also doesn't support p2p dma-buf, so always going to be in system memory when you share. Plus you also need some more flags like I suggested above I think.
What about nouveau?
Reallistically chances that someone wants to use rdma together with the upstream nouveau driver are roughly nil. Imo also needs someone with the right hardware to make sure it works (since the flags are all kinda arcane driver specific stuff testing is really needed). -Daniel
On Mon, Nov 30, 2020 at 05:04:43PM +0100, Daniel Vetter wrote:
On Mon, Nov 30, 2020 at 11:55:44AM -0400, Jason Gunthorpe wrote:
On Mon, Nov 30, 2020 at 03:57:41PM +0100, Daniel Vetter wrote:
- err = ioctl(dri->fd, DRM_IOCTL_AMDGPU_GEM_CREATE, &gem_create);
- if (err)
return err;
- *handle = gem_create.out.handle;
- return 0;
+}
+static int radeon_alloc(struct dri *dri, size_t size, uint32_t *handle)
Tbh radeon chips are old enough I wouldn't care. Also doesn't support p2p dma-buf, so always going to be in system memory when you share. Plus you also need some more flags like I suggested above I think.
What about nouveau?
Reallistically chances that someone wants to use rdma together with the upstream nouveau driver are roughly nil. Imo also needs someone with the right hardware to make sure it works (since the flags are all kinda arcane driver specific stuff testing is really needed).
Well, it would be helpful if we can test the mlx5 part of the implementation, and I have a lab stocked with nouveau compatible HW..
But you are right someone needs to test/etc, so this does not seem like Jianxin should worry
Jason
On Mon, Nov 30, 2020 at 12:36:42PM -0400, Jason Gunthorpe wrote:
On Mon, Nov 30, 2020 at 05:04:43PM +0100, Daniel Vetter wrote:
On Mon, Nov 30, 2020 at 11:55:44AM -0400, Jason Gunthorpe wrote:
On Mon, Nov 30, 2020 at 03:57:41PM +0100, Daniel Vetter wrote:
- err = ioctl(dri->fd, DRM_IOCTL_AMDGPU_GEM_CREATE, &gem_create);
- if (err)
return err;
- *handle = gem_create.out.handle;
- return 0;
+}
+static int radeon_alloc(struct dri *dri, size_t size, uint32_t *handle)
Tbh radeon chips are old enough I wouldn't care. Also doesn't support p2p dma-buf, so always going to be in system memory when you share. Plus you also need some more flags like I suggested above I think.
What about nouveau?
Reallistically chances that someone wants to use rdma together with the upstream nouveau driver are roughly nil. Imo also needs someone with the right hardware to make sure it works (since the flags are all kinda arcane driver specific stuff testing is really needed).
Well, it would be helpful if we can test the mlx5 part of the implementation, and I have a lab stocked with nouveau compatible HW..
But you are right someone needs to test/etc, so this does not seem like Jianxin should worry
Ah yes sounds good. I can help with trying to find how to allocate vram with nouveau if you don't find it. Caveat is that nouveau doesn't do dynamic dma-buf exports and hence none of the intersting flows and also not p2p. Not sure how much work it would be to roll that out (iirc it wasnt that much amdgpu code really, just endless discussions on the interface semantics and how to roll it out without breaking any of the existing dma-buf users).
Another thing that just crossed my mind: Do we have a testcase for forcing the eviction? Should be fairly easy to provoke with something like this:
- register vram-only buffer with mlx5 and do something that binds it - allocate enough vram-only buffers to overfill vram (again figuring out how much vram you have is driver specific) - touch each buffer with mmap. that should force the mlx5 buffer out. it might be that eviction isn't lru but preferentially idle buffers (i.e. not used by hw, so anything register to mlx5 won't qualify as first victims). so we might need to instead register a ton of buffers with mlx5 and access them through ibverbs - do something with mlx5 again to force the rebinding and test it all keeps working
That entire invalidate/buffer move flow is the most complex interaction I think. -Daniel
-----Original Message----- From: Daniel Vetter daniel@ffwll.ch Sent: Monday, November 30, 2020 8:56 AM To: Jason Gunthorpe jgg@ziepe.ca Cc: Daniel Vetter daniel@ffwll.ch; Xiong, Jianxin jianxin.xiong@intel.com; linux-rdma@vger.kernel.org; dri- devel@lists.freedesktop.org; Leon Romanovsky leon@kernel.org; Doug Ledford dledford@redhat.com; Vetter, Daniel daniel.vetter@intel.com; Christian Koenig christian.koenig@amd.com Subject: Re: [PATCH rdma-core v3 4/6] pyverbs: Add dma-buf based MR support
On Mon, Nov 30, 2020 at 12:36:42PM -0400, Jason Gunthorpe wrote:
On Mon, Nov 30, 2020 at 05:04:43PM +0100, Daniel Vetter wrote:
On Mon, Nov 30, 2020 at 11:55:44AM -0400, Jason Gunthorpe wrote:
On Mon, Nov 30, 2020 at 03:57:41PM +0100, Daniel Vetter wrote:
- err = ioctl(dri->fd, DRM_IOCTL_AMDGPU_GEM_CREATE, &gem_create);
- if (err)
return err;
- *handle = gem_create.out.handle;
- return 0;
+}
+static int radeon_alloc(struct dri *dri, size_t size, +uint32_t *handle)
Tbh radeon chips are old enough I wouldn't care. Also doesn't support p2p dma-buf, so always going to be in system memory when you share. Plus you also need some more flags like I suggested above I think.
What about nouveau?
Reallistically chances that someone wants to use rdma together with the upstream nouveau driver are roughly nil. Imo also needs someone with the right hardware to make sure it works (since the flags are all kinda arcane driver specific stuff testing is really needed).
Well, it would be helpful if we can test the mlx5 part of the implementation, and I have a lab stocked with nouveau compatible HW..
But you are right someone needs to test/etc, so this does not seem like Jianxin should worry
Ah yes sounds good. I can help with trying to find how to allocate vram with nouveau if you don't find it. Caveat is that nouveau doesn't do dynamic dma-buf exports and hence none of the intersting flows and also not p2p. Not sure how much work it would be to roll that out (iirc it wasnt that much amdgpu code really, just endless discussions on the interface semantics and how to roll it out without breaking any of the existing dma-buf users).
Another thing that just crossed my mind: Do we have a testcase for forcing the eviction? Should be fairly easy to provoke with something like this:
- register vram-only buffer with mlx5 and do something that binds it
- allocate enough vram-only buffers to overfill vram (again figuring out how much vram you have is driver specific)
- touch each buffer with mmap. that should force the mlx5 buffer out. it might be that eviction isn't lru but preferentially idle buffers (i.e. not used by hw, so anything register to mlx5 won't qualify as first victims). so we might need to instead register a ton of buffers with mlx5 and access them through ibverbs
- do something with mlx5 again to force the rebinding and test it all keeps working
That entire invalidate/buffer move flow is the most complex interaction I think.
Right now on my side the evict scenario is tested with the "timeout" feature of the AMD gpu. The GPU driver would move all VRAM allocations to system buffer after a certain period of "inactivity" (10s by default). VRAM being accessed by peer DMA is not counted as activity from GPU's POV. I can observe the invalidation/remapping sequence by running an RDMA test for long enough time.
I agree having a more generic mechanism to force this scenario is going to be useful.
-Daniel
Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch
-----Original Message----- From: Daniel Vetter daniel@ffwll.ch Sent: Monday, November 30, 2020 6:58 AM To: Xiong, Jianxin jianxin.xiong@intel.com Cc: linux-rdma@vger.kernel.org; dri-devel@lists.freedesktop.org; Leon Romanovsky leon@kernel.org; Jason Gunthorpe jgg@ziepe.ca; Doug Ledford dledford@redhat.com; Vetter, Daniel daniel.vetter@intel.com; Christian Koenig christian.koenig@amd.com Subject: Re: [PATCH rdma-core v3 4/6] pyverbs: Add dma-buf based MR support
+cdef class DmaBuf:
- def __init__(self, size, unit=0):
"""
Allocate DmaBuf object from a GPU device. This is done through the
DRI device interface. Usually this requires the effective user id
being a member of the 'render' group.
:param size: The size (in number of bytes) of the buffer.
:param unit: The unit number of the GPU to allocate the buffer from.
:return: The newly created DmaBuf object on success.
"""
self.dmabuf_mrs = weakref.WeakSet()
self.dmabuf = dmabuf_alloc(size, unit)
if self.dmabuf == NULL:
raise PyverbsRDMAErrno(f'Failed to allocate dmabuf of size {size} on unit {unit}')
self.dri_fd = dmabuf_get_dri_fd(<dmabuf *>self.dmabuf)
dri_fd seems unused by the tests
It's used by the read/write methods of the DmaBufMR class for performing mmap.
On Fri, Nov 27, 2020 at 12:55:41PM -0800, Jianxin Xiong wrote:
+function(rdma_multifile_module PY_MODULE MODULE_NAME LINKER_FLAGS)
I think just replace rdma_cython_module with this? No good reason I can see to have two APIs?
- set(ALL_CFILES "")
- foreach(SRC_FILE ${ARGN})
- get_filename_component(FILENAME ${SRC_FILE} NAME_WE)
- get_filename_component(DIR ${SRC_FILE} DIRECTORY)
- get_filename_component(EXT ${SRC_FILE} EXT)
- if (DIR)
set(SRC_PATH "${CMAKE_CURRENT_SOURCE_DIR}/${DIR}")
- else()
set(SRC_PATH "${CMAKE_CURRENT_SOURCE_DIR}")
- endif()
- if (${EXT} STREQUAL ".pyx")
set(PYX "${SRC_PATH}/${FILENAME}.pyx")
set(CFILE "${CMAKE_CURRENT_BINARY_DIR}/${FILENAME}.c")
include_directories(${PYTHON_INCLUDE_DIRS})
add_custom_command(
OUTPUT "${CFILE}"
MAIN_DEPENDENCY "${PYX}"
COMMAND ${CYTHON_EXECUTABLE} "${PYX}" -o "${CFILE}"
"-I${PYTHON_INCLUDE_DIRS}"
COMMENT "Cythonizing ${PYX}"
)
set(ALL_CFILES "${ALL_CFILES};${CFILE}")
- elseif(${EXT} STREQUAL ".c")
set(CFILE_ORIG "${SRC_PATH}/${FILENAME}.c")
set(CFILE "${CMAKE_CURRENT_BINARY_DIR}/${FILENAME}.c")
if (NOT ${CFILE_ORIG} STREQUAL ${CFILE})
rdma_create_symlink("${CFILE_ORIG}" "${CFILE}")
endif()
Why does this need the create_symlink? The compiler should work OK from the source file?
set(ALL_CFILES "${ALL_CFILES};${CFILE}")
- elseif(${EXT} STREQUAL ".h")
set(HFILE_ORIG "${SRC_PATH}/${FILENAME}.h")
set(HFILE "${CMAKE_CURRENT_BINARY_DIR}/${FILENAME}.h")
if (NOT ${HFILE_ORIG} STREQUAL ${HFILE})
rdma_create_symlink("${HFILE_ORIG}" "${HFILE}")
Here too? You probably don't need to specify h files at all, at worst they should only be used with publish_internal_headers
endif()
- else()
continue()
- endif()
- endforeach()
- string(REGEX REPLACE "\.so$" "" SONAME "${MODULE_NAME}${CMAKE_PYTHON_SO_SUFFIX}")
- add_library(${SONAME} SHARED ${ALL_CFILES})
- set_target_properties(${SONAME} PROPERTIES
- COMPILE_FLAGS "${CMAKE_C_FLAGS} -fPIC -fno-strict-aliasing -Wno-unused-function -Wno-redundant-decls -Wno-shadow -Wno-cast-function-type -Wno-implicit-fallthrough -Wno-unknown-warning -Wno-unknown-warning-option -Wno-deprecated-declarations ${NO_VAR_TRACKING_FLAGS}"
Ugh, you copy and pasted this, but it shouldn't have existed in the first place. Compiler arguments like this should not be specified manually. I should fix it..
Also you should cc edward on all this pyverbs stuff, he knows it all very well
It all looks reasonable to me
Jason
-----Original Message----- From: Jason Gunthorpe jgg@ziepe.ca Sent: Monday, November 30, 2020 8:08 AM To: Xiong, Jianxin jianxin.xiong@intel.com Cc: linux-rdma@vger.kernel.org; dri-devel@lists.freedesktop.org; Doug Ledford dledford@redhat.com; Leon Romanovsky leon@kernel.org; Sumit Semwal sumit.semwal@linaro.org; Christian Koenig christian.koenig@amd.com; Vetter, Daniel daniel.vetter@intel.com Subject: Re: [PATCH rdma-core v3 4/6] pyverbs: Add dma-buf based MR support
On Fri, Nov 27, 2020 at 12:55:41PM -0800, Jianxin Xiong wrote:
+function(rdma_multifile_module PY_MODULE MODULE_NAME LINKER_FLAGS)
I think just replace rdma_cython_module with this? No good reason I can see to have two APIs?
rdma_cython_module can handle many modules, but this one is for a single module. If you agree, I can merge the two by slightly tweaking the logic: each module starts with a .pyx file, followed by 0 or more .c and .h files.
- set(ALL_CFILES "")
- foreach(SRC_FILE ${ARGN})
- get_filename_component(FILENAME ${SRC_FILE} NAME_WE)
- get_filename_component(DIR ${SRC_FILE} DIRECTORY)
- get_filename_component(EXT ${SRC_FILE} EXT)
- if (DIR)
set(SRC_PATH "${CMAKE_CURRENT_SOURCE_DIR}/${DIR}")
- else()
set(SRC_PATH "${CMAKE_CURRENT_SOURCE_DIR}")
- endif()
- if (${EXT} STREQUAL ".pyx")
set(PYX "${SRC_PATH}/${FILENAME}.pyx")
set(CFILE "${CMAKE_CURRENT_BINARY_DIR}/${FILENAME}.c")
include_directories(${PYTHON_INCLUDE_DIRS})
add_custom_command(
OUTPUT "${CFILE}"
MAIN_DEPENDENCY "${PYX}"
COMMAND ${CYTHON_EXECUTABLE} "${PYX}" -o "${CFILE}"
"-I${PYTHON_INCLUDE_DIRS}"
COMMENT "Cythonizing ${PYX}"
)
set(ALL_CFILES "${ALL_CFILES};${CFILE}")
- elseif(${EXT} STREQUAL ".c")
set(CFILE_ORIG "${SRC_PATH}/${FILENAME}.c")
set(CFILE "${CMAKE_CURRENT_BINARY_DIR}/${FILENAME}.c")
if (NOT ${CFILE_ORIG} STREQUAL ${CFILE})
rdma_create_symlink("${CFILE_ORIG}" "${CFILE}")
endif()
Why does this need the create_symlink? The compiler should work OK from the source file?
You are right, the link for .c is not necessary, but the link for .h is needed.
set(ALL_CFILES "${ALL_CFILES};${CFILE}")
- elseif(${EXT} STREQUAL ".h")
set(HFILE_ORIG "${SRC_PATH}/${FILENAME}.h")
set(HFILE "${CMAKE_CURRENT_BINARY_DIR}/${FILENAME}.h")
if (NOT ${HFILE_ORIG} STREQUAL ${HFILE})
rdma_create_symlink("${HFILE_ORIG}" "${HFILE}")
Here too? You probably don't need to specify h files at all, at worst they should only be used with publish_internal_headers
Without the .h link, the compiler fail to find the header file (both dmabuf_alloc.c and the generated "dmabuf.c" contain #include "dmabuf_alloc.h").
endif()
- else()
continue()
- endif()
- endforeach()
- string(REGEX REPLACE "\.so$" "" SONAME
- "${MODULE_NAME}${CMAKE_PYTHON_SO_SUFFIX}")
- add_library(${SONAME} SHARED ${ALL_CFILES})
- set_target_properties(${SONAME} PROPERTIES
- COMPILE_FLAGS "${CMAKE_C_FLAGS} -fPIC -fno-strict-aliasing -Wno-unused-function -Wno-redundant-decls -Wno-shadow -Wno-
cast-function-type -Wno-implicit-fallthrough -Wno-unknown-warning -Wno-unknown-warning-option -Wno-deprecated-declarations ${NO_VAR_TRACKING_FLAGS}"
Ugh, you copy and pasted this, but it shouldn't have existed in the first place. Compiler arguments like this should not be specified manually. I should fix it..
Also you should cc edward on all this pyverbs stuff, he knows it all very well
Will add Edward next time. He commented a lot on the PR at github. The current github PR is in sync with this version.
It all looks reasonable to me
Jason
On Mon, Nov 30, 2020 at 05:53:39PM +0000, Xiong, Jianxin wrote:
From: Jason Gunthorpe jgg@ziepe.ca Sent: Monday, November 30, 2020 8:08 AM To: Xiong, Jianxin jianxin.xiong@intel.com Cc: linux-rdma@vger.kernel.org; dri-devel@lists.freedesktop.org; Doug Ledford dledford@redhat.com; Leon Romanovsky leon@kernel.org; Sumit Semwal sumit.semwal@linaro.org; Christian Koenig christian.koenig@amd.com; Vetter, Daniel daniel.vetter@intel.com Subject: Re: [PATCH rdma-core v3 4/6] pyverbs: Add dma-buf based MR support
On Fri, Nov 27, 2020 at 12:55:41PM -0800, Jianxin Xiong wrote:
+function(rdma_multifile_module PY_MODULE MODULE_NAME LINKER_FLAGS)
I think just replace rdma_cython_module with this? No good reason I can see to have two APIs?
rdma_cython_module can handle many modules, but this one is for a single module. If you agree, I can merge the two by slightly tweaking the logic: each module starts with a .pyx file, followed by 0 or more .c and .h files.
Then have rdma_cython_module call some rdam_single_cython_module() multiple times that has this code below?
Here too? You probably don't need to specify h files at all, at worst they should only be used with publish_internal_headers
Without the .h link, the compiler fail to find the header file (both dmabuf_alloc.c and the generated "dmabuf.c" contain #include "dmabuf_alloc.h").
Header files are made 'cross module' using the "publish_internal_headers" command
But we could also hack in a -I directive to fix up the "" include for the cython outupt..
But it should not be handled here in the cython module command
Jason
-----Original Message----- From: Jason Gunthorpe jgg@ziepe.ca Sent: Tuesday, December 01, 2020 4:39 PM To: Xiong, Jianxin jianxin.xiong@intel.com Cc: linux-rdma@vger.kernel.org; dri-devel@lists.freedesktop.org; Doug Ledford dledford@redhat.com; Leon Romanovsky leon@kernel.org; Sumit Semwal sumit.semwal@linaro.org; Christian Koenig christian.koenig@amd.com; Vetter, Daniel daniel.vetter@intel.com Subject: Re: [PATCH rdma-core v3 4/6] pyverbs: Add dma-buf based MR support
On Mon, Nov 30, 2020 at 05:53:39PM +0000, Xiong, Jianxin wrote:
From: Jason Gunthorpe jgg@ziepe.ca Sent: Monday, November 30, 2020 8:08 AM To: Xiong, Jianxin jianxin.xiong@intel.com Cc: linux-rdma@vger.kernel.org; dri-devel@lists.freedesktop.org; Doug Ledford dledford@redhat.com; Leon Romanovsky leon@kernel.org; Sumit Semwal sumit.semwal@linaro.org; Christian Koenig christian.koenig@amd.com; Vetter, Daniel daniel.vetter@intel.com Subject: Re: [PATCH rdma-core v3 4/6] pyverbs: Add dma-buf based MR support
On Fri, Nov 27, 2020 at 12:55:41PM -0800, Jianxin Xiong wrote:
+function(rdma_multifile_module PY_MODULE MODULE_NAME +LINKER_FLAGS)
I think just replace rdma_cython_module with this? No good reason I can see to have two APIs?
rdma_cython_module can handle many modules, but this one is for a single module. If you agree, I can merge the two by slightly tweaking the logic: each module starts with a .pyx file, followed by 0 or more .c and .h files.
Then have rdma_cython_module call some rdam_single_cython_module() multiple times that has this code below?
Mostly like that. Here is an outline:
function(build_one_module PY_MODULE MODULE_NAME ALL_CFILES) string(REGEX_REPLACE "\.so$" "" SONAME ${MODULE_NAME}${CMAKE_PYTHON_SO_SUFFIX}") add_library(......) set_target_properties(......) target_link_libraries(......) install(......) endfunction()
function(rdma_cython_module .......) foreach(SRC_FILE ${ARGN}) ...... # commands to parse file name If (${EXT} STREQAL ".pyx") If (ALL_CFILES AND MODULE_NAME) build_one_module(${PY_MODUE} ${MODULE_NAME} ${ALL_CFILES}) set(ALL_CFILES "") set(MODULE_NAME "") endif() ...... # commands to convert .pyx to .c set(ALL_CFILES "${ALL_CFILES};${CFILE}") elseif (${EXT} STREQAL ".c") ...... set(ALL_CFILES "${ALL_CFILES};${CFILE}") else() continue() endif() endforeach() If (ALL_CFILES AND MODULE_NAME) build_one_module(${PY_MODULE} ${MODULE_NAME} ${ALL_CFILES}) endif() endfunction()
Here too? You probably don't need to specify h files at all, at worst they should only be used with publish_internal_headers
Without the .h link, the compiler fail to find the header file (both dmabuf_alloc.c and the generated "dmabuf.c" contain #include "dmabuf_alloc.h").
Header files are made 'cross module' using the "publish_internal_headers" command
But we could also hack in a -I directive to fix up the "" include for the cython outupt..
But it should not be handled here in the cython module command
Sure. That can be fixed.
Jason
Define a set of unit tests similar to regular MR tests and a set of tests for send/recv and rdma traffic using dma-buf MRs. Add a utility function to generate access flags for dma-buf based MRs because the set of supported flags is smaller.
Signed-off-by: Jianxin Xiong jianxin.xiong@intel.com --- tests/test_mr.py | 239 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- tests/utils.py | 26 ++++++ 2 files changed, 264 insertions(+), 1 deletion(-)
diff --git a/tests/test_mr.py b/tests/test_mr.py index adc649c..52cf20a 100644 --- a/tests/test_mr.py +++ b/tests/test_mr.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) # Copyright (c) 2019 Mellanox Technologies, Inc. All rights reserved. See COPYING file +# Copyright (c) 2020 Intel Corporation. All rights reserved. See COPYING file """ Test module for pyverbs' mr module. """ @@ -9,9 +10,10 @@ import errno
from tests.base import PyverbsAPITestCase, RCResources, RDMATestCase from pyverbs.pyverbs_error import PyverbsRDMAError, PyverbsError -from pyverbs.mr import MR, MW, DMMR, MWBindInfo, MWBind +from pyverbs.mr import MR, MW, DMMR, DmaBufMR, MWBindInfo, MWBind from pyverbs.qp import QPCap, QPInitAttr, QPAttr, QP from pyverbs.wr import SendWR +from pyverbs.dmabuf import DmaBuf import pyverbs.device as d from pyverbs.pd import PD import pyverbs.enums as e @@ -366,3 +368,238 @@ class DMMRTest(PyverbsAPITestCase): dm_mr = DMMR(pd, dm_mr_len, e.IBV_ACCESS_ZERO_BASED, dm=dm, offset=dm_mr_offset) dm_mr.close() + + +def check_dmabuf_support(): + """ + Check if dma-buf allocation is supported by the system. + Skip the test on failure. + """ + try: + DmaBuf(1) + except PyverbsRDMAError as ex: + if ex.error_code == errno.ENOENT: + raise unittest.SkipTest('Device /dev/dri/renderD* is not present') + if ex.error_code == errno.EACCES: + raise unittest.SkipTest('Lack of permission to access /dev/dri/renderD*') + + +def check_dmabuf_mr_support(pd): + """ + Check if dma-buf MR registration is supported by the driver. + Skip the test on failure + """ + try: + DmaBufMR(pd, 1, 0) + except PyverbsRDMAError as ex: + if ex.error_code == errno.EOPNOTSUPP: + raise unittest.SkipTest('Reg dma-buf MR is not supported') + + +class DmaBufMRTest(PyverbsAPITestCase): + """ + Test various functionalities of the DmaBufMR class. + """ + def test_dmabuf_reg_mr(self): + """ + Test ibv_reg_dmabuf_mr() + """ + check_dmabuf_support() + for ctx, attr, attr_ex in self.devices: + with PD(ctx) as pd: + check_dmabuf_mr_support(pd) + flags = u.get_dmabuf_access_flags(ctx) + for f in flags: + len = u.get_mr_length() + for off in [0, len//2]: + with DmaBufMR(pd, len, f, offset=off) as mr: + pass + + def test_dmabuf_dereg_mr(self): + """ + Test ibv_dereg_mr() with DmaBufMR + """ + check_dmabuf_support() + for ctx, attr, attr_ex in self.devices: + with PD(ctx) as pd: + check_dmabuf_mr_support(pd) + flags = u.get_dmabuf_access_flags(ctx) + for f in flags: + len = u.get_mr_length() + for off in [0, len//2]: + with DmaBufMR(pd, len, f, offset=off) as mr: + mr.close() + + def test_dmabuf_dereg_mr_twice(self): + """ + Verify that explicit call to DmaBufMR's close() doesn't fail + """ + check_dmabuf_support() + for ctx, attr, attr_ex in self.devices: + with PD(ctx) as pd: + check_dmabuf_mr_support(pd) + flags = u.get_dmabuf_access_flags(ctx) + for f in flags: + len = u.get_mr_length() + for off in [0, len//2]: + with DmaBufMR(pd, len, f, offset=off) as mr: + # Pyverbs supports multiple destruction of objects, + # we are not expecting an exception here. + mr.close() + mr.close() + + def test_dmabuf_reg_mr_bad_flags(self): + """ + Verify that illegal flags combination fails as expected + """ + check_dmabuf_support() + for ctx, attr, attr_ex in self.devices: + with PD(ctx) as pd: + check_dmabuf_mr_support(pd) + for i in range(5): + flags = random.sample([e.IBV_ACCESS_REMOTE_WRITE, + e.IBV_ACCESS_REMOTE_ATOMIC], + random.randint(1, 2)) + mr_flags = 0 + for i in flags: + mr_flags += i.value + try: + DmaBufMR(pd, u.get_mr_length(), mr_flags) + except PyverbsRDMAError as err: + assert 'Failed to register a dma-buf MR' in err.args[0] + else: + raise PyverbsRDMAError('Registered a dma-buf MR with illegal falgs') + + def test_dmabuf_write(self): + """ + Test writing to DmaBufMR's buffer + """ + check_dmabuf_support() + for ctx, attr, attr_ex in self.devices: + with PD(ctx) as pd: + check_dmabuf_mr_support(pd) + for i in range(10): + mr_len = u.get_mr_length() + flags = u.get_dmabuf_access_flags(ctx) + for f in flags: + for mr_off in [0, mr_len//2]: + with DmaBufMR(pd, mr_len, f, offset=mr_off) as mr: + write_len = min(random.randint(1, MAX_IO_LEN), + mr_len) + mr.write('a' * write_len, write_len) + + def test_dmabuf_read(self): + """ + Test reading from DmaBufMR's buffer + """ + check_dmabuf_support() + for ctx, attr, attr_ex in self.devices: + with PD(ctx) as pd: + check_dmabuf_mr_support(pd) + for i in range(10): + mr_len = u.get_mr_length() + flags = u.get_dmabuf_access_flags(ctx) + for f in flags: + for mr_off in [0, mr_len//2]: + with DmaBufMR(pd, mr_len, f, offset=mr_off) as mr: + write_len = min(random.randint(1, MAX_IO_LEN), + mr_len) + write_str = 'a' * write_len + mr.write(write_str, write_len) + read_len = random.randint(1, write_len) + offset = random.randint(0, write_len-read_len) + read_str = mr.read(read_len, offset).decode() + assert read_str in write_str + + def test_dmabuf_lkey(self): + """ + Test reading lkey property + """ + check_dmabuf_support() + for ctx, attr, attr_ex in self.devices: + with PD(ctx) as pd: + check_dmabuf_mr_support(pd) + length = u.get_mr_length() + flags = u.get_dmabuf_access_flags(ctx) + for f in flags: + with DmaBufMR(pd, length, f) as mr: + mr.lkey + + def test_dmabuf_rkey(self): + """ + Test reading rkey property + """ + check_dmabuf_support() + for ctx, attr, attr_ex in self.devices: + with PD(ctx) as pd: + check_dmabuf_mr_support(pd) + length = u.get_mr_length() + flags = u.get_dmabuf_access_flags(ctx) + for f in flags: + with DmaBufMR(pd, length, f) as mr: + mr.rkey + + +class DmaBufRC(RCResources): + def __init__(self, dev_name, ib_port, gid_index): + """ + Initialize an DmaBufRC object. + :param dev_name: Device name to be used + :param ib_port: IB port of the device to use + :param gid_index: Which GID index to use + """ + super(DmaBufRC, self).__init__(dev_name=dev_name, ib_port=ib_port, + gid_index=gid_index) + + def create_mr(self): + check_dmabuf_support() + check_dmabuf_mr_support(self.pd) + access = e.IBV_ACCESS_LOCAL_WRITE | e.IBV_ACCESS_REMOTE_WRITE + mr = DmaBufMR(self.pd, self.msg_size, access) + self.mr = mr + + def create_qp_attr(self): + qp_attr = QPAttr(port_num=self.ib_port) + qp_access = e.IBV_ACCESS_LOCAL_WRITE | e.IBV_ACCESS_REMOTE_WRITE + qp_attr.qp_access_flags = qp_access + return qp_attr + + +class DmaBufTestCase(RDMATestCase): + def setUp(self): + super(DmaBufTestCase, self).setUp() + self.iters = 100 + + def create_players(self, resource, **resource_arg): + """ + Init dma-buf tests resources. + :param resource: The RDMA resources to use. A class of type + BaseResources. + :param resource_arg: Dict of args that specify the resource specific + attributes. + :return: The (client, server) resources. + """ + client = resource(**self.dev_info, **resource_arg) + server = resource(**self.dev_info, **resource_arg) + client.pre_run(server.psns, server.qps_num) + server.pre_run(client.psns, client.qps_num) + return client, server + + def test_dmabuf_rc_traffic(self): + """ + Test send/recv using dma-buf MR over RC + """ + client, server = self.create_players(DmaBufRC) + u.traffic(client, server, self.iters, self.gid_index, self.ib_port) + + def test_dmabuf_rdma_traffic(self): + """ + Test rdma write using dma-buf MR + """ + client, server = self.create_players(DmaBufRC) + server.rkey = client.mr.rkey + server.remote_addr = client.mr.offset + client.rkey = server.mr.rkey + client.remote_addr = server.mr.offset + u.rdma_traffic(client, server, self.iters, self.gid_index, self.ib_port, + send_op=e.IBV_WR_RDMA_WRITE) diff --git a/tests/utils.py b/tests/utils.py index 7039f41..d3d5c16 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) # Copyright (c) 2019 Mellanox Technologies, Inc. All rights reserved. See COPYING file +# Copyright (c) 2020 Intel Corporation. All rights reserved. See COPYING file """ Provide some useful helper function for pyverbs' tests. """ @@ -94,6 +95,31 @@ def get_access_flags(ctx): return arr
+def get_dmabuf_access_flags(ctx): + """ + Similar to get_access_flags, except that dma-buf MR only support + a subset of the flags. + :param ctx: Device Context to check capabilities + :return: A random legal value for MR flags + """ + attr = ctx.query_device() + vals = [e.IBV_ACCESS_LOCAL_WRITE, e.IBV_ACCESS_REMOTE_WRITE, + e.IBV_ACCESS_REMOTE_READ, e.IBV_ACCESS_REMOTE_ATOMIC, + e.IBV_ACCESS_RELAXED_ORDERING] + if not attr.atomic_caps & e.IBV_ATOMIC_HCA: + vals.remove(e.IBV_ACCESS_REMOTE_ATOMIC) + arr = [] + for i in range(1, len(vals)): + tmp = list(com(vals, i)) + tmp = filter(filter_illegal_access_flags, tmp) + for t in tmp: # Iterate legal combinations and bitwise OR them + val = 0 + for flag in t: + val += flag.value + arr.append(val) + return arr + + def get_dm_attrs(dm_len): """ Initializes an AllocDmAttr member with the given length and random
On Fri, Nov 27, 2020 at 12:55:42PM -0800, Jianxin Xiong wrote:
Define a set of unit tests similar to regular MR tests and a set of tests for send/recv and rdma traffic using dma-buf MRs. Add a utility function to generate access flags for dma-buf based MRs because the set of supported flags is smaller.
Signed-off-by: Jianxin Xiong jianxin.xiong@intel.com
tests/test_mr.py | 239 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- tests/utils.py | 26 ++++++ 2 files changed, 264 insertions(+), 1 deletion(-)
diff --git a/tests/test_mr.py b/tests/test_mr.py index adc649c..52cf20a 100644 --- a/tests/test_mr.py +++ b/tests/test_mr.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) # Copyright (c) 2019 Mellanox Technologies, Inc. All rights reserved. See COPYING file +# Copyright (c) 2020 Intel Corporation. All rights reserved. See COPYING file """ Test module for pyverbs' mr module. """ @@ -9,9 +10,10 @@ import errno
from tests.base import PyverbsAPITestCase, RCResources, RDMATestCase from pyverbs.pyverbs_error import PyverbsRDMAError, PyverbsError -from pyverbs.mr import MR, MW, DMMR, MWBindInfo, MWBind +from pyverbs.mr import MR, MW, DMMR, DmaBufMR, MWBindInfo, MWBind from pyverbs.qp import QPCap, QPInitAttr, QPAttr, QP from pyverbs.wr import SendWR +from pyverbs.dmabuf import DmaBuf import pyverbs.device as d from pyverbs.pd import PD import pyverbs.enums as e @@ -366,3 +368,238 @@ class DMMRTest(PyverbsAPITestCase): dm_mr = DMMR(pd, dm_mr_len, e.IBV_ACCESS_ZERO_BASED, dm=dm, offset=dm_mr_offset) dm_mr.close()
+def check_dmabuf_support():
- """
- Check if dma-buf allocation is supported by the system.
- Skip the test on failure.
- """
- try:
DmaBuf(1)
Hardcoding gpu unit 1 here (and in other places) is probably not quite what we want. Not sure what you want to do in the test framework here instead. -Daniel
- except PyverbsRDMAError as ex:
if ex.error_code == errno.ENOENT:
raise unittest.SkipTest('Device /dev/dri/renderD* is not present')
if ex.error_code == errno.EACCES:
raise unittest.SkipTest('Lack of permission to access /dev/dri/renderD*')
+def check_dmabuf_mr_support(pd):
- """
- Check if dma-buf MR registration is supported by the driver.
- Skip the test on failure
- """
- try:
DmaBufMR(pd, 1, 0)
- except PyverbsRDMAError as ex:
if ex.error_code == errno.EOPNOTSUPP:
raise unittest.SkipTest('Reg dma-buf MR is not supported')
+class DmaBufMRTest(PyverbsAPITestCase):
- """
- Test various functionalities of the DmaBufMR class.
- """
- def test_dmabuf_reg_mr(self):
"""
Test ibv_reg_dmabuf_mr()
"""
check_dmabuf_support()
for ctx, attr, attr_ex in self.devices:
with PD(ctx) as pd:
check_dmabuf_mr_support(pd)
flags = u.get_dmabuf_access_flags(ctx)
for f in flags:
len = u.get_mr_length()
for off in [0, len//2]:
with DmaBufMR(pd, len, f, offset=off) as mr:
pass
- def test_dmabuf_dereg_mr(self):
"""
Test ibv_dereg_mr() with DmaBufMR
"""
check_dmabuf_support()
for ctx, attr, attr_ex in self.devices:
with PD(ctx) as pd:
check_dmabuf_mr_support(pd)
flags = u.get_dmabuf_access_flags(ctx)
for f in flags:
len = u.get_mr_length()
for off in [0, len//2]:
with DmaBufMR(pd, len, f, offset=off) as mr:
mr.close()
- def test_dmabuf_dereg_mr_twice(self):
"""
Verify that explicit call to DmaBufMR's close() doesn't fail
"""
check_dmabuf_support()
for ctx, attr, attr_ex in self.devices:
with PD(ctx) as pd:
check_dmabuf_mr_support(pd)
flags = u.get_dmabuf_access_flags(ctx)
for f in flags:
len = u.get_mr_length()
for off in [0, len//2]:
with DmaBufMR(pd, len, f, offset=off) as mr:
# Pyverbs supports multiple destruction of objects,
# we are not expecting an exception here.
mr.close()
mr.close()
- def test_dmabuf_reg_mr_bad_flags(self):
"""
Verify that illegal flags combination fails as expected
"""
check_dmabuf_support()
for ctx, attr, attr_ex in self.devices:
with PD(ctx) as pd:
check_dmabuf_mr_support(pd)
for i in range(5):
flags = random.sample([e.IBV_ACCESS_REMOTE_WRITE,
e.IBV_ACCESS_REMOTE_ATOMIC],
random.randint(1, 2))
mr_flags = 0
for i in flags:
mr_flags += i.value
try:
DmaBufMR(pd, u.get_mr_length(), mr_flags)
except PyverbsRDMAError as err:
assert 'Failed to register a dma-buf MR' in err.args[0]
else:
raise PyverbsRDMAError('Registered a dma-buf MR with illegal falgs')
- def test_dmabuf_write(self):
"""
Test writing to DmaBufMR's buffer
"""
check_dmabuf_support()
for ctx, attr, attr_ex in self.devices:
with PD(ctx) as pd:
check_dmabuf_mr_support(pd)
for i in range(10):
mr_len = u.get_mr_length()
flags = u.get_dmabuf_access_flags(ctx)
for f in flags:
for mr_off in [0, mr_len//2]:
with DmaBufMR(pd, mr_len, f, offset=mr_off) as mr:
write_len = min(random.randint(1, MAX_IO_LEN),
mr_len)
mr.write('a' * write_len, write_len)
- def test_dmabuf_read(self):
"""
Test reading from DmaBufMR's buffer
"""
check_dmabuf_support()
for ctx, attr, attr_ex in self.devices:
with PD(ctx) as pd:
check_dmabuf_mr_support(pd)
for i in range(10):
mr_len = u.get_mr_length()
flags = u.get_dmabuf_access_flags(ctx)
for f in flags:
for mr_off in [0, mr_len//2]:
with DmaBufMR(pd, mr_len, f, offset=mr_off) as mr:
write_len = min(random.randint(1, MAX_IO_LEN),
mr_len)
write_str = 'a' * write_len
mr.write(write_str, write_len)
read_len = random.randint(1, write_len)
offset = random.randint(0, write_len-read_len)
read_str = mr.read(read_len, offset).decode()
assert read_str in write_str
- def test_dmabuf_lkey(self):
"""
Test reading lkey property
"""
check_dmabuf_support()
for ctx, attr, attr_ex in self.devices:
with PD(ctx) as pd:
check_dmabuf_mr_support(pd)
length = u.get_mr_length()
flags = u.get_dmabuf_access_flags(ctx)
for f in flags:
with DmaBufMR(pd, length, f) as mr:
mr.lkey
- def test_dmabuf_rkey(self):
"""
Test reading rkey property
"""
check_dmabuf_support()
for ctx, attr, attr_ex in self.devices:
with PD(ctx) as pd:
check_dmabuf_mr_support(pd)
length = u.get_mr_length()
flags = u.get_dmabuf_access_flags(ctx)
for f in flags:
with DmaBufMR(pd, length, f) as mr:
mr.rkey
+class DmaBufRC(RCResources):
- def __init__(self, dev_name, ib_port, gid_index):
"""
Initialize an DmaBufRC object.
:param dev_name: Device name to be used
:param ib_port: IB port of the device to use
:param gid_index: Which GID index to use
"""
super(DmaBufRC, self).__init__(dev_name=dev_name, ib_port=ib_port,
gid_index=gid_index)
- def create_mr(self):
check_dmabuf_support()
check_dmabuf_mr_support(self.pd)
access = e.IBV_ACCESS_LOCAL_WRITE | e.IBV_ACCESS_REMOTE_WRITE
mr = DmaBufMR(self.pd, self.msg_size, access)
self.mr = mr
- def create_qp_attr(self):
qp_attr = QPAttr(port_num=self.ib_port)
qp_access = e.IBV_ACCESS_LOCAL_WRITE | e.IBV_ACCESS_REMOTE_WRITE
qp_attr.qp_access_flags = qp_access
return qp_attr
+class DmaBufTestCase(RDMATestCase):
- def setUp(self):
super(DmaBufTestCase, self).setUp()
self.iters = 100
- def create_players(self, resource, **resource_arg):
"""
Init dma-buf tests resources.
:param resource: The RDMA resources to use. A class of type
BaseResources.
:param resource_arg: Dict of args that specify the resource specific
attributes.
:return: The (client, server) resources.
"""
client = resource(**self.dev_info, **resource_arg)
server = resource(**self.dev_info, **resource_arg)
client.pre_run(server.psns, server.qps_num)
server.pre_run(client.psns, client.qps_num)
return client, server
- def test_dmabuf_rc_traffic(self):
"""
Test send/recv using dma-buf MR over RC
"""
client, server = self.create_players(DmaBufRC)
u.traffic(client, server, self.iters, self.gid_index, self.ib_port)
- def test_dmabuf_rdma_traffic(self):
"""
Test rdma write using dma-buf MR
"""
client, server = self.create_players(DmaBufRC)
server.rkey = client.mr.rkey
server.remote_addr = client.mr.offset
client.rkey = server.mr.rkey
client.remote_addr = server.mr.offset
u.rdma_traffic(client, server, self.iters, self.gid_index, self.ib_port,
send_op=e.IBV_WR_RDMA_WRITE)
diff --git a/tests/utils.py b/tests/utils.py index 7039f41..d3d5c16 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) # Copyright (c) 2019 Mellanox Technologies, Inc. All rights reserved. See COPYING file +# Copyright (c) 2020 Intel Corporation. All rights reserved. See COPYING file """ Provide some useful helper function for pyverbs' tests. """ @@ -94,6 +95,31 @@ def get_access_flags(ctx): return arr
+def get_dmabuf_access_flags(ctx):
- """
- Similar to get_access_flags, except that dma-buf MR only support
- a subset of the flags.
- :param ctx: Device Context to check capabilities
- :return: A random legal value for MR flags
- """
- attr = ctx.query_device()
- vals = [e.IBV_ACCESS_LOCAL_WRITE, e.IBV_ACCESS_REMOTE_WRITE,
e.IBV_ACCESS_REMOTE_READ, e.IBV_ACCESS_REMOTE_ATOMIC,
e.IBV_ACCESS_RELAXED_ORDERING]
- if not attr.atomic_caps & e.IBV_ATOMIC_HCA:
vals.remove(e.IBV_ACCESS_REMOTE_ATOMIC)
- arr = []
- for i in range(1, len(vals)):
tmp = list(com(vals, i))
tmp = filter(filter_illegal_access_flags, tmp)
for t in tmp: # Iterate legal combinations and bitwise OR them
val = 0
for flag in t:
val += flag.value
arr.append(val)
- return arr
def get_dm_attrs(dm_len): """ Initializes an AllocDmAttr member with the given length and random -- 1.8.3.1
dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
-----Original Message----- From: Daniel Vetter daniel@ffwll.ch Sent: Monday, November 30, 2020 7:00 AM To: Xiong, Jianxin jianxin.xiong@intel.com Cc: linux-rdma@vger.kernel.org; dri-devel@lists.freedesktop.org; Leon Romanovsky leon@kernel.org; Jason Gunthorpe jgg@ziepe.ca; Doug Ledford dledford@redhat.com; Vetter, Daniel daniel.vetter@intel.com; Christian Koenig christian.koenig@amd.com Subject: Re: [PATCH rdma-core v3 5/6] tests: Add tests for dma-buf based memory regions
On Fri, Nov 27, 2020 at 12:55:42PM -0800, Jianxin Xiong wrote:
Define a set of unit tests similar to regular MR tests and a set of tests for send/recv and rdma traffic using dma-buf MRs. Add a utility function to generate access flags for dma-buf based MRs because the set of supported flags is smaller.
Signed-off-by: Jianxin Xiong jianxin.xiong@intel.com
tests/test_mr.py | 239 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- tests/utils.py | 26 ++++++ 2 files changed, 264 insertions(+), 1 deletion(-)
diff --git a/tests/test_mr.py b/tests/test_mr.py index adc649c..52cf20a 100644 --- a/tests/test_mr.py +++ b/tests/test_mr.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) # Copyright (c) 2019 Mellanox Technologies, Inc. All rights reserved. See COPYING file +# Copyright (c) 2020 Intel Corporation. All rights reserved. See +COPYING file """ Test module for pyverbs' mr module. """ @@ -9,9 +10,10 @@ import errno
from tests.base import PyverbsAPITestCase, RCResources, RDMATestCase from pyverbs.pyverbs_error import PyverbsRDMAError, PyverbsError -from pyverbs.mr import MR, MW, DMMR, MWBindInfo, MWBind +from pyverbs.mr import MR, MW, DMMR, DmaBufMR, MWBindInfo, MWBind from pyverbs.qp import QPCap, QPInitAttr, QPAttr, QP from pyverbs.wr import SendWR +from pyverbs.dmabuf import DmaBuf import pyverbs.device as d from pyverbs.pd import PD import pyverbs.enums as e @@ -366,3 +368,238 @@ class DMMRTest(PyverbsAPITestCase): dm_mr = DMMR(pd, dm_mr_len, e.IBV_ACCESS_ZERO_BASED, dm=dm, offset=dm_mr_offset) dm_mr.close()
+def check_dmabuf_support():
- """
- Check if dma-buf allocation is supported by the system.
- Skip the test on failure.
- """
- try:
DmaBuf(1)
Hardcoding gpu unit 1 here (and in other places) is probably not quite what we want. Not sure what you want to do in the test framework here instead.
'1' here is the buffer size. Unit is the default value 0. We could probably add a command line argument to the test to set the preferred gpu unit.
- except PyverbsRDMAError as ex:
if ex.error_code == errno.ENOENT:
raise unittest.SkipTest('Device /dev/dri/renderD* is not present')
if ex.error_code == errno.EACCES:
raise unittest.SkipTest('Lack of permission to access
- /dev/dri/renderD*')
+def check_dmabuf_mr_support(pd):
- """
- Check if dma-buf MR registration is supported by the driver.
- Skip the test on failure
- """
- try:
DmaBufMR(pd, 1, 0)
- except PyverbsRDMAError as ex:
if ex.error_code == errno.EOPNOTSUPP:
raise unittest.SkipTest('Reg dma-buf MR is not
+supported')
+class DmaBufMRTest(PyverbsAPITestCase):
- """
- Test various functionalities of the DmaBufMR class.
- """
- def test_dmabuf_reg_mr(self):
"""
Test ibv_reg_dmabuf_mr()
"""
check_dmabuf_support()
for ctx, attr, attr_ex in self.devices:
with PD(ctx) as pd:
check_dmabuf_mr_support(pd)
flags = u.get_dmabuf_access_flags(ctx)
for f in flags:
len = u.get_mr_length()
for off in [0, len//2]:
with DmaBufMR(pd, len, f, offset=off) as mr:
pass
- def test_dmabuf_dereg_mr(self):
"""
Test ibv_dereg_mr() with DmaBufMR
"""
check_dmabuf_support()
for ctx, attr, attr_ex in self.devices:
with PD(ctx) as pd:
check_dmabuf_mr_support(pd)
flags = u.get_dmabuf_access_flags(ctx)
for f in flags:
len = u.get_mr_length()
for off in [0, len//2]:
with DmaBufMR(pd, len, f, offset=off) as mr:
mr.close()
- def test_dmabuf_dereg_mr_twice(self):
"""
Verify that explicit call to DmaBufMR's close() doesn't fail
"""
check_dmabuf_support()
for ctx, attr, attr_ex in self.devices:
with PD(ctx) as pd:
check_dmabuf_mr_support(pd)
flags = u.get_dmabuf_access_flags(ctx)
for f in flags:
len = u.get_mr_length()
for off in [0, len//2]:
with DmaBufMR(pd, len, f, offset=off) as mr:
# Pyverbs supports multiple destruction of objects,
# we are not expecting an exception here.
mr.close()
mr.close()
- def test_dmabuf_reg_mr_bad_flags(self):
"""
Verify that illegal flags combination fails as expected
"""
check_dmabuf_support()
for ctx, attr, attr_ex in self.devices:
with PD(ctx) as pd:
check_dmabuf_mr_support(pd)
for i in range(5):
flags = random.sample([e.IBV_ACCESS_REMOTE_WRITE,
e.IBV_ACCESS_REMOTE_ATOMIC],
random.randint(1, 2))
mr_flags = 0
for i in flags:
mr_flags += i.value
try:
DmaBufMR(pd, u.get_mr_length(), mr_flags)
except PyverbsRDMAError as err:
assert 'Failed to register a dma-buf MR' in err.args[0]
else:
raise PyverbsRDMAError('Registered a dma-buf
- MR with illegal falgs')
- def test_dmabuf_write(self):
"""
Test writing to DmaBufMR's buffer
"""
check_dmabuf_support()
for ctx, attr, attr_ex in self.devices:
with PD(ctx) as pd:
check_dmabuf_mr_support(pd)
for i in range(10):
mr_len = u.get_mr_length()
flags = u.get_dmabuf_access_flags(ctx)
for f in flags:
for mr_off in [0, mr_len//2]:
with DmaBufMR(pd, mr_len, f, offset=mr_off) as mr:
write_len = min(random.randint(1, MAX_IO_LEN),
mr_len)
mr.write('a' * write_len, write_len)
- def test_dmabuf_read(self):
"""
Test reading from DmaBufMR's buffer
"""
check_dmabuf_support()
for ctx, attr, attr_ex in self.devices:
with PD(ctx) as pd:
check_dmabuf_mr_support(pd)
for i in range(10):
mr_len = u.get_mr_length()
flags = u.get_dmabuf_access_flags(ctx)
for f in flags:
for mr_off in [0, mr_len//2]:
with DmaBufMR(pd, mr_len, f, offset=mr_off) as mr:
write_len = min(random.randint(1, MAX_IO_LEN),
mr_len)
write_str = 'a' * write_len
mr.write(write_str, write_len)
read_len = random.randint(1, write_len)
offset = random.randint(0, write_len-read_len)
read_str = mr.read(read_len, offset).decode()
assert read_str in write_str
- def test_dmabuf_lkey(self):
"""
Test reading lkey property
"""
check_dmabuf_support()
for ctx, attr, attr_ex in self.devices:
with PD(ctx) as pd:
check_dmabuf_mr_support(pd)
length = u.get_mr_length()
flags = u.get_dmabuf_access_flags(ctx)
for f in flags:
with DmaBufMR(pd, length, f) as mr:
mr.lkey
- def test_dmabuf_rkey(self):
"""
Test reading rkey property
"""
check_dmabuf_support()
for ctx, attr, attr_ex in self.devices:
with PD(ctx) as pd:
check_dmabuf_mr_support(pd)
length = u.get_mr_length()
flags = u.get_dmabuf_access_flags(ctx)
for f in flags:
with DmaBufMR(pd, length, f) as mr:
mr.rkey
+class DmaBufRC(RCResources):
- def __init__(self, dev_name, ib_port, gid_index):
"""
Initialize an DmaBufRC object.
:param dev_name: Device name to be used
:param ib_port: IB port of the device to use
:param gid_index: Which GID index to use
"""
super(DmaBufRC, self).__init__(dev_name=dev_name, ib_port=ib_port,
gid_index=gid_index)
- def create_mr(self):
check_dmabuf_support()
check_dmabuf_mr_support(self.pd)
access = e.IBV_ACCESS_LOCAL_WRITE | e.IBV_ACCESS_REMOTE_WRITE
mr = DmaBufMR(self.pd, self.msg_size, access)
self.mr = mr
- def create_qp_attr(self):
qp_attr = QPAttr(port_num=self.ib_port)
qp_access = e.IBV_ACCESS_LOCAL_WRITE | e.IBV_ACCESS_REMOTE_WRITE
qp_attr.qp_access_flags = qp_access
return qp_attr
+class DmaBufTestCase(RDMATestCase):
- def setUp(self):
super(DmaBufTestCase, self).setUp()
self.iters = 100
- def create_players(self, resource, **resource_arg):
"""
Init dma-buf tests resources.
:param resource: The RDMA resources to use. A class of type
BaseResources.
:param resource_arg: Dict of args that specify the resource specific
attributes.
:return: The (client, server) resources.
"""
client = resource(**self.dev_info, **resource_arg)
server = resource(**self.dev_info, **resource_arg)
client.pre_run(server.psns, server.qps_num)
server.pre_run(client.psns, client.qps_num)
return client, server
- def test_dmabuf_rc_traffic(self):
"""
Test send/recv using dma-buf MR over RC
"""
client, server = self.create_players(DmaBufRC)
u.traffic(client, server, self.iters, self.gid_index,
- self.ib_port)
- def test_dmabuf_rdma_traffic(self):
"""
Test rdma write using dma-buf MR
"""
client, server = self.create_players(DmaBufRC)
server.rkey = client.mr.rkey
server.remote_addr = client.mr.offset
client.rkey = server.mr.rkey
client.remote_addr = server.mr.offset
u.rdma_traffic(client, server, self.iters, self.gid_index, self.ib_port,
send_op=e.IBV_WR_RDMA_WRITE)
diff --git a/tests/utils.py b/tests/utils.py index 7039f41..d3d5c16 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) # Copyright (c) 2019 Mellanox Technologies, Inc. All rights reserved. See COPYING file +# Copyright (c) 2020 Intel Corporation. All rights reserved. See +COPYING file """ Provide some useful helper function for pyverbs' tests. """ @@ -94,6 +95,31 @@ def get_access_flags(ctx): return arr
+def get_dmabuf_access_flags(ctx):
- """
- Similar to get_access_flags, except that dma-buf MR only support
- a subset of the flags.
- :param ctx: Device Context to check capabilities
- :return: A random legal value for MR flags
- """
- attr = ctx.query_device()
- vals = [e.IBV_ACCESS_LOCAL_WRITE, e.IBV_ACCESS_REMOTE_WRITE,
e.IBV_ACCESS_REMOTE_READ, e.IBV_ACCESS_REMOTE_ATOMIC,
e.IBV_ACCESS_RELAXED_ORDERING]
- if not attr.atomic_caps & e.IBV_ATOMIC_HCA:
vals.remove(e.IBV_ACCESS_REMOTE_ATOMIC)
- arr = []
- for i in range(1, len(vals)):
tmp = list(com(vals, i))
tmp = filter(filter_illegal_access_flags, tmp)
for t in tmp: # Iterate legal combinations and bitwise OR them
val = 0
for flag in t:
val += flag.value
arr.append(val)
- return arr
def get_dm_attrs(dm_len): """ Initializes an AllocDmAttr member with the given length and random -- 1.8.3.1
dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
-- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch
On Mon, Nov 30, 2020 at 05:35:06PM +0000, Xiong, Jianxin wrote:
-----Original Message----- From: Daniel Vetter daniel@ffwll.ch Sent: Monday, November 30, 2020 7:00 AM To: Xiong, Jianxin jianxin.xiong@intel.com Cc: linux-rdma@vger.kernel.org; dri-devel@lists.freedesktop.org; Leon Romanovsky leon@kernel.org; Jason Gunthorpe jgg@ziepe.ca; Doug Ledford dledford@redhat.com; Vetter, Daniel daniel.vetter@intel.com; Christian Koenig christian.koenig@amd.com Subject: Re: [PATCH rdma-core v3 5/6] tests: Add tests for dma-buf based memory regions
On Fri, Nov 27, 2020 at 12:55:42PM -0800, Jianxin Xiong wrote:
Define a set of unit tests similar to regular MR tests and a set of tests for send/recv and rdma traffic using dma-buf MRs. Add a utility function to generate access flags for dma-buf based MRs because the set of supported flags is smaller.
Signed-off-by: Jianxin Xiong jianxin.xiong@intel.com
tests/test_mr.py | 239 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- tests/utils.py | 26 ++++++ 2 files changed, 264 insertions(+), 1 deletion(-)
diff --git a/tests/test_mr.py b/tests/test_mr.py index adc649c..52cf20a 100644 --- a/tests/test_mr.py +++ b/tests/test_mr.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) # Copyright (c) 2019 Mellanox Technologies, Inc. All rights reserved. See COPYING file +# Copyright (c) 2020 Intel Corporation. All rights reserved. See +COPYING file """ Test module for pyverbs' mr module. """ @@ -9,9 +10,10 @@ import errno
from tests.base import PyverbsAPITestCase, RCResources, RDMATestCase from pyverbs.pyverbs_error import PyverbsRDMAError, PyverbsError -from pyverbs.mr import MR, MW, DMMR, MWBindInfo, MWBind +from pyverbs.mr import MR, MW, DMMR, DmaBufMR, MWBindInfo, MWBind from pyverbs.qp import QPCap, QPInitAttr, QPAttr, QP from pyverbs.wr import SendWR +from pyverbs.dmabuf import DmaBuf import pyverbs.device as d from pyverbs.pd import PD import pyverbs.enums as e @@ -366,3 +368,238 @@ class DMMRTest(PyverbsAPITestCase): dm_mr = DMMR(pd, dm_mr_len, e.IBV_ACCESS_ZERO_BASED, dm=dm, offset=dm_mr_offset) dm_mr.close()
+def check_dmabuf_support():
- """
- Check if dma-buf allocation is supported by the system.
- Skip the test on failure.
- """
- try:
DmaBuf(1)
Hardcoding gpu unit 1 here (and in other places) is probably not quite what we want. Not sure what you want to do in the test framework here instead.
'1' here is the buffer size. Unit is the default value 0. We could probably add a command line argument to the test to set the preferred gpu unit.
Oh I mixed up my python, not really fluent in that :-) Some means to set the preferred unit would still be good I think. -Daniel
- except PyverbsRDMAError as ex:
if ex.error_code == errno.ENOENT:
raise unittest.SkipTest('Device /dev/dri/renderD* is not present')
if ex.error_code == errno.EACCES:
raise unittest.SkipTest('Lack of permission to access
- /dev/dri/renderD*')
+def check_dmabuf_mr_support(pd):
- """
- Check if dma-buf MR registration is supported by the driver.
- Skip the test on failure
- """
- try:
DmaBufMR(pd, 1, 0)
- except PyverbsRDMAError as ex:
if ex.error_code == errno.EOPNOTSUPP:
raise unittest.SkipTest('Reg dma-buf MR is not
+supported')
+class DmaBufMRTest(PyverbsAPITestCase):
- """
- Test various functionalities of the DmaBufMR class.
- """
- def test_dmabuf_reg_mr(self):
"""
Test ibv_reg_dmabuf_mr()
"""
check_dmabuf_support()
for ctx, attr, attr_ex in self.devices:
with PD(ctx) as pd:
check_dmabuf_mr_support(pd)
flags = u.get_dmabuf_access_flags(ctx)
for f in flags:
len = u.get_mr_length()
for off in [0, len//2]:
with DmaBufMR(pd, len, f, offset=off) as mr:
pass
- def test_dmabuf_dereg_mr(self):
"""
Test ibv_dereg_mr() with DmaBufMR
"""
check_dmabuf_support()
for ctx, attr, attr_ex in self.devices:
with PD(ctx) as pd:
check_dmabuf_mr_support(pd)
flags = u.get_dmabuf_access_flags(ctx)
for f in flags:
len = u.get_mr_length()
for off in [0, len//2]:
with DmaBufMR(pd, len, f, offset=off) as mr:
mr.close()
- def test_dmabuf_dereg_mr_twice(self):
"""
Verify that explicit call to DmaBufMR's close() doesn't fail
"""
check_dmabuf_support()
for ctx, attr, attr_ex in self.devices:
with PD(ctx) as pd:
check_dmabuf_mr_support(pd)
flags = u.get_dmabuf_access_flags(ctx)
for f in flags:
len = u.get_mr_length()
for off in [0, len//2]:
with DmaBufMR(pd, len, f, offset=off) as mr:
# Pyverbs supports multiple destruction of objects,
# we are not expecting an exception here.
mr.close()
mr.close()
- def test_dmabuf_reg_mr_bad_flags(self):
"""
Verify that illegal flags combination fails as expected
"""
check_dmabuf_support()
for ctx, attr, attr_ex in self.devices:
with PD(ctx) as pd:
check_dmabuf_mr_support(pd)
for i in range(5):
flags = random.sample([e.IBV_ACCESS_REMOTE_WRITE,
e.IBV_ACCESS_REMOTE_ATOMIC],
random.randint(1, 2))
mr_flags = 0
for i in flags:
mr_flags += i.value
try:
DmaBufMR(pd, u.get_mr_length(), mr_flags)
except PyverbsRDMAError as err:
assert 'Failed to register a dma-buf MR' in err.args[0]
else:
raise PyverbsRDMAError('Registered a dma-buf
- MR with illegal falgs')
- def test_dmabuf_write(self):
"""
Test writing to DmaBufMR's buffer
"""
check_dmabuf_support()
for ctx, attr, attr_ex in self.devices:
with PD(ctx) as pd:
check_dmabuf_mr_support(pd)
for i in range(10):
mr_len = u.get_mr_length()
flags = u.get_dmabuf_access_flags(ctx)
for f in flags:
for mr_off in [0, mr_len//2]:
with DmaBufMR(pd, mr_len, f, offset=mr_off) as mr:
write_len = min(random.randint(1, MAX_IO_LEN),
mr_len)
mr.write('a' * write_len, write_len)
- def test_dmabuf_read(self):
"""
Test reading from DmaBufMR's buffer
"""
check_dmabuf_support()
for ctx, attr, attr_ex in self.devices:
with PD(ctx) as pd:
check_dmabuf_mr_support(pd)
for i in range(10):
mr_len = u.get_mr_length()
flags = u.get_dmabuf_access_flags(ctx)
for f in flags:
for mr_off in [0, mr_len//2]:
with DmaBufMR(pd, mr_len, f, offset=mr_off) as mr:
write_len = min(random.randint(1, MAX_IO_LEN),
mr_len)
write_str = 'a' * write_len
mr.write(write_str, write_len)
read_len = random.randint(1, write_len)
offset = random.randint(0, write_len-read_len)
read_str = mr.read(read_len, offset).decode()
assert read_str in write_str
- def test_dmabuf_lkey(self):
"""
Test reading lkey property
"""
check_dmabuf_support()
for ctx, attr, attr_ex in self.devices:
with PD(ctx) as pd:
check_dmabuf_mr_support(pd)
length = u.get_mr_length()
flags = u.get_dmabuf_access_flags(ctx)
for f in flags:
with DmaBufMR(pd, length, f) as mr:
mr.lkey
- def test_dmabuf_rkey(self):
"""
Test reading rkey property
"""
check_dmabuf_support()
for ctx, attr, attr_ex in self.devices:
with PD(ctx) as pd:
check_dmabuf_mr_support(pd)
length = u.get_mr_length()
flags = u.get_dmabuf_access_flags(ctx)
for f in flags:
with DmaBufMR(pd, length, f) as mr:
mr.rkey
+class DmaBufRC(RCResources):
- def __init__(self, dev_name, ib_port, gid_index):
"""
Initialize an DmaBufRC object.
:param dev_name: Device name to be used
:param ib_port: IB port of the device to use
:param gid_index: Which GID index to use
"""
super(DmaBufRC, self).__init__(dev_name=dev_name, ib_port=ib_port,
gid_index=gid_index)
- def create_mr(self):
check_dmabuf_support()
check_dmabuf_mr_support(self.pd)
access = e.IBV_ACCESS_LOCAL_WRITE | e.IBV_ACCESS_REMOTE_WRITE
mr = DmaBufMR(self.pd, self.msg_size, access)
self.mr = mr
- def create_qp_attr(self):
qp_attr = QPAttr(port_num=self.ib_port)
qp_access = e.IBV_ACCESS_LOCAL_WRITE | e.IBV_ACCESS_REMOTE_WRITE
qp_attr.qp_access_flags = qp_access
return qp_attr
+class DmaBufTestCase(RDMATestCase):
- def setUp(self):
super(DmaBufTestCase, self).setUp()
self.iters = 100
- def create_players(self, resource, **resource_arg):
"""
Init dma-buf tests resources.
:param resource: The RDMA resources to use. A class of type
BaseResources.
:param resource_arg: Dict of args that specify the resource specific
attributes.
:return: The (client, server) resources.
"""
client = resource(**self.dev_info, **resource_arg)
server = resource(**self.dev_info, **resource_arg)
client.pre_run(server.psns, server.qps_num)
server.pre_run(client.psns, client.qps_num)
return client, server
- def test_dmabuf_rc_traffic(self):
"""
Test send/recv using dma-buf MR over RC
"""
client, server = self.create_players(DmaBufRC)
u.traffic(client, server, self.iters, self.gid_index,
- self.ib_port)
- def test_dmabuf_rdma_traffic(self):
"""
Test rdma write using dma-buf MR
"""
client, server = self.create_players(DmaBufRC)
server.rkey = client.mr.rkey
server.remote_addr = client.mr.offset
client.rkey = server.mr.rkey
client.remote_addr = server.mr.offset
u.rdma_traffic(client, server, self.iters, self.gid_index, self.ib_port,
send_op=e.IBV_WR_RDMA_WRITE)
diff --git a/tests/utils.py b/tests/utils.py index 7039f41..d3d5c16 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) # Copyright (c) 2019 Mellanox Technologies, Inc. All rights reserved. See COPYING file +# Copyright (c) 2020 Intel Corporation. All rights reserved. See +COPYING file """ Provide some useful helper function for pyverbs' tests. """ @@ -94,6 +95,31 @@ def get_access_flags(ctx): return arr
+def get_dmabuf_access_flags(ctx):
- """
- Similar to get_access_flags, except that dma-buf MR only support
- a subset of the flags.
- :param ctx: Device Context to check capabilities
- :return: A random legal value for MR flags
- """
- attr = ctx.query_device()
- vals = [e.IBV_ACCESS_LOCAL_WRITE, e.IBV_ACCESS_REMOTE_WRITE,
e.IBV_ACCESS_REMOTE_READ, e.IBV_ACCESS_REMOTE_ATOMIC,
e.IBV_ACCESS_RELAXED_ORDERING]
- if not attr.atomic_caps & e.IBV_ATOMIC_HCA:
vals.remove(e.IBV_ACCESS_REMOTE_ATOMIC)
- arr = []
- for i in range(1, len(vals)):
tmp = list(com(vals, i))
tmp = filter(filter_illegal_access_flags, tmp)
for t in tmp: # Iterate legal combinations and bitwise OR them
val = 0
for flag in t:
val += flag.value
arr.append(val)
- return arr
def get_dm_attrs(dm_len): """ Initializes an AllocDmAttr member with the given length and random -- 1.8.3.1
dri-devel mailing list dri-devel@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/dri-devel
-- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch
The filter definition is wrong and causes get_access_flags() always returning empty list. As the result the MR tests using this function are effectively skipped (but report success).
Signed-off-by: Jianxin Xiong jianxin.xiong@intel.com --- tests/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/tests/utils.py b/tests/utils.py index d3d5c16..8bd0c16 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -56,8 +56,8 @@ def filter_illegal_access_flags(element): :param element: A list of access flags to check :return: True if this list is legal, else False """ - if e.IBV_ACCESS_REMOTE_ATOMIC in element or e.IBV_ACCESS_REMOTE_WRITE: - if e.IBV_ACCESS_LOCAL_WRITE: + if e.IBV_ACCESS_REMOTE_ATOMIC in element or e.IBV_ACCESS_REMOTE_WRITE in element: + if not e.IBV_ACCESS_LOCAL_WRITE in element: return False return True
dri-devel@lists.freedesktop.org