From: Rob Clark robdclark@chromium.org
This doesn't remove *all* the struct_mutex, but it covers the worst of it, ie. shrinker/madvise/free/retire. The submit path still uses struct_mutex, but it still needs *something* serialize a portion of the submit path, and lock_stat mostly just shows the lock contention there being with other submits. And there are a few other bits of struct_mutex usage in less critical paths (debugfs, etc). But this seems like a reasonable step in the right direction.
v2: teach lockdep about shrinker locking patters (danvet) and convert to obj->resv locking (danvet) v3: fix get_vaddr locking for legacy userspace (relocs), devcoredump, and rd/hangrd
Rob Clark (23): drm/msm: Fix a couple incorrect usages of get_vaddr_active() drm/msm/gem: Add obj->lock wrappers drm/msm/gem: Rename internal get_iova_locked helper drm/msm/gem: Move prototypes to msm_gem.h drm/msm/gem: Add some _locked() helpers drm/msm/gem: Move locking in shrinker path drm/msm/submit: Move copy_from_user ahead of locking bos drm/msm: Do rpm get sooner in the submit path drm/msm/gem: Switch over to obj->resv for locking drm/msm: Use correct drm_gem_object_put() in fail case drm/msm: Drop chatty trace drm/msm: Move update_fences() drm/msm: Add priv->mm_lock to protect active/inactive lists drm/msm: Document and rename preempt_lock drm/msm: Protect ring->submits with it's own lock drm/msm: Refcount submits drm/msm: Remove obj->gpu drm/msm: Drop struct_mutex from the retire path drm/msm: Drop struct_mutex in free_object() path drm/msm: Remove msm_gem_free_work drm/msm: Drop struct_mutex in madvise path drm/msm: Drop struct_mutex in shrinker path drm/msm: Don't implicit-sync if only a single ring
drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 6 +- drivers/gpu/drm/msm/adreno/a5xx_preempt.c | 12 +- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 6 +- drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c | 1 + drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c | 1 + drivers/gpu/drm/msm/dsi/dsi_host.c | 1 + drivers/gpu/drm/msm/msm_debugfs.c | 7 + drivers/gpu/drm/msm/msm_drv.c | 21 +- drivers/gpu/drm/msm/msm_drv.h | 73 +----- drivers/gpu/drm/msm/msm_fbdev.c | 1 + drivers/gpu/drm/msm/msm_gem.c | 266 +++++++++++----------- drivers/gpu/drm/msm/msm_gem.h | 133 +++++++++-- drivers/gpu/drm/msm/msm_gem_shrinker.c | 81 ++----- drivers/gpu/drm/msm/msm_gem_submit.c | 158 ++++++++----- drivers/gpu/drm/msm/msm_gpu.c | 110 +++++---- drivers/gpu/drm/msm/msm_gpu.h | 5 +- drivers/gpu/drm/msm/msm_rd.c | 2 +- drivers/gpu/drm/msm/msm_ringbuffer.c | 3 +- drivers/gpu/drm/msm/msm_ringbuffer.h | 13 +- 19 files changed, 495 insertions(+), 405 deletions(-)
From: Rob Clark robdclark@chromium.org
The microcode bo's should never be madvise(WONTNEED), so these should not be using msm_gem_get_vaddr_active().
Signed-off-by: Rob Clark robdclark@chromium.org --- drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 2 +- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index d6804a802355..b2593c6bd2ac 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -426,7 +426,7 @@ static int a5xx_preempt_start(struct msm_gpu *gpu) static void a5xx_ucode_check_version(struct a5xx_gpu *a5xx_gpu, struct drm_gem_object *obj) { - u32 *buf = msm_gem_get_vaddr_active(obj); + u32 *buf = msm_gem_get_vaddr(obj);
if (IS_ERR(buf)) return; diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 948f3656c20c..0894703a742e 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -522,7 +522,7 @@ static int a6xx_cp_init(struct msm_gpu *gpu) static void a6xx_ucode_check_version(struct a6xx_gpu *a6xx_gpu, struct drm_gem_object *obj) { - u32 *buf = msm_gem_get_vaddr_active(obj); + u32 *buf = msm_gem_get_vaddr(obj);
if (IS_ERR(buf)) return;
From: Rob Clark robdclark@chromium.org
This will make it easier to transition over to obj->resv locking for everything that is per-bo locking.
Signed-off-by: Rob Clark robdclark@chromium.org --- drivers/gpu/drm/msm/msm_gem.c | 99 ++++++++++++++++------------------- drivers/gpu/drm/msm/msm_gem.h | 28 ++++++++++ 2 files changed, 74 insertions(+), 53 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 14e14caf90f9..afef9c6b1a1c 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -178,15 +178,15 @@ struct page **msm_gem_get_pages(struct drm_gem_object *obj) struct msm_gem_object *msm_obj = to_msm_bo(obj); struct page **p;
- mutex_lock(&msm_obj->lock); + msm_gem_lock(obj);
if (WARN_ON(msm_obj->madv != MSM_MADV_WILLNEED)) { - mutex_unlock(&msm_obj->lock); + msm_gem_unlock(obj); return ERR_PTR(-EBUSY); }
p = get_pages(obj); - mutex_unlock(&msm_obj->lock); + msm_gem_unlock(obj); return p; }
@@ -252,14 +252,14 @@ vm_fault_t msm_gem_fault(struct vm_fault *vmf) * vm_ops.open/drm_gem_mmap_obj and close get and put * a reference on obj. So, we dont need to hold one here. */ - err = mutex_lock_interruptible(&msm_obj->lock); + err = msm_gem_lock_interruptible(obj); if (err) { ret = VM_FAULT_NOPAGE; goto out; }
if (WARN_ON(msm_obj->madv != MSM_MADV_WILLNEED)) { - mutex_unlock(&msm_obj->lock); + msm_gem_unlock(obj); return VM_FAULT_SIGBUS; }
@@ -280,7 +280,7 @@ vm_fault_t msm_gem_fault(struct vm_fault *vmf)
ret = vmf_insert_mixed(vma, vmf->address, __pfn_to_pfn_t(pfn, PFN_DEV)); out_unlock: - mutex_unlock(&msm_obj->lock); + msm_gem_unlock(obj); out: return ret; } @@ -289,10 +289,9 @@ vm_fault_t msm_gem_fault(struct vm_fault *vmf) static uint64_t mmap_offset(struct drm_gem_object *obj) { struct drm_device *dev = obj->dev; - struct msm_gem_object *msm_obj = to_msm_bo(obj); int ret;
- WARN_ON(!mutex_is_locked(&msm_obj->lock)); + WARN_ON(!msm_gem_is_locked(obj));
/* Make it mmapable */ ret = drm_gem_create_mmap_offset(obj); @@ -308,11 +307,10 @@ static uint64_t mmap_offset(struct drm_gem_object *obj) uint64_t msm_gem_mmap_offset(struct drm_gem_object *obj) { uint64_t offset; - struct msm_gem_object *msm_obj = to_msm_bo(obj);
- mutex_lock(&msm_obj->lock); + msm_gem_lock(obj); offset = mmap_offset(obj); - mutex_unlock(&msm_obj->lock); + msm_gem_unlock(obj); return offset; }
@@ -322,7 +320,7 @@ static struct msm_gem_vma *add_vma(struct drm_gem_object *obj, struct msm_gem_object *msm_obj = to_msm_bo(obj); struct msm_gem_vma *vma;
- WARN_ON(!mutex_is_locked(&msm_obj->lock)); + WARN_ON(!msm_gem_is_locked(obj));
vma = kzalloc(sizeof(*vma), GFP_KERNEL); if (!vma) @@ -341,7 +339,7 @@ static struct msm_gem_vma *lookup_vma(struct drm_gem_object *obj, struct msm_gem_object *msm_obj = to_msm_bo(obj); struct msm_gem_vma *vma;
- WARN_ON(!mutex_is_locked(&msm_obj->lock)); + WARN_ON(!msm_gem_is_locked(obj));
list_for_each_entry(vma, &msm_obj->vmas, list) { if (vma->aspace == aspace) @@ -360,14 +358,14 @@ static void del_vma(struct msm_gem_vma *vma) kfree(vma); }
-/* Called with msm_obj->lock locked */ +/* Called with msm_obj locked */ static void put_iova(struct drm_gem_object *obj) { struct msm_gem_object *msm_obj = to_msm_bo(obj); struct msm_gem_vma *vma, *tmp;
- WARN_ON(!mutex_is_locked(&msm_obj->lock)); + WARN_ON(!msm_gem_is_locked(obj));
list_for_each_entry_safe(vma, tmp, &msm_obj->vmas, list) { if (vma->aspace) { @@ -382,11 +380,10 @@ static int msm_gem_get_iova_locked(struct drm_gem_object *obj, struct msm_gem_address_space *aspace, uint64_t *iova, u64 range_start, u64 range_end) { - struct msm_gem_object *msm_obj = to_msm_bo(obj); struct msm_gem_vma *vma; int ret = 0;
- WARN_ON(!mutex_is_locked(&msm_obj->lock)); + WARN_ON(!msm_gem_is_locked(obj));
vma = lookup_vma(obj, aspace);
@@ -421,7 +418,7 @@ static int msm_gem_pin_iova(struct drm_gem_object *obj, if (msm_obj->flags & MSM_BO_MAP_PRIV) prot |= IOMMU_PRIV;
- WARN_ON(!mutex_is_locked(&msm_obj->lock)); + WARN_ON(!msm_gem_is_locked(obj));
if (WARN_ON(msm_obj->madv != MSM_MADV_WILLNEED)) return -EBUSY; @@ -446,11 +443,10 @@ int msm_gem_get_and_pin_iova_range(struct drm_gem_object *obj, struct msm_gem_address_space *aspace, uint64_t *iova, u64 range_start, u64 range_end) { - struct msm_gem_object *msm_obj = to_msm_bo(obj); u64 local; int ret;
- mutex_lock(&msm_obj->lock); + msm_gem_lock(obj);
ret = msm_gem_get_iova_locked(obj, aspace, &local, range_start, range_end); @@ -461,7 +457,7 @@ int msm_gem_get_and_pin_iova_range(struct drm_gem_object *obj, if (!ret) *iova = local;
- mutex_unlock(&msm_obj->lock); + msm_gem_unlock(obj); return ret; }
@@ -479,12 +475,11 @@ int msm_gem_get_and_pin_iova(struct drm_gem_object *obj, int msm_gem_get_iova(struct drm_gem_object *obj, struct msm_gem_address_space *aspace, uint64_t *iova) { - struct msm_gem_object *msm_obj = to_msm_bo(obj); int ret;
- mutex_lock(&msm_obj->lock); + msm_gem_lock(obj); ret = msm_gem_get_iova_locked(obj, aspace, iova, 0, U64_MAX); - mutex_unlock(&msm_obj->lock); + msm_gem_unlock(obj);
return ret; } @@ -495,12 +490,11 @@ int msm_gem_get_iova(struct drm_gem_object *obj, uint64_t msm_gem_iova(struct drm_gem_object *obj, struct msm_gem_address_space *aspace) { - struct msm_gem_object *msm_obj = to_msm_bo(obj); struct msm_gem_vma *vma;
- mutex_lock(&msm_obj->lock); + msm_gem_lock(obj); vma = lookup_vma(obj, aspace); - mutex_unlock(&msm_obj->lock); + msm_gem_unlock(obj); WARN_ON(!vma);
return vma ? vma->iova : 0; @@ -514,16 +508,15 @@ uint64_t msm_gem_iova(struct drm_gem_object *obj, void msm_gem_unpin_iova(struct drm_gem_object *obj, struct msm_gem_address_space *aspace) { - struct msm_gem_object *msm_obj = to_msm_bo(obj); struct msm_gem_vma *vma;
- mutex_lock(&msm_obj->lock); + msm_gem_lock(obj); vma = lookup_vma(obj, aspace);
if (!WARN_ON(!vma)) msm_gem_unmap_vma(aspace, vma);
- mutex_unlock(&msm_obj->lock); + msm_gem_unlock(obj); }
int msm_gem_dumb_create(struct drm_file *file, struct drm_device *dev, @@ -564,20 +557,20 @@ static void *get_vaddr(struct drm_gem_object *obj, unsigned madv) if (obj->import_attach) return ERR_PTR(-ENODEV);
- mutex_lock(&msm_obj->lock); + msm_gem_lock(obj);
if (WARN_ON(msm_obj->madv > madv)) { DRM_DEV_ERROR(obj->dev->dev, "Invalid madv state: %u vs %u\n", msm_obj->madv, madv); - mutex_unlock(&msm_obj->lock); + msm_gem_unlock(obj); return ERR_PTR(-EBUSY); }
/* increment vmap_count *before* vmap() call, so shrinker can - * check vmap_count (is_vunmapable()) outside of msm_obj->lock. + * check vmap_count (is_vunmapable()) outside of msm_obj lock. * This guarantees that we won't try to msm_gem_vunmap() this * same object from within the vmap() call (while we already - * hold msm_obj->lock) + * hold msm_obj lock) */ msm_obj->vmap_count++;
@@ -595,12 +588,12 @@ static void *get_vaddr(struct drm_gem_object *obj, unsigned madv) } }
- mutex_unlock(&msm_obj->lock); + msm_gem_unlock(obj); return msm_obj->vaddr;
fail: msm_obj->vmap_count--; - mutex_unlock(&msm_obj->lock); + msm_gem_unlock(obj); return ERR_PTR(ret); }
@@ -624,10 +617,10 @@ void msm_gem_put_vaddr(struct drm_gem_object *obj) { struct msm_gem_object *msm_obj = to_msm_bo(obj);
- mutex_lock(&msm_obj->lock); + msm_gem_lock(obj); WARN_ON(msm_obj->vmap_count < 1); msm_obj->vmap_count--; - mutex_unlock(&msm_obj->lock); + msm_gem_unlock(obj); }
/* Update madvise status, returns true if not purged, else @@ -637,7 +630,7 @@ int msm_gem_madvise(struct drm_gem_object *obj, unsigned madv) { struct msm_gem_object *msm_obj = to_msm_bo(obj);
- mutex_lock(&msm_obj->lock); + msm_gem_lock(obj);
WARN_ON(!mutex_is_locked(&obj->dev->struct_mutex));
@@ -646,7 +639,7 @@ int msm_gem_madvise(struct drm_gem_object *obj, unsigned madv)
madv = msm_obj->madv;
- mutex_unlock(&msm_obj->lock); + msm_gem_unlock(obj);
return (madv != __MSM_MADV_PURGED); } @@ -683,14 +676,14 @@ void msm_gem_purge(struct drm_gem_object *obj, enum msm_gem_lock subclass) invalidate_mapping_pages(file_inode(obj->filp)->i_mapping, 0, (loff_t)-1);
- mutex_unlock(&msm_obj->lock); + msm_gem_unlock(obj); }
static void msm_gem_vunmap_locked(struct drm_gem_object *obj) { struct msm_gem_object *msm_obj = to_msm_bo(obj);
- WARN_ON(!mutex_is_locked(&msm_obj->lock)); + WARN_ON(!msm_gem_is_locked(obj));
if (!msm_obj->vaddr || WARN_ON(!is_vunmapable(msm_obj))) return; @@ -705,7 +698,7 @@ void msm_gem_vunmap(struct drm_gem_object *obj, enum msm_gem_lock subclass)
mutex_lock_nested(&msm_obj->lock, subclass); msm_gem_vunmap_locked(obj); - mutex_unlock(&msm_obj->lock); + msm_gem_unlock(obj); }
/* must be called before _move_to_active().. */ @@ -816,7 +809,7 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m) uint64_t off = drm_vma_node_start(&obj->vma_node); const char *madv;
- mutex_lock(&msm_obj->lock); + msm_gem_lock(obj);
switch (msm_obj->madv) { case __MSM_MADV_PURGED: @@ -884,7 +877,7 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m) describe_fence(fence, "Exclusive", m); rcu_read_unlock();
- mutex_unlock(&msm_obj->lock); + msm_gem_unlock(obj); }
void msm_gem_describe_objects(struct list_head *list, struct seq_file *m) @@ -929,7 +922,7 @@ static void free_object(struct msm_gem_object *msm_obj)
list_del(&msm_obj->mm_list);
- mutex_lock(&msm_obj->lock); + msm_gem_lock(obj);
put_iova(obj);
@@ -950,7 +943,7 @@ static void free_object(struct msm_gem_object *msm_obj)
drm_gem_object_release(obj);
- mutex_unlock(&msm_obj->lock); + msm_gem_unlock(obj); kfree(msm_obj); }
@@ -1070,10 +1063,10 @@ static struct drm_gem_object *_msm_gem_new(struct drm_device *dev, struct msm_gem_vma *vma; struct page **pages;
- mutex_lock(&msm_obj->lock); + msm_gem_lock(obj);
vma = add_vma(obj, NULL); - mutex_unlock(&msm_obj->lock); + msm_gem_unlock(obj); if (IS_ERR(vma)) { ret = PTR_ERR(vma); goto fail; @@ -1157,22 +1150,22 @@ struct drm_gem_object *msm_gem_import(struct drm_device *dev, npages = size / PAGE_SIZE;
msm_obj = to_msm_bo(obj); - mutex_lock(&msm_obj->lock); + msm_gem_lock(obj); msm_obj->sgt = sgt; msm_obj->pages = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL); if (!msm_obj->pages) { - mutex_unlock(&msm_obj->lock); + msm_gem_unlock(obj); ret = -ENOMEM; goto fail; }
ret = drm_prime_sg_to_page_addr_arrays(sgt, msm_obj->pages, NULL, npages); if (ret) { - mutex_unlock(&msm_obj->lock); + msm_gem_unlock(obj); goto fail; }
- mutex_unlock(&msm_obj->lock); + msm_gem_unlock(obj);
mutex_lock(&dev->struct_mutex); list_add_tail(&msm_obj->mm_list, &priv->inactive_list); diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index a1bf741b9b89..f6482154e8bb 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -93,6 +93,34 @@ struct msm_gem_object { }; #define to_msm_bo(x) container_of(x, struct msm_gem_object, base)
+static inline void +msm_gem_lock(struct drm_gem_object *obj) +{ + struct msm_gem_object *msm_obj = to_msm_bo(obj); + mutex_lock(&msm_obj->lock); +} + +static inline int +msm_gem_lock_interruptible(struct drm_gem_object *obj) +{ + struct msm_gem_object *msm_obj = to_msm_bo(obj); + return mutex_lock_interruptible(&msm_obj->lock); +} + +static inline void +msm_gem_unlock(struct drm_gem_object *obj) +{ + struct msm_gem_object *msm_obj = to_msm_bo(obj); + mutex_unlock(&msm_obj->lock); +} + +static inline bool +msm_gem_is_locked(struct drm_gem_object *obj) +{ + struct msm_gem_object *msm_obj = to_msm_bo(obj); + return mutex_is_locked(&msm_obj->lock); +} + static inline bool is_active(struct msm_gem_object *msm_obj) { return atomic_read(&msm_obj->active_count);
From: Rob Clark robdclark@chromium.org
We'll need to introduce a _locked() version of msm_gem_get_iova(), so we need to make that name available.
Signed-off-by: Rob Clark robdclark@chromium.org --- drivers/gpu/drm/msm/msm_gem.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index afef9c6b1a1c..dec89fe79025 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -376,7 +376,7 @@ put_iova(struct drm_gem_object *obj) } }
-static int msm_gem_get_iova_locked(struct drm_gem_object *obj, +static int get_iova_locked(struct drm_gem_object *obj, struct msm_gem_address_space *aspace, uint64_t *iova, u64 range_start, u64 range_end) { @@ -448,7 +448,7 @@ int msm_gem_get_and_pin_iova_range(struct drm_gem_object *obj,
msm_gem_lock(obj);
- ret = msm_gem_get_iova_locked(obj, aspace, &local, + ret = get_iova_locked(obj, aspace, &local, range_start, range_end);
if (!ret) @@ -478,7 +478,7 @@ int msm_gem_get_iova(struct drm_gem_object *obj, int ret;
msm_gem_lock(obj); - ret = msm_gem_get_iova_locked(obj, aspace, iova, 0, U64_MAX); + ret = get_iova_locked(obj, aspace, iova, 0, U64_MAX); msm_gem_unlock(obj);
return ret;
From: Rob Clark robdclark@chromium.org
Signed-off-by: Rob Clark robdclark@chromium.org --- drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c | 1 + drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c | 1 + drivers/gpu/drm/msm/dsi/dsi_host.c | 1 + drivers/gpu/drm/msm/msm_drv.h | 54 ---------------------- drivers/gpu/drm/msm/msm_fbdev.c | 1 + drivers/gpu/drm/msm/msm_gem.h | 56 +++++++++++++++++++++++ 6 files changed, 60 insertions(+), 54 deletions(-)
diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c index a0253297bc76..b65b2329cc8d 100644 --- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c +++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c @@ -11,6 +11,7 @@ #include <drm/drm_vblank.h>
#include "mdp4_kms.h" +#include "msm_gem.h"
struct mdp4_crtc { struct drm_crtc base; diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c index c39dad151bb6..81fbd52ad7e7 100644 --- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c +++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c @@ -15,6 +15,7 @@ #include <drm/drm_vblank.h>
#include "mdp5_kms.h" +#include "msm_gem.h"
#define CURSOR_WIDTH 64 #define CURSOR_HEIGHT 64 diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c index cee5c50c8e52..71160b4d77a0 100644 --- a/drivers/gpu/drm/msm/dsi/dsi_host.c +++ b/drivers/gpu/drm/msm/dsi/dsi_host.c @@ -26,6 +26,7 @@ #include "sfpb.xml.h" #include "dsi_cfg.h" #include "msm_kms.h" +#include "msm_gem.h"
#define DSI_RESET_TOGGLE_DELAY_MS 20
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 7fbcdaebeff8..713a0ae28125 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -273,28 +273,6 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, void msm_gem_shrinker_init(struct drm_device *dev); void msm_gem_shrinker_cleanup(struct drm_device *dev);
-int msm_gem_mmap_obj(struct drm_gem_object *obj, - struct vm_area_struct *vma); -int msm_gem_mmap(struct file *filp, struct vm_area_struct *vma); -vm_fault_t msm_gem_fault(struct vm_fault *vmf); -uint64_t msm_gem_mmap_offset(struct drm_gem_object *obj); -int msm_gem_get_iova(struct drm_gem_object *obj, - struct msm_gem_address_space *aspace, uint64_t *iova); -int msm_gem_get_and_pin_iova_range(struct drm_gem_object *obj, - struct msm_gem_address_space *aspace, uint64_t *iova, - u64 range_start, u64 range_end); -int msm_gem_get_and_pin_iova(struct drm_gem_object *obj, - struct msm_gem_address_space *aspace, uint64_t *iova); -uint64_t msm_gem_iova(struct drm_gem_object *obj, - struct msm_gem_address_space *aspace); -void msm_gem_unpin_iova(struct drm_gem_object *obj, - struct msm_gem_address_space *aspace); -struct page **msm_gem_get_pages(struct drm_gem_object *obj); -void msm_gem_put_pages(struct drm_gem_object *obj); -int msm_gem_dumb_create(struct drm_file *file, struct drm_device *dev, - struct drm_mode_create_dumb *args); -int msm_gem_dumb_map_offset(struct drm_file *file, struct drm_device *dev, - uint32_t handle, uint64_t *offset); struct sg_table *msm_gem_prime_get_sg_table(struct drm_gem_object *obj); void *msm_gem_prime_vmap(struct drm_gem_object *obj); void msm_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); @@ -303,38 +281,8 @@ struct drm_gem_object *msm_gem_prime_import_sg_table(struct drm_device *dev, struct dma_buf_attachment *attach, struct sg_table *sg); int msm_gem_prime_pin(struct drm_gem_object *obj); void msm_gem_prime_unpin(struct drm_gem_object *obj); -void *msm_gem_get_vaddr(struct drm_gem_object *obj); -void *msm_gem_get_vaddr_active(struct drm_gem_object *obj); -void msm_gem_put_vaddr(struct drm_gem_object *obj); -int msm_gem_madvise(struct drm_gem_object *obj, unsigned madv); -int msm_gem_sync_object(struct drm_gem_object *obj, - struct msm_fence_context *fctx, bool exclusive); -void msm_gem_active_get(struct drm_gem_object *obj, struct msm_gpu *gpu); -void msm_gem_active_put(struct drm_gem_object *obj); -int msm_gem_cpu_prep(struct drm_gem_object *obj, uint32_t op, ktime_t *timeout); -int msm_gem_cpu_fini(struct drm_gem_object *obj); -void msm_gem_free_object(struct drm_gem_object *obj); -int msm_gem_new_handle(struct drm_device *dev, struct drm_file *file, - uint32_t size, uint32_t flags, uint32_t *handle, char *name); -struct drm_gem_object *msm_gem_new(struct drm_device *dev, - uint32_t size, uint32_t flags); -struct drm_gem_object *msm_gem_new_locked(struct drm_device *dev, - uint32_t size, uint32_t flags); -void *msm_gem_kernel_new(struct drm_device *dev, uint32_t size, - uint32_t flags, struct msm_gem_address_space *aspace, - struct drm_gem_object **bo, uint64_t *iova); -void *msm_gem_kernel_new_locked(struct drm_device *dev, uint32_t size, - uint32_t flags, struct msm_gem_address_space *aspace, - struct drm_gem_object **bo, uint64_t *iova); -void msm_gem_kernel_put(struct drm_gem_object *bo, - struct msm_gem_address_space *aspace, bool locked); -struct drm_gem_object *msm_gem_import(struct drm_device *dev, - struct dma_buf *dmabuf, struct sg_table *sgt); void msm_gem_free_work(struct work_struct *work);
-__printf(2, 3) -void msm_gem_object_set_name(struct drm_gem_object *bo, const char *fmt, ...); - int msm_framebuffer_prepare(struct drm_framebuffer *fb, struct msm_gem_address_space *aspace); void msm_framebuffer_cleanup(struct drm_framebuffer *fb, @@ -447,8 +395,6 @@ void __init msm_dpu_register(void); void __exit msm_dpu_unregister(void);
#ifdef CONFIG_DEBUG_FS -void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m); -void msm_gem_describe_objects(struct list_head *list, struct seq_file *m); void msm_framebuffer_describe(struct drm_framebuffer *fb, struct seq_file *m); int msm_debugfs_late_init(struct drm_device *dev); int msm_rd_debugfs_init(struct drm_minor *minor); diff --git a/drivers/gpu/drm/msm/msm_fbdev.c b/drivers/gpu/drm/msm/msm_fbdev.c index 47235f8c5922..678dba1725a6 100644 --- a/drivers/gpu/drm/msm/msm_fbdev.c +++ b/drivers/gpu/drm/msm/msm_fbdev.c @@ -9,6 +9,7 @@ #include <drm/drm_fourcc.h>
#include "msm_drv.h" +#include "msm_gem.h" #include "msm_kms.h"
extern int msm_gem_mmap_obj(struct drm_gem_object *obj, diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index f6482154e8bb..fbad08badf43 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -93,6 +93,62 @@ struct msm_gem_object { }; #define to_msm_bo(x) container_of(x, struct msm_gem_object, base)
+int msm_gem_mmap_obj(struct drm_gem_object *obj, + struct vm_area_struct *vma); +int msm_gem_mmap(struct file *filp, struct vm_area_struct *vma); +vm_fault_t msm_gem_fault(struct vm_fault *vmf); +uint64_t msm_gem_mmap_offset(struct drm_gem_object *obj); +int msm_gem_get_iova(struct drm_gem_object *obj, + struct msm_gem_address_space *aspace, uint64_t *iova); +int msm_gem_get_and_pin_iova_range(struct drm_gem_object *obj, + struct msm_gem_address_space *aspace, uint64_t *iova, + u64 range_start, u64 range_end); +int msm_gem_get_and_pin_iova(struct drm_gem_object *obj, + struct msm_gem_address_space *aspace, uint64_t *iova); +uint64_t msm_gem_iova(struct drm_gem_object *obj, + struct msm_gem_address_space *aspace); +void msm_gem_unpin_iova(struct drm_gem_object *obj, + struct msm_gem_address_space *aspace); +struct page **msm_gem_get_pages(struct drm_gem_object *obj); +void msm_gem_put_pages(struct drm_gem_object *obj); +int msm_gem_dumb_create(struct drm_file *file, struct drm_device *dev, + struct drm_mode_create_dumb *args); +int msm_gem_dumb_map_offset(struct drm_file *file, struct drm_device *dev, + uint32_t handle, uint64_t *offset); +void *msm_gem_get_vaddr(struct drm_gem_object *obj); +void *msm_gem_get_vaddr_active(struct drm_gem_object *obj); +void msm_gem_put_vaddr(struct drm_gem_object *obj); +int msm_gem_madvise(struct drm_gem_object *obj, unsigned madv); +int msm_gem_sync_object(struct drm_gem_object *obj, + struct msm_fence_context *fctx, bool exclusive); +void msm_gem_active_get(struct drm_gem_object *obj, struct msm_gpu *gpu); +void msm_gem_active_put(struct drm_gem_object *obj); +int msm_gem_cpu_prep(struct drm_gem_object *obj, uint32_t op, ktime_t *timeout); +int msm_gem_cpu_fini(struct drm_gem_object *obj); +void msm_gem_free_object(struct drm_gem_object *obj); +int msm_gem_new_handle(struct drm_device *dev, struct drm_file *file, + uint32_t size, uint32_t flags, uint32_t *handle, char *name); +struct drm_gem_object *msm_gem_new(struct drm_device *dev, + uint32_t size, uint32_t flags); +struct drm_gem_object *msm_gem_new_locked(struct drm_device *dev, + uint32_t size, uint32_t flags); +void *msm_gem_kernel_new(struct drm_device *dev, uint32_t size, + uint32_t flags, struct msm_gem_address_space *aspace, + struct drm_gem_object **bo, uint64_t *iova); +void *msm_gem_kernel_new_locked(struct drm_device *dev, uint32_t size, + uint32_t flags, struct msm_gem_address_space *aspace, + struct drm_gem_object **bo, uint64_t *iova); +void msm_gem_kernel_put(struct drm_gem_object *bo, + struct msm_gem_address_space *aspace, bool locked); +struct drm_gem_object *msm_gem_import(struct drm_device *dev, + struct dma_buf *dmabuf, struct sg_table *sgt); +__printf(2, 3) +void msm_gem_object_set_name(struct drm_gem_object *bo, const char *fmt, ...); +#ifdef CONFIG_DEBUG_FS +void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m); +void msm_gem_describe_objects(struct list_head *list, struct seq_file *m); +#endif + static inline void msm_gem_lock(struct drm_gem_object *obj) {
From: Rob Clark robdclark@chromium.org
When we cut-over to using dma_resv_lock/etc instead of msm_obj->lock, we'll need these for the submit path (where resv->lock is already held).
Signed-off-by: Rob Clark robdclark@chromium.org --- drivers/gpu/drm/msm/msm_gem.c | 89 +++++++++++++++++++++++++++-------- drivers/gpu/drm/msm/msm_gem.h | 6 +++ 2 files changed, 75 insertions(+), 20 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index dec89fe79025..e0d8d739b068 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -435,18 +435,14 @@ static int msm_gem_pin_iova(struct drm_gem_object *obj, msm_obj->sgt, obj->size >> PAGE_SHIFT); }
-/* - * get iova and pin it. Should have a matching put - * limits iova to specified range (in pages) - */ -int msm_gem_get_and_pin_iova_range(struct drm_gem_object *obj, +static int get_and_pin_iova_range_locked(struct drm_gem_object *obj, struct msm_gem_address_space *aspace, uint64_t *iova, u64 range_start, u64 range_end) { u64 local; int ret;
- msm_gem_lock(obj); + WARN_ON(!msm_gem_is_locked(obj));
ret = get_iova_locked(obj, aspace, &local, range_start, range_end); @@ -457,10 +453,32 @@ int msm_gem_get_and_pin_iova_range(struct drm_gem_object *obj, if (!ret) *iova = local;
+ return ret; +} + +/* + * get iova and pin it. Should have a matching put + * limits iova to specified range (in pages) + */ +int msm_gem_get_and_pin_iova_range(struct drm_gem_object *obj, + struct msm_gem_address_space *aspace, uint64_t *iova, + u64 range_start, u64 range_end) +{ + int ret; + + msm_gem_lock(obj); + ret = get_and_pin_iova_range_locked(obj, aspace, iova, range_start, range_end); msm_gem_unlock(obj); + return ret; }
+int msm_gem_get_and_pin_iova_locked(struct drm_gem_object *obj, + struct msm_gem_address_space *aspace, uint64_t *iova) +{ + return get_and_pin_iova_range_locked(obj, aspace, iova, 0, U64_MAX); +} + /* get iova and pin it. Should have a matching put */ int msm_gem_get_and_pin_iova(struct drm_gem_object *obj, struct msm_gem_address_space *aspace, uint64_t *iova) @@ -501,21 +519,31 @@ uint64_t msm_gem_iova(struct drm_gem_object *obj, }
/* - * Unpin a iova by updating the reference counts. The memory isn't actually - * purged until something else (shrinker, mm_notifier, destroy, etc) decides - * to get rid of it + * Locked variant of msm_gem_unpin_iova() */ -void msm_gem_unpin_iova(struct drm_gem_object *obj, +void msm_gem_unpin_iova_locked(struct drm_gem_object *obj, struct msm_gem_address_space *aspace) { struct msm_gem_vma *vma;
- msm_gem_lock(obj); + WARN_ON(!msm_gem_is_locked(obj)); + vma = lookup_vma(obj, aspace);
if (!WARN_ON(!vma)) msm_gem_unmap_vma(aspace, vma); +}
+/* + * Unpin a iova by updating the reference counts. The memory isn't actually + * purged until something else (shrinker, mm_notifier, destroy, etc) decides + * to get rid of it + */ +void msm_gem_unpin_iova(struct drm_gem_object *obj, + struct msm_gem_address_space *aspace) +{ + msm_gem_lock(obj); + msm_gem_unpin_iova_locked(obj, aspace); msm_gem_unlock(obj); }
@@ -554,15 +582,14 @@ static void *get_vaddr(struct drm_gem_object *obj, unsigned madv) struct msm_gem_object *msm_obj = to_msm_bo(obj); int ret = 0;
+ WARN_ON(!msm_gem_is_locked(obj)); + if (obj->import_attach) return ERR_PTR(-ENODEV);
- msm_gem_lock(obj); - if (WARN_ON(msm_obj->madv > madv)) { DRM_DEV_ERROR(obj->dev->dev, "Invalid madv state: %u vs %u\n", msm_obj->madv, madv); - msm_gem_unlock(obj); return ERR_PTR(-EBUSY); }
@@ -588,20 +615,29 @@ static void *get_vaddr(struct drm_gem_object *obj, unsigned madv) } }
- msm_gem_unlock(obj); return msm_obj->vaddr;
fail: msm_obj->vmap_count--; - msm_gem_unlock(obj); return ERR_PTR(ret); }
-void *msm_gem_get_vaddr(struct drm_gem_object *obj) +void *msm_gem_get_vaddr_locked(struct drm_gem_object *obj) { return get_vaddr(obj, MSM_MADV_WILLNEED); }
+void *msm_gem_get_vaddr(struct drm_gem_object *obj) +{ + void *ret; + + msm_gem_lock(obj); + ret = msm_gem_get_vaddr_locked(obj); + msm_gem_unlock(obj); + + return ret; +} + /* * Don't use this! It is for the very special case of dumping * submits from GPU hangs or faults, were the bo may already @@ -610,16 +646,29 @@ void *msm_gem_get_vaddr(struct drm_gem_object *obj) */ void *msm_gem_get_vaddr_active(struct drm_gem_object *obj) { - return get_vaddr(obj, __MSM_MADV_PURGED); + void *ret; + + msm_gem_lock(obj); + ret = get_vaddr(obj, __MSM_MADV_PURGED); + msm_gem_unlock(obj); + + return ret; }
-void msm_gem_put_vaddr(struct drm_gem_object *obj) +void msm_gem_put_vaddr_locked(struct drm_gem_object *obj) { struct msm_gem_object *msm_obj = to_msm_bo(obj);
- msm_gem_lock(obj); + WARN_ON(!msm_gem_is_locked(obj)); WARN_ON(msm_obj->vmap_count < 1); + msm_obj->vmap_count--; +} + +void msm_gem_put_vaddr(struct drm_gem_object *obj) +{ + msm_gem_lock(obj); + msm_gem_put_vaddr_locked(obj); msm_gem_unlock(obj); }
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index fbad08badf43..d55d5401a2d2 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -103,10 +103,14 @@ int msm_gem_get_iova(struct drm_gem_object *obj, int msm_gem_get_and_pin_iova_range(struct drm_gem_object *obj, struct msm_gem_address_space *aspace, uint64_t *iova, u64 range_start, u64 range_end); +int msm_gem_get_and_pin_iova_locked(struct drm_gem_object *obj, + struct msm_gem_address_space *aspace, uint64_t *iova); int msm_gem_get_and_pin_iova(struct drm_gem_object *obj, struct msm_gem_address_space *aspace, uint64_t *iova); uint64_t msm_gem_iova(struct drm_gem_object *obj, struct msm_gem_address_space *aspace); +void msm_gem_unpin_iova_locked(struct drm_gem_object *obj, + struct msm_gem_address_space *aspace); void msm_gem_unpin_iova(struct drm_gem_object *obj, struct msm_gem_address_space *aspace); struct page **msm_gem_get_pages(struct drm_gem_object *obj); @@ -115,8 +119,10 @@ int msm_gem_dumb_create(struct drm_file *file, struct drm_device *dev, struct drm_mode_create_dumb *args); int msm_gem_dumb_map_offset(struct drm_file *file, struct drm_device *dev, uint32_t handle, uint64_t *offset); +void *msm_gem_get_vaddr_locked(struct drm_gem_object *obj); void *msm_gem_get_vaddr(struct drm_gem_object *obj); void *msm_gem_get_vaddr_active(struct drm_gem_object *obj); +void msm_gem_put_vaddr_locked(struct drm_gem_object *obj); void msm_gem_put_vaddr(struct drm_gem_object *obj); int msm_gem_madvise(struct drm_gem_object *obj, unsigned madv); int msm_gem_sync_object(struct drm_gem_object *obj,
From: Rob Clark robdclark@chromium.org
Move grabbing the bo lock into shrinker, with a msm_gem_trylock() to skip over bo's that are already locked. This gets rid of the nested lock classes.
Signed-off-by: Rob Clark robdclark@chromium.org --- drivers/gpu/drm/msm/msm_gem.c | 24 +++++---------------- drivers/gpu/drm/msm/msm_gem.h | 29 ++++++++++---------------- drivers/gpu/drm/msm/msm_gem_shrinker.c | 27 +++++++++++++++++------- 3 files changed, 35 insertions(+), 45 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index e0d8d739b068..1195847714ba 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -17,8 +17,6 @@ #include "msm_gpu.h" #include "msm_mmu.h"
-static void msm_gem_vunmap_locked(struct drm_gem_object *obj); -
static dma_addr_t physaddr(struct drm_gem_object *obj) { @@ -693,20 +691,19 @@ int msm_gem_madvise(struct drm_gem_object *obj, unsigned madv) return (madv != __MSM_MADV_PURGED); }
-void msm_gem_purge(struct drm_gem_object *obj, enum msm_gem_lock subclass) +void msm_gem_purge(struct drm_gem_object *obj) { struct drm_device *dev = obj->dev; struct msm_gem_object *msm_obj = to_msm_bo(obj);
WARN_ON(!mutex_is_locked(&dev->struct_mutex)); + WARN_ON(!msm_gem_is_locked(obj)); WARN_ON(!is_purgeable(msm_obj)); WARN_ON(obj->import_attach);
- mutex_lock_nested(&msm_obj->lock, subclass); - put_iova(obj);
- msm_gem_vunmap_locked(obj); + msm_gem_vunmap(obj);
put_pages(obj);
@@ -724,11 +721,9 @@ void msm_gem_purge(struct drm_gem_object *obj, enum msm_gem_lock subclass)
invalidate_mapping_pages(file_inode(obj->filp)->i_mapping, 0, (loff_t)-1); - - msm_gem_unlock(obj); }
-static void msm_gem_vunmap_locked(struct drm_gem_object *obj) +void msm_gem_vunmap(struct drm_gem_object *obj) { struct msm_gem_object *msm_obj = to_msm_bo(obj);
@@ -741,15 +736,6 @@ static void msm_gem_vunmap_locked(struct drm_gem_object *obj) msm_obj->vaddr = NULL; }
-void msm_gem_vunmap(struct drm_gem_object *obj, enum msm_gem_lock subclass) -{ - struct msm_gem_object *msm_obj = to_msm_bo(obj); - - mutex_lock_nested(&msm_obj->lock, subclass); - msm_gem_vunmap_locked(obj); - msm_gem_unlock(obj); -} - /* must be called before _move_to_active().. */ int msm_gem_sync_object(struct drm_gem_object *obj, struct msm_fence_context *fctx, bool exclusive) @@ -986,7 +972,7 @@ static void free_object(struct msm_gem_object *msm_obj)
drm_prime_gem_destroy(obj, msm_obj->sgt); } else { - msm_gem_vunmap_locked(obj); + msm_gem_vunmap(obj); put_pages(obj); }
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index d55d5401a2d2..c5232b8da794 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -162,6 +162,13 @@ msm_gem_lock(struct drm_gem_object *obj) mutex_lock(&msm_obj->lock); }
+static inline bool __must_check +msm_gem_trylock(struct drm_gem_object *obj) +{ + struct msm_gem_object *msm_obj = to_msm_bo(obj); + return mutex_trylock_recursive(&msm_obj->lock) == MUTEX_TRYLOCK_SUCCESS; +} + static inline int msm_gem_lock_interruptible(struct drm_gem_object *obj) { @@ -190,6 +197,7 @@ static inline bool is_active(struct msm_gem_object *msm_obj)
static inline bool is_purgeable(struct msm_gem_object *msm_obj) { + WARN_ON(!msm_gem_is_locked(&msm_obj->base)); WARN_ON(!mutex_is_locked(&msm_obj->base.dev->struct_mutex)); return (msm_obj->madv == MSM_MADV_DONTNEED) && msm_obj->sgt && !msm_obj->base.dma_buf && !msm_obj->base.import_attach; @@ -197,27 +205,12 @@ static inline bool is_purgeable(struct msm_gem_object *msm_obj)
static inline bool is_vunmapable(struct msm_gem_object *msm_obj) { + WARN_ON(!msm_gem_is_locked(&msm_obj->base)); return (msm_obj->vmap_count == 0) && msm_obj->vaddr; }
-/* The shrinker can be triggered while we hold objA->lock, and need - * to grab objB->lock to purge it. Lockdep just sees these as a single - * class of lock, so we use subclasses to teach it the difference. - * - * OBJ_LOCK_NORMAL is implicit (ie. normal mutex_lock() call), and - * OBJ_LOCK_SHRINKER is used by shrinker. - * - * It is *essential* that we never go down paths that could trigger the - * shrinker for a purgable object. This is ensured by checking that - * msm_obj->madv == MSM_MADV_WILLNEED. - */ -enum msm_gem_lock { - OBJ_LOCK_NORMAL, - OBJ_LOCK_SHRINKER, -}; - -void msm_gem_purge(struct drm_gem_object *obj, enum msm_gem_lock subclass); -void msm_gem_vunmap(struct drm_gem_object *obj, enum msm_gem_lock subclass); +void msm_gem_purge(struct drm_gem_object *obj); +void msm_gem_vunmap(struct drm_gem_object *obj); void msm_gem_free_work(struct work_struct *work);
/* Created per submit-ioctl, to track bo's and cmdstream bufs, etc, diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c index 482576d7a39a..2dc0ffa925b4 100644 --- a/drivers/gpu/drm/msm/msm_gem_shrinker.c +++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c @@ -52,8 +52,11 @@ msm_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) return 0;
list_for_each_entry(msm_obj, &priv->inactive_list, mm_list) { + if (!msm_gem_trylock(&msm_obj->base)) + continue; if (is_purgeable(msm_obj)) count += msm_obj->base.size >> PAGE_SHIFT; + msm_gem_unlock(&msm_obj->base); }
if (unlock) @@ -78,10 +81,13 @@ msm_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) list_for_each_entry(msm_obj, &priv->inactive_list, mm_list) { if (freed >= sc->nr_to_scan) break; + if (!msm_gem_trylock(&msm_obj->base)) + continue; if (is_purgeable(msm_obj)) { - msm_gem_purge(&msm_obj->base, OBJ_LOCK_SHRINKER); + msm_gem_purge(&msm_obj->base); freed += msm_obj->base.size >> PAGE_SHIFT; } + msm_gem_unlock(&msm_obj->base); }
if (unlock) @@ -107,15 +113,20 @@ msm_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr) return NOTIFY_DONE;
list_for_each_entry(msm_obj, &priv->inactive_list, mm_list) { + if (!msm_gem_trylock(&msm_obj->base)) + continue; if (is_vunmapable(msm_obj)) { - msm_gem_vunmap(&msm_obj->base, OBJ_LOCK_SHRINKER); - /* since we don't know any better, lets bail after a few - * and if necessary the shrinker will be invoked again. - * Seems better than unmapping *everything* - */ - if (++unmapped >= 15) - break; + msm_gem_vunmap(&msm_obj->base); + unmapped++; } + msm_gem_unlock(&msm_obj->base); + + /* since we don't know any better, lets bail after a few + * and if necessary the shrinker will be invoked again. + * Seems better than unmapping *everything* + */ + if (++unmapped >= 15) + break; }
if (unlock)
On Mon, Oct 19, 2020 at 10:45 PM Rob Clark robdclark@gmail.com wrote:
From: Rob Clark robdclark@chromium.org
Move grabbing the bo lock into shrinker, with a msm_gem_trylock() to skip over bo's that are already locked. This gets rid of the nested lock classes.
Signed-off-by: Rob Clark robdclark@chromium.org
drivers/gpu/drm/msm/msm_gem.c | 24 +++++---------------- drivers/gpu/drm/msm/msm_gem.h | 29 ++++++++++---------------- drivers/gpu/drm/msm/msm_gem_shrinker.c | 27 +++++++++++++++++------- 3 files changed, 35 insertions(+), 45 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index e0d8d739b068..1195847714ba 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -17,8 +17,6 @@ #include "msm_gpu.h" #include "msm_mmu.h"
-static void msm_gem_vunmap_locked(struct drm_gem_object *obj);
static dma_addr_t physaddr(struct drm_gem_object *obj) { @@ -693,20 +691,19 @@ int msm_gem_madvise(struct drm_gem_object *obj, unsigned madv) return (madv != __MSM_MADV_PURGED); }
-void msm_gem_purge(struct drm_gem_object *obj, enum msm_gem_lock subclass) +void msm_gem_purge(struct drm_gem_object *obj) { struct drm_device *dev = obj->dev; struct msm_gem_object *msm_obj = to_msm_bo(obj);
WARN_ON(!mutex_is_locked(&dev->struct_mutex));
WARN_ON(!msm_gem_is_locked(obj)); WARN_ON(!is_purgeable(msm_obj)); WARN_ON(obj->import_attach);
mutex_lock_nested(&msm_obj->lock, subclass);
put_iova(obj);
msm_gem_vunmap_locked(obj);
msm_gem_vunmap(obj); put_pages(obj);
@@ -724,11 +721,9 @@ void msm_gem_purge(struct drm_gem_object *obj, enum msm_gem_lock subclass)
invalidate_mapping_pages(file_inode(obj->filp)->i_mapping, 0, (loff_t)-1);
msm_gem_unlock(obj);
}
-static void msm_gem_vunmap_locked(struct drm_gem_object *obj) +void msm_gem_vunmap(struct drm_gem_object *obj) { struct msm_gem_object *msm_obj = to_msm_bo(obj);
@@ -741,15 +736,6 @@ static void msm_gem_vunmap_locked(struct drm_gem_object *obj) msm_obj->vaddr = NULL; }
-void msm_gem_vunmap(struct drm_gem_object *obj, enum msm_gem_lock subclass) -{
struct msm_gem_object *msm_obj = to_msm_bo(obj);
mutex_lock_nested(&msm_obj->lock, subclass);
msm_gem_vunmap_locked(obj);
msm_gem_unlock(obj);
-}
/* must be called before _move_to_active().. */ int msm_gem_sync_object(struct drm_gem_object *obj, struct msm_fence_context *fctx, bool exclusive) @@ -986,7 +972,7 @@ static void free_object(struct msm_gem_object *msm_obj)
drm_prime_gem_destroy(obj, msm_obj->sgt); } else {
msm_gem_vunmap_locked(obj);
msm_gem_vunmap(obj); put_pages(obj); }
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index d55d5401a2d2..c5232b8da794 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -162,6 +162,13 @@ msm_gem_lock(struct drm_gem_object *obj) mutex_lock(&msm_obj->lock); }
+static inline bool __must_check +msm_gem_trylock(struct drm_gem_object *obj) +{
struct msm_gem_object *msm_obj = to_msm_bo(obj);
return mutex_trylock_recursive(&msm_obj->lock) == MUTEX_TRYLOCK_SUCCESS;
This can just be
return mutex_trylock(&msm_obj->lock) == 1;
now, right?
+}
static inline int msm_gem_lock_interruptible(struct drm_gem_object *obj) { @@ -190,6 +197,7 @@ static inline bool is_active(struct msm_gem_object *msm_obj)
static inline bool is_purgeable(struct msm_gem_object *msm_obj) {
WARN_ON(!msm_gem_is_locked(&msm_obj->base)); WARN_ON(!mutex_is_locked(&msm_obj->base.dev->struct_mutex)); return (msm_obj->madv == MSM_MADV_DONTNEED) && msm_obj->sgt && !msm_obj->base.dma_buf && !msm_obj->base.import_attach;
@@ -197,27 +205,12 @@ static inline bool is_purgeable(struct msm_gem_object *msm_obj)
static inline bool is_vunmapable(struct msm_gem_object *msm_obj) {
WARN_ON(!msm_gem_is_locked(&msm_obj->base)); return (msm_obj->vmap_count == 0) && msm_obj->vaddr;
}
-/* The shrinker can be triggered while we hold objA->lock, and need
- to grab objB->lock to purge it. Lockdep just sees these as a single
- class of lock, so we use subclasses to teach it the difference.
- OBJ_LOCK_NORMAL is implicit (ie. normal mutex_lock() call), and
- OBJ_LOCK_SHRINKER is used by shrinker.
- It is *essential* that we never go down paths that could trigger the
- shrinker for a purgable object. This is ensured by checking that
- msm_obj->madv == MSM_MADV_WILLNEED.
- */
-enum msm_gem_lock {
OBJ_LOCK_NORMAL,
OBJ_LOCK_SHRINKER,
-};
-void msm_gem_purge(struct drm_gem_object *obj, enum msm_gem_lock subclass); -void msm_gem_vunmap(struct drm_gem_object *obj, enum msm_gem_lock subclass); +void msm_gem_purge(struct drm_gem_object *obj); +void msm_gem_vunmap(struct drm_gem_object *obj); void msm_gem_free_work(struct work_struct *work);
/* Created per submit-ioctl, to track bo's and cmdstream bufs, etc, diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c index 482576d7a39a..2dc0ffa925b4 100644 --- a/drivers/gpu/drm/msm/msm_gem_shrinker.c +++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c @@ -52,8 +52,11 @@ msm_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) return 0;
list_for_each_entry(msm_obj, &priv->inactive_list, mm_list) {
if (!msm_gem_trylock(&msm_obj->base))
continue; if (is_purgeable(msm_obj)) count += msm_obj->base.size >> PAGE_SHIFT;
msm_gem_unlock(&msm_obj->base); } if (unlock)
@@ -78,10 +81,13 @@ msm_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) list_for_each_entry(msm_obj, &priv->inactive_list, mm_list) { if (freed >= sc->nr_to_scan) break;
if (!msm_gem_trylock(&msm_obj->base))
continue; if (is_purgeable(msm_obj)) {
msm_gem_purge(&msm_obj->base, OBJ_LOCK_SHRINKER);
msm_gem_purge(&msm_obj->base); freed += msm_obj->base.size >> PAGE_SHIFT; }
msm_gem_unlock(&msm_obj->base); } if (unlock)
@@ -107,15 +113,20 @@ msm_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr) return NOTIFY_DONE;
list_for_each_entry(msm_obj, &priv->inactive_list, mm_list) {
if (!msm_gem_trylock(&msm_obj->base))
continue; if (is_vunmapable(msm_obj)) {
msm_gem_vunmap(&msm_obj->base, OBJ_LOCK_SHRINKER);
/* since we don't know any better, lets bail after a few
* and if necessary the shrinker will be invoked again.
* Seems better than unmapping *everything*
*/
if (++unmapped >= 15)
break;
msm_gem_vunmap(&msm_obj->base);
unmapped++; }
msm_gem_unlock(&msm_obj->base);
/* since we don't know any better, lets bail after a few
* and if necessary the shrinker will be invoked again.
* Seems better than unmapping *everything*
*/
if (++unmapped >= 15)
break; } if (unlock)
-- 2.26.2
Freedreno mailing list Freedreno@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/freedreno
On Fri, Oct 23, 2020 at 1:55 AM Kristian Høgsberg hoegsberg@gmail.com wrote:
On Mon, Oct 19, 2020 at 10:45 PM Rob Clark robdclark@gmail.com wrote:
From: Rob Clark robdclark@chromium.org
Move grabbing the bo lock into shrinker, with a msm_gem_trylock() to skip over bo's that are already locked. This gets rid of the nested lock classes.
Signed-off-by: Rob Clark robdclark@chromium.org
drivers/gpu/drm/msm/msm_gem.c | 24 +++++---------------- drivers/gpu/drm/msm/msm_gem.h | 29 ++++++++++---------------- drivers/gpu/drm/msm/msm_gem_shrinker.c | 27 +++++++++++++++++------- 3 files changed, 35 insertions(+), 45 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index e0d8d739b068..1195847714ba 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -17,8 +17,6 @@ #include "msm_gpu.h" #include "msm_mmu.h"
-static void msm_gem_vunmap_locked(struct drm_gem_object *obj);
static dma_addr_t physaddr(struct drm_gem_object *obj) { @@ -693,20 +691,19 @@ int msm_gem_madvise(struct drm_gem_object *obj, unsigned madv) return (madv != __MSM_MADV_PURGED); }
-void msm_gem_purge(struct drm_gem_object *obj, enum msm_gem_lock subclass) +void msm_gem_purge(struct drm_gem_object *obj) { struct drm_device *dev = obj->dev; struct msm_gem_object *msm_obj = to_msm_bo(obj);
WARN_ON(!mutex_is_locked(&dev->struct_mutex));
WARN_ON(!msm_gem_is_locked(obj)); WARN_ON(!is_purgeable(msm_obj)); WARN_ON(obj->import_attach);
mutex_lock_nested(&msm_obj->lock, subclass);
put_iova(obj);
msm_gem_vunmap_locked(obj);
msm_gem_vunmap(obj); put_pages(obj);
@@ -724,11 +721,9 @@ void msm_gem_purge(struct drm_gem_object *obj, enum msm_gem_lock subclass)
invalidate_mapping_pages(file_inode(obj->filp)->i_mapping, 0, (loff_t)-1);
msm_gem_unlock(obj);
}
-static void msm_gem_vunmap_locked(struct drm_gem_object *obj) +void msm_gem_vunmap(struct drm_gem_object *obj) { struct msm_gem_object *msm_obj = to_msm_bo(obj);
@@ -741,15 +736,6 @@ static void msm_gem_vunmap_locked(struct drm_gem_object *obj) msm_obj->vaddr = NULL; }
-void msm_gem_vunmap(struct drm_gem_object *obj, enum msm_gem_lock subclass) -{
struct msm_gem_object *msm_obj = to_msm_bo(obj);
mutex_lock_nested(&msm_obj->lock, subclass);
msm_gem_vunmap_locked(obj);
msm_gem_unlock(obj);
-}
/* must be called before _move_to_active().. */ int msm_gem_sync_object(struct drm_gem_object *obj, struct msm_fence_context *fctx, bool exclusive) @@ -986,7 +972,7 @@ static void free_object(struct msm_gem_object *msm_obj)
drm_prime_gem_destroy(obj, msm_obj->sgt); } else {
msm_gem_vunmap_locked(obj);
msm_gem_vunmap(obj); put_pages(obj); }
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index d55d5401a2d2..c5232b8da794 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -162,6 +162,13 @@ msm_gem_lock(struct drm_gem_object *obj) mutex_lock(&msm_obj->lock); }
+static inline bool __must_check +msm_gem_trylock(struct drm_gem_object *obj) +{
struct msm_gem_object *msm_obj = to_msm_bo(obj);
return mutex_trylock_recursive(&msm_obj->lock) == MUTEX_TRYLOCK_SUCCESS;
This can just be
return mutex_trylock(&msm_obj->lock) == 1;
now, right?
yeah, I suppose.. although this is just transient and gets replaced in a later patch
BR, -R
+}
static inline int msm_gem_lock_interruptible(struct drm_gem_object *obj) { @@ -190,6 +197,7 @@ static inline bool is_active(struct msm_gem_object *msm_obj)
static inline bool is_purgeable(struct msm_gem_object *msm_obj) {
WARN_ON(!msm_gem_is_locked(&msm_obj->base)); WARN_ON(!mutex_is_locked(&msm_obj->base.dev->struct_mutex)); return (msm_obj->madv == MSM_MADV_DONTNEED) && msm_obj->sgt && !msm_obj->base.dma_buf && !msm_obj->base.import_attach;
@@ -197,27 +205,12 @@ static inline bool is_purgeable(struct msm_gem_object *msm_obj)
static inline bool is_vunmapable(struct msm_gem_object *msm_obj) {
WARN_ON(!msm_gem_is_locked(&msm_obj->base)); return (msm_obj->vmap_count == 0) && msm_obj->vaddr;
}
-/* The shrinker can be triggered while we hold objA->lock, and need
- to grab objB->lock to purge it. Lockdep just sees these as a single
- class of lock, so we use subclasses to teach it the difference.
- OBJ_LOCK_NORMAL is implicit (ie. normal mutex_lock() call), and
- OBJ_LOCK_SHRINKER is used by shrinker.
- It is *essential* that we never go down paths that could trigger the
- shrinker for a purgable object. This is ensured by checking that
- msm_obj->madv == MSM_MADV_WILLNEED.
- */
-enum msm_gem_lock {
OBJ_LOCK_NORMAL,
OBJ_LOCK_SHRINKER,
-};
-void msm_gem_purge(struct drm_gem_object *obj, enum msm_gem_lock subclass); -void msm_gem_vunmap(struct drm_gem_object *obj, enum msm_gem_lock subclass); +void msm_gem_purge(struct drm_gem_object *obj); +void msm_gem_vunmap(struct drm_gem_object *obj); void msm_gem_free_work(struct work_struct *work);
/* Created per submit-ioctl, to track bo's and cmdstream bufs, etc, diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c index 482576d7a39a..2dc0ffa925b4 100644 --- a/drivers/gpu/drm/msm/msm_gem_shrinker.c +++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c @@ -52,8 +52,11 @@ msm_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) return 0;
list_for_each_entry(msm_obj, &priv->inactive_list, mm_list) {
if (!msm_gem_trylock(&msm_obj->base))
continue; if (is_purgeable(msm_obj)) count += msm_obj->base.size >> PAGE_SHIFT;
msm_gem_unlock(&msm_obj->base); } if (unlock)
@@ -78,10 +81,13 @@ msm_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) list_for_each_entry(msm_obj, &priv->inactive_list, mm_list) { if (freed >= sc->nr_to_scan) break;
if (!msm_gem_trylock(&msm_obj->base))
continue; if (is_purgeable(msm_obj)) {
msm_gem_purge(&msm_obj->base, OBJ_LOCK_SHRINKER);
msm_gem_purge(&msm_obj->base); freed += msm_obj->base.size >> PAGE_SHIFT; }
msm_gem_unlock(&msm_obj->base); } if (unlock)
@@ -107,15 +113,20 @@ msm_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr) return NOTIFY_DONE;
list_for_each_entry(msm_obj, &priv->inactive_list, mm_list) {
if (!msm_gem_trylock(&msm_obj->base))
continue; if (is_vunmapable(msm_obj)) {
msm_gem_vunmap(&msm_obj->base, OBJ_LOCK_SHRINKER);
/* since we don't know any better, lets bail after a few
* and if necessary the shrinker will be invoked again.
* Seems better than unmapping *everything*
*/
if (++unmapped >= 15)
break;
msm_gem_vunmap(&msm_obj->base);
unmapped++; }
msm_gem_unlock(&msm_obj->base);
/* since we don't know any better, lets bail after a few
* and if necessary the shrinker will be invoked again.
* Seems better than unmapping *everything*
*/
if (++unmapped >= 15)
break; } if (unlock)
-- 2.26.2
Freedreno mailing list Freedreno@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/freedreno
From: Rob Clark robdclark@chromium.org
We cannot switch to using obj->resv for locking without first moving all the copy_from_user() ahead of submit_lock_objects(). Otherwise in the mm fault path we aquire mm->mmap_sem before obj lock, but in the submit path the order is reversed.
Signed-off-by: Rob Clark robdclark@chromium.org --- drivers/gpu/drm/msm/msm_gem.h | 3 + drivers/gpu/drm/msm/msm_gem_submit.c | 121 ++++++++++++++++----------- 2 files changed, 76 insertions(+), 48 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index c5232b8da794..0b7dda312992 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -240,7 +240,10 @@ struct msm_gem_submit { uint32_t type; uint32_t size; /* in dwords */ uint64_t iova; + uint32_t offset;/* in dwords */ uint32_t idx; /* cmdstream buffer idx in bos[] */ + uint32_t nr_relocs; + struct drm_msm_gem_submit_reloc *relocs; } *cmd; /* array of size nr_cmds */ struct { uint32_t flags; diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index aa5c60a7132d..002130d826aa 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -62,11 +62,16 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev,
void msm_gem_submit_free(struct msm_gem_submit *submit) { + unsigned i; + dma_fence_put(submit->fence); list_del(&submit->node); put_pid(submit->pid); msm_submitqueue_put(submit->queue);
+ for (i = 0; i < submit->nr_cmds; i++) + kfree(submit->cmd[i].relocs); + kfree(submit); }
@@ -150,6 +155,60 @@ static int submit_lookup_objects(struct msm_gem_submit *submit, return ret; }
+static int submit_lookup_cmds(struct msm_gem_submit *submit, + struct drm_msm_gem_submit *args, struct drm_file *file) +{ + unsigned i, sz; + int ret = 0; + + for (i = 0; i < args->nr_cmds; i++) { + struct drm_msm_gem_submit_cmd submit_cmd; + void __user *userptr = + u64_to_user_ptr(args->cmds + (i * sizeof(submit_cmd))); + + ret = copy_from_user(&submit_cmd, userptr, sizeof(submit_cmd)); + if (ret) { + ret = -EFAULT; + goto out; + } + + /* validate input from userspace: */ + switch (submit_cmd.type) { + case MSM_SUBMIT_CMD_BUF: + case MSM_SUBMIT_CMD_IB_TARGET_BUF: + case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: + break; + default: + DRM_ERROR("invalid type: %08x\n", submit_cmd.type); + return -EINVAL; + } + + if (submit_cmd.size % 4) { + DRM_ERROR("non-aligned cmdstream buffer size: %u\n", + submit_cmd.size); + ret = -EINVAL; + goto out; + } + + submit->cmd[i].type = submit_cmd.type; + submit->cmd[i].size = submit_cmd.size / 4; + submit->cmd[i].offset = submit_cmd.submit_offset / 4; + submit->cmd[i].idx = submit_cmd.submit_idx; + submit->cmd[i].nr_relocs = submit_cmd.nr_relocs; + + sz = sizeof(struct drm_msm_gem_submit_reloc) * submit_cmd.nr_relocs; + submit->cmd[i].relocs = kmalloc(sz, GFP_KERNEL); + ret = copy_from_user(submit->cmd[i].relocs, userptr, sz); + if (ret) { + ret = -EFAULT; + goto out; + } + } + +out: + return ret; +} + static void submit_unlock_unpin_bo(struct msm_gem_submit *submit, int i, bool backoff) { @@ -301,7 +360,7 @@ static int submit_bo(struct msm_gem_submit *submit, uint32_t idx,
/* process the reloc's and patch up the cmdstream as needed: */ static int submit_reloc(struct msm_gem_submit *submit, struct msm_gem_object *obj, - uint32_t offset, uint32_t nr_relocs, uint64_t relocs) + uint32_t offset, uint32_t nr_relocs, struct drm_msm_gem_submit_reloc *relocs) { uint32_t i, last_offset = 0; uint32_t *ptr; @@ -327,18 +386,11 @@ static int submit_reloc(struct msm_gem_submit *submit, struct msm_gem_object *ob }
for (i = 0; i < nr_relocs; i++) { - struct drm_msm_gem_submit_reloc submit_reloc; - void __user *userptr = - u64_to_user_ptr(relocs + (i * sizeof(submit_reloc))); + struct drm_msm_gem_submit_reloc submit_reloc = relocs[i]; uint32_t off; uint64_t iova; bool valid;
- if (copy_from_user(&submit_reloc, userptr, sizeof(submit_reloc))) { - ret = -EFAULT; - goto out; - } - if (submit_reloc.submit_offset % 4) { DRM_ERROR("non-aligned reloc offset: %u\n", submit_reloc.submit_offset); @@ -694,6 +746,10 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, if (ret) goto out;
+ ret = submit_lookup_cmds(submit, args, file); + if (ret) + goto out; + /* copy_*_user while holding a ww ticket upsets lockdep */ ww_acquire_init(&submit->ticket, &reservation_ww_class); has_ww_ticket = true; @@ -710,60 +766,29 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, goto out;
for (i = 0; i < args->nr_cmds; i++) { - struct drm_msm_gem_submit_cmd submit_cmd; - void __user *userptr = - u64_to_user_ptr(args->cmds + (i * sizeof(submit_cmd))); struct msm_gem_object *msm_obj; uint64_t iova;
- ret = copy_from_user(&submit_cmd, userptr, sizeof(submit_cmd)); - if (ret) { - ret = -EFAULT; - goto out; - } - - /* validate input from userspace: */ - switch (submit_cmd.type) { - case MSM_SUBMIT_CMD_BUF: - case MSM_SUBMIT_CMD_IB_TARGET_BUF: - case MSM_SUBMIT_CMD_CTX_RESTORE_BUF: - break; - default: - DRM_ERROR("invalid type: %08x\n", submit_cmd.type); - ret = -EINVAL; - goto out; - } - - ret = submit_bo(submit, submit_cmd.submit_idx, + ret = submit_bo(submit, submit->cmd[i].idx, &msm_obj, &iova, NULL); if (ret) goto out;
- if (submit_cmd.size % 4) { - DRM_ERROR("non-aligned cmdstream buffer size: %u\n", - submit_cmd.size); + if (!submit->cmd[i].size || + ((submit->cmd[i].size + submit->cmd[i].offset) > + msm_obj->base.size / 4)) { + DRM_ERROR("invalid cmdstream size: %u\n", submit->cmd[i].size * 4); ret = -EINVAL; goto out; }
- if (!submit_cmd.size || - ((submit_cmd.size + submit_cmd.submit_offset) > - msm_obj->base.size)) { - DRM_ERROR("invalid cmdstream size: %u\n", submit_cmd.size); - ret = -EINVAL; - goto out; - } - - submit->cmd[i].type = submit_cmd.type; - submit->cmd[i].size = submit_cmd.size / 4; - submit->cmd[i].iova = iova + submit_cmd.submit_offset; - submit->cmd[i].idx = submit_cmd.submit_idx; + submit->cmd[i].iova = iova + (submit->cmd[i].offset * 4);
if (submit->valid) continue;
- ret = submit_reloc(submit, msm_obj, submit_cmd.submit_offset, - submit_cmd.nr_relocs, submit_cmd.relocs); + ret = submit_reloc(submit, msm_obj, submit->cmd[i].offset * 4, + submit->cmd[i].nr_relocs, submit->cmd[i].relocs); if (ret) goto out; }
On Mon, Oct 19, 2020 at 10:45 PM Rob Clark robdclark@gmail.com wrote:
From: Rob Clark robdclark@chromium.org
We cannot switch to using obj->resv for locking without first moving all the copy_from_user() ahead of submit_lock_objects(). Otherwise in the mm fault path we aquire mm->mmap_sem before obj lock, but in the submit path the order is reversed.
Signed-off-by: Rob Clark robdclark@chromium.org
drivers/gpu/drm/msm/msm_gem.h | 3 + drivers/gpu/drm/msm/msm_gem_submit.c | 121 ++++++++++++++++----------- 2 files changed, 76 insertions(+), 48 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index c5232b8da794..0b7dda312992 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -240,7 +240,10 @@ struct msm_gem_submit { uint32_t type; uint32_t size; /* in dwords */ uint64_t iova;
uint32_t offset;/* in dwords */ uint32_t idx; /* cmdstream buffer idx in bos[] */
uint32_t nr_relocs;
struct drm_msm_gem_submit_reloc *relocs; } *cmd; /* array of size nr_cmds */ struct { uint32_t flags;
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index aa5c60a7132d..002130d826aa 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -62,11 +62,16 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev,
void msm_gem_submit_free(struct msm_gem_submit *submit) {
unsigned i;
dma_fence_put(submit->fence); list_del(&submit->node); put_pid(submit->pid); msm_submitqueue_put(submit->queue);
for (i = 0; i < submit->nr_cmds; i++)
kfree(submit->cmd[i].relocs);
kfree(submit);
}
@@ -150,6 +155,60 @@ static int submit_lookup_objects(struct msm_gem_submit *submit, return ret; }
+static int submit_lookup_cmds(struct msm_gem_submit *submit,
struct drm_msm_gem_submit *args, struct drm_file *file)
+{
unsigned i, sz;
int ret = 0;
for (i = 0; i < args->nr_cmds; i++) {
struct drm_msm_gem_submit_cmd submit_cmd;
void __user *userptr =
u64_to_user_ptr(args->cmds + (i * sizeof(submit_cmd)));
ret = copy_from_user(&submit_cmd, userptr, sizeof(submit_cmd));
if (ret) {
ret = -EFAULT;
goto out;
}
/* validate input from userspace: */
switch (submit_cmd.type) {
case MSM_SUBMIT_CMD_BUF:
case MSM_SUBMIT_CMD_IB_TARGET_BUF:
case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
break;
default:
DRM_ERROR("invalid type: %08x\n", submit_cmd.type);
return -EINVAL;
}
if (submit_cmd.size % 4) {
DRM_ERROR("non-aligned cmdstream buffer size: %u\n",
submit_cmd.size);
ret = -EINVAL;
goto out;
}
submit->cmd[i].type = submit_cmd.type;
submit->cmd[i].size = submit_cmd.size / 4;
submit->cmd[i].offset = submit_cmd.submit_offset / 4;
submit->cmd[i].idx = submit_cmd.submit_idx;
submit->cmd[i].nr_relocs = submit_cmd.nr_relocs;
sz = sizeof(struct drm_msm_gem_submit_reloc) * submit_cmd.nr_relocs;
submit->cmd[i].relocs = kmalloc(sz, GFP_KERNEL);
kmalloc_array() or check_mul_overflow() here for the integer overflow check.
ret = copy_from_user(submit->cmd[i].relocs, userptr, sz);
if (ret) {
ret = -EFAULT;
goto out;
}
}
+out:
return ret;
+}
static void submit_unlock_unpin_bo(struct msm_gem_submit *submit, int i, bool backoff) { @@ -301,7 +360,7 @@ static int submit_bo(struct msm_gem_submit *submit, uint32_t idx,
/* process the reloc's and patch up the cmdstream as needed: */ static int submit_reloc(struct msm_gem_submit *submit, struct msm_gem_object *obj,
uint32_t offset, uint32_t nr_relocs, uint64_t relocs)
uint32_t offset, uint32_t nr_relocs, struct drm_msm_gem_submit_reloc *relocs)
{ uint32_t i, last_offset = 0; uint32_t *ptr; @@ -327,18 +386,11 @@ static int submit_reloc(struct msm_gem_submit *submit, struct msm_gem_object *ob }
for (i = 0; i < nr_relocs; i++) {
struct drm_msm_gem_submit_reloc submit_reloc;
void __user *userptr =
u64_to_user_ptr(relocs + (i * sizeof(submit_reloc)));
struct drm_msm_gem_submit_reloc submit_reloc = relocs[i]; uint32_t off; uint64_t iova; bool valid;
if (copy_from_user(&submit_reloc, userptr, sizeof(submit_reloc))) {
ret = -EFAULT;
goto out;
}
if (submit_reloc.submit_offset % 4) { DRM_ERROR("non-aligned reloc offset: %u\n", submit_reloc.submit_offset);
@@ -694,6 +746,10 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, if (ret) goto out;
ret = submit_lookup_cmds(submit, args, file);
if (ret)
goto out;
/* copy_*_user while holding a ww ticket upsets lockdep */ ww_acquire_init(&submit->ticket, &reservation_ww_class); has_ww_ticket = true;
@@ -710,60 +766,29 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, goto out;
for (i = 0; i < args->nr_cmds; i++) {
struct drm_msm_gem_submit_cmd submit_cmd;
void __user *userptr =
u64_to_user_ptr(args->cmds + (i * sizeof(submit_cmd))); struct msm_gem_object *msm_obj; uint64_t iova;
ret = copy_from_user(&submit_cmd, userptr, sizeof(submit_cmd));
if (ret) {
ret = -EFAULT;
goto out;
}
/* validate input from userspace: */
switch (submit_cmd.type) {
case MSM_SUBMIT_CMD_BUF:
case MSM_SUBMIT_CMD_IB_TARGET_BUF:
case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
break;
default:
DRM_ERROR("invalid type: %08x\n", submit_cmd.type);
ret = -EINVAL;
goto out;
}
ret = submit_bo(submit, submit_cmd.submit_idx,
ret = submit_bo(submit, submit->cmd[i].idx, &msm_obj, &iova, NULL); if (ret) goto out;
if (submit_cmd.size % 4) {
DRM_ERROR("non-aligned cmdstream buffer size: %u\n",
submit_cmd.size);
if (!submit->cmd[i].size ||
((submit->cmd[i].size + submit->cmd[i].offset) >
msm_obj->base.size / 4)) {
DRM_ERROR("invalid cmdstream size: %u\n", submit->cmd[i].size * 4); ret = -EINVAL; goto out; }
if (!submit_cmd.size ||
((submit_cmd.size + submit_cmd.submit_offset) >
msm_obj->base.size)) {
DRM_ERROR("invalid cmdstream size: %u\n", submit_cmd.size);
ret = -EINVAL;
goto out;
}
submit->cmd[i].type = submit_cmd.type;
submit->cmd[i].size = submit_cmd.size / 4;
submit->cmd[i].iova = iova + submit_cmd.submit_offset;
submit->cmd[i].idx = submit_cmd.submit_idx;
submit->cmd[i].iova = iova + (submit->cmd[i].offset * 4); if (submit->valid) continue;
ret = submit_reloc(submit, msm_obj, submit_cmd.submit_offset,
submit_cmd.nr_relocs, submit_cmd.relocs);
ret = submit_reloc(submit, msm_obj, submit->cmd[i].offset * 4,
submit->cmd[i].nr_relocs, submit->cmd[i].relocs); if (ret) goto out; }
-- 2.26.2
Freedreno mailing list Freedreno@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/freedreno
From: Rob Clark robdclark@chromium.org
Unfortunately, due to an dev_pm_opp locking interaction with mm->mmap_sem, we need to do pm get before aquiring obj locks, otherwise we can have anger lockdep with the chain:
opp_table_lock --> &mm->mmap_sem --> reservation_ww_class_mutex
For an explicit fencing userspace, the impact should be minimal as we do all the fence waits before this point. It could result in some needless resumes in error cases, etc.
Signed-off-by: Rob Clark robdclark@chromium.org --- drivers/gpu/drm/msm/msm_gem_submit.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 002130d826aa..a9422d043bfe 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -744,11 +744,20 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
ret = submit_lookup_objects(submit, args, file); if (ret) - goto out; + goto out_pre_pm;
ret = submit_lookup_cmds(submit, args, file); if (ret) - goto out; + goto out_pre_pm; + + /* + * Thanks to dev_pm_opp opp_table_lock interactions with mm->mmap_sem + * in the resume path, we need to to rpm get before we lock objs. + * Which unfortunately might involve powering up the GPU sooner than + * is necessary. But at least in the explicit fencing case, we will + * have already done all the fence waiting. + */ + pm_runtime_get_sync(&gpu->pdev->dev);
/* copy_*_user while holding a ww ticket upsets lockdep */ ww_acquire_init(&submit->ticket, &reservation_ww_class); @@ -825,6 +834,8 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
out: + pm_runtime_put(&gpu->pdev->dev); +out_pre_pm: submit_cleanup(submit); if (has_ww_ticket) ww_acquire_fini(&submit->ticket);
From: Rob Clark robdclark@chromium.org
This also converts the special msm_gem_get_vaddr_active() to expect the lock to already be held. There are two call-sites for this, one already has the lock held, so it is more straightforward to just open-code the locking for the other caller.
Signed-off-by: Rob Clark robdclark@chromium.org --- drivers/gpu/drm/msm/msm_gem.c | 12 ++---------- drivers/gpu/drm/msm/msm_gem.h | 16 +++++----------- drivers/gpu/drm/msm/msm_gem_submit.c | 8 ++++---- drivers/gpu/drm/msm/msm_gpu.c | 14 ++++++++++++-- drivers/gpu/drm/msm/msm_rd.c | 2 +- 5 files changed, 24 insertions(+), 28 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 1195847714ba..6abcf9fe480d 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -644,13 +644,7 @@ void *msm_gem_get_vaddr(struct drm_gem_object *obj) */ void *msm_gem_get_vaddr_active(struct drm_gem_object *obj) { - void *ret; - - msm_gem_lock(obj); - ret = get_vaddr(obj, __MSM_MADV_PURGED); - msm_gem_unlock(obj); - - return ret; + return get_vaddr(obj, __MSM_MADV_PURGED); }
void msm_gem_put_vaddr_locked(struct drm_gem_object *obj) @@ -976,9 +970,9 @@ static void free_object(struct msm_gem_object *msm_obj) put_pages(obj); }
+ msm_gem_unlock(obj); drm_gem_object_release(obj);
- msm_gem_unlock(obj); kfree(msm_obj); }
@@ -1050,8 +1044,6 @@ static int msm_gem_new_impl(struct drm_device *dev, if (!msm_obj) return -ENOMEM;
- mutex_init(&msm_obj->lock); - msm_obj->flags = flags; msm_obj->madv = MSM_MADV_WILLNEED;
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index 0b7dda312992..f0608d96ef03 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -85,7 +85,6 @@ struct msm_gem_object { * an IOMMU. Also used for stolen/splashscreen buffer. */ struct drm_mm_node *vram_node; - struct mutex lock; /* Protects resources associated with bo */
char name[32]; /* Identifier to print for the debugfs files */
@@ -158,36 +157,31 @@ void msm_gem_describe_objects(struct list_head *list, struct seq_file *m); static inline void msm_gem_lock(struct drm_gem_object *obj) { - struct msm_gem_object *msm_obj = to_msm_bo(obj); - mutex_lock(&msm_obj->lock); + dma_resv_lock(obj->resv, NULL); }
static inline bool __must_check msm_gem_trylock(struct drm_gem_object *obj) { - struct msm_gem_object *msm_obj = to_msm_bo(obj); - return mutex_trylock_recursive(&msm_obj->lock) == MUTEX_TRYLOCK_SUCCESS; + return dma_resv_trylock(obj->resv); }
static inline int msm_gem_lock_interruptible(struct drm_gem_object *obj) { - struct msm_gem_object *msm_obj = to_msm_bo(obj); - return mutex_lock_interruptible(&msm_obj->lock); + return dma_resv_lock_interruptible(obj->resv, NULL); }
static inline void msm_gem_unlock(struct drm_gem_object *obj) { - struct msm_gem_object *msm_obj = to_msm_bo(obj); - mutex_unlock(&msm_obj->lock); + dma_resv_unlock(obj->resv); }
static inline bool msm_gem_is_locked(struct drm_gem_object *obj) { - struct msm_gem_object *msm_obj = to_msm_bo(obj); - return mutex_is_locked(&msm_obj->lock); + return dma_resv_is_locked(obj->resv); }
static inline bool is_active(struct msm_gem_object *msm_obj) diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index a9422d043bfe..50ecc8455197 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -215,7 +215,7 @@ static void submit_unlock_unpin_bo(struct msm_gem_submit *submit, struct msm_gem_object *msm_obj = submit->bos[i].obj;
if (submit->bos[i].flags & BO_PINNED) - msm_gem_unpin_iova(&msm_obj->base, submit->aspace); + msm_gem_unpin_iova_locked(&msm_obj->base, submit->aspace);
if (submit->bos[i].flags & BO_LOCKED) dma_resv_unlock(msm_obj->base.resv); @@ -318,7 +318,7 @@ static int submit_pin_objects(struct msm_gem_submit *submit) uint64_t iova;
/* if locking succeeded, pin bo: */ - ret = msm_gem_get_and_pin_iova(&msm_obj->base, + ret = msm_gem_get_and_pin_iova_locked(&msm_obj->base, submit->aspace, &iova);
if (ret) @@ -377,7 +377,7 @@ static int submit_reloc(struct msm_gem_submit *submit, struct msm_gem_object *ob /* For now, just map the entire thing. Eventually we probably * to do it page-by-page, w/ kmap() if not vmap()d.. */ - ptr = msm_gem_get_vaddr(&obj->base); + ptr = msm_gem_get_vaddr_locked(&obj->base);
if (IS_ERR(ptr)) { ret = PTR_ERR(ptr); @@ -428,7 +428,7 @@ static int submit_reloc(struct msm_gem_submit *submit, struct msm_gem_object *ob }
out: - msm_gem_put_vaddr(&obj->base); + msm_gem_put_vaddr_locked(&obj->base);
return ret; } diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 55d16489d0f3..015f6b884e2e 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -326,7 +326,9 @@ static void msm_gpu_crashstate_get_bo(struct msm_gpu_state *state, if (!state_bo->data) goto out;
+ msm_gem_lock(&obj->base); ptr = msm_gem_get_vaddr_active(&obj->base); + msm_gem_unlock(&obj->base); if (IS_ERR(ptr)) { kvfree(state_bo->data); state_bo->data = NULL; @@ -470,14 +472,22 @@ static void recover_worker(struct work_struct *work) put_task_struct(task); }
+ /* msm_rd_dump_submit() needs bo locked to dump: */ + for (i = 0; i < submit->nr_bos; i++) + msm_gem_lock(&submit->bos[i].obj->base); + if (comm && cmd) { DRM_DEV_ERROR(dev->dev, "%s: offending task: %s (%s)\n", gpu->name, comm, cmd);
msm_rd_dump_submit(priv->hangrd, submit, "offending task: %s (%s)", comm, cmd); - } else + } else { msm_rd_dump_submit(priv->hangrd, submit, NULL); + } + + for (i = 0; i < submit->nr_bos; i++) + msm_gem_unlock(&submit->bos[i].obj->base); }
/* Record the crash state */ @@ -784,7 +794,7 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
/* submit takes a reference to the bo and iova until retired: */ drm_gem_object_get(&msm_obj->base); - msm_gem_get_and_pin_iova(&msm_obj->base, submit->aspace, &iova); + msm_gem_get_and_pin_iova_locked(&msm_obj->base, submit->aspace, &iova);
if (submit->bos[i].flags & MSM_SUBMIT_BO_WRITE) dma_resv_add_excl_fence(drm_obj->resv, submit->fence); diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c index fea30e7aa9e8..659e5cc4b40a 100644 --- a/drivers/gpu/drm/msm/msm_rd.c +++ b/drivers/gpu/drm/msm/msm_rd.c @@ -333,7 +333,7 @@ static void snapshot_buf(struct msm_rd_state *rd,
rd_write_section(rd, RD_BUFFER_CONTENTS, buf, size);
- msm_gem_put_vaddr(&obj->base); + msm_gem_put_vaddr_locked(&obj->base); }
/* called under struct_mutex */
From: Rob Clark robdclark@chromium.org
We only want to use the _unlocked() variant in the unlocked case.
Signed-off-by: Rob Clark robdclark@chromium.org --- drivers/gpu/drm/msm/msm_gem.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 6abcf9fe480d..3dcb2ef4740f 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -1135,7 +1135,11 @@ static struct drm_gem_object *_msm_gem_new(struct drm_device *dev, return obj;
fail: - drm_gem_object_put(obj); + if (struct_mutex_locked) { + drm_gem_object_put_locked(obj); + } else { + drm_gem_object_put(obj); + } return ERR_PTR(ret); }
From: Rob Clark robdclark@chromium.org
It is somewhat redundant with the gpu tracepoints, and anyways not too useful to justify spamming the log when debug traces are enabled.
Signed-off-by: Rob Clark robdclark@chromium.org Reviewed-by: Jordan Crouse jcrouse@codeaurora.org --- drivers/gpu/drm/msm/msm_gpu.c | 1 - 1 file changed, 1 deletion(-)
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 015f6b884e2e..ed6645aa0ae5 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -545,7 +545,6 @@ static void recover_worker(struct work_struct *work)
static void hangcheck_timer_reset(struct msm_gpu *gpu) { - DBG("%s", gpu->name); mod_timer(&gpu->hangcheck_timer, round_jiffies_up(jiffies + DRM_MSM_HANGCHECK_JIFFIES)); }
From: Rob Clark robdclark@chromium.org
Small cleanup, update_fences() is used in the hangcheck path, but also in the normal retire path.
Signed-off-by: Rob Clark robdclark@chromium.org Reviewed-by: Jordan Crouse jcrouse@codeaurora.org --- drivers/gpu/drm/msm/msm_gpu.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index ed6645aa0ae5..1667d8066897 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -265,6 +265,20 @@ int msm_gpu_hw_init(struct msm_gpu *gpu) return ret; }
+static void update_fences(struct msm_gpu *gpu, struct msm_ringbuffer *ring, + uint32_t fence) +{ + struct msm_gem_submit *submit; + + list_for_each_entry(submit, &ring->submits, node) { + if (submit->seqno > fence) + break; + + msm_update_fence(submit->ring->fctx, + submit->fence->seqno); + } +} + #ifdef CONFIG_DEV_COREDUMP static ssize_t msm_gpu_devcoredump_read(char *buffer, loff_t offset, size_t count, void *data, size_t datalen) @@ -413,20 +427,6 @@ static void msm_gpu_crashstate_capture(struct msm_gpu *gpu, * Hangcheck detection for locked gpu: */
-static void update_fences(struct msm_gpu *gpu, struct msm_ringbuffer *ring, - uint32_t fence) -{ - struct msm_gem_submit *submit; - - list_for_each_entry(submit, &ring->submits, node) { - if (submit->seqno > fence) - break; - - msm_update_fence(submit->ring->fctx, - submit->fence->seqno); - } -} - static struct msm_gem_submit * find_submit(struct msm_ringbuffer *ring, uint32_t fence) {
From: Rob Clark robdclark@chromium.org
Rather than relying on the big dev->struct_mutex hammer, introduce a more specific lock for protecting the bo lists.
Signed-off-by: Rob Clark robdclark@chromium.org Reviewed-by: Jordan Crouse jcrouse@codeaurora.org --- drivers/gpu/drm/msm/msm_debugfs.c | 7 +++++++ drivers/gpu/drm/msm/msm_drv.c | 7 +++++++ drivers/gpu/drm/msm/msm_drv.h | 13 +++++++++++- drivers/gpu/drm/msm/msm_gem.c | 28 +++++++++++++++----------- drivers/gpu/drm/msm/msm_gem_shrinker.c | 12 +++++++++++ drivers/gpu/drm/msm/msm_gpu.h | 5 ++++- 6 files changed, 58 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_debugfs.c b/drivers/gpu/drm/msm/msm_debugfs.c index ee2e270f464c..64afbed89821 100644 --- a/drivers/gpu/drm/msm/msm_debugfs.c +++ b/drivers/gpu/drm/msm/msm_debugfs.c @@ -112,6 +112,11 @@ static int msm_gem_show(struct drm_device *dev, struct seq_file *m) { struct msm_drm_private *priv = dev->dev_private; struct msm_gpu *gpu = priv->gpu; + int ret; + + ret = mutex_lock_interruptible(&priv->mm_lock); + if (ret) + return ret;
if (gpu) { seq_printf(m, "Active Objects (%s):\n", gpu->name); @@ -121,6 +126,8 @@ static int msm_gem_show(struct drm_device *dev, struct seq_file *m) seq_printf(m, "Inactive Objects:\n"); msm_gem_describe_objects(&priv->inactive_list, m);
+ mutex_unlock(&priv->mm_lock); + return 0; }
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 70bc4bb69edc..15c41786d018 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -7,6 +7,7 @@
#include <linux/dma-mapping.h> #include <linux/kthread.h> +#include <linux/sched/mm.h> #include <linux/uaccess.h> #include <uapi/linux/sched/types.h>
@@ -468,6 +469,12 @@ static int msm_drm_init(struct device *dev, struct drm_driver *drv) init_llist_head(&priv->free_list);
INIT_LIST_HEAD(&priv->inactive_list); + mutex_init(&priv->mm_lock); + + /* Teach lockdep about lock ordering wrt. shrinker: */ + fs_reclaim_acquire(GFP_KERNEL); + might_lock(&priv->mm_lock); + fs_reclaim_release(GFP_KERNEL);
drm_mode_config_init(ddev);
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 713a0ae28125..7431d68ea102 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -174,8 +174,19 @@ struct msm_drm_private { struct msm_rd_state *hangrd; /* debugfs to dump hanging submits */ struct msm_perf_state *perf;
- /* list of GEM objects: */ + /* + * List of inactive GEM objects. Every bo is either in the inactive_list + * or gpu->active_list (for the gpu it is active on[1]) + * + * These lists are protected by mm_lock. If struct_mutex is involved, it + * should be aquired prior to mm_lock. One should *not* hold mm_lock in + * get_pages()/vmap()/etc paths, as they can trigger the shrinker. + * + * [1] if someone ever added support for the old 2d cores, there could be + * more than one gpu object + */ struct list_head inactive_list; + struct mutex mm_lock;
/* worker for delayed free of objects: */ struct work_struct free_work; diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 3dcb2ef4740f..092ed152999e 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -768,13 +768,17 @@ int msm_gem_sync_object(struct drm_gem_object *obj, void msm_gem_active_get(struct drm_gem_object *obj, struct msm_gpu *gpu) { struct msm_gem_object *msm_obj = to_msm_bo(obj); - WARN_ON(!mutex_is_locked(&obj->dev->struct_mutex)); + struct msm_drm_private *priv = obj->dev->dev_private; + + might_sleep(); WARN_ON(msm_obj->madv != MSM_MADV_WILLNEED);
if (!atomic_fetch_inc(&msm_obj->active_count)) { + mutex_lock(&priv->mm_lock); msm_obj->gpu = gpu; list_del_init(&msm_obj->mm_list); list_add_tail(&msm_obj->mm_list, &gpu->active_list); + mutex_unlock(&priv->mm_lock); } }
@@ -783,12 +787,14 @@ void msm_gem_active_put(struct drm_gem_object *obj) struct msm_gem_object *msm_obj = to_msm_bo(obj); struct msm_drm_private *priv = obj->dev->dev_private;
- WARN_ON(!mutex_is_locked(&obj->dev->struct_mutex)); + might_sleep();
if (!atomic_dec_return(&msm_obj->active_count)) { + mutex_lock(&priv->mm_lock); msm_obj->gpu = NULL; list_del_init(&msm_obj->mm_list); list_add_tail(&msm_obj->mm_list, &priv->inactive_list); + mutex_unlock(&priv->mm_lock); } }
@@ -943,13 +949,16 @@ static void free_object(struct msm_gem_object *msm_obj) { struct drm_gem_object *obj = &msm_obj->base; struct drm_device *dev = obj->dev; + struct msm_drm_private *priv = dev->dev_private;
WARN_ON(!mutex_is_locked(&dev->struct_mutex));
/* object should not be on active list: */ WARN_ON(is_active(msm_obj));
+ mutex_lock(&priv->mm_lock); list_del(&msm_obj->mm_list); + mutex_unlock(&priv->mm_lock);
msm_gem_lock(obj);
@@ -1123,14 +1132,9 @@ static struct drm_gem_object *_msm_gem_new(struct drm_device *dev, mapping_set_gfp_mask(obj->filp->f_mapping, GFP_HIGHUSER); }
- if (struct_mutex_locked) { - WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - list_add_tail(&msm_obj->mm_list, &priv->inactive_list); - } else { - mutex_lock(&dev->struct_mutex); - list_add_tail(&msm_obj->mm_list, &priv->inactive_list); - mutex_unlock(&dev->struct_mutex); - } + mutex_lock(&priv->mm_lock); + list_add_tail(&msm_obj->mm_list, &priv->inactive_list); + mutex_unlock(&priv->mm_lock);
return obj;
@@ -1198,9 +1202,9 @@ struct drm_gem_object *msm_gem_import(struct drm_device *dev,
msm_gem_unlock(obj);
- mutex_lock(&dev->struct_mutex); + mutex_lock(&priv->mm_lock); list_add_tail(&msm_obj->mm_list, &priv->inactive_list); - mutex_unlock(&dev->struct_mutex); + mutex_unlock(&priv->mm_lock);
return obj;
diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c index 2dc0ffa925b4..6be073b8ca08 100644 --- a/drivers/gpu/drm/msm/msm_gem_shrinker.c +++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c @@ -51,6 +51,8 @@ msm_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) if (!msm_gem_shrinker_lock(dev, &unlock)) return 0;
+ mutex_lock(&priv->mm_lock); + list_for_each_entry(msm_obj, &priv->inactive_list, mm_list) { if (!msm_gem_trylock(&msm_obj->base)) continue; @@ -59,6 +61,8 @@ msm_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) msm_gem_unlock(&msm_obj->base); }
+ mutex_unlock(&priv->mm_lock); + if (unlock) mutex_unlock(&dev->struct_mutex);
@@ -78,6 +82,8 @@ msm_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) if (!msm_gem_shrinker_lock(dev, &unlock)) return SHRINK_STOP;
+ mutex_lock(&priv->mm_lock); + list_for_each_entry(msm_obj, &priv->inactive_list, mm_list) { if (freed >= sc->nr_to_scan) break; @@ -90,6 +96,8 @@ msm_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) msm_gem_unlock(&msm_obj->base); }
+ mutex_unlock(&priv->mm_lock); + if (unlock) mutex_unlock(&dev->struct_mutex);
@@ -112,6 +120,8 @@ msm_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr) if (!msm_gem_shrinker_lock(dev, &unlock)) return NOTIFY_DONE;
+ mutex_lock(&priv->mm_lock); + list_for_each_entry(msm_obj, &priv->inactive_list, mm_list) { if (!msm_gem_trylock(&msm_obj->base)) continue; @@ -129,6 +139,8 @@ msm_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr) break; }
+ mutex_unlock(&priv->mm_lock); + if (unlock) mutex_unlock(&dev->struct_mutex);
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index 6c9e1fdc1a76..1806e87600c0 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -94,7 +94,10 @@ struct msm_gpu { struct msm_ringbuffer *rb[MSM_GPU_MAX_RINGS]; int nr_rings;
- /* list of GEM active objects: */ + /* + * List of GEM active objects on this gpu. Protected by + * msm_drm_private::mm_lock + */ struct list_head active_list;
/* does gpu need hw_init? */
From: Rob Clark robdclark@chromium.org
Before adding another lock, give ring->lock a more descriptive name.
Signed-off-by: Rob Clark robdclark@chromium.org Reviewed-by: Jordan Crouse jcrouse@codeaurora.org --- drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 4 ++-- drivers/gpu/drm/msm/adreno/a5xx_preempt.c | 12 ++++++------ drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 4 ++-- drivers/gpu/drm/msm/msm_ringbuffer.c | 2 +- drivers/gpu/drm/msm/msm_ringbuffer.h | 7 ++++++- 5 files changed, 17 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index b2593c6bd2ac..2befaf304f04 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -36,7 +36,7 @@ void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring, OUT_RING(ring, upper_32_bits(shadowptr(a5xx_gpu, ring))); }
- spin_lock_irqsave(&ring->lock, flags); + spin_lock_irqsave(&ring->preempt_lock, flags);
/* Copy the shadow to the actual register */ ring->cur = ring->next; @@ -44,7 +44,7 @@ void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring, /* Make sure to wrap wptr if we need to */ wptr = get_wptr(ring);
- spin_unlock_irqrestore(&ring->lock, flags); + spin_unlock_irqrestore(&ring->preempt_lock, flags);
/* Make sure everything is posted before making a decision */ mb(); diff --git a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c index 7e04509c4e1f..183de1139eeb 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c @@ -45,9 +45,9 @@ static inline void update_wptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring) if (!ring) return;
- spin_lock_irqsave(&ring->lock, flags); + spin_lock_irqsave(&ring->preempt_lock, flags); wptr = get_wptr(ring); - spin_unlock_irqrestore(&ring->lock, flags); + spin_unlock_irqrestore(&ring->preempt_lock, flags);
gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr); } @@ -62,9 +62,9 @@ static struct msm_ringbuffer *get_next_ring(struct msm_gpu *gpu) bool empty; struct msm_ringbuffer *ring = gpu->rb[i];
- spin_lock_irqsave(&ring->lock, flags); + spin_lock_irqsave(&ring->preempt_lock, flags); empty = (get_wptr(ring) == ring->memptrs->rptr); - spin_unlock_irqrestore(&ring->lock, flags); + spin_unlock_irqrestore(&ring->preempt_lock, flags);
if (!empty) return ring; @@ -132,9 +132,9 @@ void a5xx_preempt_trigger(struct msm_gpu *gpu) }
/* Make sure the wptr doesn't update while we're in motion */ - spin_lock_irqsave(&ring->lock, flags); + spin_lock_irqsave(&ring->preempt_lock, flags); a5xx_gpu->preempt[ring->id]->wptr = get_wptr(ring); - spin_unlock_irqrestore(&ring->lock, flags); + spin_unlock_irqrestore(&ring->preempt_lock, flags);
/* Set the address of the incoming preemption record */ gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO, diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c index 0894703a742e..5dddb9163bd3 100644 --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c @@ -65,7 +65,7 @@ static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) OUT_RING(ring, upper_32_bits(shadowptr(a6xx_gpu, ring))); }
- spin_lock_irqsave(&ring->lock, flags); + spin_lock_irqsave(&ring->preempt_lock, flags);
/* Copy the shadow to the actual register */ ring->cur = ring->next; @@ -73,7 +73,7 @@ static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) /* Make sure to wrap wptr if we need to */ wptr = get_wptr(ring);
- spin_unlock_irqrestore(&ring->lock, flags); + spin_unlock_irqrestore(&ring->preempt_lock, flags);
/* Make sure everything is posted before making a decision */ mb(); diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c index 935bf9b1d941..1b6958e908dc 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.c +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c @@ -46,7 +46,7 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id, ring->memptrs_iova = memptrs_iova;
INIT_LIST_HEAD(&ring->submits); - spin_lock_init(&ring->lock); + spin_lock_init(&ring->preempt_lock);
snprintf(name, sizeof(name), "gpu-ring-%d", ring->id);
diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.h b/drivers/gpu/drm/msm/msm_ringbuffer.h index 0987d6bf848c..4956d1bc5d0e 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.h +++ b/drivers/gpu/drm/msm/msm_ringbuffer.h @@ -46,7 +46,12 @@ struct msm_ringbuffer { struct msm_rbmemptrs *memptrs; uint64_t memptrs_iova; struct msm_fence_context *fctx; - spinlock_t lock; + + /* + * preempt_lock protects preemption and serializes wptr updates against + * preemption. Can be aquired from irq context. + */ + spinlock_t preempt_lock; };
struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id,
From: Rob Clark robdclark@chromium.org
One less place to rely on dev->struct_mutex.
Signed-off-by: Rob Clark robdclark@chromium.org Reviewed-by: Jordan Crouse jcrouse@codeaurora.org --- drivers/gpu/drm/msm/msm_gem_submit.c | 2 ++ drivers/gpu/drm/msm/msm_gpu.c | 37 ++++++++++++++++++++++------ drivers/gpu/drm/msm/msm_ringbuffer.c | 1 + drivers/gpu/drm/msm/msm_ringbuffer.h | 6 +++++ 4 files changed, 39 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index 50ecc8455197..c078b58d9c10 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -65,7 +65,9 @@ void msm_gem_submit_free(struct msm_gem_submit *submit) unsigned i;
dma_fence_put(submit->fence); + spin_lock(&submit->ring->submit_lock); list_del(&submit->node); + spin_unlock(&submit->ring->submit_lock); put_pid(submit->pid); msm_submitqueue_put(submit->queue);
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 1667d8066897..1d6f3dc3fe78 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -270,6 +270,7 @@ static void update_fences(struct msm_gpu *gpu, struct msm_ringbuffer *ring, { struct msm_gem_submit *submit;
+ spin_lock(&ring->submit_lock); list_for_each_entry(submit, &ring->submits, node) { if (submit->seqno > fence) break; @@ -277,6 +278,7 @@ static void update_fences(struct msm_gpu *gpu, struct msm_ringbuffer *ring, msm_update_fence(submit->ring->fctx, submit->fence->seqno); } + spin_unlock(&ring->submit_lock); }
#ifdef CONFIG_DEV_COREDUMP @@ -432,11 +434,14 @@ find_submit(struct msm_ringbuffer *ring, uint32_t fence) { struct msm_gem_submit *submit;
- WARN_ON(!mutex_is_locked(&ring->gpu->dev->struct_mutex)); - - list_for_each_entry(submit, &ring->submits, node) - if (submit->seqno == fence) + spin_lock(&ring->submit_lock); + list_for_each_entry(submit, &ring->submits, node) { + if (submit->seqno == fence) { + spin_unlock(&ring->submit_lock); return submit; + } + } + spin_unlock(&ring->submit_lock);
return NULL; } @@ -533,8 +538,10 @@ static void recover_worker(struct work_struct *work) for (i = 0; i < gpu->nr_rings; i++) { struct msm_ringbuffer *ring = gpu->rb[i];
+ spin_lock(&ring->submit_lock); list_for_each_entry(submit, &ring->submits, node) gpu->funcs->submit(gpu, submit); + spin_unlock(&ring->submit_lock); } }
@@ -721,7 +728,6 @@ static void retire_submit(struct msm_gpu *gpu, struct msm_ringbuffer *ring, static void retire_submits(struct msm_gpu *gpu) { struct drm_device *dev = gpu->dev; - struct msm_gem_submit *submit, *tmp; int i;
WARN_ON(!mutex_is_locked(&dev->struct_mutex)); @@ -730,9 +736,24 @@ static void retire_submits(struct msm_gpu *gpu) for (i = 0; i < gpu->nr_rings; i++) { struct msm_ringbuffer *ring = gpu->rb[i];
- list_for_each_entry_safe(submit, tmp, &ring->submits, node) { - if (dma_fence_is_signaled(submit->fence)) + while (true) { + struct msm_gem_submit *submit = NULL; + + spin_lock(&ring->submit_lock); + submit = list_first_entry_or_null(&ring->submits, + struct msm_gem_submit, node); + spin_unlock(&ring->submit_lock); + + /* + * If no submit, we are done. If submit->fence hasn't + * been signalled, then later submits are not signalled + * either, so we are also done. + */ + if (submit && dma_fence_is_signaled(submit->fence)) { retire_submit(gpu, ring, submit); + } else { + break; + } } } } @@ -775,7 +796,9 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
submit->seqno = ++ring->seqno;
+ spin_lock(&ring->submit_lock); list_add_tail(&submit->node, &ring->submits); + spin_unlock(&ring->submit_lock);
msm_rd_dump_submit(priv->rd, submit, NULL);
diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c index 1b6958e908dc..4d2a2a4abef8 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.c +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c @@ -46,6 +46,7 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id, ring->memptrs_iova = memptrs_iova;
INIT_LIST_HEAD(&ring->submits); + spin_lock_init(&ring->submit_lock); spin_lock_init(&ring->preempt_lock);
snprintf(name, sizeof(name), "gpu-ring-%d", ring->id); diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.h b/drivers/gpu/drm/msm/msm_ringbuffer.h index 4956d1bc5d0e..fe55d4a1aa16 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.h +++ b/drivers/gpu/drm/msm/msm_ringbuffer.h @@ -39,7 +39,13 @@ struct msm_ringbuffer { int id; struct drm_gem_object *bo; uint32_t *start, *end, *cur, *next; + + /* + * List of in-flight submits on this ring. Protected by submit_lock. + */ struct list_head submits; + spinlock_t submit_lock; + uint64_t iova; uint32_t seqno; uint32_t hangcheck_fence;
From: Rob Clark robdclark@chromium.org
Before we remove dev->struct_mutex from the retire path, we have to deal with the situation of a submit retiring before the submit ioctl returns.
To deal with this, ring->submits will hold a reference to the submit, which is dropped when the submit is retired. And the submit ioctl path holds it's own ref, which it drops when it is done with the submit.
Also, add to submit list *after* getting/pinning bo's, to prevent badness in case the completed fence is corrupted, and retire_worker mistakenly believes the submit is done too early.
Signed-off-by: Rob Clark robdclark@chromium.org Reviewed-by: Jordan Crouse jcrouse@codeaurora.org --- drivers/gpu/drm/msm/msm_drv.h | 1 - drivers/gpu/drm/msm/msm_gem.h | 13 +++++++++++++ drivers/gpu/drm/msm/msm_gem_submit.c | 11 +++++------ drivers/gpu/drm/msm/msm_gpu.c | 21 ++++++++++++++++----- 4 files changed, 34 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 7431d68ea102..7e6fb4af4964 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -277,7 +277,6 @@ void msm_unregister_mmu(struct drm_device *dev, struct msm_mmu *mmu);
bool msm_use_mmu(struct drm_device *dev);
-void msm_gem_submit_free(struct msm_gem_submit *submit); int msm_ioctl_gem_submit(struct drm_device *dev, void *data, struct drm_file *file);
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index f0608d96ef03..2f289c436ddd 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -213,6 +213,7 @@ void msm_gem_free_work(struct work_struct *work); * lasts for the duration of the submit-ioctl. */ struct msm_gem_submit { + struct kref ref; struct drm_device *dev; struct msm_gpu *gpu; struct msm_gem_address_space *aspace; @@ -249,6 +250,18 @@ struct msm_gem_submit { } bos[]; };
+void __msm_gem_submit_destroy(struct kref *kref); + +static inline void msm_gem_submit_get(struct msm_gem_submit *submit) +{ + kref_get(&submit->ref); +} + +static inline void msm_gem_submit_put(struct msm_gem_submit *submit) +{ + kref_put(&submit->ref, __msm_gem_submit_destroy); +} + /* helper to determine of a buffer in submit should be dumped, used for both * devcoredump and debugfs cmdstream dumping: */ diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index c078b58d9c10..d784e97f233f 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -42,6 +42,7 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev, if (!submit) return NULL;
+ kref_init(&submit->ref); submit->dev = dev; submit->aspace = queue->ctx->aspace; submit->gpu = gpu; @@ -60,14 +61,13 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev, return submit; }
-void msm_gem_submit_free(struct msm_gem_submit *submit) +void __msm_gem_submit_destroy(struct kref *kref) { + struct msm_gem_submit *submit = + container_of(kref, struct msm_gem_submit, ref); unsigned i;
dma_fence_put(submit->fence); - spin_lock(&submit->ring->submit_lock); - list_del(&submit->node); - spin_unlock(&submit->ring->submit_lock); put_pid(submit->pid); msm_submitqueue_put(submit->queue);
@@ -841,8 +841,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, submit_cleanup(submit); if (has_ww_ticket) ww_acquire_fini(&submit->ticket); - if (ret) - msm_gem_submit_free(submit); + msm_gem_submit_put(submit); out_unlock: if (ret && (out_fence_fd >= 0)) put_unused_fd(out_fence_fd); diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 1d6f3dc3fe78..bcd9b4fa98b2 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -722,7 +722,12 @@ static void retire_submit(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
pm_runtime_mark_last_busy(&gpu->pdev->dev); pm_runtime_put_autosuspend(&gpu->pdev->dev); - msm_gem_submit_free(submit); + + spin_lock(&ring->submit_lock); + list_del(&submit->node); + spin_unlock(&ring->submit_lock); + + msm_gem_submit_put(submit); }
static void retire_submits(struct msm_gpu *gpu) @@ -796,10 +801,6 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
submit->seqno = ++ring->seqno;
- spin_lock(&ring->submit_lock); - list_add_tail(&submit->node, &ring->submits); - spin_unlock(&ring->submit_lock); - msm_rd_dump_submit(priv->rd, submit, NULL);
update_sw_cntrs(gpu); @@ -826,6 +827,16 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) msm_gem_active_get(drm_obj, gpu); }
+ /* + * ring->submits holds a ref to the submit, to deal with the case + * that a submit completes before msm_ioctl_gem_submit() returns. + */ + msm_gem_submit_get(submit); + + spin_lock(&ring->submit_lock); + list_add_tail(&submit->node, &ring->submits); + spin_unlock(&ring->submit_lock); + gpu->funcs->submit(gpu, submit); priv->lastctx = submit->queue->ctx;
From: Rob Clark robdclark@chromium.org
It cannot be atomically updated with obj->active_count, and the only purpose is a useless WARN_ON() (which becomes a buggy WARN_ON() once retire_submits() is not serialized with incoming submits via struct_mutex)
Signed-off-by: Rob Clark robdclark@chromium.org --- drivers/gpu/drm/msm/msm_gem.c | 2 -- drivers/gpu/drm/msm/msm_gem.h | 1 - drivers/gpu/drm/msm/msm_gpu.c | 5 ----- 3 files changed, 8 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 092ed152999e..e4876498be47 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -775,7 +775,6 @@ void msm_gem_active_get(struct drm_gem_object *obj, struct msm_gpu *gpu)
if (!atomic_fetch_inc(&msm_obj->active_count)) { mutex_lock(&priv->mm_lock); - msm_obj->gpu = gpu; list_del_init(&msm_obj->mm_list); list_add_tail(&msm_obj->mm_list, &gpu->active_list); mutex_unlock(&priv->mm_lock); @@ -791,7 +790,6 @@ void msm_gem_active_put(struct drm_gem_object *obj)
if (!atomic_dec_return(&msm_obj->active_count)) { mutex_lock(&priv->mm_lock); - msm_obj->gpu = NULL; list_del_init(&msm_obj->mm_list); list_add_tail(&msm_obj->mm_list, &priv->inactive_list); mutex_unlock(&priv->mm_lock); diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index 2f289c436ddd..f4e73c6f07bf 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -64,7 +64,6 @@ struct msm_gem_object { * */ struct list_head mm_list; - struct msm_gpu *gpu; /* non-null if active */
/* Transiently in the process of submit ioctl, objects associated * with the submit are on submit->bo_list.. this only lasts for diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index bcd9b4fa98b2..d0f625112a97 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -810,11 +810,6 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) struct drm_gem_object *drm_obj = &msm_obj->base; uint64_t iova;
- /* can't happen yet.. but when we add 2d support we'll have - * to deal w/ cross-ring synchronization: - */ - WARN_ON(is_active(msm_obj) && (msm_obj->gpu != gpu)); - /* submit takes a reference to the bo and iova until retired: */ drm_gem_object_get(&msm_obj->base); msm_gem_get_and_pin_iova_locked(&msm_obj->base, submit->aspace, &iova);
From: Rob Clark robdclark@chromium.org
Now that we are not relying on dev->struct_mutex to protect the ring->submits lists, drop the struct_mutex lock.
Signed-off-by: Rob Clark robdclark@chromium.org Reviewed-by: Jordan Crouse jcrouse@codeaurora.org --- drivers/gpu/drm/msm/msm_gpu.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index d0f625112a97..30ba3beaad0a 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -717,7 +717,7 @@ static void retire_submit(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
msm_gem_active_put(&msm_obj->base); msm_gem_unpin_iova(&msm_obj->base, submit->aspace); - drm_gem_object_put_locked(&msm_obj->base); + drm_gem_object_put(&msm_obj->base); }
pm_runtime_mark_last_busy(&gpu->pdev->dev); @@ -732,11 +732,8 @@ static void retire_submit(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
static void retire_submits(struct msm_gpu *gpu) { - struct drm_device *dev = gpu->dev; int i;
- WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - /* Retire the commits starting with highest priority */ for (i = 0; i < gpu->nr_rings; i++) { struct msm_ringbuffer *ring = gpu->rb[i]; @@ -766,15 +763,12 @@ static void retire_submits(struct msm_gpu *gpu) static void retire_worker(struct work_struct *work) { struct msm_gpu *gpu = container_of(work, struct msm_gpu, retire_work); - struct drm_device *dev = gpu->dev; int i;
for (i = 0; i < gpu->nr_rings; i++) update_fences(gpu, gpu->rb[i], gpu->rb[i]->memptrs->fence);
- mutex_lock(&dev->struct_mutex); retire_submits(gpu); - mutex_unlock(&dev->struct_mutex); }
/* call from irq handler to schedule work to retire bo's */
From: Rob Clark robdclark@chromium.org
Now that active_list/inactive_list is protected by mm_lock, we no longer need dev->struct_mutex in the free_object() path.
Signed-off-by: Rob Clark robdclark@chromium.org --- drivers/gpu/drm/msm/msm_gem.c | 8 -------- 1 file changed, 8 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index e4876498be47..af1abddca78e 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -949,8 +949,6 @@ static void free_object(struct msm_gem_object *msm_obj) struct drm_device *dev = obj->dev; struct msm_drm_private *priv = dev->dev_private;
- WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - /* object should not be on active list: */ WARN_ON(is_active(msm_obj));
@@ -987,20 +985,14 @@ void msm_gem_free_work(struct work_struct *work) { struct msm_drm_private *priv = container_of(work, struct msm_drm_private, free_work); - struct drm_device *dev = priv->dev; struct llist_node *freed; struct msm_gem_object *msm_obj, *next;
while ((freed = llist_del_all(&priv->free_list))) { - - mutex_lock(&dev->struct_mutex); - llist_for_each_entry_safe(msm_obj, next, freed, freed) free_object(msm_obj);
- mutex_unlock(&dev->struct_mutex); - if (need_resched()) break; }
From: Rob Clark robdclark@chromium.org
Now that we don't need struct_mutex in the free path, we can get rid of the asynchronous free altogether.
Signed-off-by: Rob Clark robdclark@chromium.org --- drivers/gpu/drm/msm/msm_drv.c | 3 --- drivers/gpu/drm/msm/msm_drv.h | 5 ----- drivers/gpu/drm/msm/msm_gem.c | 27 --------------------------- drivers/gpu/drm/msm/msm_gem.h | 1 - 4 files changed, 36 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index 15c41786d018..ebcd8e827363 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -465,9 +465,6 @@ static int msm_drm_init(struct device *dev, struct drm_driver *drv)
priv->wq = alloc_ordered_workqueue("msm", 0);
- INIT_WORK(&priv->free_work, msm_gem_free_work); - init_llist_head(&priv->free_list); - INIT_LIST_HEAD(&priv->inactive_list); mutex_init(&priv->mm_lock);
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 7e6fb4af4964..5308e636a90c 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -188,10 +188,6 @@ struct msm_drm_private { struct list_head inactive_list; struct mutex mm_lock;
- /* worker for delayed free of objects: */ - struct work_struct free_work; - struct llist_head free_list; - struct workqueue_struct *wq;
unsigned int num_planes; @@ -291,7 +287,6 @@ struct drm_gem_object *msm_gem_prime_import_sg_table(struct drm_device *dev, struct dma_buf_attachment *attach, struct sg_table *sg); int msm_gem_prime_pin(struct drm_gem_object *obj); void msm_gem_prime_unpin(struct drm_gem_object *obj); -void msm_gem_free_work(struct work_struct *work);
int msm_framebuffer_prepare(struct drm_framebuffer *fb, struct msm_gem_address_space *aspace); diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index af1abddca78e..827c7397ed12 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -939,16 +939,6 @@ void msm_gem_free_object(struct drm_gem_object *obj) struct drm_device *dev = obj->dev; struct msm_drm_private *priv = dev->dev_private;
- if (llist_add(&msm_obj->freed, &priv->free_list)) - queue_work(priv->wq, &priv->free_work); -} - -static void free_object(struct msm_gem_object *msm_obj) -{ - struct drm_gem_object *obj = &msm_obj->base; - struct drm_device *dev = obj->dev; - struct msm_drm_private *priv = dev->dev_private; - /* object should not be on active list: */ WARN_ON(is_active(msm_obj));
@@ -981,23 +971,6 @@ static void free_object(struct msm_gem_object *msm_obj) kfree(msm_obj); }
-void msm_gem_free_work(struct work_struct *work) -{ - struct msm_drm_private *priv = - container_of(work, struct msm_drm_private, free_work); - struct llist_node *freed; - struct msm_gem_object *msm_obj, *next; - - while ((freed = llist_del_all(&priv->free_list))) { - llist_for_each_entry_safe(msm_obj, next, - freed, freed) - free_object(msm_obj); - - if (need_resched()) - break; - } -} - /* convenience method to construct a GEM buffer object, and userspace handle */ int msm_gem_new_handle(struct drm_device *dev, struct drm_file *file, uint32_t size, uint32_t flags, uint32_t *handle, diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index f4e73c6f07bf..ffa2130ee97d 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -204,7 +204,6 @@ static inline bool is_vunmapable(struct msm_gem_object *msm_obj)
void msm_gem_purge(struct drm_gem_object *obj); void msm_gem_vunmap(struct drm_gem_object *obj); -void msm_gem_free_work(struct work_struct *work);
/* Created per submit-ioctl, to track bo's and cmdstream bufs, etc, * associated with the cmdstream submission for synchronization (and
From: Rob Clark robdclark@chromium.org
The obj->lock is sufficient for what we need.
This *does* have the implication that userspace can try to shoot themselves in the foot by racing madvise(DONTNEED) with submit. But the result will be about the same if they did madvise(DONTNEED) before the submit ioctl, ie. they might not get want they want if they race with shrinker. But iova fault handling is robust enough, and userspace is only shooting it's own foot.
Signed-off-by: Rob Clark robdclark@chromium.org --- drivers/gpu/drm/msm/msm_drv.c | 11 ++--------- drivers/gpu/drm/msm/msm_gem.c | 4 +--- drivers/gpu/drm/msm/msm_gem.h | 2 -- 3 files changed, 3 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c index ebcd8e827363..4d808769e6ed 100644 --- a/drivers/gpu/drm/msm/msm_drv.c +++ b/drivers/gpu/drm/msm/msm_drv.c @@ -939,14 +939,9 @@ static int msm_ioctl_gem_madvise(struct drm_device *dev, void *data, return -EINVAL; }
- ret = mutex_lock_interruptible(&dev->struct_mutex); - if (ret) - return ret; - obj = drm_gem_object_lookup(file, args->handle); if (!obj) { - ret = -ENOENT; - goto unlock; + return -ENOENT; }
ret = msm_gem_madvise(obj, args->madv); @@ -955,10 +950,8 @@ static int msm_ioctl_gem_madvise(struct drm_device *dev, void *data, ret = 0; }
- drm_gem_object_put_locked(obj); + drm_gem_object_put(obj);
-unlock: - mutex_unlock(&dev->struct_mutex); return ret; }
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 827c7397ed12..c39ba9030001 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -673,8 +673,6 @@ int msm_gem_madvise(struct drm_gem_object *obj, unsigned madv)
msm_gem_lock(obj);
- WARN_ON(!mutex_is_locked(&obj->dev->struct_mutex)); - if (msm_obj->madv != __MSM_MADV_PURGED) msm_obj->madv = madv;
@@ -691,7 +689,6 @@ void msm_gem_purge(struct drm_gem_object *obj) struct msm_gem_object *msm_obj = to_msm_bo(obj);
WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - WARN_ON(!msm_gem_is_locked(obj)); WARN_ON(!is_purgeable(msm_obj)); WARN_ON(obj->import_attach);
@@ -771,6 +768,7 @@ void msm_gem_active_get(struct drm_gem_object *obj, struct msm_gpu *gpu) struct msm_drm_private *priv = obj->dev->dev_private;
might_sleep(); + WARN_ON(!msm_gem_is_locked(obj)); WARN_ON(msm_obj->madv != MSM_MADV_WILLNEED);
if (!atomic_fetch_inc(&msm_obj->active_count)) { diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index ffa2130ee97d..d79e7019cc88 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -190,8 +190,6 @@ static inline bool is_active(struct msm_gem_object *msm_obj)
static inline bool is_purgeable(struct msm_gem_object *msm_obj) { - WARN_ON(!msm_gem_is_locked(&msm_obj->base)); - WARN_ON(!mutex_is_locked(&msm_obj->base.dev->struct_mutex)); return (msm_obj->madv == MSM_MADV_DONTNEED) && msm_obj->sgt && !msm_obj->base.dma_buf && !msm_obj->base.import_attach; }
From: Rob Clark robdclark@chromium.org
Now that the inactive_list is protected by mm_lock, and everything else on per-obj basis is protected by obj->lock, we no longer depend on struct_mutex.
Signed-off-by: Rob Clark robdclark@chromium.org --- drivers/gpu/drm/msm/msm_gem.c | 1 - drivers/gpu/drm/msm/msm_gem_shrinker.c | 54 -------------------------- 2 files changed, 55 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index c39ba9030001..cf17c79d99ae 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -688,7 +688,6 @@ void msm_gem_purge(struct drm_gem_object *obj) struct drm_device *dev = obj->dev; struct msm_gem_object *msm_obj = to_msm_bo(obj);
- WARN_ON(!mutex_is_locked(&dev->struct_mutex)); WARN_ON(!is_purgeable(msm_obj)); WARN_ON(obj->import_attach);
diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c index 6be073b8ca08..6f4b1355725f 100644 --- a/drivers/gpu/drm/msm/msm_gem_shrinker.c +++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c @@ -8,48 +8,13 @@ #include "msm_gem.h" #include "msm_gpu_trace.h"
-static bool msm_gem_shrinker_lock(struct drm_device *dev, bool *unlock) -{ - /* NOTE: we are *closer* to being able to get rid of - * mutex_trylock_recursive().. the msm_gem code itself does - * not need struct_mutex, although codepaths that can trigger - * shrinker are still called in code-paths that hold the - * struct_mutex. - * - * Also, msm_obj->madv is protected by struct_mutex. - * - * The next step is probably split out a seperate lock for - * protecting inactive_list, so that shrinker does not need - * struct_mutex. - */ - switch (mutex_trylock_recursive(&dev->struct_mutex)) { - case MUTEX_TRYLOCK_FAILED: - return false; - - case MUTEX_TRYLOCK_SUCCESS: - *unlock = true; - return true; - - case MUTEX_TRYLOCK_RECURSIVE: - *unlock = false; - return true; - } - - BUG(); -} - static unsigned long msm_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) { struct msm_drm_private *priv = container_of(shrinker, struct msm_drm_private, shrinker); - struct drm_device *dev = priv->dev; struct msm_gem_object *msm_obj; unsigned long count = 0; - bool unlock; - - if (!msm_gem_shrinker_lock(dev, &unlock)) - return 0;
mutex_lock(&priv->mm_lock);
@@ -63,9 +28,6 @@ msm_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
mutex_unlock(&priv->mm_lock);
- if (unlock) - mutex_unlock(&dev->struct_mutex); - return count; }
@@ -74,13 +36,8 @@ msm_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) { struct msm_drm_private *priv = container_of(shrinker, struct msm_drm_private, shrinker); - struct drm_device *dev = priv->dev; struct msm_gem_object *msm_obj; unsigned long freed = 0; - bool unlock; - - if (!msm_gem_shrinker_lock(dev, &unlock)) - return SHRINK_STOP;
mutex_lock(&priv->mm_lock);
@@ -98,9 +55,6 @@ msm_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
mutex_unlock(&priv->mm_lock);
- if (unlock) - mutex_unlock(&dev->struct_mutex); - if (freed > 0) trace_msm_gem_purge(freed << PAGE_SHIFT);
@@ -112,13 +66,8 @@ msm_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr) { struct msm_drm_private *priv = container_of(nb, struct msm_drm_private, vmap_notifier); - struct drm_device *dev = priv->dev; struct msm_gem_object *msm_obj; unsigned unmapped = 0; - bool unlock; - - if (!msm_gem_shrinker_lock(dev, &unlock)) - return NOTIFY_DONE;
mutex_lock(&priv->mm_lock);
@@ -141,9 +90,6 @@ msm_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr)
mutex_unlock(&priv->mm_lock);
- if (unlock) - mutex_unlock(&dev->struct_mutex); - *(unsigned long *)ptr += unmapped;
if (unmapped > 0)
From: Rob Clark robdclark@chromium.org
If there is only a single ring (no-preemption), everything is FIFO order and there is no need to implicit-sync.
Mesa should probably just always use MSM_SUBMIT_NO_IMPLICIT, as behavior is undefined when fences are not used to synchronize buffer usage across contexts (which is the only case where multiple different priority rings could come into play).
Signed-off-by: Rob Clark robdclark@chromium.org --- drivers/gpu/drm/msm/msm_gem_submit.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index d784e97f233f..96832debc3b6 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -277,7 +277,7 @@ static int submit_lock_objects(struct msm_gem_submit *submit) return ret; }
-static int submit_fence_sync(struct msm_gem_submit *submit, bool no_implicit) +static int submit_fence_sync(struct msm_gem_submit *submit, bool implicit_sync) { int i, ret = 0;
@@ -297,7 +297,7 @@ static int submit_fence_sync(struct msm_gem_submit *submit, bool no_implicit) return ret; }
- if (no_implicit) + if (!implicit_sync) continue;
ret = msm_gem_sync_object(&msm_obj->base, submit->ring->fctx, @@ -768,7 +768,8 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, if (ret) goto out;
- ret = submit_fence_sync(submit, !!(args->flags & MSM_SUBMIT_NO_IMPLICIT)); + ret = submit_fence_sync(submit, (gpu->nr_rings > 1) && + !(args->flags & MSM_SUBMIT_NO_IMPLICIT)); if (ret) goto out;
On Mon, Oct 19, 2020 at 10:45 PM Rob Clark robdclark@gmail.com wrote:
From: Rob Clark robdclark@chromium.org
This doesn't remove *all* the struct_mutex, but it covers the worst of it, ie. shrinker/madvise/free/retire. The submit path still uses struct_mutex, but it still needs *something* serialize a portion of the submit path, and lock_stat mostly just shows the lock contention there being with other submits. And there are a few other bits of struct_mutex usage in less critical paths (debugfs, etc). But this seems like a reasonable step in the right direction.
v2: teach lockdep about shrinker locking patters (danvet) and convert to obj->resv locking (danvet) v3: fix get_vaddr locking for legacy userspace (relocs), devcoredump, and rd/hangrd
For the series:
Reviewed-by: Kristian H. Kristensen hoegsberg@google.com
Rob Clark (23): drm/msm: Fix a couple incorrect usages of get_vaddr_active() drm/msm/gem: Add obj->lock wrappers drm/msm/gem: Rename internal get_iova_locked helper drm/msm/gem: Move prototypes to msm_gem.h drm/msm/gem: Add some _locked() helpers drm/msm/gem: Move locking in shrinker path drm/msm/submit: Move copy_from_user ahead of locking bos drm/msm: Do rpm get sooner in the submit path drm/msm/gem: Switch over to obj->resv for locking drm/msm: Use correct drm_gem_object_put() in fail case drm/msm: Drop chatty trace drm/msm: Move update_fences() drm/msm: Add priv->mm_lock to protect active/inactive lists drm/msm: Document and rename preempt_lock drm/msm: Protect ring->submits with it's own lock drm/msm: Refcount submits drm/msm: Remove obj->gpu drm/msm: Drop struct_mutex from the retire path drm/msm: Drop struct_mutex in free_object() path drm/msm: Remove msm_gem_free_work drm/msm: Drop struct_mutex in madvise path drm/msm: Drop struct_mutex in shrinker path drm/msm: Don't implicit-sync if only a single ring
drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 6 +- drivers/gpu/drm/msm/adreno/a5xx_preempt.c | 12 +- drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 6 +- drivers/gpu/drm/msm/disp/mdp4/mdp4_crtc.c | 1 + drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c | 1 + drivers/gpu/drm/msm/dsi/dsi_host.c | 1 + drivers/gpu/drm/msm/msm_debugfs.c | 7 + drivers/gpu/drm/msm/msm_drv.c | 21 +- drivers/gpu/drm/msm/msm_drv.h | 73 +----- drivers/gpu/drm/msm/msm_fbdev.c | 1 + drivers/gpu/drm/msm/msm_gem.c | 266 +++++++++++----------- drivers/gpu/drm/msm/msm_gem.h | 133 +++++++++-- drivers/gpu/drm/msm/msm_gem_shrinker.c | 81 ++----- drivers/gpu/drm/msm/msm_gem_submit.c | 158 ++++++++----- drivers/gpu/drm/msm/msm_gpu.c | 110 +++++---- drivers/gpu/drm/msm/msm_gpu.h | 5 +- drivers/gpu/drm/msm/msm_rd.c | 2 +- drivers/gpu/drm/msm/msm_ringbuffer.c | 3 +- drivers/gpu/drm/msm/msm_ringbuffer.h | 13 +- 19 files changed, 495 insertions(+), 405 deletions(-)
-- 2.26.2
Freedreno mailing list Freedreno@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/freedreno
dri-devel@lists.freedesktop.org