On Thu, Dec 22, 2016 at 08:36:04AM +0000, Chris Wilson wrote:
When we evict from the GTT to make room for an object, the hole we create is put onto the MRU stack inside the drm_mm range manager. On the next search pass, we can speed up a PIN_HIGH allocation by referencing that stack for the new hole.
v2: Pull together the 3 identical implements (ahem, a couple were outdated) into a common routine for allocating a node and evicting as necessary.
Signed-off-by: Chris Wilson chris@chris-wilson.co.uk Reviewed-by: Joonas Lahtinen joonas.lahtinen@linux.intel.com
Since it doesn't apply directly to drm-misc I'm leaving this one out for now. I guess best to merge through drm-intel? -Daniel
drivers/gpu/drm/i915/gvt/aperture_gm.c | 33 +++++----------- drivers/gpu/drm/i915/i915_gem_gtt.c | 72 ++++++++++++++++++++++++---------- drivers/gpu/drm/i915/i915_gem_gtt.h | 5 +++ drivers/gpu/drm/i915/i915_vma.c | 40 ++----------------- 4 files changed, 70 insertions(+), 80 deletions(-)
diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c index 7d33b607bc89..1bb7a5b80d47 100644 --- a/drivers/gpu/drm/i915/gvt/aperture_gm.c +++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c @@ -48,47 +48,34 @@ static int alloc_gm(struct intel_vgpu *vgpu, bool high_gm) { struct intel_gvt *gvt = vgpu->gvt; struct drm_i915_private *dev_priv = gvt->dev_priv;
- u32 alloc_flag, search_flag;
- unsigned int flags; u64 start, end, size; struct drm_mm_node *node;
int retried = 0; int ret;
if (high_gm) {
search_flag = DRM_MM_SEARCH_BELOW;
alloc_flag = DRM_MM_CREATE_TOP;
node = &vgpu->gm.high_gm_node; size = vgpu_hidden_sz(vgpu); start = gvt_hidden_gmadr_base(gvt); end = gvt_hidden_gmadr_end(gvt);
} else {flags = PIN_HIGH;
search_flag = DRM_MM_SEARCH_DEFAULT;
node = &vgpu->gm.low_gm_node; size = vgpu_aperture_sz(vgpu); start = gvt_aperture_gmadr_base(gvt); end = gvt_aperture_gmadr_end(gvt);alloc_flag = DRM_MM_CREATE_DEFAULT;
flags = PIN_MAPPABLE;
}
mutex_lock(&dev_priv->drm.struct_mutex);
-search_again:
- ret = drm_mm_insert_node_in_range_generic(&dev_priv->ggtt.base.mm,
node, size, 4096,
I915_COLOR_UNEVICTABLE,
start, end, search_flag,
alloc_flag);
- if (ret) {
ret = i915_gem_evict_something(&dev_priv->ggtt.base,
size, 4096,
I915_COLOR_UNEVICTABLE,
start, end, 0);
if (ret == 0 && ++retried < 3)
goto search_again;
gvt_err("fail to alloc %s gm space from host, retried %d\n",
high_gm ? "high" : "low", retried);
- }
- ret = i915_gem_gtt_insert(&dev_priv->ggtt.base, node,
size, 4096, I915_COLOR_UNEVICTABLE,
mutex_unlock(&dev_priv->drm.struct_mutex);start, end, flags);
- if (ret)
gvt_err("fail to alloc %s gm space from host\n",
high_gm ? "high" : "low");
- return ret;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index 6af9311f72f5..c8f1675852a7 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2044,7 +2044,6 @@ static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt) struct i915_address_space *vm = &ppgtt->base; struct drm_i915_private *dev_priv = ppgtt->base.i915; struct i915_ggtt *ggtt = &dev_priv->ggtt;
bool retried = false; int ret;
/* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
@@ -2057,29 +2056,14 @@ static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt) if (ret) return ret;
-alloc:
- ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm, &ppgtt->node,
GEN6_PD_SIZE, GEN6_PD_ALIGN,
I915_COLOR_UNEVICTABLE,
0, ggtt->base.total,
DRM_MM_TOPDOWN);
- if (ret == -ENOSPC && !retried) {
ret = i915_gem_evict_something(&ggtt->base,
GEN6_PD_SIZE, GEN6_PD_ALIGN,
I915_COLOR_UNEVICTABLE,
0, ggtt->base.total,
0);
if (ret)
goto err_out;
retried = true;
goto alloc;
- }
- ret = i915_gem_gtt_insert(&ggtt->base, &ppgtt->node,
GEN6_PD_SIZE, GEN6_PD_ALIGN,
I915_COLOR_UNEVICTABLE,
0, ggtt->base.total,
if (ret) goto err_out;PIN_HIGH);
- if (ppgtt->node.start < ggtt->mappable_end) DRM_DEBUG("Forced to use aperture for PDEs\n");
@@ -3553,3 +3537,49 @@ i915_get_ggtt_vma_pages(struct i915_vma *vma) return ret; }
+int i915_gem_gtt_insert(struct i915_address_space *vm,
struct drm_mm_node *node,
u64 size, u64 alignment, unsigned long color,
u64 start, u64 end, unsigned int flags)
+{
- u32 search_flag, alloc_flag;
- int err;
- lockdep_assert_held(&vm->i915->drm.struct_mutex);
- if (flags & PIN_HIGH) {
search_flag = DRM_MM_SEARCH_BELOW;
alloc_flag = DRM_MM_CREATE_TOP;
- } else {
search_flag = DRM_MM_SEARCH_DEFAULT;
alloc_flag = DRM_MM_CREATE_DEFAULT;
- }
- /* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
* so we know that we always have a minimum alignment of 4096.
* The drm_mm range manager is optimised to return results
* with zero alignment, so where possible use the optimal
* path.
*/
- GEM_BUG_ON(size & 4095);
- if (alignment <= 4096)
alignment = 0;
- err = drm_mm_insert_node_in_range_generic(&vm->mm, node,
size, alignment, color,
start, end,
search_flag, alloc_flag);
- if (err != -ENOSPC)
return err;
- err = i915_gem_evict_something(vm, size, alignment, color,
start, end, flags);
- if (err)
return err;
- search_flag = DRM_MM_SEARCH_DEFAULT;
- return drm_mm_insert_node_in_range_generic(&vm->mm, node,
size, alignment, color,
start, end,
search_flag, alloc_flag);
+} diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h index 0055b8567a43..4c7bef07e38a 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.h +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h @@ -528,6 +528,11 @@ int __must_check i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj, void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, struct sg_table *pages);
+int i915_gem_gtt_insert(struct i915_address_space *vm,
struct drm_mm_node *node,
u64 size, u64 alignment, unsigned long color,
u64 start, u64 end, unsigned int flags);
/* Flags used by pin/bind&friends. */ #define PIN_NONBLOCK BIT(0) #define PIN_MAPPABLE BIT(1) diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index fd75d5704287..608008d2d999 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -415,43 +415,11 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) goto err_unpin; } } else {
u32 search_flag, alloc_flag;
if (flags & PIN_HIGH) {
search_flag = DRM_MM_SEARCH_BELOW;
alloc_flag = DRM_MM_CREATE_TOP;
} else {
search_flag = DRM_MM_SEARCH_DEFAULT;
alloc_flag = DRM_MM_CREATE_DEFAULT;
}
/* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
* so we know that we always have a minimum alignment of 4096.
* The drm_mm range manager is optimised to return results
* with zero alignment, so where possible use the optimal
* path.
*/
if (alignment <= 4096)
alignment = 0;
-search_free:
ret = drm_mm_insert_node_in_range_generic(&vma->vm->mm,
&vma->node,
size, alignment,
obj->cache_level,
start, end,
search_flag,
alloc_flag);
if (ret) {
ret = i915_gem_evict_something(vma->vm, size, alignment,
obj->cache_level,
start, end,
flags);
if (ret == 0)
goto search_free;
ret = i915_gem_gtt_insert(vma->vm, &vma->node,
size, alignment, obj->cache_level,
start, end, flags);
if (ret) goto err_unpin;
}
GEM_BUG_ON(vma->node.start < start); GEM_BUG_ON(vma->node.start + vma->node.size > end);
-- 2.11.0
Intel-gfx mailing list Intel-gfx@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/intel-gfx