On 8/7/2015 11:56 AM, Michał Winiarski wrote:
On Fri, Aug 07, 2015 at 10:45:21AM +0100, Michel Thierry wrote:
Gen8+ supports 48-bit virtual addresses, but some objects must always be allocated inside the 32-bit address range.
In specific, any resource used with flat/heapless (0x00000000-0xfffff000) General State Heap (GSH) or Instruction State Heap (ISH) must be in a 32-bit range, because the General State Offset and Instruction State Offset are limited to 32-bits.
The i915 driver has been modified to provide a flag to set when the 4GB limit is not necessary in a given bo (EXEC_OBJECT_SUPPORTS_48B_ADDRESS). 48-bit range will only be used when explicitly requested.
Calls to the new drm_intel_bo_emit_reloc_48bit function will have this flag set automatically, while calls to drm_intel_bo_emit_reloc will clear it.
v2: Make set/clear functions nops on pre-gen8 platforms, and use them internally in emit_reloc functions (Ben) s/48BADDRESS/48B_ADDRESS/ (Dave) v3: Keep set/clear functions internal, no-one needs to use them directly.
References: http://lists.freedesktop.org/archives/intel-gfx/2015-July/072612.html Cc: Ben Widawsky ben@bwidawsk.net Cc: Dave Gordon david.s.gordon@intel.com Signed-off-by: Michel Thierry michel.thierry@intel.com
include/drm/i915_drm.h | 3 ++- intel/intel_bufmgr.c | 16 ++++++++++++++ intel/intel_bufmgr.h | 6 +++++- intel/intel_bufmgr_gem.c | 54 +++++++++++++++++++++++++++++++++++++++++++---- intel/intel_bufmgr_priv.h | 11 ++++++++++ 5 files changed, 84 insertions(+), 6 deletions(-)
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h index ded43b1..426b25c 100644 --- a/include/drm/i915_drm.h +++ b/include/drm/i915_drm.h @@ -680,7 +680,8 @@ struct drm_i915_gem_exec_object2 { #define EXEC_OBJECT_NEEDS_FENCE (1<<0) #define EXEC_OBJECT_NEEDS_GTT (1<<1) #define EXEC_OBJECT_WRITE (1<<2) -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1) +#define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3) +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_SUPPORTS_48B_ADDRESS<<1) __u64 flags;
__u64 rsvd1; diff --git a/intel/intel_bufmgr.c b/intel/intel_bufmgr.c index 14ea9f9..0bd5191 100644 --- a/intel/intel_bufmgr.c +++ b/intel/intel_bufmgr.c @@ -202,6 +202,22 @@ drm_intel_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, drm_intel_bo *target_bo, uint32_t target_offset, uint32_t read_domains, uint32_t write_domain) {
- if (bo->bufmgr->bo_clear_supports_48b_address)
bo->bufmgr->bo_clear_supports_48b_address(target_bo);
This is always true - you assign func to this func pointer unconditionally. Also - why not return some meaningful value if the user does not have enable_ppgtt=3 set? You can get that from I915_PARAM_HAS_ALIASING_PPGTT right now. Check for gen >= 8 seems rather inadequate, and even then you're not returning anything useful to the caller.
- return bo->bufmgr->bo_emit_reloc(bo, offset,
target_bo, target_offset,
read_domains, write_domain);
+}
Using emit_reloc to set a BO flag seems confusing and can be error prone, emit_reloc can be called many times and caller needs to be careful and call the right function for each reloc.
+int +drm_intel_bo_emit_reloc_48bit(drm_intel_bo *bo, uint32_t offset,
drm_intel_bo *target_bo, uint32_t target_offset,
uint32_t read_domains, uint32_t write_domain)
+{
- if (bo->bufmgr->bo_set_supports_48b_address)
bo->bufmgr->bo_set_supports_48b_address(target_bo);
Same situation as with clear_supports_48b_address.
- return bo->bufmgr->bo_emit_reloc(bo, offset, target_bo, target_offset, read_domains, write_domain);
diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h index 285919e..8f91ffe 100644 --- a/intel/intel_bufmgr.h +++ b/intel/intel_bufmgr.h @@ -87,7 +87,8 @@ struct _drm_intel_bo { /** * Last seen card virtual address (offset from the beginning of the * aperture) for the object. This should be used to fill relocation
* entries when calling drm_intel_bo_emit_reloc()
* entries when calling drm_intel_bo_emit_reloc() or
*/ uint64_t offset64; };* drm_intel_bo_emit_reloc_48bit()
@@ -147,6 +148,9 @@ int drm_intel_bufmgr_check_aperture_space(drm_intel_bo ** bo_array, int count); int drm_intel_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, drm_intel_bo *target_bo, uint32_t target_offset, uint32_t read_domains, uint32_t write_domain); +int drm_intel_bo_emit_reloc_48bit(drm_intel_bo *bo, uint32_t offset,
drm_intel_bo *target_bo, uint32_t target_offset,
int drm_intel_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset, drm_intel_bo *target_bo, uint32_t target_offset,uint32_t read_domains, uint32_t write_domain);
diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c index 41de396..713ed4e 100644 --- a/intel/intel_bufmgr_gem.c +++ b/intel/intel_bufmgr_gem.c @@ -140,6 +140,7 @@ typedef struct _drm_intel_bufmgr_gem { } drm_intel_bufmgr_gem;
#define DRM_INTEL_RELOC_FENCE (1<<0) +#define DRM_INTEL_RELOC_SUPPORTS_48B_ADDRESS (2<<0)
Why are you duplicating information about support for 48B in both emit reloc function argument and struct bo_gem?
typedef struct _drm_intel_reloc_target_info { drm_intel_bo *bo; @@ -237,6 +238,14 @@ struct _drm_intel_bo_gem { bool is_userptr;
/**
* Boolean of whether this buffer can be in the whole 48-bit address.
*
* By default, buffers will be keep in a 32-bit range, unless this
* flag is explicitly set.
*/
- bool supports_48b_address;
- /**
- Size in bytes of this buffer and its relocation descendents.
- Used to avoid costly tree walking in
@@ -463,13 +472,18 @@ drm_intel_add_validate_buffer(drm_intel_bo *bo) }
static void -drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence) +drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence,
int supports_48b_address)
{ drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; int index;
if (bo_gem->validate_index != -1) {
if (supports_48b_address) {
bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |=
EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
}
if (need_fence) bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= EXEC_OBJECT_NEEDS_FENCE;
@@ -508,6 +522,10 @@ drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence) bufmgr_gem->exec2_objects[index].flags |= EXEC_OBJECT_NEEDS_FENCE; }
- if (supports_48b_address) {
bufmgr_gem->exec2_objects[index].flags |=
EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
- } bufmgr_gem->exec_count++; }
@@ -1925,11 +1943,33 @@ do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, else bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0;
if (target_bo_gem->supports_48b_address)
bo_gem->reloc_target_info[bo_gem->reloc_count].flags |=
DRM_INTEL_RELOC_SUPPORTS_48B_ADDRESS;
bo_gem->reloc_count++;
return 0; }
+static void drm_intel_gem_bo_set_supports_48b_address(drm_intel_bo *bo) +{
- drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
- drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
- if (bufmgr_gem->gen >= 8)
bo_gem->supports_48b_address = 1;
+}
+static void drm_intel_gem_bo_clear_supports_48b_address(drm_intel_bo *bo) +{
- drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
- drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
- if (bufmgr_gem->gen >= 8)
bo_gem->supports_48b_address = 0;
+}
- static int drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, drm_intel_bo *target_bo, uint32_t target_offset,
@@ -2043,7 +2083,7 @@ drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo)
for (i = 0; i < bo_gem->reloc_count; i++) { drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo;
int need_fence;
int need_fence, supports_48b_addr;
if (target_bo == bo) continue;
@@ -2056,8 +2096,12 @@ drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo) need_fence = (bo_gem->reloc_target_info[i].flags & DRM_INTEL_RELOC_FENCE);
supports_48b_addr = (bo_gem->reloc_target_info[i].flags &
DRM_INTEL_RELOC_SUPPORTS_48B_ADDRESS);
- /* Add the target to the validate list */
drm_intel_add_validate_buffer2(target_bo, need_fence);
drm_intel_add_validate_buffer2(target_bo, need_fence,
} }supports_48b_addr);
@@ -2217,7 +2261,7 @@ do_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx, /* Add the batch buffer to the validation list. There are no relocations * pointing to it. */
- drm_intel_add_validate_buffer2(bo, 0);
drm_intel_add_validate_buffer2(bo, 0, 0);
memclear(execbuf); execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects;
@@ -3299,6 +3343,8 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering; bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc; bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence;
- bufmgr_gem->bufmgr.bo_set_supports_48b_address = drm_intel_gem_bo_set_supports_48b_address;
- bufmgr_gem->bufmgr.bo_clear_supports_48b_address = drm_intel_gem_bo_clear_supports_48b_address; bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin; bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin; bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling;
diff --git a/intel/intel_bufmgr_priv.h b/intel/intel_bufmgr_priv.h index 59ebd18..774fa02 100644 --- a/intel/intel_bufmgr_priv.h +++ b/intel/intel_bufmgr_priv.h @@ -152,6 +152,17 @@ struct _drm_intel_bufmgr { void (*destroy) (drm_intel_bufmgr *bufmgr);
/**
* Set/Clear 48-bit address support flag in a given bo.
*
* Any resource used with flat/heapless (0x00000000-0xfffff000)
* General State Heap (GSH) or Intructions State Heap (ISH) must
* be in a 32-bit range. 48-bit range will only be used when explicitly
* requested.
*/
- void (*bo_set_supports_48b_address) (drm_intel_bo *bo);
- void (*bo_clear_supports_48b_address) (drm_intel_bo *bo);
- /**
- Add relocation entry in reloc_buf, which will be updated with the
- target buffer's real offset on on command submission.
-- 2.5.0
You also haven't updated the dump_validation_list with proper format specifier (addresses are truncated otherwise).
I really don't like hiding set_supports_48b_address/clear_supports_48b_address in emit reloc - making this explicit would be a better approach. Of course this is just my opinion, but let me post a bit different version of this API - just as an RFC so you can see how it would look with explicit set on bo and without adding another emit_reloc variant.
Hi Michał,
Ben suggested having the set/clear in emit reloc as this is the only place mesa cares about these wa.
But I see your point, this will be used not only by mesa, so we should have something that is good for all the other projects.
-Michel