Minimum set of patches to enable GuC submission on DG1 and enable it by default.
A little difficult to test as IGTs do not work with DG1 due to a bunch of uAPI features being disabled (e.g. relocations, caching memory options, etc...).
Tested with the loading the driver and 'live' selftests. Submissions seem to work.
Signed-off-by: Matthew Brost matthew.brost@intel.com
Daniele Ceraolo Spurio (1): drm/i915/guc: put all guc objects in lmem when available
Matthew Brost (2): drm/i915/guc: Add DG1 GuC / HuC firmware defs drm/i915/guc: Enable GuC submission by default on DG1
Venkata Sandeep Dhanalakota (1): drm/i915: Do not define vma on stack
drivers/gpu/drm/i915/gem/i915_gem_lmem.c | 26 +++++++ drivers/gpu/drm/i915/gem/i915_gem_lmem.h | 4 + drivers/gpu/drm/i915/gt/uc/intel_guc.c | 9 ++- drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c | 11 ++- drivers/gpu/drm/i915/gt/uc/intel_huc.c | 14 +++- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 2 +- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 90 ++++++++++++++++++++--- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h | 2 + 8 files changed, 138 insertions(+), 20 deletions(-)
From: Venkata Sandeep Dhanalakota venkata.s.dhanalakota@intel.com
Defining vma on stack can cause stack overflow, if vma gets populated with new fields.
Cc: Daniele Ceraolo Spurio daniele.ceraolospurio@intel.com Cc: Tvrtko Ursulin tvrtko.ursulin@intel.com Signed-off-by: Venkata Sandeep Dhanalakota venkata.s.dhanalakota@intel.com Signef-off-by: Matthew Brost matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 18 +++++++++--------- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h | 2 ++ 2 files changed, 11 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 3a16d08608a5..f632dbd32b42 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -413,20 +413,20 @@ static void uc_fw_bind_ggtt(struct intel_uc_fw *uc_fw) { struct drm_i915_gem_object *obj = uc_fw->obj; struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt; - struct i915_vma dummy = { - .node.start = uc_fw_ggtt_offset(uc_fw), - .node.size = obj->base.size, - .pages = obj->mm.pages, - .vm = &ggtt->vm, - }; + struct i915_vma *dummy = &uc_fw->dummy; + + dummy->node.start = uc_fw_ggtt_offset(uc_fw); + dummy->node.size = obj->base.size; + dummy->pages = obj->mm.pages; + dummy->vm = &ggtt->vm;
GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); - GEM_BUG_ON(dummy.node.size > ggtt->uc_fw.size); + GEM_BUG_ON(dummy->node.size > ggtt->uc_fw.size);
/* uc_fw->obj cache domains were not controlled across suspend */ - drm_clflush_sg(dummy.pages); + drm_clflush_sg(dummy->pages);
- ggtt->vm.insert_entries(&ggtt->vm, &dummy, I915_CACHE_NONE, 0); + ggtt->vm.insert_entries(&ggtt->vm, dummy, I915_CACHE_NONE, 0); }
static void uc_fw_unbind_ggtt(struct intel_uc_fw *uc_fw) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h index 99bb1fe1af66..693cc0ebcd63 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h @@ -10,6 +10,7 @@ #include "intel_uc_fw_abi.h" #include "intel_device_info.h" #include "i915_gem.h" +#include "i915_vma.h"
struct drm_printer; struct drm_i915_private; @@ -75,6 +76,7 @@ struct intel_uc_fw { bool user_overridden; size_t size; struct drm_i915_gem_object *obj; + struct i915_vma dummy;
/* * The firmware build process will generate a version header file with major and
On Mon, Aug 02, 2021 at 10:11:18PM -0700, Matthew Brost wrote:
From: Venkata Sandeep Dhanalakota venkata.s.dhanalakota@intel.com
Defining vma on stack can cause stack overflow, if vma gets populated with new fields.
Cc: Daniele Ceraolo Spurio daniele.ceraolospurio@intel.com Cc: Tvrtko Ursulin tvrtko.ursulin@intel.com Signed-off-by: Venkata Sandeep Dhanalakota venkata.s.dhanalakota@intel.com Signef-off-by: Matthew Brost matthew.brost@intel.com
Reviewed-by: Matthew Brost matthew.brost@intel.com
drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 18 +++++++++--------- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h | 2 ++ 2 files changed, 11 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 3a16d08608a5..f632dbd32b42 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -413,20 +413,20 @@ static void uc_fw_bind_ggtt(struct intel_uc_fw *uc_fw) { struct drm_i915_gem_object *obj = uc_fw->obj; struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt;
- struct i915_vma dummy = {
.node.start = uc_fw_ggtt_offset(uc_fw),
.node.size = obj->base.size,
.pages = obj->mm.pages,
.vm = &ggtt->vm,
- };
struct i915_vma *dummy = &uc_fw->dummy;
dummy->node.start = uc_fw_ggtt_offset(uc_fw);
dummy->node.size = obj->base.size;
dummy->pages = obj->mm.pages;
dummy->vm = &ggtt->vm;
GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
- GEM_BUG_ON(dummy.node.size > ggtt->uc_fw.size);
GEM_BUG_ON(dummy->node.size > ggtt->uc_fw.size);
/* uc_fw->obj cache domains were not controlled across suspend */
- drm_clflush_sg(dummy.pages);
- drm_clflush_sg(dummy->pages);
- ggtt->vm.insert_entries(&ggtt->vm, &dummy, I915_CACHE_NONE, 0);
- ggtt->vm.insert_entries(&ggtt->vm, dummy, I915_CACHE_NONE, 0);
}
static void uc_fw_unbind_ggtt(struct intel_uc_fw *uc_fw) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h index 99bb1fe1af66..693cc0ebcd63 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h @@ -10,6 +10,7 @@ #include "intel_uc_fw_abi.h" #include "intel_device_info.h" #include "i915_gem.h" +#include "i915_vma.h"
struct drm_printer; struct drm_i915_private; @@ -75,6 +76,7 @@ struct intel_uc_fw { bool user_overridden; size_t size; struct drm_i915_gem_object *obj;
struct i915_vma dummy;
/*
- The firmware build process will generate a version header file with major and
-- 2.28.0
From: Daniele Ceraolo Spurio daniele.ceraolospurio@intel.com
The firmware binary has to be loaded from lmem and the recommendation is to put all other objects in there as well. Note that we don't fall back to system memory if the allocation in lmem fails because all objects are allocated during driver load and if we have issues with lmem at that point something is seriously wrong with the system, so no point in trying to handle it.
Cc: Matthew Auld matthew.auld@intel.com Cc: Abdiel Janulgue abdiel.janulgue@linux.intel.com Cc: Michal Wajdeczko michal.wajdeczko@intel.com Cc: Vinay Belgaumkar vinay.belgaumkar@intel.com Cc: Radoslaw Szwichtenberg radoslaw.szwichtenberg@intel.com Signed-off-by: Daniele Ceraolo Spurio daniele.ceraolospurio@intel.com Signed-off-by: Matthew Brost matthew.brost@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_lmem.c | 26 ++++++++ drivers/gpu/drm/i915/gem/i915_gem_lmem.h | 4 ++ drivers/gpu/drm/i915/gt/uc/intel_guc.c | 9 ++- drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c | 11 +++- drivers/gpu/drm/i915/gt/uc/intel_huc.c | 14 ++++- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 75 +++++++++++++++++++++-- 6 files changed, 127 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c index eb345305dc52..034226c5d4d0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c @@ -103,6 +103,32 @@ __i915_gem_object_create_lmem_with_ps(struct drm_i915_private *i915, size, page_size, flags); }
+struct drm_i915_gem_object * +i915_gem_object_create_lmem_from_data(struct drm_i915_private *i915, + const void *data, size_t size) +{ + struct drm_i915_gem_object *obj; + void *map; + + obj = i915_gem_object_create_lmem(i915, + round_up(size, PAGE_SIZE), + I915_BO_ALLOC_CONTIGUOUS); + if (IS_ERR(obj)) + return obj; + + map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC); + if (IS_ERR(map)) { + i915_gem_object_put(obj); + return map; + } + + memcpy(map, data, size); + + i915_gem_object_unpin_map(obj); + + return obj; +} + struct drm_i915_gem_object * i915_gem_object_create_lmem(struct drm_i915_private *i915, resource_size_t size, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h index 4ee81fc66302..1b88ea13435c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h @@ -23,6 +23,10 @@ bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj);
bool __i915_gem_object_is_lmem(struct drm_i915_gem_object *obj);
+struct drm_i915_gem_object * +i915_gem_object_create_lmem_from_data(struct drm_i915_private *i915, + const void *data, size_t size); + struct drm_i915_gem_object * __i915_gem_object_create_lmem_with_ps(struct drm_i915_private *i915, resource_size_t size, diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 979128e28372..55160d3e401a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -3,6 +3,7 @@ * Copyright © 2014-2019 Intel Corporation */
+#include "gem/i915_gem_lmem.h" #include "gt/intel_gt.h" #include "gt/intel_gt_irq.h" #include "gt/intel_gt_pm_irq.h" @@ -630,7 +631,13 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size) u64 flags; int ret;
- obj = i915_gem_object_create_shmem(gt->i915, size); + if (HAS_LMEM(gt->i915)) + obj = i915_gem_object_create_lmem(gt->i915, size, + I915_BO_ALLOC_CPU_CLEAR | + I915_BO_ALLOC_CONTIGUOUS); + else + obj = i915_gem_object_create_shmem(gt->i915, size); + if (IS_ERR(obj)) return ERR_CAST(obj);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c index 76fe766ad1bc..962be0c12208 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c @@ -41,7 +41,7 @@ static void guc_prepare_xfer(struct intel_uncore *uncore) }
/* Copy RSA signature from the fw image to HW for verification */ -static void guc_xfer_rsa(struct intel_uc_fw *guc_fw, +static int guc_xfer_rsa(struct intel_uc_fw *guc_fw, struct intel_uncore *uncore) { u32 rsa[UOS_RSA_SCRATCH_COUNT]; @@ -49,10 +49,13 @@ static void guc_xfer_rsa(struct intel_uc_fw *guc_fw, int i;
copied = intel_uc_fw_copy_rsa(guc_fw, rsa, sizeof(rsa)); - GEM_BUG_ON(copied < sizeof(rsa)); + if (copied < sizeof(rsa)) + return -ENOMEM;
for (i = 0; i < UOS_RSA_SCRATCH_COUNT; i++) intel_uncore_write(uncore, UOS_RSA_SCRATCH(i), rsa[i]); + + return 0; }
/* @@ -141,7 +144,9 @@ int intel_guc_fw_upload(struct intel_guc *guc) * by the DMA engine in one operation, whereas the RSA signature is * loaded via MMIO. */ - guc_xfer_rsa(&guc->fw, uncore); + ret = guc_xfer_rsa(&guc->fw, uncore); + if (ret) + goto out;
/* * Current uCode expects the code to be loaded at 8k; locations below diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index fc5387b410a2..ff4b6869b80b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -87,17 +87,25 @@ static int intel_huc_rsa_data_create(struct intel_huc *huc) vma->obj, true)); if (IS_ERR(vaddr)) { i915_vma_unpin_and_release(&vma, 0); - return PTR_ERR(vaddr); + err = PTR_ERR(vaddr); + goto unpin_out; }
copied = intel_uc_fw_copy_rsa(&huc->fw, vaddr, vma->size); - GEM_BUG_ON(copied < huc->fw.rsa_size); - i915_gem_object_unpin_map(vma->obj);
+ if (copied < huc->fw.rsa_size) { + err = -ENOMEM; + goto unpin_out; + } + huc->rsa_data = vma;
return 0; + +unpin_out: + i915_vma_unpin_and_release(&vma, 0); + return err; }
static void intel_huc_rsa_data_destroy(struct intel_huc *huc) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index f632dbd32b42..f8cb00ffb506 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -7,6 +7,7 @@ #include <linux/firmware.h> #include <drm/drm_print.h>
+#include "gem/i915_gem_lmem.h" #include "intel_uc_fw.h" #include "intel_uc_fw_abi.h" #include "i915_drv.h" @@ -370,7 +371,11 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) if (uc_fw->type == INTEL_UC_FW_TYPE_GUC) uc_fw->private_data_size = css->private_data_size;
- obj = i915_gem_object_create_shmem_from_data(i915, fw->data, fw->size); + if (HAS_LMEM(i915)) + obj = i915_gem_object_create_lmem_from_data(i915, fw->data, fw->size); + else + obj = i915_gem_object_create_shmem_from_data(i915, fw->data, fw->size); + if (IS_ERR(obj)) { err = PTR_ERR(obj); goto fail; @@ -414,6 +419,7 @@ static void uc_fw_bind_ggtt(struct intel_uc_fw *uc_fw) struct drm_i915_gem_object *obj = uc_fw->obj; struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt; struct i915_vma *dummy = &uc_fw->dummy; + u32 pte_flags = 0;
dummy->node.start = uc_fw_ggtt_offset(uc_fw); dummy->node.size = obj->base.size; @@ -424,9 +430,13 @@ static void uc_fw_bind_ggtt(struct intel_uc_fw *uc_fw) GEM_BUG_ON(dummy->node.size > ggtt->uc_fw.size);
/* uc_fw->obj cache domains were not controlled across suspend */ - drm_clflush_sg(dummy->pages); + if (i915_gem_object_has_struct_page(obj)) + drm_clflush_sg(dummy->pages); + + if (i915_gem_object_is_lmem(obj)) + pte_flags |= PTE_LM;
- ggtt->vm.insert_entries(&ggtt->vm, dummy, I915_CACHE_NONE, 0); + ggtt->vm.insert_entries(&ggtt->vm, dummy, I915_CACHE_NONE, pte_flags); }
static void uc_fw_unbind_ggtt(struct intel_uc_fw *uc_fw) @@ -585,13 +595,68 @@ void intel_uc_fw_cleanup_fetch(struct intel_uc_fw *uc_fw) */ size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len) { - struct sg_table *pages = uc_fw->obj->mm.pages; + struct intel_memory_region *mr = uc_fw->obj->mm.region; u32 size = min_t(u32, uc_fw->rsa_size, max_len); u32 offset = sizeof(struct uc_css_header) + uc_fw->ucode_size; + struct sgt_iter iter; + size_t count = 0; + int idx;
+ /* Called during reset handling, must be atomic [no fs_reclaim] */ GEM_BUG_ON(!intel_uc_fw_is_available(uc_fw));
- return sg_pcopy_to_buffer(pages->sgl, pages->nents, dst, size, offset); + idx = offset >> PAGE_SHIFT; + offset = offset_in_page(offset); + if (i915_gem_object_has_struct_page(uc_fw->obj)) { + struct page *page; + + for_each_sgt_page(page, iter, uc_fw->obj->mm.pages) { + u32 len = min_t(u32, size, PAGE_SIZE - offset); + void *vaddr; + + if (idx > 0) { + idx--; + continue; + } + + vaddr = kmap_atomic(page); + memcpy(dst, vaddr + offset, len); + kunmap_atomic(vaddr); + + offset = 0; + dst += len; + size -= len; + count += len; + if (!size) + break; + } + } else { + dma_addr_t addr; + + for_each_sgt_daddr(addr, iter, uc_fw->obj->mm.pages) { + u32 len = min_t(u32, size, PAGE_SIZE - offset); + void __iomem *vaddr; + + if (idx > 0) { + idx--; + continue; + } + + vaddr = io_mapping_map_atomic_wc(&mr->iomap, + addr - mr->region.start); + memcpy_fromio(dst, vaddr + offset, len); + io_mapping_unmap_atomic(vaddr); + + offset = 0; + dst += len; + size -= len; + count += len; + if (!size) + break; + } + } + + return count; }
/**
On 8/2/2021 22:11, Matthew Brost wrote:
From: Daniele Ceraolo Spurio daniele.ceraolospurio@intel.com
The firmware binary has to be loaded from lmem and the recommendation is to put all other objects in there as well. Note that we don't fall back to system memory if the allocation in lmem fails because all objects are allocated during driver load and if we have issues with lmem at that point something is seriously wrong with the system, so no point in trying to handle it.
Cc: Matthew Auld matthew.auld@intel.com Cc: Abdiel Janulgue abdiel.janulgue@linux.intel.com Cc: Michal Wajdeczko michal.wajdeczko@intel.com Cc: Vinay Belgaumkar vinay.belgaumkar@intel.com Cc: Radoslaw Szwichtenberg radoslaw.szwichtenberg@intel.com Signed-off-by: Daniele Ceraolo Spurio daniele.ceraolospurio@intel.com Signed-off-by: Matthew Brost matthew.brost@intel.com
drivers/gpu/drm/i915/gem/i915_gem_lmem.c | 26 ++++++++ drivers/gpu/drm/i915/gem/i915_gem_lmem.h | 4 ++ drivers/gpu/drm/i915/gt/uc/intel_guc.c | 9 ++- drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c | 11 +++- drivers/gpu/drm/i915/gt/uc/intel_huc.c | 14 ++++- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 75 +++++++++++++++++++++-- 6 files changed, 127 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c index eb345305dc52..034226c5d4d0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c @@ -103,6 +103,32 @@ __i915_gem_object_create_lmem_with_ps(struct drm_i915_private *i915, size, page_size, flags); }
+struct drm_i915_gem_object * +i915_gem_object_create_lmem_from_data(struct drm_i915_private *i915,
const void *data, size_t size)
+{
- struct drm_i915_gem_object *obj;
- void *map;
- obj = i915_gem_object_create_lmem(i915,
round_up(size, PAGE_SIZE),
I915_BO_ALLOC_CONTIGUOUS);
- if (IS_ERR(obj))
return obj;
- map = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
- if (IS_ERR(map)) {
i915_gem_object_put(obj);
return map;
- }
- memcpy(map, data, size);
- i915_gem_object_unpin_map(obj);
- return obj;
+}
- struct drm_i915_gem_object * i915_gem_object_create_lmem(struct drm_i915_private *i915, resource_size_t size,
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h index 4ee81fc66302..1b88ea13435c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h @@ -23,6 +23,10 @@ bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj);
bool __i915_gem_object_is_lmem(struct drm_i915_gem_object *obj);
+struct drm_i915_gem_object * +i915_gem_object_create_lmem_from_data(struct drm_i915_private *i915,
const void *data, size_t size);
- struct drm_i915_gem_object * __i915_gem_object_create_lmem_with_ps(struct drm_i915_private *i915, resource_size_t size,
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 979128e28372..55160d3e401a 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -3,6 +3,7 @@
- Copyright © 2014-2019 Intel Corporation
*/
+#include "gem/i915_gem_lmem.h" #include "gt/intel_gt.h" #include "gt/intel_gt_irq.h" #include "gt/intel_gt_pm_irq.h" @@ -630,7 +631,13 @@ struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size) u64 flags; int ret;
- obj = i915_gem_object_create_shmem(gt->i915, size);
- if (HAS_LMEM(gt->i915))
obj = i915_gem_object_create_lmem(gt->i915, size,
I915_BO_ALLOC_CPU_CLEAR |
I915_BO_ALLOC_CONTIGUOUS);
- else
obj = i915_gem_object_create_shmem(gt->i915, size);
- if (IS_ERR(obj)) return ERR_CAST(obj);
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c index 76fe766ad1bc..962be0c12208 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c @@ -41,7 +41,7 @@ static void guc_prepare_xfer(struct intel_uncore *uncore) }
/* Copy RSA signature from the fw image to HW for verification */ -static void guc_xfer_rsa(struct intel_uc_fw *guc_fw, +static int guc_xfer_rsa(struct intel_uc_fw *guc_fw, struct intel_uncore *uncore) { u32 rsa[UOS_RSA_SCRATCH_COUNT]; @@ -49,10 +49,13 @@ static void guc_xfer_rsa(struct intel_uc_fw *guc_fw, int i;
copied = intel_uc_fw_copy_rsa(guc_fw, rsa, sizeof(rsa));
- GEM_BUG_ON(copied < sizeof(rsa));
if (copied < sizeof(rsa))
return -ENOMEM;
for (i = 0; i < UOS_RSA_SCRATCH_COUNT; i++) intel_uncore_write(uncore, UOS_RSA_SCRATCH(i), rsa[i]);
return 0; }
/*
@@ -141,7 +144,9 @@ int intel_guc_fw_upload(struct intel_guc *guc) * by the DMA engine in one operation, whereas the RSA signature is * loaded via MMIO. */
- guc_xfer_rsa(&guc->fw, uncore);
ret = guc_xfer_rsa(&guc->fw, uncore);
if (ret)
goto out;
/*
- Current uCode expects the code to be loaded at 8k; locations below
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_huc.c b/drivers/gpu/drm/i915/gt/uc/intel_huc.c index fc5387b410a2..ff4b6869b80b 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_huc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_huc.c @@ -87,17 +87,25 @@ static int intel_huc_rsa_data_create(struct intel_huc *huc) vma->obj, true)); if (IS_ERR(vaddr)) { i915_vma_unpin_and_release(&vma, 0);
return PTR_ERR(vaddr);
err = PTR_ERR(vaddr);
goto unpin_out;
}
copied = intel_uc_fw_copy_rsa(&huc->fw, vaddr, vma->size);
- GEM_BUG_ON(copied < huc->fw.rsa_size);
- i915_gem_object_unpin_map(vma->obj);
if (copied < huc->fw.rsa_size) {
err = -ENOMEM;
goto unpin_out;
}
huc->rsa_data = vma;
return 0;
+unpin_out:
i915_vma_unpin_and_release(&vma, 0);
return err; }
static void intel_huc_rsa_data_destroy(struct intel_huc *huc)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index f632dbd32b42..f8cb00ffb506 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -7,6 +7,7 @@ #include <linux/firmware.h> #include <drm/drm_print.h>
+#include "gem/i915_gem_lmem.h" #include "intel_uc_fw.h" #include "intel_uc_fw_abi.h" #include "i915_drv.h" @@ -370,7 +371,11 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) if (uc_fw->type == INTEL_UC_FW_TYPE_GUC) uc_fw->private_data_size = css->private_data_size;
- obj = i915_gem_object_create_shmem_from_data(i915, fw->data, fw->size);
- if (HAS_LMEM(i915))
obj = i915_gem_object_create_lmem_from_data(i915, fw->data, fw->size);
- else
obj = i915_gem_object_create_shmem_from_data(i915, fw->data, fw->size);
- if (IS_ERR(obj)) { err = PTR_ERR(obj); goto fail;
@@ -414,6 +419,7 @@ static void uc_fw_bind_ggtt(struct intel_uc_fw *uc_fw) struct drm_i915_gem_object *obj = uc_fw->obj; struct i915_ggtt *ggtt = __uc_fw_to_gt(uc_fw)->ggtt; struct i915_vma *dummy = &uc_fw->dummy;
u32 pte_flags = 0;
dummy->node.start = uc_fw_ggtt_offset(uc_fw); dummy->node.size = obj->base.size;
@@ -424,9 +430,13 @@ static void uc_fw_bind_ggtt(struct intel_uc_fw *uc_fw) GEM_BUG_ON(dummy->node.size > ggtt->uc_fw.size);
/* uc_fw->obj cache domains were not controlled across suspend */
- drm_clflush_sg(dummy->pages);
- if (i915_gem_object_has_struct_page(obj))
drm_clflush_sg(dummy->pages);
- if (i915_gem_object_is_lmem(obj))
pte_flags |= PTE_LM;
- ggtt->vm.insert_entries(&ggtt->vm, dummy, I915_CACHE_NONE, 0);
ggtt->vm.insert_entries(&ggtt->vm, dummy, I915_CACHE_NONE, pte_flags); }
static void uc_fw_unbind_ggtt(struct intel_uc_fw *uc_fw)
@@ -585,13 +595,68 @@ void intel_uc_fw_cleanup_fetch(struct intel_uc_fw *uc_fw) */ size_t intel_uc_fw_copy_rsa(struct intel_uc_fw *uc_fw, void *dst, u32 max_len) {
- struct sg_table *pages = uc_fw->obj->mm.pages;
struct intel_memory_region *mr = uc_fw->obj->mm.region; u32 size = min_t(u32, uc_fw->rsa_size, max_len); u32 offset = sizeof(struct uc_css_header) + uc_fw->ucode_size;
struct sgt_iter iter;
size_t count = 0;
int idx;
/* Called during reset handling, must be atomic [no fs_reclaim] */ GEM_BUG_ON(!intel_uc_fw_is_available(uc_fw));
- return sg_pcopy_to_buffer(pages->sgl, pages->nents, dst, size, offset);
- idx = offset >> PAGE_SHIFT;
- offset = offset_in_page(offset);
- if (i915_gem_object_has_struct_page(uc_fw->obj)) {
struct page *page;
for_each_sgt_page(page, iter, uc_fw->obj->mm.pages) {
Why can't this just use 'sg_pcopy_to_buffer' as before?
John.
u32 len = min_t(u32, size, PAGE_SIZE - offset);
void *vaddr;
if (idx > 0) {
idx--;
continue;
}
vaddr = kmap_atomic(page);
memcpy(dst, vaddr + offset, len);
kunmap_atomic(vaddr);
offset = 0;
dst += len;
size -= len;
count += len;
if (!size)
break;
}
} else {
dma_addr_t addr;
for_each_sgt_daddr(addr, iter, uc_fw->obj->mm.pages) {
u32 len = min_t(u32, size, PAGE_SIZE - offset);
void __iomem *vaddr;
if (idx > 0) {
idx--;
continue;
}
vaddr = io_mapping_map_atomic_wc(&mr->iomap,
addr - mr->region.start);
memcpy_fromio(dst, vaddr + offset, len);
io_mapping_unmap_atomic(vaddr);
offset = 0;
dst += len;
size -= len;
count += len;
if (!size)
break;
}
}
return count; }
/**
Signed-off-by: Matthew Brost matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index f8cb00ffb506..a685d563df72 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -51,6 +51,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, #define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \ fw_def(ALDERLAKE_P, 0, guc_def(adlp, 62, 0, 3), huc_def(tgl, 7, 9, 3)) \ fw_def(ALDERLAKE_S, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 9, 3)) \ + fw_def(DG1, 0, guc_def(dg1, 62, 0, 0), huc_def(dg1, 7, 9, 3)) \ fw_def(ROCKETLAKE, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 9, 3)) \ fw_def(TIGERLAKE, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 9, 3)) \ fw_def(JASPERLAKE, 0, guc_def(ehl, 62, 0, 0), huc_def(ehl, 9, 0, 0)) \
On 8/2/2021 22:11, Matthew Brost wrote:
Signed-off-by: Matthew Brost matthew.brost@intel.com
drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 1 + 1 file changed, 1 insertion(+)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index f8cb00ffb506..a685d563df72 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -51,6 +51,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, #define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \ fw_def(ALDERLAKE_P, 0, guc_def(adlp, 62, 0, 3), huc_def(tgl, 7, 9, 3)) \ fw_def(ALDERLAKE_S, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 9, 3)) \
- fw_def(DG1, 0, guc_def(dg1, 62, 0, 0), huc_def(dg1, 7, 9, 3)) \ fw_def(ROCKETLAKE, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 9, 3)) \ fw_def(TIGERLAKE, 0, guc_def(tgl, 62, 0, 0), huc_def(tgl, 7, 9, 3)) \ fw_def(JASPERLAKE, 0, guc_def(ehl, 62, 0, 0), huc_def(ehl, 9, 0, 0)) \
Reviewed-by: John Harrison John.C.Harrison@Intel.com
Signed-off-by: Matthew Brost matthew.brost@intel.com --- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index da57d18d9f6b..fc2fc8d111d8 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -35,7 +35,7 @@ static void uc_expand_default_options(struct intel_uc *uc) }
/* Intermediate platforms are HuC authentication only */ - if (IS_DG1(i915) || IS_ALDERLAKE_S(i915)) { + if (IS_ALDERLAKE_S(i915)) { i915->params.enable_guc = ENABLE_GUC_LOAD_HUC; return; }
On 8/2/2021 22:11, Matthew Brost wrote:
Signed-off-by: Matthew Brost matthew.brost@intel.com
drivers/gpu/drm/i915/gt/uc/intel_uc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index da57d18d9f6b..fc2fc8d111d8 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -35,7 +35,7 @@ static void uc_expand_default_options(struct intel_uc *uc) }
/* Intermediate platforms are HuC authentication only */
- if (IS_DG1(i915) || IS_ALDERLAKE_S(i915)) {
- if (IS_ALDERLAKE_S(i915)) { i915->params.enable_guc = ENABLE_GUC_LOAD_HUC; return; }
Reviewed-by: John Harrison John.C.Harrison@Intel.com
On Tue, Aug 3, 2021 at 6:53 AM Matthew Brost matthew.brost@intel.com wrote:
Minimum set of patches to enable GuC submission on DG1 and enable it by default.
A little difficult to test as IGTs do not work with DG1 due to a bunch of uAPI features being disabled (e.g. relocations, caching memory options, etc...).
Matt Auld has an igt series which fixes a lot of this stuff, would be good to do at least a Test-With run with that.
Also I'm assuming that for ADL-P we'll get this equivalent patch set soon, and there we should be able to get real results? -Daniel
Tested with the loading the driver and 'live' selftests. Submissions seem to work.
Signed-off-by: Matthew Brost matthew.brost@intel.com
Daniele Ceraolo Spurio (1): drm/i915/guc: put all guc objects in lmem when available
Matthew Brost (2): drm/i915/guc: Add DG1 GuC / HuC firmware defs drm/i915/guc: Enable GuC submission by default on DG1
Venkata Sandeep Dhanalakota (1): drm/i915: Do not define vma on stack
drivers/gpu/drm/i915/gem/i915_gem_lmem.c | 26 +++++++ drivers/gpu/drm/i915/gem/i915_gem_lmem.h | 4 + drivers/gpu/drm/i915/gt/uc/intel_guc.c | 9 ++- drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c | 11 ++- drivers/gpu/drm/i915/gt/uc/intel_huc.c | 14 +++- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 2 +- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 90 ++++++++++++++++++++--- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h | 2 + 8 files changed, 138 insertions(+), 20 deletions(-)
-- 2.28.0
On Tue, Aug 03, 2021 at 02:15:13PM +0200, Daniel Vetter wrote:
On Tue, Aug 3, 2021 at 6:53 AM Matthew Brost matthew.brost@intel.com wrote:
Minimum set of patches to enable GuC submission on DG1 and enable it by default.
A little difficult to test as IGTs do not work with DG1 due to a bunch of uAPI features being disabled (e.g. relocations, caching memory options, etc...).
Matt Auld has an igt series which fixes a lot of this stuff, would be good to do at least a Test-With run with that.
If I'm understanding his series correct it only fixes the mmap issues not relocs so I don't think it will help all that much. I probably could send a trybot series letting relocs work on DG1 + his series and that might work? I guess I'll find out.
Also I'm assuming that for ADL-P we'll get this equivalent patch set
GuC submission is enabled by default on ADL-P. We don't have ADL-P in pre-merge CI currently though :(, just offline runs of about 1800 tests. Still a awaiting results for that though.
For what it is worth I've run all 1800 locally, via a script, and gotten solid results back with my IGT updates. There are some failures but I believe I understand all of them as either test issues or known differences in behavior between execlists and GuC submission with are opens (e.g. persistence, semaphores, SUBMIT fence, etc...).
Matt
soon, and there we should be able to get real results? -Daniel
Tested with the loading the driver and 'live' selftests. Submissions seem to work.
Signed-off-by: Matthew Brost matthew.brost@intel.com
Daniele Ceraolo Spurio (1): drm/i915/guc: put all guc objects in lmem when available
Matthew Brost (2): drm/i915/guc: Add DG1 GuC / HuC firmware defs drm/i915/guc: Enable GuC submission by default on DG1
Venkata Sandeep Dhanalakota (1): drm/i915: Do not define vma on stack
drivers/gpu/drm/i915/gem/i915_gem_lmem.c | 26 +++++++ drivers/gpu/drm/i915/gem/i915_gem_lmem.h | 4 + drivers/gpu/drm/i915/gt/uc/intel_guc.c | 9 ++- drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c | 11 ++- drivers/gpu/drm/i915/gt/uc/intel_huc.c | 14 +++- drivers/gpu/drm/i915/gt/uc/intel_uc.c | 2 +- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 90 ++++++++++++++++++++--- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h | 2 + 8 files changed, 138 insertions(+), 20 deletions(-)
-- 2.28.0
-- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch
Hi,
On 8/3/21 7:26 PM, Matthew Brost wrote:
On Tue, Aug 03, 2021 at 02:15:13PM +0200, Daniel Vetter wrote:
On Tue, Aug 3, 2021 at 6:53 AM Matthew Brost matthew.brost@intel.com wrote:
Minimum set of patches to enable GuC submission on DG1 and enable it by default.
A little difficult to test as IGTs do not work with DG1 due to a bunch of uAPI features being disabled (e.g. relocations, caching memory options, etc...).
Matt Auld has an igt series which fixes a lot of this stuff, would be good to do at least a Test-With run with that.
It looks like Maarten now merged Matt's series to IGT.
There is a series on IGT trybot with pending work to have some igt tests support relocations,
https://patchwork.freedesktop.org/series/92043/
One of the tests that have WIP fixes is gem_exec_whisper, and that particular test has historically shown occasional hangs with GuC submission on DG1 so it would be very desirable if we could make that test in particular work (I haven't verified that that's the case) reliably.
Also the following series:
https://patchwork.freedesktop.org/series/93455/
tries a bit harder to get some more tests running, squashing the above series on top of latest IGT.
Thanks, /Thomas
On 8/6/21 1:34 PM, Thomas Hellström (Intel) wrote:
Hi,
On 8/3/21 7:26 PM, Matthew Brost wrote:
On Tue, Aug 03, 2021 at 02:15:13PM +0200, Daniel Vetter wrote:
On Tue, Aug 3, 2021 at 6:53 AM Matthew Brost matthew.brost@intel.com wrote:
Minimum set of patches to enable GuC submission on DG1 and enable it by default.
A little difficult to test as IGTs do not work with DG1 due to a bunch of uAPI features being disabled (e.g. relocations, caching memory options, etc...).
Matt Auld has an igt series which fixes a lot of this stuff, would be good to do at least a Test-With run with that.
It looks like Maarten now merged Matt's series to IGT.
There is a series on IGT trybot with pending work to have some igt tests support relocations,
https://patchwork.freedesktop.org/series/92043/
One of the tests that have WIP fixes is gem_exec_whisper, and that particular test has historically shown occasional hangs with GuC submission on DG1 so it would be very desirable if we could make that test in particular work (I haven't verified that that's the case) reliably.
Also the following series:
https://patchwork.freedesktop.org/series/93455/
tries a bit harder to get some more tests running, squashing the above series on top of latest IGT.
Thanks, /Thomas
And also while we're working on getting igt adapted to uapi changes and to get more LMEM coverage in there, an IMO relevant test case to run manually is "piglit quick"on top of DG1-enabled OpenGL checking for regressions and significant changes in execution time.
/Thomas
On 8/6/21 1:34 PM, Thomas Hellström (Intel) wrote:
Hi,
On 8/3/21 7:26 PM, Matthew Brost wrote:
On Tue, Aug 03, 2021 at 02:15:13PM +0200, Daniel Vetter wrote:
On Tue, Aug 3, 2021 at 6:53 AM Matthew Brost matthew.brost@intel.com wrote:
Minimum set of patches to enable GuC submission on DG1 and enable it by default.
A little difficult to test as IGTs do not work with DG1 due to a bunch of uAPI features being disabled (e.g. relocations, caching memory options, etc...).
Matt Auld has an igt series which fixes a lot of this stuff, would be good to do at least a Test-With run with that.
It looks like Maarten now merged Matt's series to IGT.
There is a series on IGT trybot with pending work to have some igt tests support relocations,
https://patchwork.freedesktop.org/series/92043/
One of the tests that have WIP fixes is gem_exec_whisper, and that particular test has historically shown occasional hangs with GuC submission on DG1 so it would be very desirable if we could make that test in particular work (I haven't verified that that's the case) reliably.
Also the following series:
https://patchwork.freedesktop.org/series/93455/
tries a bit harder to get some more tests running, squashing the above series on top of latest IGT.
Thanks, /Thomas
Just verified that gem-exec-whisper is running now on DG1 on latest igt and the above series. Haven't tried with GuC submission, though.
/Thomas
On Fri, Aug 06, 2021 at 01:34:33PM +0200, Thomas Hellström (Intel) wrote:
Hi,
On 8/3/21 7:26 PM, Matthew Brost wrote:
On Tue, Aug 03, 2021 at 02:15:13PM +0200, Daniel Vetter wrote:
On Tue, Aug 3, 2021 at 6:53 AM Matthew Brost matthew.brost@intel.com wrote:
Minimum set of patches to enable GuC submission on DG1 and enable it by default.
A little difficult to test as IGTs do not work with DG1 due to a bunch of uAPI features being disabled (e.g. relocations, caching memory options, etc...).
Matt Auld has an igt series which fixes a lot of this stuff, would be good to do at least a Test-With run with that.
It looks like Maarten now merged Matt's series to IGT.
Great.
There is a series on IGT trybot with pending work to have some igt tests support relocations,
Will take a look but at the moment we are blocked because SLPC won't init on the DG1 in CI but it works just fine on all other parts I've tried in RIL. We suspect the DG1 in CI is an early stepping and we may be missing workarounds. Have a possible fix, just need to try it out. We also might just want to replace the DG1 part in CI with a newer stepping so we don't have to upstream WAs for non-shipping parts.
One of the tests that have WIP fixes is gem_exec_whisper, and that particular test has historically shown occasional hangs with GuC submission on DG1 so it would be very desirable if we could make that test in particular work (I haven't verified that that's the case) reliably.
I just ran gem_exec_whisper on DG1, with GuC submission, and kernel hacked to allow relocs. It passed for me. Only 1 run though so it is possible there are still intermittent issues. We really need to get CI up and running ASAP on all platforms where GuC submission is POR. Perhaps on all gen11+ platforms as well because GuC submission is supported and the more coverage we can get, the better.
Also the following series:
I'll check this series out too.
Matt
tries a bit harder to get some more tests running, squashing the above series on top of latest IGT.
Thanks, /Thomas
dri-devel@lists.freedesktop.org