For discrete, users of pin_map() needs to obey the same rules at the TTM backend, where we map system only objects as WB, and everything else as WC. The simplest for now is to just force the correct mapping type as per the new rules for discrete.
Suggested-by: Thomas Hellström thomas.hellstrom@linux.intel.com Signed-off-by: Matthew Auld matthew.auld@intel.com Cc: Thomas Hellström thomas.hellstrom@linux.intel.com Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: Ramalingam C ramalingam.c@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_object.c | 34 ++++++++++++++++++++++ drivers/gpu/drm/i915/gem/i915_gem_object.h | 4 +++ drivers/gpu/drm/i915/gem/i915_gem_pages.c | 22 ++++++++++++-- 3 files changed, 58 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 547cc9dad90d..9da7b288b7ed 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -625,6 +625,40 @@ int i915_gem_object_migrate(struct drm_i915_gem_object *obj, return obj->ops->migrate(obj, mr); }
+/** + * i915_gem_object_placement_possible - Check whether the object can be + * placed at certain memory type + * @obj: Pointer to the object + * @type: The memory type to check + * + * Return: True if the object can be placed in @type. False otherwise. + */ +bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj, + enum intel_memory_type type) +{ + unsigned int i; + + if (!obj->mm.n_placements) { + switch (type) { + case INTEL_MEMORY_LOCAL: + return i915_gem_object_has_iomem(obj); + case INTEL_MEMORY_SYSTEM: + return i915_gem_object_has_pages(obj); + default: + /* Ignore stolen for now */ + GEM_BUG_ON(1); + return false; + } + } + + for (i = 0; i < obj->mm.n_placements; i++) { + if (obj->mm.placements[i]->type == type) + return true; + } + + return false; +} + void i915_gem_init__objects(struct drm_i915_private *i915) { INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index d423d8cac4f2..8be4fadeee48 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -12,6 +12,7 @@ #include <drm/drm_device.h>
#include "display/intel_frontbuffer.h" +#include "intel_memory_region.h" #include "i915_gem_object_types.h" #include "i915_gem_gtt.h" #include "i915_gem_ww.h" @@ -607,6 +608,9 @@ bool i915_gem_object_can_migrate(struct drm_i915_gem_object *obj, int i915_gem_object_wait_migration(struct drm_i915_gem_object *obj, unsigned int flags);
+bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj, + enum intel_memory_type type); + #ifdef CONFIG_MMU_NOTIFIER static inline bool i915_gem_object_is_userptr(struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index f2f850e31b8e..810a157a18f8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -321,8 +321,7 @@ static void *i915_gem_object_map_pfn(struct drm_i915_gem_object *obj, dma_addr_t addr; void *vaddr;
- if (type != I915_MAP_WC) - return ERR_PTR(-ENODEV); + GEM_BUG_ON(type != I915_MAP_WC);
if (n_pfn > ARRAY_SIZE(stack)) { /* Too big for stack -- allocate temporary array instead */ @@ -374,6 +373,25 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, } GEM_BUG_ON(!i915_gem_object_has_pages(obj));
+ /* + * For discrete our CPU mappings needs to be consistent in order to + * function correctly on !x86. When mapping things through TTM, we use + * the same rules to determine the caching type. + * + * Internal users of lmem are already expected to get this right, so no + * fudging needed there. + */ + if (i915_gem_object_placement_possible(obj, INTEL_MEMORY_LOCAL)) { + if (type != I915_MAP_WC && !obj->mm.n_placements) { + ptr = ERR_PTR(-ENODEV); + goto err_unpin; + } + + type = I915_MAP_WC; + } else if (IS_DGFX(to_i915(obj->base.dev))) { + type = I915_MAP_WB; + } + ptr = page_unpack_bits(obj->mm.mapping, &has_type); if (ptr && has_type != type) { if (pinned) {
Convert all the drm_i915_gem_caching bits to proper kernel doc.
Suggested-by: Daniel Vetter daniel@ffwll.ch Signed-off-by: Matthew Auld matthew.auld@intel.com Cc: Thomas Hellström thomas.hellstrom@linux.intel.com Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Tvrtko Ursulin tvrtko.ursulin@linux.intel.com Cc: Jordan Justen jordan.l.justen@intel.com Cc: Kenneth Graunke kenneth@whitecape.org Cc: Jason Ekstrand jason@jlekstrand.net Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: Ramalingam C ramalingam.c@intel.com --- include/uapi/drm/i915_drm.h | 70 +++++++++++++++++++++++-------------- 1 file changed, 43 insertions(+), 27 deletions(-)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 2f70c48567c0..d13c6c5fad04 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1363,43 +1363,59 @@ struct drm_i915_gem_busy { };
/** - * I915_CACHING_NONE + * struct drm_i915_gem_caching - Set or get the caching for given object + * handle. * - * GPU access is not coherent with cpu caches. Default for machines without an - * LLC. - */ -#define I915_CACHING_NONE 0 -/** - * I915_CACHING_CACHED + * Allow userspace to control the GTT caching bits for a given object when the + * object is later mapped through the ppGTT(or GGTT on older platforms lacking + * ppGTT support, or if the object is used for scanout). Note that this might + * require unbinding the object from the GTT first, if its current caching value + * doesn't match. * - * GPU access is coherent with cpu caches and furthermore the data is cached in - * last-level caches shared between cpu cores and the gpu GT. Default on - * machines with HAS_LLC. - */ -#define I915_CACHING_CACHED 1 -/** - * I915_CACHING_DISPLAY * - * Special GPU caching mode which is coherent with the scanout engines. - * Transparently falls back to I915_CACHING_NONE on platforms where no special - * cache mode (like write-through or gfdt flushing) is available. The kernel - * automatically sets this mode when using a buffer as a scanout target. - * Userspace can manually set this mode to avoid a costly stall and clflush in - * the hotpath of drawing the first frame. */ -#define I915_CACHING_DISPLAY 2 - struct drm_i915_gem_caching { /** - * Handle of the buffer to set/get the caching level of. */ + * @handle: Handle of the buffer to set/get the caching level. + */ __u32 handle;
/** - * Cacheing level to apply or return value + * @caching: The GTT caching level to apply or possible return value. + * + * The supported @caching values: * - * bits0-15 are for generic caching control (i.e. the above defined - * values). bits16-31 are reserved for platform-specific variations - * (e.g. l3$ caching on gen7). */ + * I915_CACHING_NONE: + * + * GPU access is not coherent with CPU caches. Default for machines + * without an LLC. This means we need to manually clflush, if we want + * GPU access to be coherent. + * + * I915_CACHING_CACHED: + * + * GPU access is coherent with CPU caches and furthermore the data is + * cached in last-level caches shared between CPU cores and the GPU GT. + * Default on machines with HAS_LLC. In general the fast shared + * last-level cache(HAS_LLC) is considered much faster then platforms + * which only support snooping(HAS_SNOOP), hence by default + * + * I915_CACHING_DISPLAY: + * + * Special GPU caching mode which is coherent with the scanout engines. + * Transparently falls back to I915_CACHING_NONE on platforms where no + * special cache mode (like write-through or gfdt flushing) is + * available. The kernel automatically sets this mode when using a + * buffer as a scanout target. Userspace can manually set this mode to + * avoid a costly stall and clflush in the hotpath of drawing the first + * frame. + * + * Side note: On gen8+ this no longer does much since we lost the GGTT + * caching bits. Although setting this is harmless, since it still + * effectively falls back to I915_CACHING_NONE. + */ +#define I915_CACHING_NONE 0 +#define I915_CACHING_CACHED 1 +#define I915_CACHING_DISPLAY 2 __u32 caching; };
On 2021-07-05 at 14:53:07 +0100, Matthew Auld wrote:
Convert all the drm_i915_gem_caching bits to proper kernel doc.
Suggested-by: Daniel Vetter daniel@ffwll.ch Signed-off-by: Matthew Auld matthew.auld@intel.com
LGTM.
Reviewed-by: Ramalingam C ramalingam.c@intel.com
Cc: Thomas Hellström thomas.hellstrom@linux.intel.com Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Tvrtko Ursulin tvrtko.ursulin@linux.intel.com Cc: Jordan Justen jordan.l.justen@intel.com Cc: Kenneth Graunke kenneth@whitecape.org Cc: Jason Ekstrand jason@jlekstrand.net Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: Ramalingam C ramalingam.c@intel.com
include/uapi/drm/i915_drm.h | 70 +++++++++++++++++++++++-------------- 1 file changed, 43 insertions(+), 27 deletions(-)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 2f70c48567c0..d13c6c5fad04 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1363,43 +1363,59 @@ struct drm_i915_gem_busy { };
/**
- I915_CACHING_NONE
- struct drm_i915_gem_caching - Set or get the caching for given object
- handle.
- GPU access is not coherent with cpu caches. Default for machines without an
- LLC.
- */
-#define I915_CACHING_NONE 0 -/**
- I915_CACHING_CACHED
- Allow userspace to control the GTT caching bits for a given object when the
- object is later mapped through the ppGTT(or GGTT on older platforms lacking
- ppGTT support, or if the object is used for scanout). Note that this might
- require unbinding the object from the GTT first, if its current caching value
- doesn't match.
- GPU access is coherent with cpu caches and furthermore the data is cached in
- last-level caches shared between cpu cores and the gpu GT. Default on
- machines with HAS_LLC.
- */
-#define I915_CACHING_CACHED 1 -/**
- I915_CACHING_DISPLAY
- Special GPU caching mode which is coherent with the scanout engines.
- Transparently falls back to I915_CACHING_NONE on platforms where no special
- cache mode (like write-through or gfdt flushing) is available. The kernel
- automatically sets this mode when using a buffer as a scanout target.
- Userspace can manually set this mode to avoid a costly stall and clflush in
*/
- the hotpath of drawing the first frame.
-#define I915_CACHING_DISPLAY 2
struct drm_i915_gem_caching { /**
* Handle of the buffer to set/get the caching level of. */
* @handle: Handle of the buffer to set/get the caching level.
*/
__u32 handle;
/**
* Cacheing level to apply or return value
* @caching: The GTT caching level to apply or possible return value.
*
* The supported @caching values:
* bits0-15 are for generic caching control (i.e. the above defined
* values). bits16-31 are reserved for platform-specific variations
* (e.g. l3$ caching on gen7). */
* I915_CACHING_NONE:
*
* GPU access is not coherent with CPU caches. Default for machines
* without an LLC. This means we need to manually clflush, if we want
* GPU access to be coherent.
*
* I915_CACHING_CACHED:
*
* GPU access is coherent with CPU caches and furthermore the data is
* cached in last-level caches shared between CPU cores and the GPU GT.
* Default on machines with HAS_LLC. In general the fast shared
* last-level cache(HAS_LLC) is considered much faster then platforms
* which only support snooping(HAS_SNOOP), hence by default
*
* I915_CACHING_DISPLAY:
*
* Special GPU caching mode which is coherent with the scanout engines.
* Transparently falls back to I915_CACHING_NONE on platforms where no
* special cache mode (like write-through or gfdt flushing) is
* available. The kernel automatically sets this mode when using a
* buffer as a scanout target. Userspace can manually set this mode to
* avoid a costly stall and clflush in the hotpath of drawing the first
* frame.
*
* Side note: On gen8+ this no longer does much since we lost the GGTT
* caching bits. Although setting this is harmless, since it still
* effectively falls back to I915_CACHING_NONE.
*/
+#define I915_CACHING_NONE 0 +#define I915_CACHING_CACHED 1 +#define I915_CACHING_DISPLAY 2 __u32 caching; };
-- 2.26.3
It's a noop on DG1, and in the future when need to support other devices which let us control the coherency, then it should be an immutable creation time property for the BO. This will likely be controlled through a new gem_create_ext extension.
v2: add some kernel doc for the discrete changes, and document the implicit rules
Suggested-by: Daniel Vetter daniel@ffwll.ch Signed-off-by: Matthew Auld matthew.auld@intel.com Cc: Thomas Hellström thomas.hellstrom@linux.intel.com Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Tvrtko Ursulin tvrtko.ursulin@linux.intel.com Cc: Jordan Justen jordan.l.justen@intel.com Cc: Kenneth Graunke kenneth@whitecape.org Cc: Jason Ekstrand jason@jlekstrand.net Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: Ramalingam C ramalingam.c@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 6 +++++ include/uapi/drm/i915_drm.h | 29 +++++++++++++++++++++- 2 files changed, 34 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 7d1400b13429..43004bef55cb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -268,6 +268,9 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, struct drm_i915_gem_object *obj; int err = 0;
+ if (IS_DGFX(to_i915(dev))) + return -ENODEV; + rcu_read_lock(); obj = i915_gem_object_lookup_rcu(file, args->handle); if (!obj) { @@ -303,6 +306,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, enum i915_cache_level level; int ret = 0;
+ if (IS_DGFX(i915)) + return -ENODEV; + switch (args->caching) { case I915_CACHING_NONE: level = I915_CACHE_NONE; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index d13c6c5fad04..a4faceeb8c32 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1372,7 +1372,34 @@ struct drm_i915_gem_busy { * require unbinding the object from the GTT first, if its current caching value * doesn't match. * - * + * Note that this all changes on discrete platforms, starting from DG1, the + * set/get caching is no longer supported, and is now rejected. Instead the CPU + * caching attributes(WB vs WC) will become an immutable creation time property + * for the object, along with the GTT caching level. For now we don't expose any + * new uAPI for this, instead on DG1 this is all implicit, although this largely + * shouldn't matter since DG1 is coherent by default(without any way of + * controlling it). + * + * Implicit caching rules, starting from DG1: + * + * - If any of the object placements (see &drm_i915_gem_create_ext_memory_regions) + * contain I915_MEMORY_CLASS_DEVICE then the object will be allocated and + * mapped as write-combined only. + * + * - Everything else is always allocated and mapped as write-back, with the + * guarantee that everything is also coherent with the GPU. + * + * Note that this is likely to change in the future again, where we might need + * more flexibility on future devices, so making this all explicit as part of a + * new &drm_i915_gem_create_ext extension is probable. + * + * Side note: Part of the reason for this is that changing the at-allocation-time CPU + * caching attributes for the pages might be required(and is expensive) if we + * need to then CPU map the pages later with different caching attributes. This + * inconsistent caching behaviour, while supported on x86, is not universally + * supported on other architectures. So for simplicity we opt for setting + * everything at creation time, whilst also making it immutable, on discrete + * platforms. */ struct drm_i915_gem_caching { /**
On 2021-07-05 at 14:53:08 +0100, Matthew Auld wrote:
It's a noop on DG1, and in the future when need to support other devices which let us control the coherency, then it should be an immutable creation time property for the BO. This will likely be controlled through a new gem_create_ext extension.
v2: add some kernel doc for the discrete changes, and document the implicit rules
LGTM
Reviewed-by: Ramalingam C ramalingam.c@intel.com
Suggested-by: Daniel Vetter daniel@ffwll.ch Signed-off-by: Matthew Auld matthew.auld@intel.com Cc: Thomas Hellström thomas.hellstrom@linux.intel.com Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Tvrtko Ursulin tvrtko.ursulin@linux.intel.com Cc: Jordan Justen jordan.l.justen@intel.com Cc: Kenneth Graunke kenneth@whitecape.org Cc: Jason Ekstrand jason@jlekstrand.net Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: Ramalingam C ramalingam.c@intel.com
drivers/gpu/drm/i915/gem/i915_gem_domain.c | 6 +++++ include/uapi/drm/i915_drm.h | 29 +++++++++++++++++++++- 2 files changed, 34 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 7d1400b13429..43004bef55cb 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -268,6 +268,9 @@ int i915_gem_get_caching_ioctl(struct drm_device *dev, void *data, struct drm_i915_gem_object *obj; int err = 0;
- if (IS_DGFX(to_i915(dev)))
return -ENODEV;
- rcu_read_lock(); obj = i915_gem_object_lookup_rcu(file, args->handle); if (!obj) {
@@ -303,6 +306,9 @@ int i915_gem_set_caching_ioctl(struct drm_device *dev, void *data, enum i915_cache_level level; int ret = 0;
- if (IS_DGFX(i915))
return -ENODEV;
- switch (args->caching) { case I915_CACHING_NONE: level = I915_CACHE_NONE;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index d13c6c5fad04..a4faceeb8c32 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1372,7 +1372,34 @@ struct drm_i915_gem_busy {
- require unbinding the object from the GTT first, if its current caching value
- doesn't match.
- Note that this all changes on discrete platforms, starting from DG1, the
- set/get caching is no longer supported, and is now rejected. Instead the CPU
- caching attributes(WB vs WC) will become an immutable creation time property
- for the object, along with the GTT caching level. For now we don't expose any
- new uAPI for this, instead on DG1 this is all implicit, although this largely
- shouldn't matter since DG1 is coherent by default(without any way of
- controlling it).
- Implicit caching rules, starting from DG1:
- If any of the object placements (see &drm_i915_gem_create_ext_memory_regions)
contain I915_MEMORY_CLASS_DEVICE then the object will be allocated and
mapped as write-combined only.
- Everything else is always allocated and mapped as write-back, with the
guarantee that everything is also coherent with the GPU.
- Note that this is likely to change in the future again, where we might need
- more flexibility on future devices, so making this all explicit as part of a
- new &drm_i915_gem_create_ext extension is probable.
- Side note: Part of the reason for this is that changing the at-allocation-time CPU
- caching attributes for the pages might be required(and is expensive) if we
- need to then CPU map the pages later with different caching attributes. This
- inconsistent caching behaviour, while supported on x86, is not universally
- supported on other architectures. So for simplicity we opt for setting
- everything at creation time, whilst also making it immutable, on discrete
*/
- platforms.
struct drm_i915_gem_caching { /** -- 2.26.3
On Monday, July 5, 2021 6:53:08 AM PDT Matthew Auld wrote:
It's a noop on DG1, and in the future when need to support other devices which let us control the coherency, then it should be an immutable creation time property for the BO. This will likely be controlled through a new gem_create_ext extension.
v2: add some kernel doc for the discrete changes, and document the implicit rules
Suggested-by: Daniel Vetter daniel@ffwll.ch Signed-off-by: Matthew Auld matthew.auld@intel.com Cc: Thomas Hellström thomas.hellstrom@linux.intel.com Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Tvrtko Ursulin tvrtko.ursulin@linux.intel.com Cc: Jordan Justen jordan.l.justen@intel.com Cc: Kenneth Graunke kenneth@whitecape.org Cc: Jason Ekstrand jason@jlekstrand.net Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: Ramalingam C ramalingam.c@intel.com
drivers/gpu/drm/i915/gem/i915_gem_domain.c | 6 +++++ include/uapi/drm/i915_drm.h | 29 +++++++++++++++++++++- 2 files changed, 34 insertions(+), 1 deletion(-)
This caching ioctl patch is:
Reviewed-by: Kenneth Graunke kenneth@whitecape.org
Convert all the drm_i915_gem_set_domain bits to proper kernel doc.
Suggested-by: Daniel Vetter daniel@ffwll.ch Signed-off-by: Matthew Auld matthew.auld@intel.com Cc: Thomas Hellström thomas.hellstrom@linux.intel.com Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Tvrtko Ursulin tvrtko.ursulin@linux.intel.com Cc: Jordan Justen jordan.l.justen@intel.com Cc: Kenneth Graunke kenneth@whitecape.org Cc: Jason Ekstrand jason@jlekstrand.net Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: Ramalingam C ramalingam.c@intel.com --- include/uapi/drm/i915_drm.h | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index a4faceeb8c32..6f94e5e7569a 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -880,14 +880,42 @@ struct drm_i915_gem_mmap_offset { __u64 extensions; };
+ +/** + * struct drm_i915_gem_set_domain - Adjust the objects write or read domain, in + * preparation for accessing the pages via some CPU domain. + * + * Specifying a new write or read domain will flush the object out of the + * previous domain(if required), before then updating the objects domain + * tracking with the new domain. + * + * Note this might involve waiting for the object first if it is still active on + * the GPU. + * + * Supported values for @read_domains and @write_domain: + * + * - I915_GEM_DOMAIN_WC: Uncached write-combined domain + * - I915_GEM_DOMAIN_CPU: CPU cache domain + * - I915_GEM_DOMAIN_GTT: Mappable aperture domain + * + * All other domains are rejected. + * + */ struct drm_i915_gem_set_domain { - /** Handle for the object */ + /** @handle: Handle for the object. */ __u32 handle;
- /** New read domains */ + /** + * @read_domains: New read domains. + */ __u32 read_domains;
- /** New write domain */ + /** + * @write_domain: New write domain. + * + * Note that having something in the write domain implies it's in the + * read domain, and only that read domain. + */ __u32 write_domain; };
On 2021-07-05 at 14:53:09 +0100, Matthew Auld wrote:
Convert all the drm_i915_gem_set_domain bits to proper kernel doc.
LGTM.
Reviewed-by: Ramalingam C ramalingam.c@intel.com
Suggested-by: Daniel Vetter daniel@ffwll.ch Signed-off-by: Matthew Auld matthew.auld@intel.com Cc: Thomas Hellström thomas.hellstrom@linux.intel.com Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Tvrtko Ursulin tvrtko.ursulin@linux.intel.com Cc: Jordan Justen jordan.l.justen@intel.com Cc: Kenneth Graunke kenneth@whitecape.org Cc: Jason Ekstrand jason@jlekstrand.net Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: Ramalingam C ramalingam.c@intel.com
include/uapi/drm/i915_drm.h | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index a4faceeb8c32..6f94e5e7569a 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -880,14 +880,42 @@ struct drm_i915_gem_mmap_offset { __u64 extensions; };
+/**
- struct drm_i915_gem_set_domain - Adjust the objects write or read domain, in
- preparation for accessing the pages via some CPU domain.
- Specifying a new write or read domain will flush the object out of the
- previous domain(if required), before then updating the objects domain
- tracking with the new domain.
- Note this might involve waiting for the object first if it is still active on
- the GPU.
- Supported values for @read_domains and @write_domain:
- I915_GEM_DOMAIN_WC: Uncached write-combined domain
- I915_GEM_DOMAIN_CPU: CPU cache domain
- I915_GEM_DOMAIN_GTT: Mappable aperture domain
- All other domains are rejected.
- */
struct drm_i915_gem_set_domain {
- /** Handle for the object */
- /** @handle: Handle for the object. */ __u32 handle;
- /** New read domains */
- /**
* @read_domains: New read domains.
__u32 read_domains;*/
- /** New write domain */
- /**
* @write_domain: New write domain.
*
* Note that having something in the write domain implies it's in the
* read domain, and only that read domain.
__u32 write_domain;*/
};
-- 2.26.3
The CPU domain should be static for discrete, and on DG1 we don't need any flushing since everything is already coherent, so really all this does is an object wait, for which we have an ioctl. Longer term the desired caching should be an immutable creation time property for the BO, which can be set with something like gem_create_ext.
One other user is iris + userptr, which uses the set_domain to probe all the pages to check if the GUP succeeds, however keeping the set_domain around just for that seems rather scuffed. We could equally just submit a dummy batch, which should hopefully be good enough, otherwise adding a new creation time flag for userptr might be an option. Although longer term we will also have vm_bind, which should also be a nice fit for this, so adding a whole new flag is likely overkill.
v2: add some more kernel doc, also add the implicit rules with caching
Suggested-by: Daniel Vetter daniel@ffwll.ch Signed-off-by: Matthew Auld matthew.auld@intel.com Cc: Thomas Hellström thomas.hellstrom@linux.intel.com Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Tvrtko Ursulin tvrtko.ursulin@linux.intel.com Cc: Jordan Justen jordan.l.justen@intel.com Cc: Kenneth Graunke kenneth@whitecape.org Cc: Jason Ekstrand jason@jlekstrand.net Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: Ramalingam C ramalingam.c@intel.com --- drivers/gpu/drm/i915/gem/i915_gem_domain.c | 3 +++ include/uapi/drm/i915_drm.h | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 43004bef55cb..b684a62bf3b0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -490,6 +490,9 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, u32 write_domain = args->write_domain; int err;
+ if (IS_DGFX(to_i915(dev))) + return -ENODEV; + /* Only handle setting domains to types used by the CPU. */ if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) return -EINVAL; diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 6f94e5e7569a..fd1a9878730c 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -900,6 +900,24 @@ struct drm_i915_gem_mmap_offset { * * All other domains are rejected. * + * Note that for discrete, starting from DG1, this is no longer supported, and + * is instead rejected. On such platforms the CPU domain is effectively static, + * where we also only support a single &drm_i915_gem_mmap_offset cache mode, + * which can't be set explicitly and instead depends on the object placements, + * as per the below. + * + * Implicit caching rules, starting from DG1: + * + * - If any of the object placements (see &drm_i915_gem_create_ext_memory_regions) + * contain I915_MEMORY_CLASS_DEVICE then the object will be allocated and + * mapped as write-combined only. + * + * - Everything else is always allocated and mapped as write-back, with the + * guarantee that everything is also coherent with the GPU. + * + * Note that this is likely to change in the future again, where we might need + * more flexibility on future devices, so making this all explicit as part of a + * new &drm_i915_gem_create_ext extension is probable. */ struct drm_i915_gem_set_domain { /** @handle: Handle for the object. */
On 2021-07-05 at 14:53:10 +0100, Matthew Auld wrote:
The CPU domain should be static for discrete, and on DG1 we don't need any flushing since everything is already coherent, so really all this does is an object wait, for which we have an ioctl. Longer term the desired caching should be an immutable creation time property for the BO, which can be set with something like gem_create_ext.
One other user is iris + userptr, which uses the set_domain to probe all the pages to check if the GUP succeeds, however keeping the set_domain around just for that seems rather scuffed. We could equally just submit a dummy batch, which should hopefully be good enough, otherwise adding a new creation time flag for userptr might be an option. Although longer term we will also have vm_bind, which should also be a nice fit for this, so adding a whole new flag is likely overkill.
v2: add some more kernel doc, also add the implicit rules with caching
LGTM
Reviewed-by: Ramalingam C ramalingam.c@intel.com
Suggested-by: Daniel Vetter daniel@ffwll.ch Signed-off-by: Matthew Auld matthew.auld@intel.com Cc: Thomas Hellström thomas.hellstrom@linux.intel.com Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Tvrtko Ursulin tvrtko.ursulin@linux.intel.com Cc: Jordan Justen jordan.l.justen@intel.com Cc: Kenneth Graunke kenneth@whitecape.org Cc: Jason Ekstrand jason@jlekstrand.net Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: Ramalingam C ramalingam.c@intel.com
drivers/gpu/drm/i915/gem/i915_gem_domain.c | 3 +++ include/uapi/drm/i915_drm.h | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c index 43004bef55cb..b684a62bf3b0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c @@ -490,6 +490,9 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data, u32 write_domain = args->write_domain; int err;
- if (IS_DGFX(to_i915(dev)))
return -ENODEV;
- /* Only handle setting domains to types used by the CPU. */ if ((write_domain | read_domains) & I915_GEM_GPU_DOMAINS) return -EINVAL;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 6f94e5e7569a..fd1a9878730c 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -900,6 +900,24 @@ struct drm_i915_gem_mmap_offset {
- All other domains are rejected.
- Note that for discrete, starting from DG1, this is no longer supported, and
- is instead rejected. On such platforms the CPU domain is effectively static,
- where we also only support a single &drm_i915_gem_mmap_offset cache mode,
- which can't be set explicitly and instead depends on the object placements,
- as per the below.
- Implicit caching rules, starting from DG1:
- If any of the object placements (see &drm_i915_gem_create_ext_memory_regions)
contain I915_MEMORY_CLASS_DEVICE then the object will be allocated and
mapped as write-combined only.
- Everything else is always allocated and mapped as write-back, with the
guarantee that everything is also coherent with the GPU.
- Note that this is likely to change in the future again, where we might need
- more flexibility on future devices, so making this all explicit as part of a
*/
- new &drm_i915_gem_create_ext extension is probable.
struct drm_i915_gem_set_domain { /** @handle: Handle for the object. */ -- 2.26.3
On 2021-07-05 at 14:53:06 +0100, Matthew Auld wrote:
For discrete, users of pin_map() needs to obey the same rules at the TTM backend, where we map system only objects as WB, and everything else as WC. The simplest for now is to just force the correct mapping type as per the new rules for discrete.
Suggested-by: Thomas Hellström thomas.hellstrom@linux.intel.com Signed-off-by: Matthew Auld matthew.auld@intel.com Cc: Thomas Hellström thomas.hellstrom@linux.intel.com Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: Ramalingam C ramalingam.c@intel.com
drivers/gpu/drm/i915/gem/i915_gem_object.c | 34 ++++++++++++++++++++++ drivers/gpu/drm/i915/gem/i915_gem_object.h | 4 +++ drivers/gpu/drm/i915/gem/i915_gem_pages.c | 22 ++++++++++++-- 3 files changed, 58 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 547cc9dad90d..9da7b288b7ed 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -625,6 +625,40 @@ int i915_gem_object_migrate(struct drm_i915_gem_object *obj, return obj->ops->migrate(obj, mr); }
+/**
- i915_gem_object_placement_possible - Check whether the object can be
- placed at certain memory type
- @obj: Pointer to the object
- @type: The memory type to check
- Return: True if the object can be placed in @type. False otherwise.
- */
+bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj,
enum intel_memory_type type)
+{
- unsigned int i;
- if (!obj->mm.n_placements) {
switch (type) {
case INTEL_MEMORY_LOCAL:
return i915_gem_object_has_iomem(obj);
case INTEL_MEMORY_SYSTEM:
return i915_gem_object_has_pages(obj);
default:
/* Ignore stolen for now */
GEM_BUG_ON(1);
return false;
}
- }
- for (i = 0; i < obj->mm.n_placements; i++) {
if (obj->mm.placements[i]->type == type)
return true;
- }
- return false;
+}
void i915_gem_init__objects(struct drm_i915_private *i915) { INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index d423d8cac4f2..8be4fadeee48 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -12,6 +12,7 @@ #include <drm/drm_device.h>
#include "display/intel_frontbuffer.h" +#include "intel_memory_region.h" #include "i915_gem_object_types.h" #include "i915_gem_gtt.h" #include "i915_gem_ww.h" @@ -607,6 +608,9 @@ bool i915_gem_object_can_migrate(struct drm_i915_gem_object *obj, int i915_gem_object_wait_migration(struct drm_i915_gem_object *obj, unsigned int flags);
+bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj,
enum intel_memory_type type);
#ifdef CONFIG_MMU_NOTIFIER static inline bool i915_gem_object_is_userptr(struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index f2f850e31b8e..810a157a18f8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -321,8 +321,7 @@ static void *i915_gem_object_map_pfn(struct drm_i915_gem_object *obj, dma_addr_t addr; void *vaddr;
- if (type != I915_MAP_WC)
return ERR_PTR(-ENODEV);
GEM_BUG_ON(type != I915_MAP_WC);
if (n_pfn > ARRAY_SIZE(stack)) { /* Too big for stack -- allocate temporary array instead */
@@ -374,6 +373,25 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, } GEM_BUG_ON(!i915_gem_object_has_pages(obj));
- /*
* For discrete our CPU mappings needs to be consistent in order to
* function correctly on !x86. When mapping things through TTM, we use
* the same rules to determine the caching type.
*
* Internal users of lmem are already expected to get this right, so no
* fudging needed there.
*/
- if (i915_gem_object_placement_possible(obj, INTEL_MEMORY_LOCAL)) {
if (type != I915_MAP_WC && !obj->mm.n_placements) {
ptr = ERR_PTR(-ENODEV);
goto err_unpin;
}
type = I915_MAP_WC;
- } else if (IS_DGFX(to_i915(obj->base.dev))) {
type = I915_MAP_WB;
- }
Looks good to me.
Reviewed-by: Ramalingam C ramalingam.c@intel.com
- ptr = page_unpack_bits(obj->mm.mapping, &has_type); if (ptr && has_type != type) { if (pinned) {
-- 2.26.3
On Mon, Jul 05, 2021 at 02:53:06PM +0100, Matthew Auld wrote:
For discrete, users of pin_map() needs to obey the same rules at the TTM backend, where we map system only objects as WB, and everything else as WC. The simplest for now is to just force the correct mapping type as per the new rules for discrete.
Suggested-by: Thomas Hellström thomas.hellstrom@linux.intel.com Signed-off-by: Matthew Auld matthew.auld@intel.com Cc: Thomas Hellström thomas.hellstrom@linux.intel.com Cc: Maarten Lankhorst maarten.lankhorst@linux.intel.com Cc: Daniel Vetter daniel.vetter@ffwll.ch Cc: Ramalingam C ramalingam.c@intel.com
Huge thanks for doing all the kerneldoc work for uapi you're doing in this series, this should help a lot with umd conversions since we can just point them at the docs and tell them to pls update code.
Yes I know there's no kerneldoc here, but I didn't see the cover letter :-)
Cheers, Daniel
drivers/gpu/drm/i915/gem/i915_gem_object.c | 34 ++++++++++++++++++++++ drivers/gpu/drm/i915/gem/i915_gem_object.h | 4 +++ drivers/gpu/drm/i915/gem/i915_gem_pages.c | 22 ++++++++++++-- 3 files changed, 58 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 547cc9dad90d..9da7b288b7ed 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -625,6 +625,40 @@ int i915_gem_object_migrate(struct drm_i915_gem_object *obj, return obj->ops->migrate(obj, mr); }
+/**
- i915_gem_object_placement_possible - Check whether the object can be
- placed at certain memory type
- @obj: Pointer to the object
- @type: The memory type to check
- Return: True if the object can be placed in @type. False otherwise.
- */
+bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj,
enum intel_memory_type type)
+{
- unsigned int i;
- if (!obj->mm.n_placements) {
switch (type) {
case INTEL_MEMORY_LOCAL:
return i915_gem_object_has_iomem(obj);
case INTEL_MEMORY_SYSTEM:
return i915_gem_object_has_pages(obj);
default:
/* Ignore stolen for now */
GEM_BUG_ON(1);
return false;
}
- }
- for (i = 0; i < obj->mm.n_placements; i++) {
if (obj->mm.placements[i]->type == type)
return true;
- }
- return false;
+}
void i915_gem_init__objects(struct drm_i915_private *i915) { INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index d423d8cac4f2..8be4fadeee48 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -12,6 +12,7 @@ #include <drm/drm_device.h>
#include "display/intel_frontbuffer.h" +#include "intel_memory_region.h" #include "i915_gem_object_types.h" #include "i915_gem_gtt.h" #include "i915_gem_ww.h" @@ -607,6 +608,9 @@ bool i915_gem_object_can_migrate(struct drm_i915_gem_object *obj, int i915_gem_object_wait_migration(struct drm_i915_gem_object *obj, unsigned int flags);
+bool i915_gem_object_placement_possible(struct drm_i915_gem_object *obj,
enum intel_memory_type type);
#ifdef CONFIG_MMU_NOTIFIER static inline bool i915_gem_object_is_userptr(struct drm_i915_gem_object *obj) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index f2f850e31b8e..810a157a18f8 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -321,8 +321,7 @@ static void *i915_gem_object_map_pfn(struct drm_i915_gem_object *obj, dma_addr_t addr; void *vaddr;
- if (type != I915_MAP_WC)
return ERR_PTR(-ENODEV);
GEM_BUG_ON(type != I915_MAP_WC);
if (n_pfn > ARRAY_SIZE(stack)) { /* Too big for stack -- allocate temporary array instead */
@@ -374,6 +373,25 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, } GEM_BUG_ON(!i915_gem_object_has_pages(obj));
- /*
* For discrete our CPU mappings needs to be consistent in order to
* function correctly on !x86. When mapping things through TTM, we use
* the same rules to determine the caching type.
*
* Internal users of lmem are already expected to get this right, so no
* fudging needed there.
*/
- if (i915_gem_object_placement_possible(obj, INTEL_MEMORY_LOCAL)) {
if (type != I915_MAP_WC && !obj->mm.n_placements) {
ptr = ERR_PTR(-ENODEV);
goto err_unpin;
}
type = I915_MAP_WC;
- } else if (IS_DGFX(to_i915(obj->base.dev))) {
type = I915_MAP_WB;
- }
- ptr = page_unpack_bits(obj->mm.mapping, &has_type); if (ptr && has_type != type) { if (pinned) {
-- 2.26.3
dri-devel@lists.freedesktop.org