New subject: [PATCH 01/10] drm/doc: add rfc section for small BAR uapi

25 May 2022

* @probed_size: Memory probed by the driver (-1 = unknown)

*

* Note that it should not be possible to ever encounter a zero value

* here, also note that no current region type will ever return -1 here.

* Although for future region types, this might be a possibility. The

* same applies to the other size fields.

*/

* @unallocated_size: Estimate of memory remaining (-1 = unknown)

*

* Requires CAP_PERFMON or CAP_SYS_ADMIN to get reliable accounting.

* Without this (or if this is an older kernel) the value here will

* always equal the @probed_size. Note this is only currently tracked

* for I915_MEMORY_CLASS_DEVICE regions (for other types the value here

* will always equal the @probed_size).

*/

/** @rsvd1: MBZ */

__u64 rsvd1[8];

struct {

	/**

	 * @probed_cpu_visible_size: Memory probed by the driver

	 * that is CPU accessible. (-1 = unknown).

	 *

	 * This will be always be <= @probed_size, and the

	 * remainder (if there is any) will not be CPU

	 * accessible.

	 *

	 * On systems without small BAR, the @probed_size will

	 * always equal the @probed_cpu_visible_size, since all

	 * of it will be CPU accessible.

	 *

	 * Note this is only tracked for

	 * I915_MEMORY_CLASS_DEVICE regions (for other types the

	 * value here will always equal the @probed_size).

	 *

	 * Note that if the value returned here is zero, then

	 * this must be an old kernel which lacks the relevant

	 * small-bar uAPI support (including

	 * I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS), but on

	 * such systems we should never actually end up with a

	 * small BAR configuration, assuming we are able to load

	 * the kernel module. Hence it should be safe to treat

	 * this the same as when @probed_cpu_visible_size ==

	 * @probed_size.

	 */

	__u64 probed_cpu_visible_size;

	/**

	 * @unallocated_cpu_visible_size: Estimate of CPU

	 * visible memory remaining (-1 = unknown).

	 *

	 * Note this is only tracked for

	 * I915_MEMORY_CLASS_DEVICE regions (for other types the

	 * value here will always equal the

	 * @probed_cpu_visible_size).

	 *

	 * Requires CAP_PERFMON or CAP_SYS_ADMIN to get reliable

	 * accounting.  Without this the value here will always

	 * equal the @probed_cpu_visible_size. Note this is only

	 * currently tracked for I915_MEMORY_CLASS_DEVICE

	 * regions (for other types the value here will also

	 * always equal the @probed_cpu_visible_size).

	 *

	 * If this is an older kernel the value here will be

	 * zero, see also @probed_cpu_visible_size.

	 */

	__u64 unallocated_cpu_visible_size;

};

* @size: Requested size for the object.

*

* The (page-aligned) allocated size for the object will be returned.

*

* Note that for some devices we have might have further minimum

* page-size restrictions (larger than 4K), like for device local-memory.

* However in general the final size here should always reflect any

* rounding up, if for example using the I915_GEM_CREATE_EXT_MEMORY_REGIONS

* extension to place the object in device local-memory. The kernel will

* always select the largest minimum page-size for the set of possible

* placements as the value to use when rounding up the @size.

*/

* @handle: Returned handle for the object.

*

* Object handles are nonzero.

*/

* @flags: Optional flags.

*

* Supported values:

*

* I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS - Signal to the kernel that

* the object will need to be accessed via the CPU.

*

* Only valid when placing objects in I915_MEMORY_CLASS_DEVICE, and only

* strictly required on configurations where some subset of the device

* memory is directly visible/mappable through the CPU (which we also

* call small BAR), like on some DG2+ systems. Note that this is quite

* undesirable, but due to various factors like the client CPU, BIOS etc

* it's something we can expect to see in the wild. See

* &__drm_i915_memory_region_info.probed_cpu_visible_size for how to

* determine if this system applies.

*

* Note that one of the placements MUST be I915_MEMORY_CLASS_SYSTEM, to

* ensure the kernel can always spill the allocation to system memory,

* if the object can't be allocated in the mappable part of

* I915_MEMORY_CLASS_DEVICE.

*

* Also note that since the kernel only supports flat-CCS on objects

* that can *only* be placed in I915_MEMORY_CLASS_DEVICE, we therefore

* don't support I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS together with

* flat-CCS.

*

* Without this hint, the kernel will assume that non-mappable

* I915_MEMORY_CLASS_DEVICE is preferred for this object. Note that the

* kernel can still migrate the object to the mappable part, as a last

* resort, if userspace ever CPU faults this object, but this might be

* expensive, and so ideally should be avoided.

*

* On older kernels which lack the relevant small-bar uAPI support (see

* also &__drm_i915_memory_region_info.probed_cpu_visible_size),

* usage of the flag will result in an error, but it should NEVER be

* possible to end up with a small BAR configuration, assuming we can

* also successfully load the i915 kernel module. In such cases the

* entire I915_MEMORY_CLASS_DEVICE region will be CPU accessible, and as

* such there are zero restrictions on where the object can be placed.

*/

* @extensions: The chain of extensions to apply to this object.

*

* This will be useful in the future when we need to support several

* different extensions, and we need to apply more than one when

* creating the object. See struct i915_user_extension.

*

* If we don't supply any extensions then we get the same old gem_create

* behaviour.

*

* For I915_GEM_CREATE_EXT_MEMORY_REGIONS usage see

* struct drm_i915_gem_create_ext_memory_regions.

*

* For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see

* struct drm_i915_gem_create_ext_protected_content.

*/

  i915_scheduler.rst

if (mr->type == INTEL_MEMORY_LOCAL)

	info.probed_cpu_visible_size = mr->io_size;

else

	info.probed_cpu_visible_size = mr->total;

struct drm_i915_gem_memory_class_instance region;

union {

	__u32 rsvd0;

	__u32 new_thing1;

};

...

union {

	__u64 rsvd1[8];

	struct {

		__u64 new_thing2;

		__u64 new_thing3;

		...

	};

};

* @probed_size: Memory probed by the driver (-1 = unknown)

*

* Note that it should not be possible to ever encounter a zero value

* here, also note that no current region type will ever return -1 here.

* Although for future region types, this might be a possibility. The

* same applies to the other size fields.

*/

/** @rsvd1: MBZ */

__u64 rsvd1[8];

struct {

	/**

	 * @probed_cpu_visible_size: Memory probed by the driver

	 * that is CPU accessible. (-1 = unknown).

	 *

	 * This will be always be <= @probed_size, and the

	 * remainder (if there is any) will not be CPU

	 * accessible.

	 *

	 * On systems without small BAR, the @probed_size will

	 * always equal the @probed_cpu_visible_size, since all

	 * of it will be CPU accessible.

	 *

	 * Note this is only tracked for

	 * I915_MEMORY_CLASS_DEVICE regions (for other types the

	 * value here will always equal the @probed_size).

	 *

	 * Note that if the value returned here is zero, then

	 * this must be an old kernel which lacks the relevant

	 * small-bar uAPI support (including

	 * I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS), but on

	 * such systems we should never actually end up with a

	 * small BAR configuration, assuming we are able to load

	 * the kernel module. Hence it should be safe to treat

	 * this the same as when @probed_cpu_visible_size ==

	 * @probed_size.

	 */

	__u64 probed_cpu_visible_size;

};

info.unallocated_size = mr->avail;

if (perfmon_capable()) {

	intel_memory_region_avail(mr,

				  &info.unallocated_size,

				  &info.unallocated_cpu_visible_size);

} else {

	info.unallocated_size = info.probed_size;

	info.unallocated_cpu_visible_size =

		info.probed_cpu_visible_size;

}

	     u64 *avail, u64 *visible_avail)

	      u64 *avail, u64 *avail_visible);

	       u64 *avail, u64 *visible_avail)

i915_ttm_buddy_man_avail(mr->region_private,

			 avail, visible_avail);

*avail <<= PAGE_SHIFT;

*visible_avail <<= PAGE_SHIFT;

*avail = mr->total;

*visible_avail = mr->total;

	       u64 *avail, u64 *visible_avail);

* @unallocated_size: Estimate of memory remaining (-1 = unknown)

*

* Requires CAP_PERFMON or CAP_SYS_ADMIN to get reliable accounting.

* Without this (or if this is an older kernel) the value here will

* always equal the @probed_size. Note this is only currently tracked

* for I915_MEMORY_CLASS_DEVICE regions (for other types the value here

* will always equal the @probed_size).

*/

	/**

	 * @unallocated_cpu_visible_size: Estimate of CPU

	 * visible memory remaining (-1 = unknown).

	 *

	 * Note this is only tracked for

	 * I915_MEMORY_CLASS_DEVICE regions (for other types the

	 * value here will always equal the

	 * @probed_cpu_visible_size).

	 *

	 * Requires CAP_PERFMON or CAP_SYS_ADMIN to get reliable

	 * accounting.  Without this the value here will always

	 * equal the @probed_cpu_visible_size. Note this is only

	 * currently tracked for I915_MEMORY_CLASS_DEVICE

	 * regions (for other types the value here will also

	 * always equal the @probed_cpu_visible_size).

	 *

	 * If this is an older kernel the value here will be

	 * zero, see also @probed_cpu_visible_size.

	 */

	__u64 unallocated_cpu_visible_size;

drm_printf(printer, "total:%pa, available:%pa bytes\n",

	   &mr->total, &mr->avail);

drm_printf(printer, "total:%pa bytes\n", &mr->total);

* TODO: add a userspace hint to force CPU_ACCESS for the object, which

* can override this.

*/

   ext_data.placements[0]->type != INTEL_MEMORY_SYSTEM)

ext_data.flags |= I915_BO_ALLOC_GPU_ONLY;

if (ext_data.n_placements == 1)

	return -EINVAL;

/*

 * We always need to be able to spill to system memory, if we

 * can't place in the mappable part of LMEM.

 */

if (!(ext_data.placement_mask & BIT(INTEL_REGION_SMEM)))

	return -EINVAL;

if (ext_data.n_placements > 1 ||

    ext_data.placements[0]->type != INTEL_MEMORY_SYSTEM)

	ext_data.flags |= I915_BO_ALLOC_GPU_ONLY;

*

* such as DG2.

* such as DG2. The kernel will always select the largest minimum

* page-size for the set of possible placements as the value to use when

* rounding up the @size.

* @flags: Optional flags.

*

* Supported values:

*

* I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS - Signal to the kernel that

* the object will need to be accessed via the CPU.

*

* Only valid when placing objects in I915_MEMORY_CLASS_DEVICE, and only

* strictly required on configurations where some subset of the device

* memory is directly visible/mappable through the CPU (which we also

* call small BAR), like on some DG2+ systems. Note that this is quite

* undesirable, but due to various factors like the client CPU, BIOS etc

* it's something we can expect to see in the wild. See

* &drm_i915_memory_region_info.probed_cpu_visible_size for how to

* determine if this system applies.

*

* Note that one of the placements MUST be I915_MEMORY_CLASS_SYSTEM, to

* ensure the kernel can always spill the allocation to system memory,

* if the object can't be allocated in the mappable part of

* I915_MEMORY_CLASS_DEVICE.

*

* Also note that since the kernel only supports flat-CCS on objects

* that can *only* be placed in I915_MEMORY_CLASS_DEVICE, we therefore

* don't support I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS together with

* flat-CCS.

*

* Without this hint, the kernel will assume that non-mappable

* I915_MEMORY_CLASS_DEVICE is preferred for this object. Note that the

* kernel can still migrate the object to the mappable part, as a last

* resort, if userspace ever CPU faults this object, but this might be

* expensive, and so ideally should be avoided.

*

* On older kernels which lack the relevant small-bar uAPI support (see

* also &drm_i915_memory_region_info.probed_cpu_visible_size),

* usage of the flag will result in an error, but it should NEVER be

* possible to end up with a small BAR configuration, assuming we can

* also successfully load the i915 kernel module. In such cases the

* entire I915_MEMORY_CLASS_DEVICE region will be CPU accessible, and as

* such there are zero restrictions on where the object can be placed.

*/

* TODO: add a userspace hint to force CPU_ACCESS for the object, which

* can override this.

*/

   ext_data.placements[0]->type != INTEL_MEMORY_SYSTEM)

ext_data.flags |= I915_BO_ALLOC_GPU_ONLY;

if (ext_data.n_placements == 1)

	return -EINVAL;

/*

 * We always need to be able to spill to system memory, if we

 * can't place in the mappable part of LMEM.

 */

if (!(ext_data.placement_mask & BIT(INTEL_REGION_SMEM)))

	return -EINVAL;

if (ext_data.n_placements > 1 ||

    ext_data.placements[0]->type != INTEL_MEMORY_SYSTEM)

	ext_data.flags |= I915_BO_ALLOC_GPU_ONLY;

*

* such as DG2.

* such as DG2. The kernel will always select the largest minimum

* page-size for the set of possible placements as the value to use when

* rounding up the @size.

* @flags: Optional flags.

*

* Supported values:

*

* I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS - Signal to the kernel that

* the object will need to be accessed via the CPU.

*

* Only valid when placing objects in I915_MEMORY_CLASS_DEVICE, and only

* strictly required on configurations where some subset of the device

* memory is directly visible/mappable through the CPU (which we also

* call small BAR), like on some DG2+ systems. Note that this is quite

* undesirable, but due to various factors like the client CPU, BIOS etc

* it's something we can expect to see in the wild. See

* &drm_i915_memory_region_info.probed_cpu_visible_size for how to

* determine if this system applies.

*

* Note that one of the placements MUST be I915_MEMORY_CLASS_SYSTEM, to

* ensure the kernel can always spill the allocation to system memory,

* if the object can't be allocated in the mappable part of

* I915_MEMORY_CLASS_DEVICE.

*

* Also note that since the kernel only supports flat-CCS on objects

* that can *only* be placed in I915_MEMORY_CLASS_DEVICE, we therefore

* don't support I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS together with

* flat-CCS.

*

* Without this hint, the kernel will assume that non-mappable

* I915_MEMORY_CLASS_DEVICE is preferred for this object. Note that the

* kernel can still migrate the object to the mappable part, as a last

* resort, if userspace ever CPU faults this object, but this might be

* expensive, and so ideally should be avoided.

*

* On older kernels which lack the relevant small-bar uAPI support (see

* also &drm_i915_memory_region_info.probed_cpu_visible_size),

* usage of the flag will result in an error, but it should NEVER be

* possible to end up with a small BAR configuration, assuming we can

* also successfully load the i915 kernel module. In such cases the

* entire I915_MEMORY_CLASS_DEVICE region will be CPU accessible, and as

* such there are zero restrictions on where the object can be placed.

*/

for_each_sgt_daddr(dma, iter, vma_res->bi.pages) {

	dma_addr_t offset = dma - mem->region.start;
void __iomem *s;

	s = io_mapping_map_wc(&mem->iomap,

			      dma - mem->region.start,

			      PAGE_SIZE);

	if (offset + PAGE_SIZE > mem->io_size) {

		ret = -EINVAL;

		break;

	}

	s = io_mapping_map_wc(&mem->iomap, offset, PAGE_SIZE);
ret = compress_page(compress,
		    (void __force *)s, dst,
		    true);

for_each_sgt_daddr(dma, iter, vma_res->bi.pages) {

	dma_addr_t offset = dma - mem->region.start;
void __iomem *s;

	s = io_mapping_map_wc(&mem->iomap,

			      dma - mem->region.start,

			      PAGE_SIZE);

	if (offset + PAGE_SIZE > mem->io_size) {

		ret = -EINVAL;

		break;

	}

	s = io_mapping_map_wc(&mem->iomap, offset, PAGE_SIZE);
ret = compress_page(compress,
		    (void __force *)s, dst,
		    true);

if (i915_gem_context_is_recoverable(eb->gem_context) &&

    IS_DGFX(eb->i915))

	return -EINVAL;

/* FIXME: Remove this when we have small-bar enabled */

if (pci_resource_len(pdev, 2) < lmem_size) {

	drm_err(&i915->drm, "System requires small-BAR support, which is currently unsupported on this kernel\n");

	return ERR_PTR(-EINVAL);

}

drm_info(&i915->drm, "Using a reduced BAR size of %lluMiB. Consider enabling the full BAR size if available in the BIOS.\n",

	 (u64)io_size >> 20);

[PATCH 00/10] small BAR uapi bits