Am 26.02.2014 19:25, schrieb Marek Olšák:
From: Marek Olšák marek.olsak@amd.com
Userspace should set the first 4 bits of drm_radeon_cs_reloc::flags to a number from 0 to 15. The higher the number, the higher the priority, which means a buffer with a higher number will be validated sooner.
The old behavior is preserved: Buffers used for write are prioritized over read-only buffers if the userspace doesn't set the number.
v2: add buffers to buckets directly, then concatenate them
Signed-off-by: Marek Olšák marek.olsak@amd.com
drivers/gpu/drm/radeon/radeon.h | 2 +- drivers/gpu/drm/radeon/radeon_cs.c | 68 ++++++++++++++++++++++++++++++++-- drivers/gpu/drm/radeon/radeon_object.c | 10 ----- drivers/gpu/drm/radeon/radeon_object.h | 2 - 4 files changed, 66 insertions(+), 16 deletions(-)
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index d37a57a..f7a3174 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -481,7 +481,7 @@ struct radeon_bo_list { struct ttm_validate_buffer tv; struct radeon_bo *bo; uint64_t gpu_offset;
- bool written;
- unsigned priority;
As far as I can see priority is only used temporary in radeon_cs_parser_relocs, so we probably don't need it in here.
Apart from that patch looks good to me, Christian.
unsigned domain; unsigned alt_domain; u32 tiling_flags; diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index d49a3f7..779fa02 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -31,10 +31,57 @@ #include "radeon.h" #include "radeon_trace.h"
+#define RADEON_CS_MAX_PRIORITY 33u +#define RADEON_CS_MAX_BUCKET (RADEON_CS_MAX_PRIORITY / 2u) +#define RADEON_CS_NUM_BUCKETS (RADEON_CS_MAX_BUCKET + 1u)
+/* This is based on the bucket sort with O(n) time complexity.
- Relocations are sorted from the highest to the lowest priority as
- they are added.
- Odd numbers are added at the head of a bucket and even numbers are
- added at the tail, therefore all buckets are always sorted. */
+struct radeon_cs_buckets {
- struct list_head bucket[RADEON_CS_NUM_BUCKETS];
+};
+static void radeon_cs_buckets_init(struct radeon_cs_buckets *b) +{
- unsigned i;
- for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++)
INIT_LIST_HEAD(&b->bucket[i]);
+}
+static void radeon_cs_buckets_add(struct radeon_cs_buckets *b,
struct radeon_bo_list *reloc)
+{
- unsigned priority = reloc->priority;
- unsigned i = min(priority / 2u, RADEON_CS_MAX_BUCKET);
- if (priority % 2 == 1) {
list_add(&reloc->tv.head, &b->bucket[i]);
- } else {
list_add_tail(&reloc->tv.head, &b->bucket[i]);
- }
+}
+static void radeon_cs_buckets_get_list(struct radeon_cs_buckets *b,
struct list_head *out_list)
+{
- unsigned i;
- /* Connect the sorted buckets in the output list. */
- for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) {
list_splice(&b->bucket[i], out_list);
- }
+}
- static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) { struct drm_device *ddev = p->rdev->ddev; struct radeon_cs_chunk *chunk;
- struct radeon_cs_buckets buckets; unsigned i, j; bool duplicate;
@@ -53,6 +100,9 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) if (p->relocs == NULL) { return -ENOMEM; }
- radeon_cs_buckets_init(&buckets);
- for (i = 0; i < p->nrelocs; i++) { struct drm_radeon_cs_reloc *r;
@@ -80,7 +130,15 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) p->relocs_ptr[i] = &p->relocs[i]; p->relocs[i].robj = gem_to_radeon_bo(p->relocs[i].gobj); p->relocs[i].lobj.bo = p->relocs[i].robj;
p->relocs[i].lobj.written = !!r->write_domain;
/* The userspace buffer priorities are from 0 to 15. A higher
* number means the buffer is more important.
* Also, the buffers used for write have a higher priority than
* the buffers used for read only, which doubles the range
* to 0 to 31. Numbers 32 and 33 are reserved for the kernel
* driver.
*/
p->relocs[i].lobj.priority = (r->flags & 0xf) * 2 + !!r->write_domain;
/* the first reloc of an UVD job is the msg and that must be in VRAM, also but everything into VRAM on AGP cards to avoid
@@ -94,6 +152,8 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) p->relocs[i].lobj.alt_domain = RADEON_GEM_DOMAIN_VRAM;
/* prioritize this over any other relocation */
} else { uint32_t domain = r->write_domain ? r->write_domain : r->read_domains;p->relocs[i].lobj.priority = 32;
@@ -107,9 +167,11 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p) p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo; p->relocs[i].handle = r->handle;
radeon_bo_list_add_object(&p->relocs[i].lobj,
&p->validated);
}radeon_cs_buckets_add(&buckets, &p->relocs[i].lobj);
- radeon_cs_buckets_get_list(&buckets, &p->validated);
- return radeon_bo_list_validate(&p->ticket, &p->validated, p->ring); }
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c index d676ee2..19042ae 100644 --- a/drivers/gpu/drm/radeon/radeon_object.c +++ b/drivers/gpu/drm/radeon/radeon_object.c @@ -368,16 +368,6 @@ void radeon_bo_fini(struct radeon_device *rdev) arch_phys_wc_del(rdev->mc.vram_mtrr); }
-void radeon_bo_list_add_object(struct radeon_bo_list *lobj,
struct list_head *head)
-{
- if (lobj->written) {
list_add(&lobj->tv.head, head);
- } else {
list_add_tail(&lobj->tv.head, head);
- }
-}
- int radeon_bo_list_validate(struct ww_acquire_ctx *ticket, struct list_head *head, int ring) {
diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h index a9a8c11..6c3ca9e 100644 --- a/drivers/gpu/drm/radeon/radeon_object.h +++ b/drivers/gpu/drm/radeon/radeon_object.h @@ -138,8 +138,6 @@ extern int radeon_bo_evict_vram(struct radeon_device *rdev); extern void radeon_bo_force_delete(struct radeon_device *rdev); extern int radeon_bo_init(struct radeon_device *rdev); extern void radeon_bo_fini(struct radeon_device *rdev); -extern void radeon_bo_list_add_object(struct radeon_bo_list *lobj,
extern int radeon_bo_list_validate(struct ww_acquire_ctx *ticket, struct list_head *head, int ring); extern int radeon_bo_fbdev_mmap(struct radeon_bo *bo,struct list_head *head);