lgtm, I just have a few nits and questions below:
Regardless, this is
Reviewed-by: Umesh Nerlige Ramappa umesh.nerlige.ramappa@intel.com
Umesh
On Tue, Feb 22, 2022 at 01:55:57PM +0000, Tvrtko Ursulin wrote:
From: Tvrtko Ursulin tvrtko.ursulin@intel.com
Use the i915 exported data in /proc/<pid>/fdinfo to show GPU utilization per DRM client.
Example of the output:
intel-gpu-top: Intel Tigerlake (Gen12) @ /dev/dri/card0 - 220/ 221 MHz 70% RC6; 0.62/ 7.08 W; 760 irqs/s
ENGINES BUSY MI_SEMA MI_WAIT Render/3D 23.06% |██████▊ | 0% 0% Blitter 0.00% | | 0% 0% Video 5.40% |█▋ | 0% 0%
VideoEnhance 20.67% |██████ | 0% 0%
PID NAME Render/3D Blitter Video VideoEnhance 3082 mpv | || ||▌ ||██ | 3117 neverball |█▉ || || || | 1 systemd |▍ || || || | 2338 gnome-shell | || || || |
Signed-off-by: Tvrtko Ursulin tvrtko.ursulin@intel.com
man/intel_gpu_top.rst | 4 + tools/intel_gpu_top.c | 801 +++++++++++++++++++++++++++++++++++++++++- tools/meson.build | 2 +- 3 files changed, 804 insertions(+), 3 deletions(-)
diff --git a/man/intel_gpu_top.rst b/man/intel_gpu_top.rst index b3b765b05feb..f4dbfc5b44d9 100644 --- a/man/intel_gpu_top.rst +++ b/man/intel_gpu_top.rst @@ -56,6 +56,10 @@ Supported keys: 'q' Exit from the tool. 'h' Show interactive help. '1' Toggle between aggregated engine class and physical engine mode.
- 'n' Toggle display of numeric client busyness overlay.
- 's' Toggle between sort modes (runtime, total runtime, pid, client id).
- 'i' Toggle display of clients which used no GPU time.
- 'H' Toggle between per PID aggregation and individual clients.
DEVICE SELECTION
diff --git a/tools/intel_gpu_top.c b/tools/intel_gpu_top.c index bc11fce2bb1e..73815cdea8aa 100644 --- a/tools/intel_gpu_top.c +++ b/tools/intel_gpu_top.c @@ -43,8 +43,10 @@ #include <sys/types.h> #include <unistd.h> #include <termios.h> +#include <sys/sysmacros.h>
#include "igt_perf.h" +#include "igt_drm_fdinfo.h"
#define ARRAY_SIZE(arr) (sizeof(arr)/sizeof(arr[0]))
@@ -311,7 +313,8 @@ static int engine_cmp(const void *__a, const void *__b) return a->instance - b->instance; }
-#define is_igpu_pci(x) (strcmp(x, "0000:00:02.0") == 0) +#define IGPU_PCI "0000:00:02.0" +#define is_igpu_pci(x) (strcmp(x, IGPU_PCI) == 0) #define is_igpu(x) (strcmp(x, "i915") == 0)
static struct engines *discover_engines(char *device) @@ -635,6 +638,547 @@ static void pmu_sample(struct engines *engines) } }
+enum client_status {
- FREE = 0, /* mbz */
- ALIVE,
- PROBE
+};
+struct clients;
+struct client {
- struct clients *clients;
- enum client_status status;
- unsigned int id;
- unsigned int pid;
- char name[24];
- char print_name[24];
- unsigned int samples;
- unsigned long total_runtime;
- unsigned long last_runtime;
- unsigned long *val;
- uint64_t *last;
+};
+struct clients {
- unsigned int num_clients;
- unsigned int active_clients;
- unsigned int num_classes;
- struct engine_class *class;
- char pci_slot[64];
- struct client *client;
+};
+#define for_each_client(clients, c, tmp) \
- for ((tmp) = (clients)->num_clients, c = (clients)->client; \
(tmp > 0); (tmp)--, (c)++)
+static struct clients *init_clients(const char *pci_slot) +{
- struct clients *clients;
- clients = malloc(sizeof(*clients));
- if (!clients)
return NULL;
- memset(clients, 0, sizeof(*clients));
- strncpy(clients->pci_slot, pci_slot, sizeof(clients->pci_slot));
- return clients;
+}
+static struct client * +find_client(struct clients *clients, enum client_status status, unsigned int id) +{
- unsigned int start, num;
- struct client *c;
- start = status == FREE ? clients->active_clients : 0; /* Free block at the end. */
- num = clients->num_clients - start;
- for (c = &clients->client[start]; num; c++, num--) {
if (status != c->status)
continue;
if (status == FREE || c->id == id)
return c;
- }
- return NULL;
+}
+static void +update_client(struct client *c, unsigned int pid, char *name, uint64_t val[16]) +{
- unsigned int i;
- if (c->pid != pid)
c->pid = pid;
- if (strcmp(c->name, name)) {
char *p;
strncpy(c->name, name, sizeof(c->name) - 1);
strncpy(c->print_name, name, sizeof(c->print_name) - 1);
p = c->print_name;
while (*p) {
if (!isprint(*p))
*p = '*';
p++;
}
- }
- c->last_runtime = 0;
- c->total_runtime = 0;
assert(c->clients->num_classes <= 16);
- for (i = 0; i < c->clients->num_classes; i++) {
if (val[i] < c->last[i])
continue; /* It will catch up soon. */
c->total_runtime += val[i];
As far as busyness is concerned c->total_runtime += val[i] should be it.
c->val[i] = val[i] - c->last[i];
c->last_runtime += c->val[i];
c->last[i] = val[i];
Can you describe what these 3 lines are doing?
- }
- c->samples++;
- c->status = ALIVE;
+}
+static void +add_client(struct clients *clients, unsigned int id, unsigned int pid,
char *name, uint64_t busy[16])
+{
- struct client *c;
- assert(!find_client(clients, ALIVE, id));
- c = find_client(clients, FREE, 0);
- if (!c) {
unsigned int idx = clients->num_clients;
clients->num_clients += (clients->num_clients + 2) / 2;
clients->client = realloc(clients->client,
clients->num_clients * sizeof(*c));
assert(clients->client);
c = &clients->client[idx];
memset(c, 0, (clients->num_clients - idx) * sizeof(*c));
- }
- c->id = id;
- c->clients = clients;
- c->val = calloc(clients->num_classes, sizeof(c->val));
- c->last = calloc(clients->num_classes, sizeof(c->last));
- assert(c->val && c->last);
- update_client(c, pid, name, busy);
+}
+static void free_client(struct client *c) +{
- free(c->val);
- free(c->last);
- memset(c, 0, sizeof(*c));
+}
+static int client_last_cmp(const void *_a, const void *_b) +{
- const struct client *a = _a;
- const struct client *b = _b;
- long tot_a, tot_b;
- /*
* Sort clients in descending order of runtime in the previous sampling
* period for active ones, followed by inactive. Tie-breaker is client
* id.
*/
- tot_a = a->status == ALIVE ? a->last_runtime : -1;
- tot_b = b->status == ALIVE ? b->last_runtime : -1;
- tot_b -= tot_a;
- if (tot_b > 0)
return 1;
- if (tot_b < 0)
return -1;
- return (int)b->id - a->id;
nit: the compare functions could use a single if, if you agree on just returning tot_b.
if (!tot_b) return (int)b->id - a->id;
return tot_b;
+}
+static int client_total_cmp(const void *_a, const void *_b) +{
- const struct client *a = _a;
- const struct client *b = _b;
- long tot_a, tot_b;
- tot_a = a->status == ALIVE ? a->total_runtime : -1;
- tot_b = b->status == ALIVE ? b->total_runtime : -1;
- tot_b -= tot_a;
- if (tot_b > 0)
return 1;
- if (tot_b < 0)
return -1;
- return (int)b->id - a->id;
+}
+static int client_id_cmp(const void *_a, const void *_b) +{
- const struct client *a = _a;
- const struct client *b = _b;
- int id_a, id_b;
- id_a = a->status == ALIVE ? a->id : -1;
- id_b = b->status == ALIVE ? b->id : -1;
- id_b -= id_a;
- if (id_b > 0)
return 1;
- if (id_b < 0)
return -1;
- return (int)b->id - a->id;
+}
+static int client_pid_cmp(const void *_a, const void *_b) +{
- const struct client *a = _a;
- const struct client *b = _b;
- int pid_a, pid_b;
- pid_a = a->status == ALIVE ? a->pid : INT_MAX;
- pid_b = b->status == ALIVE ? b->pid : INT_MAX;
- pid_b -= pid_a;
- if (pid_b > 0)
return -1;
- if (pid_b < 0)
return 1;
- return (int)a->id - b->id;
+}
+static int (*client_cmp)(const void *, const void *) = client_last_cmp;
+static struct clients *sort_clients(struct clients *clients,
int (*cmp)(const void *, const void *))
+{
- unsigned int active, free;
- struct client *c;
- int tmp;
- if (!clients)
return clients;
- qsort(clients->client, clients->num_clients, sizeof(*clients->client),
cmp);
- /* Trim excessive array space. */
- active = 0;
- for_each_client(clients, c, tmp) {
if (c->status != ALIVE)
break; /* Active clients are first in the array. */
active++;
- }
- clients->active_clients = active;
- free = clients->num_clients - active;
- if (free > clients->num_clients / 2) {
active = clients->num_clients - free / 2;
if (active != clients->num_clients) {
clients->num_clients = active;
clients->client = realloc(clients->client,
clients->num_clients *
sizeof(*c));
}
- }
- return clients;
+}
+static bool aggregate_pids = true;
+static struct clients *display_clients(struct clients *clients) +{
- struct client *ac, *c, *cp = NULL;
- struct clients *aggregated;
- int tmp, num = 0;
- if (!aggregate_pids)
goto out;
- /* Sort by pid first to make it easy to aggregate while walking. */
- sort_clients(clients, client_pid_cmp);
- aggregated = calloc(1, sizeof(*clients));
- assert(aggregated);
- ac = calloc(clients->num_clients, sizeof(*c));
- assert(ac);
- aggregated->num_classes = clients->num_classes;
- aggregated->class = clients->class;
- aggregated->client = ac;
- for_each_client(clients, c, tmp) {
unsigned int i;
if (c->status == FREE)
break;
assert(c->status == ALIVE);
if ((cp && c->pid != cp->pid) || !cp) {
same as: if (!cp || c->pid != cp->pid), fine either ways
ac = &aggregated->client[num++];
/* New pid. */
ac->clients = aggregated;
ac->status = ALIVE;
ac->id = -c->pid;
ac->pid = c->pid;
strcpy(ac->name, c->name);
strcpy(ac->print_name, c->print_name);
ac->val = calloc(clients->num_classes,
sizeof(ac->val[0]));
assert(ac->val);
ac->samples = 1;
}
cp = c;
if (c->samples < 2)
continue;
ac->samples = 2; /* All what matters for display. */
ac->total_runtime += c->total_runtime;
ac->last_runtime += c->last_runtime;
for (i = 0; i < clients->num_classes; i++)
ac->val[i] += c->val[i];
- }
- aggregated->num_clients = num;
- aggregated->active_clients = num;
- clients = aggregated;
+out:
- return sort_clients(clients, client_cmp);
+}
+static void free_clients(struct clients *clients) +{
- struct client *c;
- unsigned int tmp;
- for_each_client(clients, c, tmp) {
free(c->val);
free(c->last);
- }
- free(clients->client);
- free(clients);
+}
+static bool is_drm_fd(DIR *fd_dir, const char *name) +{
- struct stat stat;
- int ret;
- ret = fstatat(dirfd(fd_dir), name, &stat, 0);
- return ret == 0 &&
(stat.st_mode & S_IFMT) == S_IFCHR &&
major(stat.st_rdev) == 226;
+}
+static bool get_task_name(const char *buffer, char *out, unsigned long sz) +{
- char *s = index(buffer, '(');
- char *e = rindex(buffer, ')');
- unsigned int len;
- if (!s || !e)
return false;
maybe assert(e > s);
- len = --e - ++s + 1;
len = e - ++s;
Thanks, Umesh