Hello everyone,
I'm currently working on the issue that when device drivers allocate memory on behalf of an application the OOM killer usually doesn't knew about that unless the application also get this memory mapped into their address space.
This is especially annoying for graphics drivers where a lot of the VRAM usually isn't CPU accessible and so doesn't make sense to map into the address space of the process using it.
The problem now is that when an application starts to use a lot of VRAM those buffers objects sooner or later get swapped out to system memory, but when we now run into an out of memory situation the OOM killer obviously doesn't knew anything about that memory and so usually kills the wrong process.
The following set of patches tries to address this problem by introducing a per file OOM badness score, which device drivers can use to give the OOM killer a hint how many resources are bound to a file descriptor so that it can make better decisions which process to kill.
So question at every one: What do you think about this approach?
My biggest concern right now is the patches are messing with a core kernel structure (adding a field to struct file). Any better idea? I'm considering to put a callback into file_ops instead.
Best regards and feel free to tear this idea apart, Christian.
From: Christian König christian.koenig@amd.com
This allows device drivers to specify an additional badness for the OOM and low memory killer when they allocate memory on behalf of userspace.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Alex Deucher alexander.deucher@amd.com --- fs/file_table.c | 1 + include/linux/fs.h | 1 + 2 files changed, 2 insertions(+)
diff --git a/fs/file_table.c b/fs/file_table.c index 294174d..23b797d 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -136,6 +136,7 @@ struct file *get_empty_filp(void) spin_lock_init(&f->f_lock); mutex_init(&f->f_pos_lock); eventpoll_init_file(f); + atomic_long_set(&f->f_oom_badness, 0); /* f->f_version: 0 */ return f;
diff --git a/include/linux/fs.h b/include/linux/fs.h index 35ec87e..5a5d20a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -861,6 +861,7 @@ struct file { struct list_head f_tfile_llink; #endif /* #ifdef CONFIG_EPOLL */ struct address_space *f_mapping; + atomic_long_t f_oom_badness; } __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */
struct file_handle {
From: Christian König christian.koenig@amd.com
Large amounts of VRAM are usually not CPU accessible, so they are not mapped into the processes address space. But since the device drivers usually support swapping buffers from VRAM to system memory we can still run into an out of memory situation when userspace starts to allocate to much.
This patch gives the OOM and lower memory killer another hint which process is holding how many resources.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Alex Deucher alexander.deucher@amd.com --- drivers/gpu/drm/drm_gem.c | 9 +++++++++ 1 file changed, 9 insertions(+)
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 16a1647..17d14c1 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -282,6 +282,9 @@ drm_gem_handle_delete(struct drm_file *filp, u32 handle)
if (dev->driver->gem_close_object) dev->driver->gem_close_object(obj, filp); + + atomic_long_sub(obj->size >> PAGE_SHIFT, &filp->filp->f_oom_badness); + drm_gem_object_handle_unreference_unlocked(obj);
return 0; @@ -358,6 +361,9 @@ drm_gem_handle_create_tail(struct drm_file *file_priv, } }
+ atomic_long_add(obj->size >> PAGE_SHIFT, + &file_priv->filp->f_oom_badness); + return 0; }
@@ -717,6 +723,9 @@ drm_gem_object_release_handle(int id, void *ptr, void *data) if (dev->driver->gem_close_object) dev->driver->gem_close_object(obj, file_priv);
+ atomic_long_sub(obj->size >> PAGE_SHIFT, + &file_priv->filp->f_oom_badness); + drm_gem_object_handle_unreference_unlocked(obj);
return 0;
From: Christian König christian.koenig@amd.com
Try to make better decisions which process to kill based on per file OOM badness.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Alex Deucher alexander.deucher@amd.com --- mm/oom_kill.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+)
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 2b665da..4bcb3f4 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -35,6 +35,7 @@ #include <linux/freezer.h> #include <linux/ftrace.h> #include <linux/ratelimit.h> +#include <linux/fdtable.h>
#define CREATE_TRACE_POINTS #include <trace/events/oom.h> @@ -138,6 +139,19 @@ static bool oom_unkillable_task(struct task_struct *p, }
/** + * oom_file_badness - add per file badness + * @points: pointer to summed up badness points + * @file: tasks open file + * @n: file descriptor id (unused) + */ +static int oom_file_badness(const void *points, struct file *file, unsigned n) +{ + *((long*)points) += atomic_long_read(&file->f_oom_badness); + + return 0; +} + +/** * oom_badness - heuristic function to determine which candidate task to kill * @p: task struct of which task we should calculate * @totalpages: total present RAM allowed for page allocation @@ -171,6 +185,12 @@ unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, */ points = get_mm_rss(p->mm) + get_mm_counter(p->mm, MM_SWAPENTS) + atomic_long_read(&p->mm->nr_ptes) + mm_nr_pmds(p->mm); + + /* + * Add how much memory a task uses in opened files, e.g. device drivers. + */ + iterate_fd(p->files, 0, oom_file_badness, &points); + task_unlock(p);
/*
From: Christian König christian.koenig@amd.com
Try to make better decisions which process to kill based on per file OOM badness.
Signed-off-by: Christian König christian.koenig@amd.com Reviewed-by: Alex Deucher alexander.deucher@amd.com --- drivers/staging/android/lowmemorykiller.c | 9 +++++++++ 1 file changed, 9 insertions(+)
diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c index feafa17..baefe8f 100644 --- a/drivers/staging/android/lowmemorykiller.c +++ b/drivers/staging/android/lowmemorykiller.c @@ -41,6 +41,7 @@ #include <linux/rcupdate.h> #include <linux/profile.h> #include <linux/notifier.h> +#include <linux/fdtable.h>
static uint32_t lowmem_debug_level = 1; static short lowmem_adj[6] = { @@ -75,6 +76,13 @@ static unsigned long lowmem_count(struct shrinker *s, global_page_state(NR_INACTIVE_FILE); }
+static int lowmem_file_badness(const void *tasksize, struct file *file, unsigned n) +{ + *((int*)tasksize) += atomic_long_read(&file->f_oom_badness); + + return 0; +} + static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc) { struct task_struct *tsk; @@ -139,6 +147,7 @@ static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc) continue; } tasksize = get_mm_rss(p->mm); + iterate_fd(p->files, 0, lowmem_file_badness, &tasksize); task_unlock(p); if (tasksize <= 0) continue;
dri-devel@lists.freedesktop.org