On Wed, Jul 21, 2021 at 10:17 PM Jason Ekstrand jason@jlekstrand.net wrote:
On Wed, Jul 21, 2021 at 1:32 PM Daniel Vetter daniel.vetter@ffwll.ch wrote:
This essentially reverts
commit 84a1074920523430f9dc30ff907f4801b4820072 Author: Chris Wilson chris@chris-wilson.co.uk Date: Wed Jan 24 11:36:08 2018 +0000
drm/i915: Shrink the GEM kmem_caches upon idling
mm/vmscan.c:do_shrink_slab() is a thing, if there's an issue with it then we need to fix that there, not hand-roll our own slab shrinking code in i915.
Noticed while reviewing a patch set from Jason to fix up some issues in our i915_init() and i915_exit() module load/cleanup code. Now that i915_globals.c isn't any different than normal init/exit functions, we should convert them over to one unified table and remove i915_globals.[hc] entirely.
Mind throwing in a comment somewhere about how i915 is one of only two users of kmem_cache_shrink() in the entire kernel? That also seems to be pretty good evidence that it's not useful.
I missed one, there's also on in kunit (I think just got in, it's from this year at least per commit). That one seems actually legit, it's a selftest for some statistics around slabs I think, so has a legit reason to carefully control the state and trim anything that just hangs around.
I'll add something and push when CI approves.
Reviewed-by: Jason Ekstrand jason@jlekstrand.net
Feel free to land at-will and I'll deal with merge conflicts on my end.
I think if we can land this, then yours, then I type the conversion to explicit init/exit and we're done. -Daniel
Cc: David Airlie airlied@linux.ie Cc: Jason Ekstrand jason@jlekstrand.net Signed-off-by: Daniel Vetter daniel.vetter@intel.com
drivers/gpu/drm/i915/gem/i915_gem_context.c | 6 -- drivers/gpu/drm/i915/gem/i915_gem_object.c | 6 -- drivers/gpu/drm/i915/gt/intel_context.c | 6 -- drivers/gpu/drm/i915/gt/intel_gt_pm.c | 4 - drivers/gpu/drm/i915/i915_active.c | 6 -- drivers/gpu/drm/i915/i915_globals.c | 95 --------------------- drivers/gpu/drm/i915/i915_globals.h | 3 - drivers/gpu/drm/i915/i915_request.c | 7 -- drivers/gpu/drm/i915/i915_scheduler.c | 7 -- drivers/gpu/drm/i915/i915_vma.c | 6 -- 10 files changed, 146 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 7d6f52d8a801..bf2a2319353a 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -2280,18 +2280,12 @@ i915_gem_engines_iter_next(struct i915_gem_engines_iter *it) #include "selftests/i915_gem_context.c" #endif
-static void i915_global_gem_context_shrink(void) -{
kmem_cache_shrink(global.slab_luts);
-}
static void i915_global_gem_context_exit(void) { kmem_cache_destroy(global.slab_luts); }
static struct i915_global_gem_context global = { {
.shrink = i915_global_gem_context_shrink, .exit = i915_global_gem_context_exit,
} };
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 9da7b288b7ed..5c21cff33199 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -664,18 +664,12 @@ void i915_gem_init__objects(struct drm_i915_private *i915) INIT_WORK(&i915->mm.free_work, __i915_gem_free_work); }
-static void i915_global_objects_shrink(void) -{
kmem_cache_shrink(global.slab_objects);
-}
static void i915_global_objects_exit(void) { kmem_cache_destroy(global.slab_objects); }
static struct i915_global_object global = { {
.shrink = i915_global_objects_shrink, .exit = i915_global_objects_exit,
} };
diff --git a/drivers/gpu/drm/i915/gt/intel_context.c b/drivers/gpu/drm/i915/gt/intel_context.c index bd63813c8a80..c1338441cc1d 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.c +++ b/drivers/gpu/drm/i915/gt/intel_context.c @@ -398,18 +398,12 @@ void intel_context_fini(struct intel_context *ce) i915_active_fini(&ce->active); }
-static void i915_global_context_shrink(void) -{
kmem_cache_shrink(global.slab_ce);
-}
static void i915_global_context_exit(void) { kmem_cache_destroy(global.slab_ce); }
static struct i915_global_context global = { {
.shrink = i915_global_context_shrink, .exit = i915_global_context_exit,
} };
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index aef3084e8b16..d86825437516 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -67,8 +67,6 @@ static int __gt_unpark(struct intel_wakeref *wf)
GT_TRACE(gt, "\n");
i915_globals_unpark();
/* * It seems that the DMC likes to transition between the DC states a lot * when there are no connected displays (no active power domains) during
@@ -116,8 +114,6 @@ static int __gt_park(struct intel_wakeref *wf) GEM_BUG_ON(!wakeref); intel_display_power_put_async(i915, POWER_DOMAIN_GT_IRQ, wakeref);
i915_globals_park();
return 0;
}
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index b1aa1c482c32..91723123ae9f 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -1176,18 +1176,12 @@ struct i915_active *i915_active_create(void) #include "selftests/i915_active.c" #endif
-static void i915_global_active_shrink(void) -{
kmem_cache_shrink(global.slab_cache);
-}
static void i915_global_active_exit(void) { kmem_cache_destroy(global.slab_cache); }
static struct i915_global_active global = { {
.shrink = i915_global_active_shrink, .exit = i915_global_active_exit,
} };
diff --git a/drivers/gpu/drm/i915/i915_globals.c b/drivers/gpu/drm/i915/i915_globals.c index 77f1911c463b..7fe2e503897b 100644 --- a/drivers/gpu/drm/i915/i915_globals.c +++ b/drivers/gpu/drm/i915/i915_globals.c @@ -17,61 +17,8 @@
static LIST_HEAD(globals);
-static atomic_t active; -static atomic_t epoch; -static struct park_work {
struct delayed_work work;
struct rcu_head rcu;
unsigned long flags;
-#define PENDING 0
int epoch;
-} park;
-static void i915_globals_shrink(void) -{
struct i915_global *global;
/*
* kmem_cache_shrink() discards empty slabs and reorders partially
* filled slabs to prioritise allocating from the mostly full slabs,
* with the aim of reducing fragmentation.
*/
list_for_each_entry(global, &globals, link)
global->shrink();
-}
-static void __i915_globals_grace(struct rcu_head *rcu) -{
/* Ratelimit parking as shrinking is quite slow */
schedule_delayed_work(&park.work, round_jiffies_up_relative(2 * HZ));
-}
-static void __i915_globals_queue_rcu(void) -{
park.epoch = atomic_inc_return(&epoch);
if (!atomic_read(&active)) {
init_rcu_head(&park.rcu);
call_rcu(&park.rcu, __i915_globals_grace);
}
-}
-static void __i915_globals_park(struct work_struct *work) -{
destroy_rcu_head(&park.rcu);
/* Confirm nothing woke up in the last grace period */
if (park.epoch != atomic_read(&epoch)) {
__i915_globals_queue_rcu();
return;
}
clear_bit(PENDING, &park.flags);
i915_globals_shrink();
-}
void __init i915_global_register(struct i915_global *global) {
GEM_BUG_ON(!global->shrink); GEM_BUG_ON(!global->exit); list_add_tail(&global->link, &globals);
@@ -109,52 +56,10 @@ int __init i915_globals_init(void) } }
INIT_DELAYED_WORK(&park.work, __i915_globals_park); return 0;
}
-void i915_globals_park(void) -{
/*
* Defer shrinking the global slab caches (and other work) until
* after a RCU grace period has completed with no activity. This
* is to try and reduce the latency impact on the consumers caused
* by us shrinking the caches the same time as they are trying to
* allocate, with the assumption being that if we idle long enough
* for an RCU grace period to elapse since the last use, it is likely
* to be longer until we need the caches again.
*/
if (!atomic_dec_and_test(&active))
return;
/* Queue cleanup after the next RCU grace period has freed slabs */
if (!test_and_set_bit(PENDING, &park.flags))
__i915_globals_queue_rcu();
-}
-void i915_globals_unpark(void) -{
atomic_inc(&epoch);
atomic_inc(&active);
-}
-static void __exit __i915_globals_flush(void) -{
atomic_inc(&active); /* skip shrinking */
rcu_barrier(); /* wait for the work to be queued */
flush_delayed_work(&park.work);
atomic_dec(&active);
-}
void __exit i915_globals_exit(void) {
GEM_BUG_ON(atomic_read(&active));
__i915_globals_flush(); __i915_globals_cleanup();
/* And ensure that our DESTROY_BY_RCU slabs are truly destroyed */
rcu_barrier();
} diff --git a/drivers/gpu/drm/i915/i915_globals.h b/drivers/gpu/drm/i915/i915_globals.h index 2d199f411a4a..9e6b4fd07528 100644 --- a/drivers/gpu/drm/i915/i915_globals.h +++ b/drivers/gpu/drm/i915/i915_globals.h @@ -14,15 +14,12 @@ typedef void (*i915_global_func_t)(void); struct i915_global { struct list_head link;
i915_global_func_t shrink; i915_global_func_t exit;
};
void i915_global_register(struct i915_global *global);
int i915_globals_init(void); -void i915_globals_park(void); -void i915_globals_unpark(void); void i915_globals_exit(void);
/* constructors */ diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 09ebea9a0090..d3de9f60e03a 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -2077,12 +2077,6 @@ void i915_request_show(struct drm_printer *m, #include "selftests/i915_request.c" #endif
-static void i915_global_request_shrink(void) -{
kmem_cache_shrink(global.slab_execute_cbs);
kmem_cache_shrink(global.slab_requests);
-}
static void i915_global_request_exit(void) { kmem_cache_destroy(global.slab_execute_cbs); @@ -2090,7 +2084,6 @@ static void i915_global_request_exit(void) }
static struct i915_global_request global = { {
.shrink = i915_global_request_shrink, .exit = i915_global_request_exit,
} };
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index 3a58a9130309..561c649e59f7 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -475,12 +475,6 @@ i915_sched_engine_create(unsigned int subclass) return sched_engine; }
-static void i915_global_scheduler_shrink(void) -{
kmem_cache_shrink(global.slab_dependencies);
kmem_cache_shrink(global.slab_priorities);
-}
static void i915_global_scheduler_exit(void) { kmem_cache_destroy(global.slab_dependencies); @@ -488,7 +482,6 @@ static void i915_global_scheduler_exit(void) }
static struct i915_global_scheduler global = { {
.shrink = i915_global_scheduler_shrink, .exit = i915_global_scheduler_exit,
} };
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 5b9dce0f443b..09a7c47926f7 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -1414,18 +1414,12 @@ void i915_vma_make_purgeable(struct i915_vma *vma) #include "selftests/i915_vma.c" #endif
-static void i915_global_vma_shrink(void) -{
kmem_cache_shrink(global.slab_vmas);
-}
static void i915_global_vma_exit(void) { kmem_cache_destroy(global.slab_vmas); }
static struct i915_global_vma global = { {
.shrink = i915_global_vma_shrink, .exit = i915_global_vma_exit,
} };
-- 2.32.0