From: Peter Zijlstra <peterz@infradead.org> Date: Mon, 7 Oct 2024 09:46:14 +0200 Subject: [PATCH 5/5] sched: Add laziest preempt model Much like LAZY, except lazier still. It will not promote LAZY to full preempt on tick and compete with None for suckage. (do we really wants this?) Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lore.kernel.org/r/20241007075055.671722644@infradead.org Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- include/linux/preempt.h | 10 ++++++++- kernel/Kconfig.preempt | 12 +++++++++++ kernel/sched/core.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++- kernel/sched/debug.c | 4 +-- 4 files changed, 71 insertions(+), 4 deletions(-) @ include/linux/preempt.h:490 @ extern bool preempt_model_none(void); extern bool preempt_model_voluntary(void); extern bool preempt_model_full(void); extern bool preempt_model_lazy(void); +extern bool preempt_model_laziest(void); #else @ include/linux/preempt.h:511 @ static inline bool preempt_model_lazy(vo { return IS_ENABLED(CONFIG_PREEMPT_LAZY); } +static inline bool preempt_model_laziest(void) +{ + return IS_ENABLED(CONFIG_PREEMPT_LAZIEST); +} #endif @ include/linux/preempt.h:533 @ static inline bool preempt_model_rt(void */ static inline bool preempt_model_preemptible(void) { - return preempt_model_full() || preempt_model_lazy() || preempt_model_rt(); + return preempt_model_full() || + preempt_model_lazy() || + preempt_model_laziest() || + preempt_model_rt(); } #endif /* __LINUX_PREEMPT_H */ --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @ include/linux/preempt.h:87 @ config PREEMPT_LAZY reduce lock holder preemption and recover some of the performance gains seen from using Voluntary preemption. +config PREEMPT_LAZIEST + bool "Scheduler controlled preemption model" + depends on !ARCH_NO_PREEMPT + depends on ARCH_HAS_PREEMPT_LAZY + select PREEMPT_BUILD if !PREEMPT_DYNAMIC + help + This option provides a scheduler driven preemption model that + is fundamentally similar to full preemption, but is least + eager to preempt SCHED_NORMAL tasks in an attempt to + reduce lock holder preemption and recover some of the performance + gains seen from using no preemption. + endchoice config PREEMPT_RT --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @ include/linux/preempt.h:1111 @ void resched_curr(struct rq *rq) #ifdef CONFIG_PREEMPT_DYNAMIC static DEFINE_STATIC_KEY_FALSE(sk_dynamic_preempt_lazy); +static DEFINE_STATIC_KEY_FALSE(sk_dynamic_preempt_promote); static __always_inline bool dynamic_preempt_lazy(void) { return static_branch_unlikely(&sk_dynamic_preempt_lazy); } +static __always_inline bool dynamic_preempt_promote(void) +{ + return static_branch_unlikely(&sk_dynamic_preempt_promote); +} #else static __always_inline bool dynamic_preempt_lazy(void) { + return IS_ENABLED(PREEMPT_LAZY) | IS_ENABLED(PREEMPT_LAZIEST); +} +static __always_inline bool dynamic_preempt_promote(void) +{ return IS_ENABLED(PREEMPT_LAZY); } #endif @ include/linux/preempt.h:5640 @ void sched_tick(void) hw_pressure = arch_scale_hw_pressure(cpu_of(rq)); update_hw_load_avg(rq_clock_task(rq), rq, hw_pressure); - if (dynamic_preempt_lazy() && tif_test_bit(TIF_NEED_RESCHED_LAZY)) + if (dynamic_preempt_promote() && tif_test_bit(TIF_NEED_RESCHED_LAZY)) resched_curr(rq); curr->sched_class->task_tick(rq, curr, 0); @ include/linux/preempt.h:7380 @ EXPORT_SYMBOL(__cond_resched_rwlock_writ * preempt_schedule_notrace <- NOP * irqentry_exit_cond_resched <- NOP * dynamic_preempt_lazy <- false + * dynamic_preempt_promote <- false * * VOLUNTARY: * cond_resched <- __cond_resched @ include/linux/preempt.h:7389 @ EXPORT_SYMBOL(__cond_resched_rwlock_writ * preempt_schedule_notrace <- NOP * irqentry_exit_cond_resched <- NOP * dynamic_preempt_lazy <- false + * dynamic_preempt_promote <- false * * FULL: * cond_resched <- RET0 @ include/linux/preempt.h:7398 @ EXPORT_SYMBOL(__cond_resched_rwlock_writ * preempt_schedule_notrace <- preempt_schedule_notrace * irqentry_exit_cond_resched <- irqentry_exit_cond_resched * dynamic_preempt_lazy <- false + * dynamic_preempt_promote <- false * * LAZY: * cond_resched <- RET0 @ include/linux/preempt.h:7407 @ EXPORT_SYMBOL(__cond_resched_rwlock_writ * preempt_schedule_notrace <- preempt_schedule_notrace * irqentry_exit_cond_resched <- irqentry_exit_cond_resched * dynamic_preempt_lazy <- true + * dynamic_preempt_promote <- true + * + * LAZIEST: + * cond_resched <- RET0 + * might_resched <- RET0 + * preempt_schedule <- preempt_schedule + * preempt_schedule_notrace <- preempt_schedule_notrace + * irqentry_exit_cond_resched <- irqentry_exit_cond_resched + * dynamic_preempt_lazy <- true + * dynamic_preempt_promote <- false */ enum { @ include/linux/preempt.h:7425 @ enum { preempt_dynamic_voluntary, preempt_dynamic_full, preempt_dynamic_lazy, + preempt_dynamic_laziest, }; int preempt_dynamic_mode = preempt_dynamic_undefined; @ include/linux/preempt.h:7446 @ int sched_dynamic_mode(const char *str) #ifdef CONFIG_ARCH_HAS_PREEMPT_LAZY if (!strcmp(str, "lazy")) return preempt_dynamic_lazy; + + if (!strcmp(str, "laziest")) + return preempt_dynamic_laziest; #endif return -EINVAL; @ include/linux/preempt.h:7483 @ static void __sched_dynamic_update(int m preempt_dynamic_enable(preempt_schedule_notrace); preempt_dynamic_enable(irqentry_exit_cond_resched); preempt_dynamic_key_disable(preempt_lazy); + preempt_dynamic_key_disable(preempt_promote); switch (mode) { case preempt_dynamic_none: @ include/linux/preempt.h:7494 @ static void __sched_dynamic_update(int m preempt_dynamic_disable(preempt_schedule_notrace); preempt_dynamic_disable(irqentry_exit_cond_resched); preempt_dynamic_key_disable(preempt_lazy); + preempt_dynamic_key_disable(preempt_promote); if (mode != preempt_dynamic_mode) pr_info("Dynamic Preempt: none\n"); break; @ include/linux/preempt.h:7507 @ static void __sched_dynamic_update(int m preempt_dynamic_disable(preempt_schedule_notrace); preempt_dynamic_disable(irqentry_exit_cond_resched); preempt_dynamic_key_disable(preempt_lazy); + preempt_dynamic_key_disable(preempt_promote); if (mode != preempt_dynamic_mode) pr_info("Dynamic Preempt: voluntary\n"); break; @ include/linux/preempt.h:7520 @ static void __sched_dynamic_update(int m preempt_dynamic_enable(preempt_schedule_notrace); preempt_dynamic_enable(irqentry_exit_cond_resched); preempt_dynamic_key_disable(preempt_lazy); + preempt_dynamic_key_disable(preempt_promote); if (mode != preempt_dynamic_mode) pr_info("Dynamic Preempt: full\n"); break; @ include/linux/preempt.h:7533 @ static void __sched_dynamic_update(int m preempt_dynamic_enable(preempt_schedule_notrace); preempt_dynamic_enable(irqentry_exit_cond_resched); preempt_dynamic_key_enable(preempt_lazy); + preempt_dynamic_key_enable(preempt_promote); if (mode != preempt_dynamic_mode) pr_info("Dynamic Preempt: lazy\n"); break; + + case preempt_dynamic_laziest: + if (!klp_override) + preempt_dynamic_disable(cond_resched); + preempt_dynamic_disable(might_resched); + preempt_dynamic_enable(preempt_schedule); + preempt_dynamic_enable(preempt_schedule_notrace); + preempt_dynamic_enable(irqentry_exit_cond_resched); + preempt_dynamic_key_enable(preempt_lazy); + preempt_dynamic_key_disable(preempt_promote); + if (mode != preempt_dynamic_mode) + pr_info("Dynamic Preempt: laziest\n"); + break; } preempt_dynamic_mode = mode; @ include/linux/preempt.h:7614 @ static void __init preempt_dynamic_init( sched_dynamic_update(preempt_dynamic_voluntary); } else if (IS_ENABLED(CONFIG_PREEMPT_LAZY)) { sched_dynamic_update(preempt_dynamic_lazy); + } else if (IS_ENABLED(CONFIG_PREEMPT_LAZIEST)) { + sched_dynamic_update(preempt_dynamic_laziest); } else { /* Default static call setting, nothing to do */ WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT)); @ include/linux/preempt.h:7637 @ PREEMPT_MODEL_ACCESSOR(none); PREEMPT_MODEL_ACCESSOR(voluntary); PREEMPT_MODEL_ACCESSOR(full); PREEMPT_MODEL_ACCESSOR(lazy); +PREEMPT_MODEL_ACCESSOR(laziest); #else /* !CONFIG_PREEMPT_DYNAMIC: */ --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @ include/linux/preempt.h:248 @ static ssize_t sched_dynamic_write(struc static int sched_dynamic_show(struct seq_file *m, void *v) { static const char * preempt_modes[] = { - "none", "voluntary", "full", "lazy", + "none", "voluntary", "full", "lazy", "laziest", }; - int j = ARRAY_SIZE(preempt_modes) - !IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY); + int j = ARRAY_SIZE(preempt_modes) - 2*!IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY); int i = IS_ENABLED(CONFIG_PREEMPT_RT) * 2; for (; i < j; i++) {