From: Peter Zijlstra <peterz@infradead.org> Date: Mon, 7 Oct 2024 09:46:11 +0200 Subject: [PATCH 2/5] sched: Add Lazy preemption model Change fair to use resched_curr_lazy(), which, when the lazy preemption model is selected, will set TIF_NEED_RESCHED_LAZY. This LAZY bit will be promoted to the full NEED_RESCHED bit on tick. As such, the average delay between setting LAZY and actually rescheduling will be TICK_NSEC/2. In short, Lazy preemption will delay preemption for fair class but will function as Full preemption for all the other classes, most notably the realtime (RR/FIFO/DEADLINE) classes. The goal is to bridge the performance gap with Voluntary, such that we might eventually remove that option entirely. Suggested-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lore.kernel.org/r/20241007075055.331243614@infradead.org Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- include/linux/preempt.h | 8 ++++- kernel/Kconfig.preempt | 15 +++++++++ kernel/sched/core.c | 76 ++++++++++++++++++++++++++++++++++++++++++++++-- kernel/sched/debug.c | 5 +-- kernel/sched/fair.c | 6 +-- kernel/sched/sched.h | 1 6 files changed, 103 insertions(+), 8 deletions(-) @ include/linux/preempt.h:489 @ DEFINE_LOCK_GUARD_0(migrate, migrate_dis extern bool preempt_model_none(void); extern bool preempt_model_voluntary(void); extern bool preempt_model_full(void); +extern bool preempt_model_lazy(void); #else @ include/linux/preempt.h:506 @ static inline bool preempt_model_full(vo return IS_ENABLED(CONFIG_PREEMPT); } +static inline bool preempt_model_lazy(void) +{ + return IS_ENABLED(CONFIG_PREEMPT_LAZY); +} + #endif static inline bool preempt_model_rt(void) @ include/linux/preempt.h:528 @ static inline bool preempt_model_rt(void */ static inline bool preempt_model_preemptible(void) { - return preempt_model_full() || preempt_model_rt(); + return preempt_model_full() || preempt_model_lazy() || preempt_model_rt(); } #endif /* __LINUX_PREEMPT_H */ --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @ include/linux/preempt.h:14 @ config PREEMPT_BUILD select PREEMPTION select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK +config ARCH_HAS_PREEMPT_LAZY + bool + choice prompt "Preemption Model" default PREEMPT_NONE @ include/linux/preempt.h:73 @ config PREEMPT embedded system with latency requirements in the milliseconds range. +config PREEMPT_LAZY + bool "Scheduler controlled preemption model" + depends on !ARCH_NO_PREEMPT + depends on ARCH_HAS_PREEMPT_LAZY + select PREEMPT_BUILD + help + This option provides a scheduler driven preemption model that + is fundamentally similar to full preemption, but is less + eager to preempt SCHED_NORMAL tasks in an attempt to + reduce lock holder preemption and recover some of the performance + gains seen from using Voluntary preemption. + config PREEMPT_RT bool "Fully Preemptible Kernel (Real-Time)" depends on EXPERT && ARCH_SUPPORTS_RT --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @ include/linux/preempt.h:1081 @ static void __resched_curr(struct rq *rq lockdep_assert_rq_held(rq); + if (is_idle_task(curr) && tif == TIF_NEED_RESCHED_LAZY) + tif = TIF_NEED_RESCHED; + if (cti->flags & ((1 << tif) | _TIF_NEED_RESCHED)) return; @ include/linux/preempt.h:1109 @ void resched_curr(struct rq *rq) __resched_curr(rq, TIF_NEED_RESCHED); } +#ifdef CONFIG_PREEMPT_DYNAMIC +static DEFINE_STATIC_KEY_FALSE(sk_dynamic_preempt_lazy); +static __always_inline bool dynamic_preempt_lazy(void) +{ + return static_branch_unlikely(&sk_dynamic_preempt_lazy); +} +#else +static __always_inline bool dynamic_preempt_lazy(void) +{ + return IS_ENABLED(PREEMPT_LAZY); +} +#endif + +static __always_inline int tif_need_resched_lazy(void) +{ + if (dynamic_preempt_lazy()) + return TIF_NEED_RESCHED_LAZY; + + return TIF_NEED_RESCHED; +} + +void resched_curr_lazy(struct rq *rq) +{ + __resched_curr(rq, tif_need_resched_lazy()); +} + void resched_cpu(int cpu) { struct rq *rq = cpu_rq(cpu); @ include/linux/preempt.h:5630 @ void sched_tick(void) update_rq_clock(rq); hw_pressure = arch_scale_hw_pressure(cpu_of(rq)); update_hw_load_avg(rq_clock_task(rq), rq, hw_pressure); + + if (dynamic_preempt_lazy() && tif_test_bit(TIF_NEED_RESCHED_LAZY)) + resched_curr(rq); + curr->sched_class->task_tick(rq, curr, 0); if (sched_feat(LATENCY_WARN)) resched_latency = cpu_resched_latency(rq); @ include/linux/preempt.h:7370 @ EXPORT_SYMBOL(__cond_resched_rwlock_writ * preempt_schedule <- NOP * preempt_schedule_notrace <- NOP * irqentry_exit_cond_resched <- NOP + * dynamic_preempt_lazy <- false * * VOLUNTARY: * cond_resched <- __cond_resched @ include/linux/preempt.h:7378 @ EXPORT_SYMBOL(__cond_resched_rwlock_writ * preempt_schedule <- NOP * preempt_schedule_notrace <- NOP * irqentry_exit_cond_resched <- NOP + * dynamic_preempt_lazy <- false * * FULL: * cond_resched <- RET0 @ include/linux/preempt.h:7386 @ EXPORT_SYMBOL(__cond_resched_rwlock_writ * preempt_schedule <- preempt_schedule * preempt_schedule_notrace <- preempt_schedule_notrace * irqentry_exit_cond_resched <- irqentry_exit_cond_resched + * dynamic_preempt_lazy <- false + * + * LAZY: + * cond_resched <- RET0 + * might_resched <- RET0 + * preempt_schedule <- preempt_schedule + * preempt_schedule_notrace <- preempt_schedule_notrace + * irqentry_exit_cond_resched <- irqentry_exit_cond_resched + * dynamic_preempt_lazy <- true */ enum { @ include/linux/preempt.h:7402 @ enum { preempt_dynamic_none, preempt_dynamic_voluntary, preempt_dynamic_full, + preempt_dynamic_lazy, }; int preempt_dynamic_mode = preempt_dynamic_undefined; @ include/linux/preempt.h:7418 @ int sched_dynamic_mode(const char *str) if (!strcmp(str, "full")) return preempt_dynamic_full; +#ifdef CONFIG_ARCH_HAS_PREEMPT_LAZY + if (!strcmp(str, "lazy")) + return preempt_dynamic_lazy; +#endif + return -EINVAL; } +#define preempt_dynamic_key_enable(f) static_key_enable(&sk_dynamic_##f.key) +#define preempt_dynamic_key_disable(f) static_key_disable(&sk_dynamic_##f.key) + #if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) #define preempt_dynamic_enable(f) static_call_update(f, f##_dynamic_enabled) #define preempt_dynamic_disable(f) static_call_update(f, f##_dynamic_disabled) #elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY) -#define preempt_dynamic_enable(f) static_key_enable(&sk_dynamic_##f.key) -#define preempt_dynamic_disable(f) static_key_disable(&sk_dynamic_##f.key) +#define preempt_dynamic_enable(f) preempt_dynamic_key_enable(f) +#define preempt_dynamic_disable(f) preempt_dynamic_key_disable(f) #else #error "Unsupported PREEMPT_DYNAMIC mechanism" #endif @ include/linux/preempt.h:7454 @ static void __sched_dynamic_update(int m preempt_dynamic_enable(preempt_schedule); preempt_dynamic_enable(preempt_schedule_notrace); preempt_dynamic_enable(irqentry_exit_cond_resched); + preempt_dynamic_key_disable(preempt_lazy); switch (mode) { case preempt_dynamic_none: @ include/linux/preempt.h:7464 @ static void __sched_dynamic_update(int m preempt_dynamic_disable(preempt_schedule); preempt_dynamic_disable(preempt_schedule_notrace); preempt_dynamic_disable(irqentry_exit_cond_resched); + preempt_dynamic_key_disable(preempt_lazy); if (mode != preempt_dynamic_mode) pr_info("Dynamic Preempt: none\n"); break; @ include/linux/preempt.h:7476 @ static void __sched_dynamic_update(int m preempt_dynamic_disable(preempt_schedule); preempt_dynamic_disable(preempt_schedule_notrace); preempt_dynamic_disable(irqentry_exit_cond_resched); + preempt_dynamic_key_disable(preempt_lazy); if (mode != preempt_dynamic_mode) pr_info("Dynamic Preempt: voluntary\n"); break; @ include/linux/preempt.h:7488 @ static void __sched_dynamic_update(int m preempt_dynamic_enable(preempt_schedule); preempt_dynamic_enable(preempt_schedule_notrace); preempt_dynamic_enable(irqentry_exit_cond_resched); + preempt_dynamic_key_disable(preempt_lazy); if (mode != preempt_dynamic_mode) pr_info("Dynamic Preempt: full\n"); break; + + case preempt_dynamic_lazy: + if (!klp_override) + preempt_dynamic_disable(cond_resched); + preempt_dynamic_disable(might_resched); + preempt_dynamic_enable(preempt_schedule); + preempt_dynamic_enable(preempt_schedule_notrace); + preempt_dynamic_enable(irqentry_exit_cond_resched); + preempt_dynamic_key_enable(preempt_lazy); + if (mode != preempt_dynamic_mode) + pr_info("Dynamic Preempt: lazy\n"); + break; } preempt_dynamic_mode = mode; @ include/linux/preempt.h:7566 @ static void __init preempt_dynamic_init( sched_dynamic_update(preempt_dynamic_none); } else if (IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY)) { sched_dynamic_update(preempt_dynamic_voluntary); + } else if (IS_ENABLED(CONFIG_PREEMPT_LAZY)) { + sched_dynamic_update(preempt_dynamic_lazy); } else { /* Default static call setting, nothing to do */ WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT)); @ include/linux/preempt.h:7588 @ static void __init preempt_dynamic_init( PREEMPT_MODEL_ACCESSOR(none); PREEMPT_MODEL_ACCESSOR(voluntary); PREEMPT_MODEL_ACCESSOR(full); +PREEMPT_MODEL_ACCESSOR(lazy); #else /* !CONFIG_PREEMPT_DYNAMIC: */ --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @ include/linux/preempt.h:248 @ static ssize_t sched_dynamic_write(struc static int sched_dynamic_show(struct seq_file *m, void *v) { static const char * preempt_modes[] = { - "none", "voluntary", "full" + "none", "voluntary", "full", "lazy", }; + int j = ARRAY_SIZE(preempt_modes) - !IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY); int i; - for (i = 0; i < ARRAY_SIZE(preempt_modes); i++) { + for (i = 0; i < j; i++) { if (preempt_dynamic_mode == i) seq_puts(m, "("); seq_puts(m, preempt_modes[i]); --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @ include/linux/preempt.h:1254 @ static void update_curr(struct cfs_rq *c return; if (resched || did_preempt_short(cfs_rq, curr)) { - resched_curr(rq); + resched_curr_lazy(rq); clear_buddies(cfs_rq, curr); } } @ include/linux/preempt.h:5680 @ entity_tick(struct cfs_rq *cfs_rq, struc * validating it and just reschedule. */ if (queued) { - resched_curr(rq_of(cfs_rq)); + resched_curr_lazy(rq_of(cfs_rq)); return; } /* @ include/linux/preempt.h:8835 @ static void check_preempt_wakeup_fair(st return; preempt: - resched_curr(rq); + resched_curr_lazy(rq); } static struct task_struct *pick_task_fair(struct rq *rq) --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @ include/linux/preempt.h:2695 @ extern void init_sched_rt_class(void); extern void init_sched_fair_class(void); extern void resched_curr(struct rq *rq); +extern void resched_curr_lazy(struct rq *rq); extern void resched_cpu(int cpu); extern void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime);