From 0a3061d9c53ef3e251a4dedbe25909babd64e8aa Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Date: Fri, 8 Sep 2023 18:22:52 +0200 Subject: [PATCH 005/204] locking/rtmutex: Use rt_mutex specific scheduler helpers Have rt_mutex use the rt_mutex specific scheduler helpers to avoid recursion vs rtlock on the PI state. [[ peterz: adapted to new names ]] Reported-by: Crystal Wood <swood@redhat.com> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lkml.kernel.org/r/20230908162254.999499-6-bigeasy@linutronix.de --- kernel/futex/pi.c | 11 +++++++++++ kernel/locking/rtmutex.c | 14 ++++++++++++-- kernel/locking/rwbase_rt.c | 6 ++++++ kernel/locking/rwsem.c | 8 +++++++- kernel/locking/spinlock_rt.c | 4 ++++ 5 files changed, 40 insertions(+), 3 deletions(-) Index: linux-6.6.58-rt45/kernel/futex/pi.c =================================================================== @ linux-6.6.58-rt45/kernel/futex/pi.c:4 @ // SPDX-License-Identifier: GPL-2.0-or-later #include <linux/slab.h> +#include <linux/sched/rt.h> #include <linux/sched/task.h> #include "futex.h" @ linux-6.6.58-rt45/kernel/futex/pi.c:1006 @ retry_private: goto no_block; } + /* + * Must be done before we enqueue the waiter, here is unfortunately + * under the hb lock, but that *should* work because it does nothing. + */ + rt_mutex_pre_schedule(); + rt_mutex_init_waiter(&rt_waiter); /* @ linux-6.6.58-rt45/kernel/futex/pi.c:1062 @ cleanup: if (ret && !rt_mutex_cleanup_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter)) ret = 0; + /* + * Waiter is unqueued. + */ + rt_mutex_post_schedule(); no_block: /* * Fixup the pi_state owner and possibly acquire the lock if we Index: linux-6.6.58-rt45/kernel/locking/rtmutex.c =================================================================== --- linux-6.6.58-rt45.orig/kernel/locking/rtmutex.c +++ linux-6.6.58-rt45/kernel/locking/rtmutex.c @ linux-6.6.58-rt45/kernel/futex/pi.c:1635 @ static int __sched rt_mutex_slowlock_blo raw_spin_unlock_irq(&lock->wait_lock); if (!owner || !rtmutex_spin_on_owner(lock, waiter, owner)) - schedule(); + rt_mutex_schedule(); raw_spin_lock_irq(&lock->wait_lock); set_current_state(state); @ linux-6.6.58-rt45/kernel/futex/pi.c:1665 @ static void __sched rt_mutex_handle_dead while (1) { set_current_state(TASK_INTERRUPTIBLE); - schedule(); + rt_mutex_schedule(); } } @ linux-6.6.58-rt45/kernel/futex/pi.c:1761 @ static int __sched rt_mutex_slowlock(str int ret; /* + * Do all pre-schedule work here, before we queue a waiter and invoke + * PI -- any such work that trips on rtlock (PREEMPT_RT spinlock) would + * otherwise recurse back into task_blocks_on_rt_mutex() through + * rtlock_slowlock() and will then enqueue a second waiter for this + * same task and things get really confusing real fast. + */ + rt_mutex_pre_schedule(); + + /* * Technically we could use raw_spin_[un]lock_irq() here, but this can * be called in early boot if the cmpxchg() fast path is disabled * (debug, no architecture support). In this case we will acquire the @ linux-6.6.58-rt45/kernel/futex/pi.c:1780 @ static int __sched rt_mutex_slowlock(str raw_spin_lock_irqsave(&lock->wait_lock, flags); ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state); raw_spin_unlock_irqrestore(&lock->wait_lock, flags); + rt_mutex_post_schedule(); return ret; } Index: linux-6.6.58-rt45/kernel/locking/rwbase_rt.c =================================================================== --- linux-6.6.58-rt45.orig/kernel/locking/rwbase_rt.c +++ linux-6.6.58-rt45/kernel/locking/rwbase_rt.c @ linux-6.6.58-rt45/kernel/futex/pi.c:74 @ static int __sched __rwbase_read_lock(st struct rt_mutex_base *rtm = &rwb->rtmutex; int ret; + rwbase_pre_schedule(); raw_spin_lock_irq(&rtm->wait_lock); /* @ linux-6.6.58-rt45/kernel/futex/pi.c:129 @ static int __sched __rwbase_read_lock(st rwbase_rtmutex_unlock(rtm); trace_contention_end(rwb, ret); + rwbase_post_schedule(); return ret; } @ linux-6.6.58-rt45/kernel/futex/pi.c:242 @ static int __sched rwbase_write_lock(str /* Force readers into slow path */ atomic_sub(READER_BIAS, &rwb->readers); + rwbase_pre_schedule(); + raw_spin_lock_irqsave(&rtm->wait_lock, flags); if (__rwbase_write_trylock(rwb)) goto out_unlock; @ linux-6.6.58-rt45/kernel/futex/pi.c:255 @ static int __sched rwbase_write_lock(str if (rwbase_signal_pending_state(state, current)) { rwbase_restore_current_state(); __rwbase_write_unlock(rwb, 0, flags); + rwbase_post_schedule(); trace_contention_end(rwb, -EINTR); return -EINTR; } @ linux-6.6.58-rt45/kernel/futex/pi.c:274 @ static int __sched rwbase_write_lock(str out_unlock: raw_spin_unlock_irqrestore(&rtm->wait_lock, flags); + rwbase_post_schedule(); return 0; } Index: linux-6.6.58-rt45/kernel/locking/rwsem.c =================================================================== --- linux-6.6.58-rt45.orig/kernel/locking/rwsem.c +++ linux-6.6.58-rt45/kernel/locking/rwsem.c @ linux-6.6.58-rt45/kernel/futex/pi.c:1430 @ static inline void __downgrade_write(str #define rwbase_signal_pending_state(state, current) \ signal_pending_state(state, current) +#define rwbase_pre_schedule() \ + rt_mutex_pre_schedule() + #define rwbase_schedule() \ - schedule() + rt_mutex_schedule() + +#define rwbase_post_schedule() \ + rt_mutex_post_schedule() #include "rwbase_rt.c" Index: linux-6.6.58-rt45/kernel/locking/spinlock_rt.c =================================================================== --- linux-6.6.58-rt45.orig/kernel/locking/spinlock_rt.c +++ linux-6.6.58-rt45/kernel/locking/spinlock_rt.c @ linux-6.6.58-rt45/kernel/futex/pi.c:187 @ static __always_inline int rwbase_rtmut #define rwbase_signal_pending_state(state, current) (0) +#define rwbase_pre_schedule() + #define rwbase_schedule() \ schedule_rtlock() +#define rwbase_post_schedule() + #include "rwbase_rt.c" /* * The common functions which get wrapped into the rwlock API.