From: Thomas Gleixner <tglx@linutronix.de> Date: Sun, 11 Sep 2022 00:28:01 +0200 Subject: [PATCH 05/24] printk: Add non-BKL console basic infrastructure The current console/printk subsystem is protected by a Big Kernel Lock, (aka console_lock) which has ill defined semantics and is more or less stateless. This puts severe limitations on the console subsystem and makes forced takeover and output in emergency and panic situations a fragile endavour which is based on try and pray. The goal of non-BKL consoles is to break out of the console lock jail and to provide a new infrastructure that avoids the pitfalls and allows console drivers to be gradually converted over. The proposed infrastructure aims for the following properties: - Per console locking instead of global locking - Per console state which allows to make informed decisions - Stateful handover and takeover As a first step state is added to struct console. The per console state is an atomic_long_t with a 32bit bit field and on 64bit also a 32bit sequence for tracking the last printed ringbuffer sequence number. On 32bit the sequence is separate from state for obvious reasons which requires handling a few extra race conditions. Reserve state bits, which will be populated later in the series. Wire it up into the console register/unregister functionality and exclude such consoles from being handled in the console BKL mechanisms. Since the non-BKL consoles will not depend on the console lock/unlock dance for printing, only perform said dance if a BKL console is registered. The decision to use a bitfield was made as using a plain u32 with mask/shift operations turned out to result in uncomprehensible code. Co-developed-by: John Ogness <john.ogness@linutronix.de> Signed-off-by: John Ogness <john.ogness@linutronix.de> Signed-off-by: Thomas Gleixner (Intel) <tglx@linutronix.de> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- include/linux/console.h | 33 ++++++++++ kernel/printk/Makefile | 2 kernel/printk/internal.h | 10 +++ kernel/printk/printk.c | 50 +++++++++++++-- kernel/printk/printk_nobkl.c | 137 +++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 225 insertions(+), 7 deletions(-) create mode 100644 kernel/printk/printk_nobkl.c Index: linux-6.3.0-rt11/include/linux/console.h =================================================================== @ linux-6.3.0-rt11/include/linux/console.h:159 @ static inline int con_debug_leave(void) * /dev/kmesg which requires a larger output buffer. * @CON_SUSPENDED: Indicates if a console is suspended. If true, the * printing callbacks must not be called. + * @CON_NO_BKL: Console can operate outside of the BKL style console_lock + * constraints. */ enum cons_flags { CON_PRINTBUFFER = BIT(0), @ linux-6.3.0-rt11/include/linux/console.h:171 @ enum cons_flags { CON_BRL = BIT(5), CON_EXTENDED = BIT(6), CON_SUSPENDED = BIT(7), + CON_NO_BKL = BIT(8), +}; + +/** + * struct cons_state - console state for NOBKL consoles + * @atom: Compound of the state fields for atomic operations + * @seq: Sequence for record tracking (64bit only) + * @bits: Compound of the state bits below + * + * To be used for state read and preparation of atomic_long_cmpxchg() + * operations. + */ +struct cons_state { + union { + unsigned long atom; + struct { +#ifdef CONFIG_64BIT + u32 seq; +#endif + union { + u32 bits; + struct { + }; + }; + }; + }; }; /** @ linux-6.3.0-rt11/include/linux/console.h:218 @ enum cons_flags { * @dropped: Number of unreported dropped ringbuffer records * @data: Driver private data * @node: hlist node for the console list + * + * @atomic_state: State array for NOBKL consoles; real and handover */ struct console { char name[16]; @ linux-6.3.0-rt11/include/linux/console.h:239 @ struct console { unsigned long dropped; void *data; struct hlist_node node; + + /* NOBKL console specific members */ + atomic_long_t __private atomic_state[2]; }; #ifdef CONFIG_LOCKDEP Index: linux-6.3.0-rt11/kernel/printk/Makefile =================================================================== --- linux-6.3.0-rt11.orig/kernel/printk/Makefile +++ linux-6.3.0-rt11/kernel/printk/Makefile @ linux-6.3.0-rt11/include/linux/console.h:3 @ # SPDX-License-Identifier: GPL-2.0-only obj-y = printk.o -obj-$(CONFIG_PRINTK) += printk_safe.o +obj-$(CONFIG_PRINTK) += printk_safe.o printk_nobkl.o obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o obj-$(CONFIG_PRINTK_INDEX) += index.o Index: linux-6.3.0-rt11/kernel/printk/internal.h =================================================================== --- linux-6.3.0-rt11.orig/kernel/printk/internal.h +++ linux-6.3.0-rt11/kernel/printk/internal.h @ linux-6.3.0-rt11/include/linux/console.h:6 @ * internal.h - printk internal definitions */ #include <linux/percpu.h> +#include <linux/console.h> #if defined(CONFIG_PRINTK) && defined(CONFIG_SYSCTL) void __init printk_sysctl_init(void); @ linux-6.3.0-rt11/include/linux/console.h:65 @ void defer_console_output(void); u16 printk_parse_prefix(const char *text, int *level, enum printk_info_flags *flags); + +void cons_nobkl_cleanup(struct console *con); +void cons_nobkl_init(struct console *con); + #else #define PRINTK_PREFIX_MAX 0 @ linux-6.3.0-rt11/include/linux/console.h:84 @ u16 printk_parse_prefix(const char *text #define printk_safe_exit_irqrestore(flags) local_irq_restore(flags) static inline bool printk_percpu_data_ready(void) { return false; } +static inline void cons_nobkl_init(struct console *con) { } +static inline void cons_nobkl_cleanup(struct console *con) { } + #endif /* CONFIG_PRINTK */ +extern bool have_boot_console; + /** * struct printk_buffers - Buffers to read/format/output printk messages. * @outbuf: After formatting, contains text to output. Index: linux-6.3.0-rt11/kernel/printk/printk.c =================================================================== --- linux-6.3.0-rt11.orig/kernel/printk/printk.c +++ linux-6.3.0-rt11/kernel/printk/printk.c @ linux-6.3.0-rt11/include/linux/console.h:445 @ static int console_msg_format = MSG_FORM /* syslog_lock protects syslog_* variables and write access to clear_seq. */ static DEFINE_MUTEX(syslog_lock); +/* + * Specifies if a BKL console was ever registered. Used to determine if the + * console lock/unlock dance is needed for console printing. + */ +static bool have_bkl_console; + +/* + * Specifies if a boot console is registered. Used to determine if NOBKL + * consoles may be used since NOBKL consoles cannot synchronize with boot + * consoles. + */ +bool have_boot_console; + #ifdef CONFIG_PRINTK DECLARE_WAIT_QUEUE_HEAD(log_wait); /* All 3 protected by @syslog_lock. */ @ linux-6.3.0-rt11/include/linux/console.h:2313 @ asmlinkage int vprintk_emit(int facility printed_len = vprintk_store(facility, level, dev_info, fmt, args); /* If called from the scheduler, we can not call up(). */ - if (!in_sched) { + if (!in_sched && have_bkl_console) { /* * The caller may be holding system-critical or * timing-sensitive locks. Disable preemption during @ linux-6.3.0-rt11/include/linux/console.h:2636 @ void resume_console(void) */ static int console_cpu_notify(unsigned int cpu) { - if (!cpuhp_tasks_frozen) { + if (!cpuhp_tasks_frozen && have_bkl_console) { /* If trylock fails, someone else is doing the printing */ if (console_trylock()) console_unlock(); @ linux-6.3.0-rt11/include/linux/console.h:3110 @ void console_unblank(void) struct console *c; int cookie; + if (!have_bkl_console) + return; + /* * Stop console printing because the unblank() callback may * assume the console is not within its write() callback. @ linux-6.3.0-rt11/include/linux/console.h:3154 @ void console_unblank(void) */ void console_flush_on_panic(enum con_flush_mode mode) { + if (!have_bkl_console) + return; + /* * If someone else is holding the console lock, trylock will fail * and may_schedule may be set. Ignore and proceed to unlock so @ linux-6.3.0-rt11/include/linux/console.h:3337 @ static void try_enable_default_console(s newcon->flags |= CON_CONSDEV; } -#define con_printk(lvl, con, fmt, ...) \ - printk(lvl pr_fmt("%sconsole [%s%d] " fmt), \ - (con->flags & CON_BOOT) ? "boot" : "", \ +#define con_printk(lvl, con, fmt, ...) \ + printk(lvl pr_fmt("%s%sconsole [%s%d] " fmt), \ + (con->flags & CON_NO_BKL) ? "" : "legacy ", \ + (con->flags & CON_BOOT) ? "boot" : "", \ con->name, con->index, ##__VA_ARGS__) static void console_init_seq(struct console *newcon, bool bootcon_registered) @ linux-6.3.0-rt11/include/linux/console.h:3500 @ void register_console(struct console *ne newcon->dropped = 0; console_init_seq(newcon, bootcon_registered); + if (!(newcon->flags & CON_NO_BKL)) + have_bkl_console = true; + else + cons_nobkl_init(newcon); + + if (newcon->flags & CON_BOOT) + have_boot_console = true; + /* * Put this console in the list - keep the * preferred driver at the head of the list. @ linux-6.3.0-rt11/include/linux/console.h:3551 @ void register_console(struct console *ne if (con->flags & CON_BOOT) unregister_console_locked(con); } + + /* All boot consoles have been unregistered. */ + have_boot_console = false; } unlock: console_list_unlock(); @ linux-6.3.0-rt11/include/linux/console.h:3602 @ static int unregister_console_locked(str */ synchronize_srcu(&console_srcu); + if (console->flags & CON_NO_BKL) + cons_nobkl_cleanup(console); + console_sysfs_notify(); if (console->exit) @ linux-6.3.0-rt11/include/linux/console.h:3908 @ void wake_up_klogd(void) */ void defer_console_output(void) { + int val = PRINTK_PENDING_WAKEUP; + /* * New messages may have been added directly to the ringbuffer * using vprintk_store(), so wake any waiters as well. */ - __wake_up_klogd(PRINTK_PENDING_WAKEUP | PRINTK_PENDING_OUTPUT); + if (have_bkl_console) + val |= PRINTK_PENDING_OUTPUT; + __wake_up_klogd(val); } void printk_trigger_flush(void) Index: linux-6.3.0-rt11/kernel/printk/printk_nobkl.c =================================================================== --- /dev/null +++ linux-6.3.0-rt11/kernel/printk/printk_nobkl.c @ linux-6.3.0-rt11/include/linux/console.h:4 @ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2022 Linutronix GmbH, John Ogness +// Copyright (C) 2022 Intel, Thomas Gleixner + +#include <linux/kernel.h> +#include <linux/console.h> +#include "internal.h" +/* + * Printk implementation for consoles that do not depend on the BKL style + * console_lock mechanism. + * + * Console is locked on a CPU when state::locked is set and state:cpu == + * current CPU. This is valid for the current execution context. + * + * Nesting execution contexts on the same CPU can carefully take over + * if the driver allows reentrancy via state::unsafe = false. When the + * interrupted context resumes it checks the state before entering + * an unsafe region and aborts the operation if it detects a takeover. + * + * In case of panic or emergency the nesting context can take over the + * console forcefully. The write callback is then invoked with the unsafe + * flag set in the write context data, which allows the driver side to avoid + * locks and to evaluate the driver state so it can use an emergency path + * or repair the state instead of blindly assuming that it works. + * + * If the interrupted context touches the assigned record buffer after + * takeover, it does not cause harm because at the same execution level + * there is no concurrency on the same CPU. A threaded printer always has + * its own record buffer so it can never interfere with any of the per CPU + * record buffers. + * + * A concurrent writer on a different CPU can request to take over the + * console by: + * + * 1) Carefully writing the desired state into state[REQ] + * if there is no same or higher priority request pending. + * This locks state[REQ] except for higher priority + * waiters. + * + * 2) Setting state[CUR].req_prio unless a same or higher + * priority waiter won the race. + * + * 3) Carefully spin on state[CUR] until that is locked with the + * expected state. When the state is not the expected one then it + * has to verify that state[REQ] is still the same and that + * state[CUR] has not been taken over or unlocked. + * + * The unlocker hands over to state[REQ], but only if state[CUR] + * matches. + * + * In case that the owner does not react on the request and does not make + * observable progress, the waiter will timeout and can then decide to do + * a hostile takeover. + */ + +#define copy_full_state(_dst, _src) do { _dst = _src; } while (0) +#define copy_bit_state(_dst, _src) do { _dst.bits = _src.bits; } while (0) + +#ifdef CONFIG_64BIT +#define copy_seq_state64(_dst, _src) do { _dst.seq = _src.seq; } while (0) +#else +#define copy_seq_state64(_dst, _src) do { } while (0) +#endif + +enum state_selector { + CON_STATE_CUR, + CON_STATE_REQ, +}; + +/** + * cons_state_set - Helper function to set the console state + * @con: Console to update + * @which: Selects real state or handover state + * @new: The new state to write + * + * Only to be used when the console is not yet or no longer visible in the + * system. Otherwise use cons_state_try_cmpxchg(). + */ +static inline void cons_state_set(struct console *con, enum state_selector which, + struct cons_state *new) +{ + atomic_long_set(&ACCESS_PRIVATE(con, atomic_state[which]), new->atom); +} + +/** + * cons_state_read - Helper function to read the console state + * @con: Console to update + * @which: Selects real state or handover state + * @state: The state to store the result + */ +static inline void cons_state_read(struct console *con, enum state_selector which, + struct cons_state *state) +{ + state->atom = atomic_long_read(&ACCESS_PRIVATE(con, atomic_state[which])); +} + +/** + * cons_state_try_cmpxchg() - Helper function for atomic_long_try_cmpxchg() on console state + * @con: Console to update + * @which: Selects real state or handover state + * @old: Old/expected state + * @new: New state + * + * Returns: True on success, false on fail + */ +static inline bool cons_state_try_cmpxchg(struct console *con, + enum state_selector which, + struct cons_state *old, + struct cons_state *new) +{ + return atomic_long_try_cmpxchg(&ACCESS_PRIVATE(con, atomic_state[which]), + &old->atom, new->atom); +} + +/** + * cons_nobkl_init - Initialize the NOBKL console specific data + * @con: Console to initialize + */ +void cons_nobkl_init(struct console *con) +{ + struct cons_state state = { }; + + cons_state_set(con, CON_STATE_CUR, &state); + cons_state_set(con, CON_STATE_REQ, &state); +} + +/** + * cons_nobkl_cleanup - Cleanup the NOBKL console specific data + * @con: Console to cleanup + */ +void cons_nobkl_cleanup(struct console *con) +{ + struct cons_state state = { }; + + cons_state_set(con, CON_STATE_CUR, &state); + cons_state_set(con, CON_STATE_REQ, &state); +}