diff options
Diffstat (limited to 'debian/patches-rt/0054-irq_work-Handle-some-irq_work-in-a-per-CPU-thread-on.patch')
-rw-r--r-- | debian/patches-rt/0054-irq_work-Handle-some-irq_work-in-a-per-CPU-thread-on.patch | 242 |
1 files changed, 242 insertions, 0 deletions
diff --git a/debian/patches-rt/0054-irq_work-Handle-some-irq_work-in-a-per-CPU-thread-on.patch b/debian/patches-rt/0054-irq_work-Handle-some-irq_work-in-a-per-CPU-thread-on.patch new file mode 100644 index 000000000..518e0b622 --- /dev/null +++ b/debian/patches-rt/0054-irq_work-Handle-some-irq_work-in-a-per-CPU-thread-on.patch @@ -0,0 +1,242 @@ +From 6b0aed256667de2557b0bc3864d2847a0538857a Mon Sep 17 00:00:00 2001 +From: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Date: Wed, 6 Oct 2021 13:18:51 +0200 +Subject: [PATCH 054/158] irq_work: Handle some irq_work in a per-CPU thread on + PREEMPT_RT +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.15/older/patches-5.15.10-rt24.tar.xz + +The irq_work callback is invoked in hard IRQ context. By default all +callbacks are scheduled for invocation right away (given supported by +the architecture) except for the ones marked IRQ_WORK_LAZY which are +delayed until the next timer-tick. + +While looking over the callbacks, some of them may acquire locks +(spinlock_t, rwlock_t) which are transformed into sleeping locks on +PREEMPT_RT and must not be acquired in hard IRQ context. +Changing the locks into locks which could be acquired in this context +will lead to other problems such as increased latencies if everything +in the chain has IRQ-off locks. This will not solve all the issues as +one callback has been noticed which invoked kref_put() and its callback +invokes kfree() and this can not be invoked in hardirq context. + +Some callbacks are required to be invoked in hardirq context even on +PREEMPT_RT to work properly. This includes for instance the NO_HZ +callback which needs to be able to observe the idle context. + +The callbacks which require to be run in hardirq have already been +marked. Use this information to split the callbacks onto the two lists +on PREEMPT_RT: +- lazy_list + Work items which are not marked with IRQ_WORK_HARD_IRQ will be added + to this list. Callbacks on this list will be invoked from a per-CPU + thread. + The handler here may acquire sleeping locks such as spinlock_t and + invoke kfree(). + +- raised_list + Work items which are marked with IRQ_WORK_HARD_IRQ will be added to + this list. They will be invoked in hardirq context and must not + acquire any sleeping locks. + +The wake up of the per-CPU thread occurs from irq_work handler/ +hardirq context. The thread runs with lowest RT priority to ensure it +runs before any SCHED_OTHER tasks do. + +[bigeasy: melt tglx's irq_work_tick_soft() which splits irq_work_tick() into a + hard and soft variant. Collected fixes over time from Steven + Rostedt and Mike Galbraith. Move to per-CPU threads instead of + softirq as suggested by PeterZ.] + +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +Link: https://lore.kernel.org/r/20211007092646.uhshe3ut2wkrcfzv@linutronix.de +--- + kernel/irq_work.c | 118 +++++++++++++++++++++++++++++++++++++++++----- + 1 file changed, 106 insertions(+), 12 deletions(-) + +diff --git a/kernel/irq_work.c b/kernel/irq_work.c +index e789beda8297..90b6b56f92e9 100644 +--- a/kernel/irq_work.c ++++ b/kernel/irq_work.c +@@ -18,11 +18,36 @@ + #include <linux/cpu.h> + #include <linux/notifier.h> + #include <linux/smp.h> ++#include <linux/smpboot.h> + #include <asm/processor.h> + #include <linux/kasan.h> + + static DEFINE_PER_CPU(struct llist_head, raised_list); + static DEFINE_PER_CPU(struct llist_head, lazy_list); ++static DEFINE_PER_CPU(struct task_struct *, irq_workd); ++ ++static void wake_irq_workd(void) ++{ ++ struct task_struct *tsk = __this_cpu_read(irq_workd); ++ ++ if (!llist_empty(this_cpu_ptr(&lazy_list)) && tsk) ++ wake_up_process(tsk); ++} ++ ++#ifdef CONFIG_SMP ++static void irq_work_wake(struct irq_work *entry) ++{ ++ wake_irq_workd(); ++} ++ ++static DEFINE_PER_CPU(struct irq_work, irq_work_wakeup) = ++ IRQ_WORK_INIT_HARD(irq_work_wake); ++#endif ++ ++static int irq_workd_should_run(unsigned int cpu) ++{ ++ return !llist_empty(this_cpu_ptr(&lazy_list)); ++} + + /* + * Claim the entry so that no one else will poke at it. +@@ -52,15 +77,29 @@ void __weak arch_irq_work_raise(void) + /* Enqueue on current CPU, work must already be claimed and preempt disabled */ + static void __irq_work_queue_local(struct irq_work *work) + { ++ struct llist_head *list; ++ bool rt_lazy_work = false; ++ bool lazy_work = false; ++ int work_flags; ++ ++ work_flags = atomic_read(&work->node.a_flags); ++ if (work_flags & IRQ_WORK_LAZY) ++ lazy_work = true; ++ else if (IS_ENABLED(CONFIG_PREEMPT_RT) && ++ !(work_flags & IRQ_WORK_HARD_IRQ)) ++ rt_lazy_work = true; ++ ++ if (lazy_work || rt_lazy_work) ++ list = this_cpu_ptr(&lazy_list); ++ else ++ list = this_cpu_ptr(&raised_list); ++ ++ if (!llist_add(&work->node.llist, list)) ++ return; ++ + /* If the work is "lazy", handle it from next tick if any */ +- if (atomic_read(&work->node.a_flags) & IRQ_WORK_LAZY) { +- if (llist_add(&work->node.llist, this_cpu_ptr(&lazy_list)) && +- tick_nohz_tick_stopped()) +- arch_irq_work_raise(); +- } else { +- if (llist_add(&work->node.llist, this_cpu_ptr(&raised_list))) +- arch_irq_work_raise(); +- } ++ if (!lazy_work || tick_nohz_tick_stopped()) ++ arch_irq_work_raise(); + } + + /* Enqueue the irq work @work on the current CPU */ +@@ -104,17 +143,34 @@ bool irq_work_queue_on(struct irq_work *work, int cpu) + if (cpu != smp_processor_id()) { + /* Arch remote IPI send/receive backend aren't NMI safe */ + WARN_ON_ONCE(in_nmi()); ++ ++ /* ++ * On PREEMPT_RT the items which are not marked as ++ * IRQ_WORK_HARD_IRQ are added to the lazy list and a HARD work ++ * item is used on the remote CPU to wake the thread. ++ */ ++ if (IS_ENABLED(CONFIG_PREEMPT_RT) && ++ !(atomic_read(&work->node.a_flags) & IRQ_WORK_HARD_IRQ)) { ++ ++ if (!llist_add(&work->node.llist, &per_cpu(lazy_list, cpu))) ++ goto out; ++ ++ work = &per_cpu(irq_work_wakeup, cpu); ++ if (!irq_work_claim(work)) ++ goto out; ++ } ++ + __smp_call_single_queue(cpu, &work->node.llist); + } else { + __irq_work_queue_local(work); + } ++out: + preempt_enable(); + + return true; + #endif /* CONFIG_SMP */ + } + +- + bool irq_work_needs_cpu(void) + { + struct llist_head *raised, *lazy; +@@ -170,7 +226,12 @@ static void irq_work_run_list(struct llist_head *list) + struct irq_work *work, *tmp; + struct llist_node *llnode; + +- BUG_ON(!irqs_disabled()); ++ /* ++ * On PREEMPT_RT IRQ-work which is not marked as HARD will be processed ++ * in a per-CPU thread in preemptible context. Only the items which are ++ * marked as IRQ_WORK_HARD_IRQ will be processed in hardirq context. ++ */ ++ BUG_ON(!irqs_disabled() && !IS_ENABLED(CONFIG_PREEMPT_RT)); + + if (llist_empty(list)) + return; +@@ -187,7 +248,10 @@ static void irq_work_run_list(struct llist_head *list) + void irq_work_run(void) + { + irq_work_run_list(this_cpu_ptr(&raised_list)); +- irq_work_run_list(this_cpu_ptr(&lazy_list)); ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) ++ irq_work_run_list(this_cpu_ptr(&lazy_list)); ++ else ++ wake_irq_workd(); + } + EXPORT_SYMBOL_GPL(irq_work_run); + +@@ -197,7 +261,11 @@ void irq_work_tick(void) + + if (!llist_empty(raised) && !arch_irq_work_has_interrupt()) + irq_work_run_list(raised); +- irq_work_run_list(this_cpu_ptr(&lazy_list)); ++ ++ if (!IS_ENABLED(CONFIG_PREEMPT_RT)) ++ irq_work_run_list(this_cpu_ptr(&lazy_list)); ++ else ++ wake_irq_workd(); + } + + /* +@@ -219,3 +287,29 @@ void irq_work_sync(struct irq_work *work) + cpu_relax(); + } + EXPORT_SYMBOL_GPL(irq_work_sync); ++ ++static void run_irq_workd(unsigned int cpu) ++{ ++ irq_work_run_list(this_cpu_ptr(&lazy_list)); ++} ++ ++static void irq_workd_setup(unsigned int cpu) ++{ ++ sched_set_fifo_low(current); ++} ++ ++static struct smp_hotplug_thread irqwork_threads = { ++ .store = &irq_workd, ++ .setup = irq_workd_setup, ++ .thread_should_run = irq_workd_should_run, ++ .thread_fn = run_irq_workd, ++ .thread_comm = "irq_work/%u", ++}; ++ ++static __init int irq_work_init_threads(void) ++{ ++ if (IS_ENABLED(CONFIG_PREEMPT_RT)) ++ BUG_ON(smpboot_register_percpu_thread(&irqwork_threads)); ++ return 0; ++} ++early_initcall(irq_work_init_threads); +-- +2.33.1 + |