summaryrefslogtreecommitdiffstats
path: root/debian/patches-rt/0054-irq_work-Handle-some-irq_work-in-a-per-CPU-thread-on.patch
diff options
context:
space:
mode:
Diffstat (limited to 'debian/patches-rt/0054-irq_work-Handle-some-irq_work-in-a-per-CPU-thread-on.patch')
-rw-r--r--debian/patches-rt/0054-irq_work-Handle-some-irq_work-in-a-per-CPU-thread-on.patch242
1 files changed, 242 insertions, 0 deletions
diff --git a/debian/patches-rt/0054-irq_work-Handle-some-irq_work-in-a-per-CPU-thread-on.patch b/debian/patches-rt/0054-irq_work-Handle-some-irq_work-in-a-per-CPU-thread-on.patch
new file mode 100644
index 000000000..518e0b622
--- /dev/null
+++ b/debian/patches-rt/0054-irq_work-Handle-some-irq_work-in-a-per-CPU-thread-on.patch
@@ -0,0 +1,242 @@
+From 6b0aed256667de2557b0bc3864d2847a0538857a Mon Sep 17 00:00:00 2001
+From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Date: Wed, 6 Oct 2021 13:18:51 +0200
+Subject: [PATCH 054/158] irq_work: Handle some irq_work in a per-CPU thread on
+ PREEMPT_RT
+Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.15/older/patches-5.15.10-rt24.tar.xz
+
+The irq_work callback is invoked in hard IRQ context. By default all
+callbacks are scheduled for invocation right away (given supported by
+the architecture) except for the ones marked IRQ_WORK_LAZY which are
+delayed until the next timer-tick.
+
+While looking over the callbacks, some of them may acquire locks
+(spinlock_t, rwlock_t) which are transformed into sleeping locks on
+PREEMPT_RT and must not be acquired in hard IRQ context.
+Changing the locks into locks which could be acquired in this context
+will lead to other problems such as increased latencies if everything
+in the chain has IRQ-off locks. This will not solve all the issues as
+one callback has been noticed which invoked kref_put() and its callback
+invokes kfree() and this can not be invoked in hardirq context.
+
+Some callbacks are required to be invoked in hardirq context even on
+PREEMPT_RT to work properly. This includes for instance the NO_HZ
+callback which needs to be able to observe the idle context.
+
+The callbacks which require to be run in hardirq have already been
+marked. Use this information to split the callbacks onto the two lists
+on PREEMPT_RT:
+- lazy_list
+ Work items which are not marked with IRQ_WORK_HARD_IRQ will be added
+ to this list. Callbacks on this list will be invoked from a per-CPU
+ thread.
+ The handler here may acquire sleeping locks such as spinlock_t and
+ invoke kfree().
+
+- raised_list
+ Work items which are marked with IRQ_WORK_HARD_IRQ will be added to
+ this list. They will be invoked in hardirq context and must not
+ acquire any sleeping locks.
+
+The wake up of the per-CPU thread occurs from irq_work handler/
+hardirq context. The thread runs with lowest RT priority to ensure it
+runs before any SCHED_OTHER tasks do.
+
+[bigeasy: melt tglx's irq_work_tick_soft() which splits irq_work_tick() into a
+ hard and soft variant. Collected fixes over time from Steven
+ Rostedt and Mike Galbraith. Move to per-CPU threads instead of
+ softirq as suggested by PeterZ.]
+
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Link: https://lore.kernel.org/r/20211007092646.uhshe3ut2wkrcfzv@linutronix.de
+---
+ kernel/irq_work.c | 118 +++++++++++++++++++++++++++++++++++++++++-----
+ 1 file changed, 106 insertions(+), 12 deletions(-)
+
+diff --git a/kernel/irq_work.c b/kernel/irq_work.c
+index e789beda8297..90b6b56f92e9 100644
+--- a/kernel/irq_work.c
++++ b/kernel/irq_work.c
+@@ -18,11 +18,36 @@
+ #include <linux/cpu.h>
+ #include <linux/notifier.h>
+ #include <linux/smp.h>
++#include <linux/smpboot.h>
+ #include <asm/processor.h>
+ #include <linux/kasan.h>
+
+ static DEFINE_PER_CPU(struct llist_head, raised_list);
+ static DEFINE_PER_CPU(struct llist_head, lazy_list);
++static DEFINE_PER_CPU(struct task_struct *, irq_workd);
++
++static void wake_irq_workd(void)
++{
++ struct task_struct *tsk = __this_cpu_read(irq_workd);
++
++ if (!llist_empty(this_cpu_ptr(&lazy_list)) && tsk)
++ wake_up_process(tsk);
++}
++
++#ifdef CONFIG_SMP
++static void irq_work_wake(struct irq_work *entry)
++{
++ wake_irq_workd();
++}
++
++static DEFINE_PER_CPU(struct irq_work, irq_work_wakeup) =
++ IRQ_WORK_INIT_HARD(irq_work_wake);
++#endif
++
++static int irq_workd_should_run(unsigned int cpu)
++{
++ return !llist_empty(this_cpu_ptr(&lazy_list));
++}
+
+ /*
+ * Claim the entry so that no one else will poke at it.
+@@ -52,15 +77,29 @@ void __weak arch_irq_work_raise(void)
+ /* Enqueue on current CPU, work must already be claimed and preempt disabled */
+ static void __irq_work_queue_local(struct irq_work *work)
+ {
++ struct llist_head *list;
++ bool rt_lazy_work = false;
++ bool lazy_work = false;
++ int work_flags;
++
++ work_flags = atomic_read(&work->node.a_flags);
++ if (work_flags & IRQ_WORK_LAZY)
++ lazy_work = true;
++ else if (IS_ENABLED(CONFIG_PREEMPT_RT) &&
++ !(work_flags & IRQ_WORK_HARD_IRQ))
++ rt_lazy_work = true;
++
++ if (lazy_work || rt_lazy_work)
++ list = this_cpu_ptr(&lazy_list);
++ else
++ list = this_cpu_ptr(&raised_list);
++
++ if (!llist_add(&work->node.llist, list))
++ return;
++
+ /* If the work is "lazy", handle it from next tick if any */
+- if (atomic_read(&work->node.a_flags) & IRQ_WORK_LAZY) {
+- if (llist_add(&work->node.llist, this_cpu_ptr(&lazy_list)) &&
+- tick_nohz_tick_stopped())
+- arch_irq_work_raise();
+- } else {
+- if (llist_add(&work->node.llist, this_cpu_ptr(&raised_list)))
+- arch_irq_work_raise();
+- }
++ if (!lazy_work || tick_nohz_tick_stopped())
++ arch_irq_work_raise();
+ }
+
+ /* Enqueue the irq work @work on the current CPU */
+@@ -104,17 +143,34 @@ bool irq_work_queue_on(struct irq_work *work, int cpu)
+ if (cpu != smp_processor_id()) {
+ /* Arch remote IPI send/receive backend aren't NMI safe */
+ WARN_ON_ONCE(in_nmi());
++
++ /*
++ * On PREEMPT_RT the items which are not marked as
++ * IRQ_WORK_HARD_IRQ are added to the lazy list and a HARD work
++ * item is used on the remote CPU to wake the thread.
++ */
++ if (IS_ENABLED(CONFIG_PREEMPT_RT) &&
++ !(atomic_read(&work->node.a_flags) & IRQ_WORK_HARD_IRQ)) {
++
++ if (!llist_add(&work->node.llist, &per_cpu(lazy_list, cpu)))
++ goto out;
++
++ work = &per_cpu(irq_work_wakeup, cpu);
++ if (!irq_work_claim(work))
++ goto out;
++ }
++
+ __smp_call_single_queue(cpu, &work->node.llist);
+ } else {
+ __irq_work_queue_local(work);
+ }
++out:
+ preempt_enable();
+
+ return true;
+ #endif /* CONFIG_SMP */
+ }
+
+-
+ bool irq_work_needs_cpu(void)
+ {
+ struct llist_head *raised, *lazy;
+@@ -170,7 +226,12 @@ static void irq_work_run_list(struct llist_head *list)
+ struct irq_work *work, *tmp;
+ struct llist_node *llnode;
+
+- BUG_ON(!irqs_disabled());
++ /*
++ * On PREEMPT_RT IRQ-work which is not marked as HARD will be processed
++ * in a per-CPU thread in preemptible context. Only the items which are
++ * marked as IRQ_WORK_HARD_IRQ will be processed in hardirq context.
++ */
++ BUG_ON(!irqs_disabled() && !IS_ENABLED(CONFIG_PREEMPT_RT));
+
+ if (llist_empty(list))
+ return;
+@@ -187,7 +248,10 @@ static void irq_work_run_list(struct llist_head *list)
+ void irq_work_run(void)
+ {
+ irq_work_run_list(this_cpu_ptr(&raised_list));
+- irq_work_run_list(this_cpu_ptr(&lazy_list));
++ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
++ irq_work_run_list(this_cpu_ptr(&lazy_list));
++ else
++ wake_irq_workd();
+ }
+ EXPORT_SYMBOL_GPL(irq_work_run);
+
+@@ -197,7 +261,11 @@ void irq_work_tick(void)
+
+ if (!llist_empty(raised) && !arch_irq_work_has_interrupt())
+ irq_work_run_list(raised);
+- irq_work_run_list(this_cpu_ptr(&lazy_list));
++
++ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
++ irq_work_run_list(this_cpu_ptr(&lazy_list));
++ else
++ wake_irq_workd();
+ }
+
+ /*
+@@ -219,3 +287,29 @@ void irq_work_sync(struct irq_work *work)
+ cpu_relax();
+ }
+ EXPORT_SYMBOL_GPL(irq_work_sync);
++
++static void run_irq_workd(unsigned int cpu)
++{
++ irq_work_run_list(this_cpu_ptr(&lazy_list));
++}
++
++static void irq_workd_setup(unsigned int cpu)
++{
++ sched_set_fifo_low(current);
++}
++
++static struct smp_hotplug_thread irqwork_threads = {
++ .store = &irq_workd,
++ .setup = irq_workd_setup,
++ .thread_should_run = irq_workd_should_run,
++ .thread_fn = run_irq_workd,
++ .thread_comm = "irq_work/%u",
++};
++
++static __init int irq_work_init_threads(void)
++{
++ if (IS_ENABLED(CONFIG_PREEMPT_RT))
++ BUG_ON(smpboot_register_percpu_thread(&irqwork_threads));
++ return 0;
++}
++early_initcall(irq_work_init_threads);
+--
+2.33.1
+