summaryrefslogtreecommitdiffstats
path: root/debian/patches-rt/0009-net-sched-Remove-Qdisc-running-sequence-counter.patch
diff options
context:
space:
mode:
Diffstat (limited to 'debian/patches-rt/0009-net-sched-Remove-Qdisc-running-sequence-counter.patch')
-rw-r--r--debian/patches-rt/0009-net-sched-Remove-Qdisc-running-sequence-counter.patch817
1 files changed, 817 insertions, 0 deletions
diff --git a/debian/patches-rt/0009-net-sched-Remove-Qdisc-running-sequence-counter.patch b/debian/patches-rt/0009-net-sched-Remove-Qdisc-running-sequence-counter.patch
new file mode 100644
index 000000000..89be30417
--- /dev/null
+++ b/debian/patches-rt/0009-net-sched-Remove-Qdisc-running-sequence-counter.patch
@@ -0,0 +1,817 @@
+From: "Ahmed S. Darwish" <a.darwish@linutronix.de>
+Date: Sat, 16 Oct 2021 10:49:10 +0200
+Subject: [PATCH 9/9] net: sched: Remove Qdisc::running sequence counter
+Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.15/older/patches-5.15.3-rt21.tar.xz
+
+The Qdisc::running sequence counter has two uses:
+
+ 1. Reliably reading qdisc's tc statistics while the qdisc is running
+ (a seqcount read/retry loop at gnet_stats_add_basic()).
+
+ 2. As a flag, indicating whether the qdisc in question is running
+ (without any retry loops).
+
+For the first usage, the Qdisc::running sequence counter write section,
+qdisc_run_begin() => qdisc_run_end(), covers a much wider area than what
+is actually needed: the raw qdisc's bstats update. A u64_stats sync
+point was thus introduced (in previous commits) inside the bstats
+structure itself. A local u64_stats write section is then started and
+stopped for the bstats updates.
+
+Use that u64_stats sync point mechanism for the bstats read/retry loop
+at gnet_stats_add_basic().
+
+For the second qdisc->running usage, a __QDISC_STATE_RUNNING bit flag,
+accessed with atomic bitops, is sufficient. Using a bit flag instead of
+a sequence counter at qdisc_run_begin/end() and qdisc_is_running() leads
+to the SMP barriers implicitly added through raw_read_seqcount() and
+write_seqcount_begin/end() getting removed. All call sites have been
+surveyed though, and no required ordering was identified.
+
+Now that the qdisc->running sequence counter is no longer used, remove
+it.
+
+Note, using u64_stats implies no sequence counter protection for 64-bit
+architectures. This can lead to the qdisc tc statistics "packets" vs.
+"bytes" values getting out of sync on rare occasions. The individual
+values will still be valid.
+
+Signed-off-by: Ahmed S. Darwish <a.darwish@linutronix.de>
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+---
+ include/linux/netdevice.h | 4 ---
+ include/net/gen_stats.h | 19 +++++++----------
+ include/net/sch_generic.h | 33 ++++++++++++------------------
+ net/core/gen_estimator.c | 16 +++++++++-----
+ net/core/gen_stats.c | 50 +++++++++++++++++++++++++---------------------
+ net/sched/act_api.c | 9 ++++----
+ net/sched/act_police.c | 2 -
+ net/sched/sch_api.c | 16 ++------------
+ net/sched/sch_atm.c | 3 --
+ net/sched/sch_cbq.c | 9 ++------
+ net/sched/sch_drr.c | 10 ++-------
+ net/sched/sch_ets.c | 3 --
+ net/sched/sch_generic.c | 10 +--------
+ net/sched/sch_hfsc.c | 8 ++-----
+ net/sched/sch_htb.c | 7 ++----
+ net/sched/sch_mq.c | 7 ++----
+ net/sched/sch_mqprio.c | 14 ++++++------
+ net/sched/sch_multiq.c | 3 --
+ net/sched/sch_prio.c | 4 +--
+ net/sched/sch_qfq.c | 7 ++----
+ net/sched/sch_taprio.c | 2 -
+ 21 files changed, 102 insertions(+), 134 deletions(-)
+
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -1916,7 +1916,6 @@ enum netdev_ml_priv_type {
+ * @sfp_bus: attached &struct sfp_bus structure.
+ *
+ * @qdisc_tx_busylock: lockdep class annotating Qdisc->busylock spinlock
+- * @qdisc_running_key: lockdep class annotating Qdisc->running seqcount
+ *
+ * @proto_down: protocol port state information can be sent to the
+ * switch driver and used to set the phys state of the
+@@ -2250,7 +2249,6 @@ struct net_device {
+ struct phy_device *phydev;
+ struct sfp_bus *sfp_bus;
+ struct lock_class_key *qdisc_tx_busylock;
+- struct lock_class_key *qdisc_running_key;
+ bool proto_down;
+ unsigned wol_enabled:1;
+ unsigned threaded:1;
+@@ -2360,13 +2358,11 @@ static inline void netdev_for_each_tx_qu
+ #define netdev_lockdep_set_classes(dev) \
+ { \
+ static struct lock_class_key qdisc_tx_busylock_key; \
+- static struct lock_class_key qdisc_running_key; \
+ static struct lock_class_key qdisc_xmit_lock_key; \
+ static struct lock_class_key dev_addr_list_lock_key; \
+ unsigned int i; \
+ \
+ (dev)->qdisc_tx_busylock = &qdisc_tx_busylock_key; \
+- (dev)->qdisc_running_key = &qdisc_running_key; \
+ lockdep_set_class(&(dev)->addr_list_lock, \
+ &dev_addr_list_lock_key); \
+ for (i = 0; i < (dev)->num_tx_queues; i++) \
+--- a/include/net/gen_stats.h
++++ b/include/net/gen_stats.h
+@@ -46,18 +46,15 @@ int gnet_stats_start_copy_compat(struct
+ spinlock_t *lock, struct gnet_dump *d,
+ int padattr);
+
+-int gnet_stats_copy_basic(const seqcount_t *running,
+- struct gnet_dump *d,
++int gnet_stats_copy_basic(struct gnet_dump *d,
+ struct gnet_stats_basic_sync __percpu *cpu,
+- struct gnet_stats_basic_sync *b);
+-void gnet_stats_add_basic(const seqcount_t *running,
+- struct gnet_stats_basic_sync *bstats,
++ struct gnet_stats_basic_sync *b, bool running);
++void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats,
+ struct gnet_stats_basic_sync __percpu *cpu,
+- struct gnet_stats_basic_sync *b);
+-int gnet_stats_copy_basic_hw(const seqcount_t *running,
+- struct gnet_dump *d,
++ struct gnet_stats_basic_sync *b, bool running);
++int gnet_stats_copy_basic_hw(struct gnet_dump *d,
+ struct gnet_stats_basic_sync __percpu *cpu,
+- struct gnet_stats_basic_sync *b);
++ struct gnet_stats_basic_sync *b, bool running);
+ int gnet_stats_copy_rate_est(struct gnet_dump *d,
+ struct net_rate_estimator __rcu **ptr);
+ int gnet_stats_copy_queue(struct gnet_dump *d,
+@@ -74,13 +71,13 @@ int gen_new_estimator(struct gnet_stats_
+ struct gnet_stats_basic_sync __percpu *cpu_bstats,
+ struct net_rate_estimator __rcu **rate_est,
+ spinlock_t *lock,
+- seqcount_t *running, struct nlattr *opt);
++ bool running, struct nlattr *opt);
+ void gen_kill_estimator(struct net_rate_estimator __rcu **ptr);
+ int gen_replace_estimator(struct gnet_stats_basic_sync *bstats,
+ struct gnet_stats_basic_sync __percpu *cpu_bstats,
+ struct net_rate_estimator __rcu **ptr,
+ spinlock_t *lock,
+- seqcount_t *running, struct nlattr *opt);
++ bool running, struct nlattr *opt);
+ bool gen_estimator_active(struct net_rate_estimator __rcu **ptr);
+ bool gen_estimator_read(struct net_rate_estimator __rcu **ptr,
+ struct gnet_stats_rate_est64 *sample);
+--- a/include/net/sch_generic.h
++++ b/include/net/sch_generic.h
+@@ -38,6 +38,10 @@ enum qdisc_state_t {
+ __QDISC_STATE_DEACTIVATED,
+ __QDISC_STATE_MISSED,
+ __QDISC_STATE_DRAINING,
++ /* Only for !TCQ_F_NOLOCK qdisc. Never access it directly.
++ * Use qdisc_run_begin/end() or qdisc_is_running() instead.
++ */
++ __QDISC_STATE_RUNNING,
+ };
+
+ #define QDISC_STATE_MISSED BIT(__QDISC_STATE_MISSED)
+@@ -108,7 +112,6 @@ struct Qdisc {
+ struct sk_buff_head gso_skb ____cacheline_aligned_in_smp;
+ struct qdisc_skb_head q;
+ struct gnet_stats_basic_sync bstats;
+- seqcount_t running;
+ struct gnet_stats_queue qstats;
+ unsigned long state;
+ struct Qdisc *next_sched;
+@@ -143,11 +146,15 @@ static inline struct Qdisc *qdisc_refcou
+ return NULL;
+ }
+
++/* For !TCQ_F_NOLOCK qdisc: callers must either call this within a qdisc
++ * root_lock section, or provide their own memory barriers -- ordering
++ * against qdisc_run_begin/end() atomic bit operations.
++ */
+ static inline bool qdisc_is_running(struct Qdisc *qdisc)
+ {
+ if (qdisc->flags & TCQ_F_NOLOCK)
+ return spin_is_locked(&qdisc->seqlock);
+- return (raw_read_seqcount(&qdisc->running) & 1) ? true : false;
++ return test_bit(__QDISC_STATE_RUNNING, &qdisc->state);
+ }
+
+ static inline bool nolock_qdisc_is_empty(const struct Qdisc *qdisc)
+@@ -167,6 +174,9 @@ static inline bool qdisc_is_empty(const
+ return !READ_ONCE(qdisc->q.qlen);
+ }
+
++/* For !TCQ_F_NOLOCK qdisc, qdisc_run_begin/end() must be invoked with
++ * the qdisc root lock acquired.
++ */
+ static inline bool qdisc_run_begin(struct Qdisc *qdisc)
+ {
+ if (qdisc->flags & TCQ_F_NOLOCK) {
+@@ -206,15 +216,8 @@ static inline bool qdisc_run_begin(struc
+ * after it releases the lock at the end of qdisc_run_end().
+ */
+ return spin_trylock(&qdisc->seqlock);
+- } else if (qdisc_is_running(qdisc)) {
+- return false;
+ }
+- /* Variant of write_seqcount_begin() telling lockdep a trylock
+- * was attempted.
+- */
+- raw_write_seqcount_begin(&qdisc->running);
+- seqcount_acquire(&qdisc->running.dep_map, 0, 1, _RET_IP_);
+- return true;
++ return test_and_set_bit(__QDISC_STATE_RUNNING, &qdisc->state);
+ }
+
+ static inline void qdisc_run_end(struct Qdisc *qdisc)
+@@ -226,7 +229,7 @@ static inline void qdisc_run_end(struct
+ &qdisc->state)))
+ __netif_schedule(qdisc);
+ } else {
+- write_seqcount_end(&qdisc->running);
++ clear_bit(__QDISC_STATE_RUNNING, &qdisc->state);
+ }
+ }
+
+@@ -590,14 +593,6 @@ static inline spinlock_t *qdisc_root_sle
+ return qdisc_lock(root);
+ }
+
+-static inline seqcount_t *qdisc_root_sleeping_running(const struct Qdisc *qdisc)
+-{
+- struct Qdisc *root = qdisc_root_sleeping(qdisc);
+-
+- ASSERT_RTNL();
+- return &root->running;
+-}
+-
+ static inline struct net_device *qdisc_dev(const struct Qdisc *qdisc)
+ {
+ return qdisc->dev_queue->dev;
+--- a/net/core/gen_estimator.c
++++ b/net/core/gen_estimator.c
+@@ -42,7 +42,7 @@
+ struct net_rate_estimator {
+ struct gnet_stats_basic_sync *bstats;
+ spinlock_t *stats_lock;
+- seqcount_t *running;
++ bool running;
+ struct gnet_stats_basic_sync __percpu *cpu_bstats;
+ u8 ewma_log;
+ u8 intvl_log; /* period : (250ms << intvl_log) */
+@@ -66,7 +66,7 @@ static void est_fetch_counters(struct ne
+ if (e->stats_lock)
+ spin_lock(e->stats_lock);
+
+- gnet_stats_add_basic(e->running, b, e->cpu_bstats, e->bstats);
++ gnet_stats_add_basic(b, e->cpu_bstats, e->bstats, e->running);
+
+ if (e->stats_lock)
+ spin_unlock(e->stats_lock);
+@@ -113,7 +113,9 @@ static void est_timer(struct timer_list
+ * @cpu_bstats: bstats per cpu
+ * @rate_est: rate estimator statistics
+ * @lock: lock for statistics and control path
+- * @running: qdisc running seqcount
++ * @running: true if @bstats represents a running qdisc, thus @bstats'
++ * internal values might change during basic reads. Only used
++ * if @bstats_cpu is NULL
+ * @opt: rate estimator configuration TLV
+ *
+ * Creates a new rate estimator with &bstats as source and &rate_est
+@@ -129,7 +131,7 @@ int gen_new_estimator(struct gnet_stats_
+ struct gnet_stats_basic_sync __percpu *cpu_bstats,
+ struct net_rate_estimator __rcu **rate_est,
+ spinlock_t *lock,
+- seqcount_t *running,
++ bool running,
+ struct nlattr *opt)
+ {
+ struct gnet_estimator *parm = nla_data(opt);
+@@ -218,7 +220,9 @@ EXPORT_SYMBOL(gen_kill_estimator);
+ * @cpu_bstats: bstats per cpu
+ * @rate_est: rate estimator statistics
+ * @lock: lock for statistics and control path
+- * @running: qdisc running seqcount (might be NULL)
++ * @running: true if @bstats represents a running qdisc, thus @bstats'
++ * internal values might change during basic reads. Only used
++ * if @cpu_bstats is NULL
+ * @opt: rate estimator configuration TLV
+ *
+ * Replaces the configuration of a rate estimator by calling
+@@ -230,7 +234,7 @@ int gen_replace_estimator(struct gnet_st
+ struct gnet_stats_basic_sync __percpu *cpu_bstats,
+ struct net_rate_estimator __rcu **rate_est,
+ spinlock_t *lock,
+- seqcount_t *running, struct nlattr *opt)
++ bool running, struct nlattr *opt)
+ {
+ return gen_new_estimator(bstats, cpu_bstats, rate_est,
+ lock, running, opt);
+--- a/net/core/gen_stats.c
++++ b/net/core/gen_stats.c
+@@ -146,42 +146,42 @@ static void gnet_stats_add_basic_cpu(str
+ _bstats_update(bstats, t_bytes, t_packets);
+ }
+
+-void gnet_stats_add_basic(const seqcount_t *running,
+- struct gnet_stats_basic_sync *bstats,
++void gnet_stats_add_basic(struct gnet_stats_basic_sync *bstats,
+ struct gnet_stats_basic_sync __percpu *cpu,
+- struct gnet_stats_basic_sync *b)
++ struct gnet_stats_basic_sync *b, bool running)
+ {
+- unsigned int seq;
++ unsigned int start;
+ u64 bytes = 0;
+ u64 packets = 0;
+
++ WARN_ON_ONCE((cpu || running) && !in_task());
++
+ if (cpu) {
+ gnet_stats_add_basic_cpu(bstats, cpu);
+ return;
+ }
+ do {
+ if (running)
+- seq = read_seqcount_begin(running);
++ start = u64_stats_fetch_begin_irq(&b->syncp);
+ bytes = u64_stats_read(&b->bytes);
+ packets = u64_stats_read(&b->packets);
+- } while (running && read_seqcount_retry(running, seq));
++ } while (running && u64_stats_fetch_retry_irq(&b->syncp, start));
+
+ _bstats_update(bstats, bytes, packets);
+ }
+ EXPORT_SYMBOL(gnet_stats_add_basic);
+
+ static int
+-___gnet_stats_copy_basic(const seqcount_t *running,
+- struct gnet_dump *d,
++___gnet_stats_copy_basic(struct gnet_dump *d,
+ struct gnet_stats_basic_sync __percpu *cpu,
+ struct gnet_stats_basic_sync *b,
+- int type)
++ int type, bool running)
+ {
+ struct gnet_stats_basic_sync bstats;
+ u64 bstats_bytes, bstats_packets;
+
+ gnet_stats_basic_sync_init(&bstats);
+- gnet_stats_add_basic(running, &bstats, cpu, b);
++ gnet_stats_add_basic(&bstats, cpu, b, running);
+
+ bstats_bytes = u64_stats_read(&bstats.bytes);
+ bstats_packets = u64_stats_read(&bstats.packets);
+@@ -210,10 +210,14 @@ static int
+
+ /**
+ * gnet_stats_copy_basic - copy basic statistics into statistic TLV
+- * @running: seqcount_t pointer
+ * @d: dumping handle
+ * @cpu: copy statistic per cpu
+ * @b: basic statistics
++ * @running: true if @b represents a running qdisc, thus @b's
++ * internal values might change during basic reads.
++ * Only used if @cpu is NULL
++ *
++ * Context: task; must not be run from IRQ or BH contexts
+ *
+ * Appends the basic statistics to the top level TLV created by
+ * gnet_stats_start_copy().
+@@ -222,22 +226,25 @@ static int
+ * if the room in the socket buffer was not sufficient.
+ */
+ int
+-gnet_stats_copy_basic(const seqcount_t *running,
+- struct gnet_dump *d,
++gnet_stats_copy_basic(struct gnet_dump *d,
+ struct gnet_stats_basic_sync __percpu *cpu,
+- struct gnet_stats_basic_sync *b)
++ struct gnet_stats_basic_sync *b,
++ bool running)
+ {
+- return ___gnet_stats_copy_basic(running, d, cpu, b,
+- TCA_STATS_BASIC);
++ return ___gnet_stats_copy_basic(d, cpu, b, TCA_STATS_BASIC, running);
+ }
+ EXPORT_SYMBOL(gnet_stats_copy_basic);
+
+ /**
+ * gnet_stats_copy_basic_hw - copy basic hw statistics into statistic TLV
+- * @running: seqcount_t pointer
+ * @d: dumping handle
+ * @cpu: copy statistic per cpu
+ * @b: basic statistics
++ * @running: true if @b represents a running qdisc, thus @b's
++ * internal values might change during basic reads.
++ * Only used if @cpu is NULL
++ *
++ * Context: task; must not be run from IRQ or BH contexts
+ *
+ * Appends the basic statistics to the top level TLV created by
+ * gnet_stats_start_copy().
+@@ -246,13 +253,12 @@ EXPORT_SYMBOL(gnet_stats_copy_basic);
+ * if the room in the socket buffer was not sufficient.
+ */
+ int
+-gnet_stats_copy_basic_hw(const seqcount_t *running,
+- struct gnet_dump *d,
++gnet_stats_copy_basic_hw(struct gnet_dump *d,
+ struct gnet_stats_basic_sync __percpu *cpu,
+- struct gnet_stats_basic_sync *b)
++ struct gnet_stats_basic_sync *b,
++ bool running)
+ {
+- return ___gnet_stats_copy_basic(running, d, cpu, b,
+- TCA_STATS_BASIC_HW);
++ return ___gnet_stats_copy_basic(d, cpu, b, TCA_STATS_BASIC_HW, running);
+ }
+ EXPORT_SYMBOL(gnet_stats_copy_basic_hw);
+
+--- a/net/sched/act_api.c
++++ b/net/sched/act_api.c
+@@ -501,7 +501,7 @@ int tcf_idr_create(struct tc_action_net
+ if (est) {
+ err = gen_new_estimator(&p->tcfa_bstats, p->cpu_bstats,
+ &p->tcfa_rate_est,
+- &p->tcfa_lock, NULL, est);
++ &p->tcfa_lock, false, est);
+ if (err)
+ goto err4;
+ }
+@@ -1173,9 +1173,10 @@ int tcf_action_copy_stats(struct sk_buff
+ if (err < 0)
+ goto errout;
+
+- if (gnet_stats_copy_basic(NULL, &d, p->cpu_bstats, &p->tcfa_bstats) < 0 ||
+- gnet_stats_copy_basic_hw(NULL, &d, p->cpu_bstats_hw,
+- &p->tcfa_bstats_hw) < 0 ||
++ if (gnet_stats_copy_basic(&d, p->cpu_bstats,
++ &p->tcfa_bstats, false) < 0 ||
++ gnet_stats_copy_basic_hw(&d, p->cpu_bstats_hw,
++ &p->tcfa_bstats_hw, false) < 0 ||
+ gnet_stats_copy_rate_est(&d, &p->tcfa_rate_est) < 0 ||
+ gnet_stats_copy_queue(&d, p->cpu_qstats,
+ &p->tcfa_qstats,
+--- a/net/sched/act_police.c
++++ b/net/sched/act_police.c
+@@ -125,7 +125,7 @@ static int tcf_police_init(struct net *n
+ police->common.cpu_bstats,
+ &police->tcf_rate_est,
+ &police->tcf_lock,
+- NULL, est);
++ false, est);
+ if (err)
+ goto failure;
+ } else if (tb[TCA_POLICE_AVRATE] &&
+--- a/net/sched/sch_api.c
++++ b/net/sched/sch_api.c
+@@ -942,8 +942,7 @@ static int tc_fill_qdisc(struct sk_buff
+ cpu_qstats = q->cpu_qstats;
+ }
+
+- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q),
+- &d, cpu_bstats, &q->bstats) < 0 ||
++ if (gnet_stats_copy_basic(&d, cpu_bstats, &q->bstats, true) < 0 ||
+ gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
+ gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0)
+ goto nla_put_failure;
+@@ -1264,26 +1263,17 @@ static struct Qdisc *qdisc_create(struct
+ rcu_assign_pointer(sch->stab, stab);
+ }
+ if (tca[TCA_RATE]) {
+- seqcount_t *running;
+-
+ err = -EOPNOTSUPP;
+ if (sch->flags & TCQ_F_MQROOT) {
+ NL_SET_ERR_MSG(extack, "Cannot attach rate estimator to a multi-queue root qdisc");
+ goto err_out4;
+ }
+
+- if (sch->parent != TC_H_ROOT &&
+- !(sch->flags & TCQ_F_INGRESS) &&
+- (!p || !(p->flags & TCQ_F_MQROOT)))
+- running = qdisc_root_sleeping_running(sch);
+- else
+- running = &sch->running;
+-
+ err = gen_new_estimator(&sch->bstats,
+ sch->cpu_bstats,
+ &sch->rate_est,
+ NULL,
+- running,
++ true,
+ tca[TCA_RATE]);
+ if (err) {
+ NL_SET_ERR_MSG(extack, "Failed to generate new estimator");
+@@ -1359,7 +1349,7 @@ static int qdisc_change(struct Qdisc *sc
+ sch->cpu_bstats,
+ &sch->rate_est,
+ NULL,
+- qdisc_root_sleeping_running(sch),
++ true,
+ tca[TCA_RATE]);
+ }
+ out:
+--- a/net/sched/sch_atm.c
++++ b/net/sched/sch_atm.c
+@@ -653,8 +653,7 @@ atm_tc_dump_class_stats(struct Qdisc *sc
+ {
+ struct atm_flow_data *flow = (struct atm_flow_data *)arg;
+
+- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+- d, NULL, &flow->bstats) < 0 ||
++ if (gnet_stats_copy_basic(d, NULL, &flow->bstats, true) < 0 ||
+ gnet_stats_copy_queue(d, NULL, &flow->qstats, flow->q->q.qlen) < 0)
+ return -1;
+
+--- a/net/sched/sch_cbq.c
++++ b/net/sched/sch_cbq.c
+@@ -1383,8 +1383,7 @@ cbq_dump_class_stats(struct Qdisc *sch,
+ if (cl->undertime != PSCHED_PASTPERFECT)
+ cl->xstats.undertime = cl->undertime - q->now;
+
+- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+- d, NULL, &cl->bstats) < 0 ||
++ if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
+ gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
+ gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0)
+ return -1;
+@@ -1518,7 +1517,7 @@ cbq_change_class(struct Qdisc *sch, u32
+ err = gen_replace_estimator(&cl->bstats, NULL,
+ &cl->rate_est,
+ NULL,
+- qdisc_root_sleeping_running(sch),
++ true,
+ tca[TCA_RATE]);
+ if (err) {
+ NL_SET_ERR_MSG(extack, "Failed to replace specified rate estimator");
+@@ -1619,9 +1618,7 @@ cbq_change_class(struct Qdisc *sch, u32
+
+ if (tca[TCA_RATE]) {
+ err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
+- NULL,
+- qdisc_root_sleeping_running(sch),
+- tca[TCA_RATE]);
++ NULL, true, tca[TCA_RATE]);
+ if (err) {
+ NL_SET_ERR_MSG(extack, "Couldn't create new estimator");
+ tcf_block_put(cl->block);
+--- a/net/sched/sch_drr.c
++++ b/net/sched/sch_drr.c
+@@ -85,8 +85,7 @@ static int drr_change_class(struct Qdisc
+ if (tca[TCA_RATE]) {
+ err = gen_replace_estimator(&cl->bstats, NULL,
+ &cl->rate_est,
+- NULL,
+- qdisc_root_sleeping_running(sch),
++ NULL, true,
+ tca[TCA_RATE]);
+ if (err) {
+ NL_SET_ERR_MSG(extack, "Failed to replace estimator");
+@@ -119,9 +118,7 @@ static int drr_change_class(struct Qdisc
+
+ if (tca[TCA_RATE]) {
+ err = gen_replace_estimator(&cl->bstats, NULL, &cl->rate_est,
+- NULL,
+- qdisc_root_sleeping_running(sch),
+- tca[TCA_RATE]);
++ NULL, true, tca[TCA_RATE]);
+ if (err) {
+ NL_SET_ERR_MSG(extack, "Failed to replace estimator");
+ qdisc_put(cl->qdisc);
+@@ -268,8 +265,7 @@ static int drr_dump_class_stats(struct Q
+ if (qlen)
+ xstats.deficit = cl->deficit;
+
+- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+- d, NULL, &cl->bstats) < 0 ||
++ if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
+ gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
+ gnet_stats_copy_queue(d, cl_q->cpu_qstats, &cl_q->qstats, qlen) < 0)
+ return -1;
+--- a/net/sched/sch_ets.c
++++ b/net/sched/sch_ets.c
+@@ -325,8 +325,7 @@ static int ets_class_dump_stats(struct Q
+ struct ets_class *cl = ets_class_from_arg(sch, arg);
+ struct Qdisc *cl_q = cl->qdisc;
+
+- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+- d, NULL, &cl_q->bstats) < 0 ||
++ if (gnet_stats_copy_basic(d, NULL, &cl_q->bstats, true) < 0 ||
+ qdisc_qstats_copy(d, cl_q) < 0)
+ return -1;
+
+--- a/net/sched/sch_generic.c
++++ b/net/sched/sch_generic.c
+@@ -304,8 +304,8 @@ static struct sk_buff *dequeue_skb(struc
+
+ /*
+ * Transmit possibly several skbs, and handle the return status as
+- * required. Owning running seqcount bit guarantees that
+- * only one CPU can execute this function.
++ * required. Owning qdisc running bit guarantees that only one CPU
++ * can execute this function.
+ *
+ * Returns to the caller:
+ * false - hardware queue frozen backoff
+@@ -606,7 +606,6 @@ struct Qdisc noop_qdisc = {
+ .ops = &noop_qdisc_ops,
+ .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
+ .dev_queue = &noop_netdev_queue,
+- .running = SEQCNT_ZERO(noop_qdisc.running),
+ .busylock = __SPIN_LOCK_UNLOCKED(noop_qdisc.busylock),
+ .gso_skb = {
+ .next = (struct sk_buff *)&noop_qdisc.gso_skb,
+@@ -867,7 +866,6 @@ struct Qdisc_ops pfifo_fast_ops __read_m
+ EXPORT_SYMBOL(pfifo_fast_ops);
+
+ static struct lock_class_key qdisc_tx_busylock;
+-static struct lock_class_key qdisc_running_key;
+
+ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
+ const struct Qdisc_ops *ops,
+@@ -917,10 +915,6 @@ struct Qdisc *qdisc_alloc(struct netdev_
+ lockdep_set_class(&sch->seqlock,
+ dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
+
+- seqcount_init(&sch->running);
+- lockdep_set_class(&sch->running,
+- dev->qdisc_running_key ?: &qdisc_running_key);
+-
+ sch->ops = ops;
+ sch->flags = ops->static_flags;
+ sch->enqueue = ops->enqueue;
+--- a/net/sched/sch_hfsc.c
++++ b/net/sched/sch_hfsc.c
+@@ -965,7 +965,7 @@ hfsc_change_class(struct Qdisc *sch, u32
+ err = gen_replace_estimator(&cl->bstats, NULL,
+ &cl->rate_est,
+ NULL,
+- qdisc_root_sleeping_running(sch),
++ true,
+ tca[TCA_RATE]);
+ if (err)
+ return err;
+@@ -1033,9 +1033,7 @@ hfsc_change_class(struct Qdisc *sch, u32
+
+ if (tca[TCA_RATE]) {
+ err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
+- NULL,
+- qdisc_root_sleeping_running(sch),
+- tca[TCA_RATE]);
++ NULL, true, tca[TCA_RATE]);
+ if (err) {
+ tcf_block_put(cl->block);
+ kfree(cl);
+@@ -1328,7 +1326,7 @@ hfsc_dump_class_stats(struct Qdisc *sch,
+ xstats.work = cl->cl_total;
+ xstats.rtwork = cl->cl_cumul;
+
+- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 ||
++ if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
+ gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
+ gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0)
+ return -1;
+--- a/net/sched/sch_htb.c
++++ b/net/sched/sch_htb.c
+@@ -1368,8 +1368,7 @@ htb_dump_class_stats(struct Qdisc *sch,
+ }
+ }
+
+- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+- d, NULL, &cl->bstats) < 0 ||
++ if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
+ gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
+ gnet_stats_copy_queue(d, NULL, &qs, qlen) < 0)
+ return -1;
+@@ -1865,7 +1864,7 @@ static int htb_change_class(struct Qdisc
+ err = gen_new_estimator(&cl->bstats, NULL,
+ &cl->rate_est,
+ NULL,
+- qdisc_root_sleeping_running(sch),
++ true,
+ tca[TCA_RATE] ? : &est.nla);
+ if (err)
+ goto err_block_put;
+@@ -1991,7 +1990,7 @@ static int htb_change_class(struct Qdisc
+ err = gen_replace_estimator(&cl->bstats, NULL,
+ &cl->rate_est,
+ NULL,
+- qdisc_root_sleeping_running(sch),
++ true,
+ tca[TCA_RATE]);
+ if (err)
+ return err;
+--- a/net/sched/sch_mq.c
++++ b/net/sched/sch_mq.c
+@@ -144,8 +144,8 @@ static int mq_dump(struct Qdisc *sch, st
+ qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping;
+ spin_lock_bh(qdisc_lock(qdisc));
+
+- gnet_stats_add_basic(NULL, &sch->bstats, qdisc->cpu_bstats,
+- &qdisc->bstats);
++ gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats,
++ &qdisc->bstats, false);
+ gnet_stats_add_queue(&sch->qstats, qdisc->cpu_qstats,
+ &qdisc->qstats);
+ sch->q.qlen += qdisc_qlen(qdisc);
+@@ -231,8 +231,7 @@ static int mq_dump_class_stats(struct Qd
+ struct netdev_queue *dev_queue = mq_queue_get(sch, cl);
+
+ sch = dev_queue->qdisc_sleeping;
+- if (gnet_stats_copy_basic(&sch->running, d, sch->cpu_bstats,
+- &sch->bstats) < 0 ||
++ if (gnet_stats_copy_basic(d, sch->cpu_bstats, &sch->bstats, true) < 0 ||
+ qdisc_qstats_copy(d, sch) < 0)
+ return -1;
+ return 0;
+--- a/net/sched/sch_mqprio.c
++++ b/net/sched/sch_mqprio.c
+@@ -402,8 +402,8 @@ static int mqprio_dump(struct Qdisc *sch
+ qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping;
+ spin_lock_bh(qdisc_lock(qdisc));
+
+- gnet_stats_add_basic(NULL, &sch->bstats, qdisc->cpu_bstats,
+- &qdisc->bstats);
++ gnet_stats_add_basic(&sch->bstats, qdisc->cpu_bstats,
++ &qdisc->bstats, false);
+ gnet_stats_add_queue(&sch->qstats, qdisc->cpu_qstats,
+ &qdisc->qstats);
+ sch->q.qlen += qdisc_qlen(qdisc);
+@@ -519,8 +519,8 @@ static int mqprio_dump_class_stats(struc
+
+ spin_lock_bh(qdisc_lock(qdisc));
+
+- gnet_stats_add_basic(NULL, &bstats, qdisc->cpu_bstats,
+- &qdisc->bstats);
++ gnet_stats_add_basic(&bstats, qdisc->cpu_bstats,
++ &qdisc->bstats, false);
+ gnet_stats_add_queue(&qstats, qdisc->cpu_qstats,
+ &qdisc->qstats);
+ sch->q.qlen += qdisc_qlen(qdisc);
+@@ -532,15 +532,15 @@ static int mqprio_dump_class_stats(struc
+ /* Reclaim root sleeping lock before completing stats */
+ if (d->lock)
+ spin_lock_bh(d->lock);
+- if (gnet_stats_copy_basic(NULL, d, NULL, &bstats) < 0 ||
++ if (gnet_stats_copy_basic(d, NULL, &bstats, false) < 0 ||
+ gnet_stats_copy_queue(d, NULL, &qstats, qlen) < 0)
+ return -1;
+ } else {
+ struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
+
+ sch = dev_queue->qdisc_sleeping;
+- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d,
+- sch->cpu_bstats, &sch->bstats) < 0 ||
++ if (gnet_stats_copy_basic(d, sch->cpu_bstats,
++ &sch->bstats, true) < 0 ||
+ qdisc_qstats_copy(d, sch) < 0)
+ return -1;
+ }
+--- a/net/sched/sch_multiq.c
++++ b/net/sched/sch_multiq.c
+@@ -338,8 +338,7 @@ static int multiq_dump_class_stats(struc
+ struct Qdisc *cl_q;
+
+ cl_q = q->queues[cl - 1];
+- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+- d, cl_q->cpu_bstats, &cl_q->bstats) < 0 ||
++ if (gnet_stats_copy_basic(d, cl_q->cpu_bstats, &cl_q->bstats, true) < 0 ||
+ qdisc_qstats_copy(d, cl_q) < 0)
+ return -1;
+
+--- a/net/sched/sch_prio.c
++++ b/net/sched/sch_prio.c
+@@ -361,8 +361,8 @@ static int prio_dump_class_stats(struct
+ struct Qdisc *cl_q;
+
+ cl_q = q->queues[cl - 1];
+- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+- d, cl_q->cpu_bstats, &cl_q->bstats) < 0 ||
++ if (gnet_stats_copy_basic(d, cl_q->cpu_bstats,
++ &cl_q->bstats, true) < 0 ||
+ qdisc_qstats_copy(d, cl_q) < 0)
+ return -1;
+
+--- a/net/sched/sch_qfq.c
++++ b/net/sched/sch_qfq.c
+@@ -451,7 +451,7 @@ static int qfq_change_class(struct Qdisc
+ err = gen_replace_estimator(&cl->bstats, NULL,
+ &cl->rate_est,
+ NULL,
+- qdisc_root_sleeping_running(sch),
++ true,
+ tca[TCA_RATE]);
+ if (err)
+ return err;
+@@ -478,7 +478,7 @@ static int qfq_change_class(struct Qdisc
+ err = gen_new_estimator(&cl->bstats, NULL,
+ &cl->rate_est,
+ NULL,
+- qdisc_root_sleeping_running(sch),
++ true,
+ tca[TCA_RATE]);
+ if (err)
+ goto destroy_class;
+@@ -640,8 +640,7 @@ static int qfq_dump_class_stats(struct Q
+ xstats.weight = cl->agg->class_weight;
+ xstats.lmax = cl->agg->lmax;
+
+- if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
+- d, NULL, &cl->bstats) < 0 ||
++ if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
+ gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
+ qdisc_qstats_copy(d, cl->qdisc) < 0)
+ return -1;
+--- a/net/sched/sch_taprio.c
++++ b/net/sched/sch_taprio.c
+@@ -1977,7 +1977,7 @@ static int taprio_dump_class_stats(struc
+ struct netdev_queue *dev_queue = taprio_queue_get(sch, cl);
+
+ sch = dev_queue->qdisc_sleeping;
+- if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 ||
++ if (gnet_stats_copy_basic(d, NULL, &sch->bstats, true) < 0 ||
+ qdisc_qstats_copy(d, sch) < 0)
+ return -1;
+ return 0;