summaryrefslogtreecommitdiffstats
path: root/debian/patches-rt/of-Rework-and-simplify-phandle-cache-to-use-a-fixed-.patch
diff options
context:
space:
mode:
authorSalvatore Bonaccorso <carnil@debian.org>2019-12-18 18:06:31 +0100
committerSalvatore Bonaccorso <carnil@debian.org>2019-12-18 22:50:21 +0100
commit102987a83771aa994821d4d982d06faa64c789e4 (patch)
tree3e5ab5206886ec297a1a97ce80f504d75468ed2f /debian/patches-rt/of-Rework-and-simplify-phandle-cache-to-use-a-fixed-.patch
parent479cb120ecb2b3f2c4d929a7b57860248d6f79bd (diff)
downloadlinux-debian-102987a83771aa994821d4d982d06faa64c789e4.tar.gz
[rt] Update to 5.4.3-rt1 and re-enable
Adjust for context changes due to backport of e66b39af00f4 ("workqueue: Fix pwq ref leak in rescuer_thread()") and def98c84b6cd ("workqueue: Fix spurious sanity check failures in destroy_workqueue()") in 5.4.4.
Diffstat (limited to 'debian/patches-rt/of-Rework-and-simplify-phandle-cache-to-use-a-fixed-.patch')
-rw-r--r--debian/patches-rt/of-Rework-and-simplify-phandle-cache-to-use-a-fixed-.patch315
1 files changed, 315 insertions, 0 deletions
diff --git a/debian/patches-rt/of-Rework-and-simplify-phandle-cache-to-use-a-fixed-.patch b/debian/patches-rt/of-Rework-and-simplify-phandle-cache-to-use-a-fixed-.patch
new file mode 100644
index 000000000..6c301a68d
--- /dev/null
+++ b/debian/patches-rt/of-Rework-and-simplify-phandle-cache-to-use-a-fixed-.patch
@@ -0,0 +1,315 @@
+From: Rob Herring <robh@kernel.org>
+Date: Wed, 11 Dec 2019 17:23:45 -0600
+Subject: [PATCH] of: Rework and simplify phandle cache to use a fixed size
+Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.4/older/patches-5.4.3-rt1.tar.xz
+
+The phandle cache was added to speed up of_find_node_by_phandle() by
+avoiding walking the whole DT to find a matching phandle. The
+implementation has several shortcomings:
+
+ - The cache is designed to work on a linear set of phandle values.
+ This is true for dtc generated DTs, but not for other cases such as
+ Power.
+ - The cache isn't enabled until of_core_init() and a typical system
+ may see hundreds of calls to of_find_node_by_phandle() before that
+ point.
+ - The cache is freed and re-allocated when the number of phandles
+ changes.
+ - It takes a raw spinlock around a memory allocation which breaks on
+ RT.
+
+Change the implementation to a fixed size and use hash_32() as the
+cache index. This greatly simplifies the implementation. It avoids
+the need for any re-alloc of the cache and taking a reference on nodes
+in the cache. We only have a single source of removing cache entries
+which is of_detach_node().
+
+Using hash_32() removes any assumption on phandle values improving
+the hit rate for non-linear phandle values. The effect on linear values
+using hash_32() is about a 10% collision. The chances of thrashing on
+colliding values seems to be low.
+
+To compare performance, I used a RK3399 board which is a pretty typical
+system. I found that just measuring boot time as done previously is
+noisy and may be impacted by other things. Also bringing up secondary
+cores causes some issues with measuring, so I booted with 'nr_cpus=1'.
+With no caching, calls to of_find_node_by_phandle() take about 20124 us
+for 1248 calls. There's an additional 288 calls before time keeping is
+up. Using the average time per hit/miss with the cache, we can calculate
+these calls to take 690 us (277 hit / 11 miss) with a 128 entry cache
+and 13319 us with no cache or an uninitialized cache.
+
+Comparing the 3 implementations the time spent in
+of_find_node_by_phandle() is:
+
+no cache: 20124 us (+ 13319 us)
+128 entry cache: 5134 us (+ 690 us)
+current cache: 819 us (+ 13319 us)
+
+We could move the allocation of the cache earlier to improve the
+current cache, but that just further complicates the situation as it
+needs to be after slab is up, so we can't do it when unflattening (which
+uses memblock).
+
+Reported-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Cc: Michael Ellerman <mpe@ellerman.id.au>
+Cc: Segher Boessenkool <segher@kernel.crashing.org>
+Cc: Frank Rowand <frowand.list@gmail.com>
+Signed-off-by: Rob Herring <robh@kernel.org>
+Link: https://lkml.kernel.org/r/20191211232345.24810-1-robh@kernel.org
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ drivers/of/base.c | 133 +++++++++---------------------------------------
+ drivers/of/dynamic.c | 2
+ drivers/of/of_private.h | 4 -
+ drivers/of/overlay.c | 10 ---
+ 4 files changed, 28 insertions(+), 121 deletions(-)
+
+--- a/drivers/of/base.c
++++ b/drivers/of/base.c
+@@ -123,115 +123,38 @@ int __weak of_node_to_nid(struct device_
+ }
+ #endif
+
+-/*
+- * Assumptions behind phandle_cache implementation:
+- * - phandle property values are in a contiguous range of 1..n
+- *
+- * If the assumptions do not hold, then
+- * - the phandle lookup overhead reduction provided by the cache
+- * will likely be less
+- */
++#define OF_PHANDLE_CACHE_BITS 7
++#define OF_PHANDLE_CACHE_SZ BIT(OF_PHANDLE_CACHE_BITS)
+
+-static struct device_node **phandle_cache;
+-static u32 phandle_cache_mask;
++static struct device_node *phandle_cache[OF_PHANDLE_CACHE_SZ];
+
+-/*
+- * Caller must hold devtree_lock.
+- */
+-static void __of_free_phandle_cache(void)
++static u32 of_phandle_cache_hash(phandle handle)
+ {
+- u32 cache_entries = phandle_cache_mask + 1;
+- u32 k;
+-
+- if (!phandle_cache)
+- return;
+-
+- for (k = 0; k < cache_entries; k++)
+- of_node_put(phandle_cache[k]);
+-
+- kfree(phandle_cache);
+- phandle_cache = NULL;
++ return hash_32(handle, OF_PHANDLE_CACHE_BITS);
+ }
+
+-int of_free_phandle_cache(void)
+-{
+- unsigned long flags;
+-
+- raw_spin_lock_irqsave(&devtree_lock, flags);
+-
+- __of_free_phandle_cache();
+-
+- raw_spin_unlock_irqrestore(&devtree_lock, flags);
+-
+- return 0;
+-}
+-#if !defined(CONFIG_MODULES)
+-late_initcall_sync(of_free_phandle_cache);
+-#endif
+-
+ /*
+ * Caller must hold devtree_lock.
+ */
+-void __of_free_phandle_cache_entry(phandle handle)
++void __of_phandle_cache_inv_entry(phandle handle)
+ {
+- phandle masked_handle;
++ u32 handle_hash;
+ struct device_node *np;
+
+ if (!handle)
+ return;
+
+- masked_handle = handle & phandle_cache_mask;
+-
+- if (phandle_cache) {
+- np = phandle_cache[masked_handle];
+- if (np && handle == np->phandle) {
+- of_node_put(np);
+- phandle_cache[masked_handle] = NULL;
+- }
+- }
+-}
+-
+-void of_populate_phandle_cache(void)
+-{
+- unsigned long flags;
+- u32 cache_entries;
+- struct device_node *np;
+- u32 phandles = 0;
+-
+- raw_spin_lock_irqsave(&devtree_lock, flags);
+-
+- __of_free_phandle_cache();
++ handle_hash = of_phandle_cache_hash(handle);
+
+- for_each_of_allnodes(np)
+- if (np->phandle && np->phandle != OF_PHANDLE_ILLEGAL)
+- phandles++;
+-
+- if (!phandles)
+- goto out;
+-
+- cache_entries = roundup_pow_of_two(phandles);
+- phandle_cache_mask = cache_entries - 1;
+-
+- phandle_cache = kcalloc(cache_entries, sizeof(*phandle_cache),
+- GFP_ATOMIC);
+- if (!phandle_cache)
+- goto out;
+-
+- for_each_of_allnodes(np)
+- if (np->phandle && np->phandle != OF_PHANDLE_ILLEGAL) {
+- of_node_get(np);
+- phandle_cache[np->phandle & phandle_cache_mask] = np;
+- }
+-
+-out:
+- raw_spin_unlock_irqrestore(&devtree_lock, flags);
++ np = phandle_cache[handle_hash];
++ if (np && handle == np->phandle)
++ phandle_cache[handle_hash] = NULL;
+ }
+
+ void __init of_core_init(void)
+ {
+ struct device_node *np;
+
+- of_populate_phandle_cache();
+
+ /* Create the kset, and register existing nodes */
+ mutex_lock(&of_mutex);
+@@ -241,8 +164,11 @@ void __init of_core_init(void)
+ pr_err("failed to register existing nodes\n");
+ return;
+ }
+- for_each_of_allnodes(np)
++ for_each_of_allnodes(np) {
+ __of_attach_node_sysfs(np);
++ if (np->phandle && !phandle_cache[of_phandle_cache_hash(np->phandle)])
++ phandle_cache[of_phandle_cache_hash(np->phandle)] = np;
++ }
+ mutex_unlock(&of_mutex);
+
+ /* Symlink in /proc as required by userspace ABI */
+@@ -1223,36 +1149,29 @@ struct device_node *of_find_node_by_phan
+ {
+ struct device_node *np = NULL;
+ unsigned long flags;
+- phandle masked_handle;
++ u32 handle_hash;
+
+ if (!handle)
+ return NULL;
+
+- raw_spin_lock_irqsave(&devtree_lock, flags);
++ handle_hash = of_phandle_cache_hash(handle);
+
+- masked_handle = handle & phandle_cache_mask;
++ raw_spin_lock_irqsave(&devtree_lock, flags);
+
+- if (phandle_cache) {
+- if (phandle_cache[masked_handle] &&
+- handle == phandle_cache[masked_handle]->phandle)
+- np = phandle_cache[masked_handle];
+- if (np && of_node_check_flag(np, OF_DETACHED)) {
+- WARN_ON(1); /* did not uncache np on node removal */
+- of_node_put(np);
+- phandle_cache[masked_handle] = NULL;
+- np = NULL;
+- }
++ if (phandle_cache[handle_hash] &&
++ handle == phandle_cache[handle_hash]->phandle)
++ np = phandle_cache[handle_hash];
++ if (np && of_node_check_flag(np, OF_DETACHED)) {
++ WARN_ON(1); /* did not uncache np on node removal */
++ phandle_cache[handle_hash] = NULL;
++ np = NULL;
+ }
+
+ if (!np) {
+ for_each_of_allnodes(np)
+ if (np->phandle == handle &&
+ !of_node_check_flag(np, OF_DETACHED)) {
+- if (phandle_cache) {
+- /* will put when removed from cache */
+- of_node_get(np);
+- phandle_cache[masked_handle] = np;
+- }
++ phandle_cache[handle_hash] = np;
+ break;
+ }
+ }
+--- a/drivers/of/dynamic.c
++++ b/drivers/of/dynamic.c
+@@ -276,7 +276,7 @@ void __of_detach_node(struct device_node
+ of_node_set_flag(np, OF_DETACHED);
+
+ /* race with of_find_node_by_phandle() prevented by devtree_lock */
+- __of_free_phandle_cache_entry(np->phandle);
++ __of_phandle_cache_inv_entry(np->phandle);
+ }
+
+ /**
+--- a/drivers/of/of_private.h
++++ b/drivers/of/of_private.h
+@@ -85,14 +85,12 @@ int of_resolve_phandles(struct device_no
+ #endif
+
+ #if defined(CONFIG_OF_DYNAMIC)
+-void __of_free_phandle_cache_entry(phandle handle);
++void __of_phandle_cache_inv_entry(phandle handle);
+ #endif
+
+ #if defined(CONFIG_OF_OVERLAY)
+ void of_overlay_mutex_lock(void);
+ void of_overlay_mutex_unlock(void);
+-int of_free_phandle_cache(void);
+-void of_populate_phandle_cache(void);
+ #else
+ static inline void of_overlay_mutex_lock(void) {};
+ static inline void of_overlay_mutex_unlock(void) {};
+--- a/drivers/of/overlay.c
++++ b/drivers/of/overlay.c
+@@ -971,8 +971,6 @@ static int of_overlay_apply(const void *
+ goto err_free_overlay_changeset;
+ }
+
+- of_populate_phandle_cache();
+-
+ ret = __of_changeset_apply_notify(&ovcs->cset);
+ if (ret)
+ pr_err("overlay apply changeset entry notify error %d\n", ret);
+@@ -1215,17 +1213,9 @@ int of_overlay_remove(int *ovcs_id)
+
+ list_del(&ovcs->ovcs_list);
+
+- /*
+- * Disable phandle cache. Avoids race condition that would arise
+- * from removing cache entry when the associated node is deleted.
+- */
+- of_free_phandle_cache();
+-
+ ret_apply = 0;
+ ret = __of_changeset_revert_entries(&ovcs->cset, &ret_apply);
+
+- of_populate_phandle_cache();
+-
+ if (ret) {
+ if (ret_apply)
+ devicetree_state_flags |= DTSF_REVERT_FAIL;