[rt] Update to 5.4.3-rt1 and re-enable

Adjust for context changes due to backport of e66b39af00f4 ("workqueue: Fix pwq ref leak in rescuer_thread()") and def98c84b6cd ("workqueue: Fix spurious sanity check failures in destroy_workqueue()") in 5.4.4.
author: Salvatore Bonaccorso <carnil@debian.org> 2019-12-18 18:06:31 +0100
committer: Salvatore Bonaccorso <carnil@debian.org> 2019-12-18 22:50:21 +0100
commit: 102987a83771aa994821d4d982d06faa64c789e4 (patch)
tree: 3e5ab5206886ec297a1a97ce80f504d75468ed2f /debian/patches-rt/of-Rework-and-simplify-phandle-cache-to-use-a-fixed-.patch
parent: 479cb120ecb2b3f2c4d929a7b57860248d6f79bd (diff)
download: linux-debian-102987a83771aa994821d4d982d06faa64c789e4.tar.gz
1 files changed, 315 insertions, 0 deletions
diff --git a/debian/patches-rt/of-Rework-and-simplify-phandle-cache-to-use-a-fixed-.patch b/debian/patches-rt/of-Rework-and-simplify-phandle-cache-to-use-a-fixed-.patch
new file mode 100644
index 000000000..6c301a68d
--- /dev/null
+++ b/debian/patches-rt/of-Rework-and-simplify-phandle-cache-to-use-a-fixed-.patch
@@ -0,0 +1,315 @@
+From: Rob Herring <robh@kernel.org>
+Date: Wed, 11 Dec 2019 17:23:45 -0600
+Subject: [PATCH] of: Rework and simplify phandle cache to use a fixed size
+Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.4/older/patches-5.4.3-rt1.tar.xz
+
+The phandle cache was added to speed up of_find_node_by_phandle() by
+avoiding walking the whole DT to find a matching phandle. The
+implementation has several shortcomings:
+
+  - The cache is designed to work on a linear set of phandle values.
+    This is true for dtc generated DTs, but not for other cases such as
+    Power.
+  - The cache isn't enabled until of_core_init() and a typical system
+    may see hundreds of calls to of_find_node_by_phandle() before that
+    point.
+  - The cache is freed and re-allocated when the number of phandles
+    changes.
+  - It takes a raw spinlock around a memory allocation which breaks on
+    RT.
+
+Change the implementation to a fixed size and use hash_32() as the
+cache index. This greatly simplifies the implementation. It avoids
+the need for any re-alloc of the cache and taking a reference on nodes
+in the cache. We only have a single source of removing cache entries
+which is of_detach_node().
+
+Using hash_32() removes any assumption on phandle values improving
+the hit rate for non-linear phandle values. The effect on linear values
+using hash_32() is about a 10% collision. The chances of thrashing on
+colliding values seems to be low.
+
+To compare performance, I used a RK3399 board which is a pretty typical
+system. I found that just measuring boot time as done previously is
+noisy and may be impacted by other things. Also bringing up secondary
+cores causes some issues with measuring, so I booted with 'nr_cpus=1'.
+With no caching, calls to of_find_node_by_phandle() take about 20124 us
+for 1248 calls. There's an additional 288 calls before time keeping is
+up. Using the average time per hit/miss with the cache, we can calculate
+these calls to take 690 us (277 hit / 11 miss) with a 128 entry cache
+and 13319 us with no cache or an uninitialized cache.
+
+Comparing the 3 implementations the time spent in
+of_find_node_by_phandle() is:
+
+no cache:        20124 us (+ 13319 us)
+128 entry cache:  5134 us (+ 690 us)
+current cache:     819 us (+ 13319 us)
+
+We could move the allocation of the cache earlier to improve the
+current cache, but that just further complicates the situation as it
+needs to be after slab is up, so we can't do it when unflattening (which
+uses memblock).
+
+Reported-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+Cc: Michael Ellerman <mpe@ellerman.id.au>
+Cc: Segher Boessenkool <segher@kernel.crashing.org>
+Cc: Frank Rowand <frowand.list@gmail.com>
+Signed-off-by: Rob Herring <robh@kernel.org>
+Link: https://lkml.kernel.org/r/20191211232345.24810-1-robh@kernel.org
+Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
+---
+ drivers/of/base.c       |  133 +++++++++---------------------------------------
+ drivers/of/dynamic.c    |    2 
+ drivers/of/of_private.h |    4 -
+ drivers/of/overlay.c    |   10 ---
+ 4 files changed, 28 insertions(+), 121 deletions(-)
+
+--- a/drivers/of/base.c
++++ b/drivers/of/base.c
+@@ -123,115 +123,38 @@ int __weak of_node_to_nid(struct device_
+ }
+ #endif
+ 
+-/*
+- * Assumptions behind phandle_cache implementation:
+- *   - phandle property values are in a contiguous range of 1..n
+- *
+- * If the assumptions do not hold, then
+- *   - the phandle lookup overhead reduction provided by the cache
+- *     will likely be less
+- */
++#define OF_PHANDLE_CACHE_BITS	7
++#define OF_PHANDLE_CACHE_SZ	BIT(OF_PHANDLE_CACHE_BITS)
+ 
+-static struct device_node **phandle_cache;
+-static u32 phandle_cache_mask;
++static struct device_node *phandle_cache[OF_PHANDLE_CACHE_SZ];
+ 
+-/*
+- * Caller must hold devtree_lock.
+- */
+-static void __of_free_phandle_cache(void)
++static u32 of_phandle_cache_hash(phandle handle)
+ {
+-	u32 cache_entries = phandle_cache_mask + 1;
+-	u32 k;
+-
+-	if (!phandle_cache)
+-		return;
+-
+-	for (k = 0; k < cache_entries; k++)
+-		of_node_put(phandle_cache[k]);
+-
+-	kfree(phandle_cache);
+-	phandle_cache = NULL;
++	return hash_32(handle, OF_PHANDLE_CACHE_BITS);
+ }
+ 
+-int of_free_phandle_cache(void)
+-{
+-	unsigned long flags;
+-
+-	raw_spin_lock_irqsave(&devtree_lock, flags);
+-
+-	__of_free_phandle_cache();
+-
+-	raw_spin_unlock_irqrestore(&devtree_lock, flags);
+-
+-	return 0;
+-}
+-#if !defined(CONFIG_MODULES)
+-late_initcall_sync(of_free_phandle_cache);
+-#endif
+-
+ /*
+  * Caller must hold devtree_lock.
+  */
+-void __of_free_phandle_cache_entry(phandle handle)
++void __of_phandle_cache_inv_entry(phandle handle)
+ {
+-	phandle masked_handle;
++	u32 handle_hash;
+ 	struct device_node *np;
+ 
+ 	if (!handle)
+ 		return;
+ 
+-	masked_handle = handle & phandle_cache_mask;
+-
+-	if (phandle_cache) {
+-		np = phandle_cache[masked_handle];
+-		if (np && handle == np->phandle) {
+-			of_node_put(np);
+-			phandle_cache[masked_handle] = NULL;
+-		}
+-	}
+-}
+-
+-void of_populate_phandle_cache(void)
+-{
+-	unsigned long flags;
+-	u32 cache_entries;
+-	struct device_node *np;
+-	u32 phandles = 0;
+-
+-	raw_spin_lock_irqsave(&devtree_lock, flags);
+-
+-	__of_free_phandle_cache();
++	handle_hash = of_phandle_cache_hash(handle);
+ 
+-	for_each_of_allnodes(np)
+-		if (np->phandle && np->phandle != OF_PHANDLE_ILLEGAL)
+-			phandles++;
+-
+-	if (!phandles)
+-		goto out;
+-
+-	cache_entries = roundup_pow_of_two(phandles);
+-	phandle_cache_mask = cache_entries - 1;
+-
+-	phandle_cache = kcalloc(cache_entries, sizeof(*phandle_cache),
+-				GFP_ATOMIC);
+-	if (!phandle_cache)
+-		goto out;
+-
+-	for_each_of_allnodes(np)
+-		if (np->phandle && np->phandle != OF_PHANDLE_ILLEGAL) {
+-			of_node_get(np);
+-			phandle_cache[np->phandle & phandle_cache_mask] = np;
+-		}
+-
+-out:
+-	raw_spin_unlock_irqrestore(&devtree_lock, flags);
++	np = phandle_cache[handle_hash];
++	if (np && handle == np->phandle)
++		phandle_cache[handle_hash] = NULL;
+ }
+ 
+ void __init of_core_init(void)
+ {
+ 	struct device_node *np;
+ 
+-	of_populate_phandle_cache();
+ 
+ 	/* Create the kset, and register existing nodes */
+ 	mutex_lock(&of_mutex);
+@@ -241,8 +164,11 @@ void __init of_core_init(void)
+ 		pr_err("failed to register existing nodes\n");
+ 		return;
+ 	}
+-	for_each_of_allnodes(np)
++	for_each_of_allnodes(np) {
+ 		__of_attach_node_sysfs(np);
++		if (np->phandle && !phandle_cache[of_phandle_cache_hash(np->phandle)])
++			phandle_cache[of_phandle_cache_hash(np->phandle)] = np;
++	}
+ 	mutex_unlock(&of_mutex);
+ 
+ 	/* Symlink in /proc as required by userspace ABI */
+@@ -1223,36 +1149,29 @@ struct device_node *of_find_node_by_phan
+ {
+ 	struct device_node *np = NULL;
+ 	unsigned long flags;
+-	phandle masked_handle;
++	u32 handle_hash;
+ 
+ 	if (!handle)
+ 		return NULL;
+ 
+-	raw_spin_lock_irqsave(&devtree_lock, flags);
++	handle_hash = of_phandle_cache_hash(handle);
+ 
+-	masked_handle = handle & phandle_cache_mask;
++	raw_spin_lock_irqsave(&devtree_lock, flags);
+ 
+-	if (phandle_cache) {
+-		if (phandle_cache[masked_handle] &&
+-		    handle == phandle_cache[masked_handle]->phandle)
+-			np = phandle_cache[masked_handle];
+-		if (np && of_node_check_flag(np, OF_DETACHED)) {
+-			WARN_ON(1); /* did not uncache np on node removal */
+-			of_node_put(np);
+-			phandle_cache[masked_handle] = NULL;
+-			np = NULL;
+-		}
++	if (phandle_cache[handle_hash] &&
++	    handle == phandle_cache[handle_hash]->phandle)
++		np = phandle_cache[handle_hash];
++	if (np && of_node_check_flag(np, OF_DETACHED)) {
++		WARN_ON(1); /* did not uncache np on node removal */
++		phandle_cache[handle_hash] = NULL;
++		np = NULL;
+ 	}
+ 
+ 	if (!np) {
+ 		for_each_of_allnodes(np)
+ 			if (np->phandle == handle &&
+ 			    !of_node_check_flag(np, OF_DETACHED)) {
+-				if (phandle_cache) {
+-					/* will put when removed from cache */
+-					of_node_get(np);
+-					phandle_cache[masked_handle] = np;
+-				}
++				phandle_cache[handle_hash] = np;
+ 				break;
+ 			}
+ 	}
+--- a/drivers/of/dynamic.c
++++ b/drivers/of/dynamic.c
+@@ -276,7 +276,7 @@ void __of_detach_node(struct device_node
+ 	of_node_set_flag(np, OF_DETACHED);
+ 
+ 	/* race with of_find_node_by_phandle() prevented by devtree_lock */
+-	__of_free_phandle_cache_entry(np->phandle);
++	__of_phandle_cache_inv_entry(np->phandle);
+ }
+ 
+ /**
+--- a/drivers/of/of_private.h
++++ b/drivers/of/of_private.h
+@@ -85,14 +85,12 @@ int of_resolve_phandles(struct device_no
+ #endif
+ 
+ #if defined(CONFIG_OF_DYNAMIC)
+-void __of_free_phandle_cache_entry(phandle handle);
++void __of_phandle_cache_inv_entry(phandle handle);
+ #endif
+ 
+ #if defined(CONFIG_OF_OVERLAY)
+ void of_overlay_mutex_lock(void);
+ void of_overlay_mutex_unlock(void);
+-int of_free_phandle_cache(void);
+-void of_populate_phandle_cache(void);
+ #else
+ static inline void of_overlay_mutex_lock(void) {};
+ static inline void of_overlay_mutex_unlock(void) {};
+--- a/drivers/of/overlay.c
++++ b/drivers/of/overlay.c
+@@ -971,8 +971,6 @@ static int of_overlay_apply(const void *
+ 		goto err_free_overlay_changeset;
+ 	}
+ 
+-	of_populate_phandle_cache();
+-
+ 	ret = __of_changeset_apply_notify(&ovcs->cset);
+ 	if (ret)
+ 		pr_err("overlay apply changeset entry notify error %d\n", ret);
+@@ -1215,17 +1213,9 @@ int of_overlay_remove(int *ovcs_id)
+ 
+ 	list_del(&ovcs->ovcs_list);
+ 
+-	/*
+-	 * Disable phandle cache.  Avoids race condition that would arise
+-	 * from removing cache entry when the associated node is deleted.
+-	 */
+-	of_free_phandle_cache();
+-
+ 	ret_apply = 0;
+ 	ret = __of_changeset_revert_entries(&ovcs->cset, &ret_apply);
+ 
+-	of_populate_phandle_cache();
+-
+ 	if (ret) {
+ 		if (ret_apply)
+ 			devicetree_state_flags |= DTSF_REVERT_FAIL;
author	Salvatore Bonaccorso <carnil@debian.org>	2019-12-18 18:06:31 +0100
committer	Salvatore Bonaccorso <carnil@debian.org>	2019-12-18 22:50:21 +0100
commit	102987a83771aa994821d4d982d06faa64c789e4 (patch)
tree	3e5ab5206886ec297a1a97ce80f504d75468ed2f /debian/patches-rt/of-Rework-and-simplify-phandle-cache-to-use-a-fixed-.patch
parent	479cb120ecb2b3f2c4d929a7b57860248d6f79bd (diff)
download	linux-debian-102987a83771aa994821d4d982d06faa64c789e4.tar.gz