diff options
Diffstat (limited to 'debian/patches-rt/mm-zsmalloc-Replace-bit-spinlock-and-get_cpu_var-usa.patch')
-rw-r--r-- | debian/patches-rt/mm-zsmalloc-Replace-bit-spinlock-and-get_cpu_var-usa.patch | 245 |
1 files changed, 245 insertions, 0 deletions
diff --git a/debian/patches-rt/mm-zsmalloc-Replace-bit-spinlock-and-get_cpu_var-usa.patch b/debian/patches-rt/mm-zsmalloc-Replace-bit-spinlock-and-get_cpu_var-usa.patch new file mode 100644 index 000000000..c3fd635dd --- /dev/null +++ b/debian/patches-rt/mm-zsmalloc-Replace-bit-spinlock-and-get_cpu_var-usa.patch @@ -0,0 +1,245 @@ +From: Mike Galbraith <umgwanakikbuti@gmail.com> +Date: Tue, 28 Sep 2021 09:38:47 +0200 +Subject: [PATCH] mm/zsmalloc: Replace bit spinlock and get_cpu_var() usage. +Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/5.15/older/patches-5.15.3-rt21.tar.xz + +For efficiency reasons, zsmalloc is using a slim `handle'. The value is +the address of a memory allocation of 4 or 8 bytes depending on the size +of the long data type. The lowest bit in that allocated memory is used +as a bit spin lock. +The usage of the bit spin lock is problematic because with the bit spin +lock held zsmalloc acquires a rwlock_t and spinlock_t which are both +sleeping locks on PREEMPT_RT and therefore must not be acquired with +disabled preemption. + +Extend the handle to struct zsmalloc_handle which holds the old handle as +addr and a spinlock_t which replaces the bit spinlock. Replace all the +wrapper functions accordingly. + +The usage of get_cpu_var() in zs_map_object() is problematic because +it disables preemption and makes it impossible to acquire any sleeping +lock on PREEMPT_RT such as a spinlock_t. +Replace the get_cpu_var() usage with a local_lock_t which is embedded +struct mapping_area. It ensures that the access the struct is +synchronized against all users on the same CPU. + +This survived LTP testing. + +Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com> +Signed-off-by: Thomas Gleixner <tglx@linutronix.de> +[bigeasy: replace the bitspin_lock() with a mutex, get_locked_var() and + patch description. Mike then fixed the size magic and made handle lock + spinlock_t.] +Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> +--- + mm/Kconfig | 3 -- + mm/zsmalloc.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++++----- + 2 files changed, 79 insertions(+), 8 deletions(-) + +--- a/mm/Kconfig ++++ b/mm/Kconfig +@@ -640,7 +640,6 @@ config ZSWAP_ZPOOL_DEFAULT_Z3FOLD + + config ZSWAP_ZPOOL_DEFAULT_ZSMALLOC + bool "zsmalloc" +- depends on !PREEMPT_RT + select ZSMALLOC + help + Use the zsmalloc allocator as the default allocator. +@@ -691,7 +690,7 @@ config Z3FOLD + + config ZSMALLOC + tristate "Memory allocator for compressed pages" +- depends on MMU && !PREEMPT_RT ++ depends on MMU + help + zsmalloc is a slab-based memory allocator designed to store + compressed RAM pages. zsmalloc uses virtual memory mapping +--- a/mm/zsmalloc.c ++++ b/mm/zsmalloc.c +@@ -57,6 +57,7 @@ + #include <linux/wait.h> + #include <linux/pagemap.h> + #include <linux/fs.h> ++#include <linux/local_lock.h> + + #define ZSPAGE_MAGIC 0x58 + +@@ -77,6 +78,20 @@ + + #define ZS_HANDLE_SIZE (sizeof(unsigned long)) + ++#ifdef CONFIG_PREEMPT_RT ++ ++struct zsmalloc_handle { ++ unsigned long addr; ++ spinlock_t lock; ++}; ++ ++#define ZS_HANDLE_ALLOC_SIZE (sizeof(struct zsmalloc_handle)) ++ ++#else ++ ++#define ZS_HANDLE_ALLOC_SIZE (sizeof(unsigned long)) ++#endif ++ + /* + * Object location (<PFN>, <obj_idx>) is encoded as + * a single (unsigned long) handle value. +@@ -293,6 +308,7 @@ struct zspage { + }; + + struct mapping_area { ++ local_lock_t lock; + char *vm_buf; /* copy buffer for objects that span pages */ + char *vm_addr; /* address of kmap_atomic()'ed pages */ + enum zs_mapmode vm_mm; /* mapping mode */ +@@ -322,7 +338,7 @@ static void SetZsPageMovable(struct zs_p + + static int create_cache(struct zs_pool *pool) + { +- pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_SIZE, ++ pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_ALLOC_SIZE, + 0, 0, NULL); + if (!pool->handle_cachep) + return 1; +@@ -346,10 +362,27 @@ static void destroy_cache(struct zs_pool + + static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp) + { +- return (unsigned long)kmem_cache_alloc(pool->handle_cachep, +- gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE)); ++ void *p; ++ ++ p = kmem_cache_alloc(pool->handle_cachep, ++ gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE)); ++#ifdef CONFIG_PREEMPT_RT ++ if (p) { ++ struct zsmalloc_handle *zh = p; ++ ++ spin_lock_init(&zh->lock); ++ } ++#endif ++ return (unsigned long)p; + } + ++#ifdef CONFIG_PREEMPT_RT ++static struct zsmalloc_handle *zs_get_pure_handle(unsigned long handle) ++{ ++ return (void *)(handle & ~((1 << OBJ_TAG_BITS) - 1)); ++} ++#endif ++ + static void cache_free_handle(struct zs_pool *pool, unsigned long handle) + { + kmem_cache_free(pool->handle_cachep, (void *)handle); +@@ -368,12 +401,18 @@ static void cache_free_zspage(struct zs_ + + static void record_obj(unsigned long handle, unsigned long obj) + { ++#ifdef CONFIG_PREEMPT_RT ++ struct zsmalloc_handle *zh = zs_get_pure_handle(handle); ++ ++ WRITE_ONCE(zh->addr, obj); ++#else + /* + * lsb of @obj represents handle lock while other bits + * represent object value the handle is pointing so + * updating shouldn't do store tearing. + */ + WRITE_ONCE(*(unsigned long *)handle, obj); ++#endif + } + + /* zpool driver */ +@@ -455,7 +494,9 @@ MODULE_ALIAS("zpool-zsmalloc"); + #endif /* CONFIG_ZPOOL */ + + /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */ +-static DEFINE_PER_CPU(struct mapping_area, zs_map_area); ++static DEFINE_PER_CPU(struct mapping_area, zs_map_area) = { ++ .lock = INIT_LOCAL_LOCK(lock), ++}; + + static bool is_zspage_isolated(struct zspage *zspage) + { +@@ -862,7 +903,13 @@ static unsigned long location_to_obj(str + + static unsigned long handle_to_obj(unsigned long handle) + { ++#ifdef CONFIG_PREEMPT_RT ++ struct zsmalloc_handle *zh = zs_get_pure_handle(handle); ++ ++ return zh->addr; ++#else + return *(unsigned long *)handle; ++#endif + } + + static unsigned long obj_to_head(struct page *page, void *obj) +@@ -876,22 +923,46 @@ static unsigned long obj_to_head(struct + + static inline int testpin_tag(unsigned long handle) + { ++#ifdef CONFIG_PREEMPT_RT ++ struct zsmalloc_handle *zh = zs_get_pure_handle(handle); ++ ++ return spin_is_locked(&zh->lock); ++#else + return bit_spin_is_locked(HANDLE_PIN_BIT, (unsigned long *)handle); ++#endif + } + + static inline int trypin_tag(unsigned long handle) + { ++#ifdef CONFIG_PREEMPT_RT ++ struct zsmalloc_handle *zh = zs_get_pure_handle(handle); ++ ++ return spin_trylock(&zh->lock); ++#else + return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle); ++#endif + } + + static void pin_tag(unsigned long handle) __acquires(bitlock) + { ++#ifdef CONFIG_PREEMPT_RT ++ struct zsmalloc_handle *zh = zs_get_pure_handle(handle); ++ ++ return spin_lock(&zh->lock); ++#else + bit_spin_lock(HANDLE_PIN_BIT, (unsigned long *)handle); ++#endif + } + + static void unpin_tag(unsigned long handle) __releases(bitlock) + { ++#ifdef CONFIG_PREEMPT_RT ++ struct zsmalloc_handle *zh = zs_get_pure_handle(handle); ++ ++ return spin_unlock(&zh->lock); ++#else + bit_spin_unlock(HANDLE_PIN_BIT, (unsigned long *)handle); ++#endif + } + + static void reset_page(struct page *page) +@@ -1274,7 +1345,8 @@ void *zs_map_object(struct zs_pool *pool + class = pool->size_class[class_idx]; + off = (class->size * obj_idx) & ~PAGE_MASK; + +- area = &get_cpu_var(zs_map_area); ++ local_lock(&zs_map_area.lock); ++ area = this_cpu_ptr(&zs_map_area); + area->vm_mm = mm; + if (off + class->size <= PAGE_SIZE) { + /* this object is contained entirely within a page */ +@@ -1328,7 +1400,7 @@ void zs_unmap_object(struct zs_pool *poo + + __zs_unmap_object(area, pages, off, class->size); + } +- put_cpu_var(zs_map_area); ++ local_unlock(&zs_map_area.lock); + + migrate_read_unlock(zspage); + unpin_tag(handle); |