From: Mike Galbraith <umgwanakikbuti@gmail.com> Date: Tue, 28 Sep 2021 09:38:47 +0200 Subject: [PATCH] mm/zsmalloc: Replace bit spinlock and get_cpu_var() usage. For efficiency reasons, zsmalloc is using a slim `handle'. The value is the address of a memory allocation of 4 or 8 bytes depending on the size of the long data type. The lowest bit in that allocated memory is used as a bit spin lock. The usage of the bit spin lock is problematic because with the bit spin lock held zsmalloc acquires a rwlock_t and spinlock_t which are both sleeping locks on PREEMPT_RT and therefore must not be acquired with disabled preemption. Extend the handle to struct zsmalloc_handle which holds the old handle as addr and a spinlock_t which replaces the bit spinlock. Replace all the wrapper functions accordingly. The usage of get_cpu_var() in zs_map_object() is problematic because it disables preemption and makes it impossible to acquire any sleeping lock on PREEMPT_RT such as a spinlock_t. Replace the get_cpu_var() usage with a local_lock_t which is embedded struct mapping_area. It ensures that the access the struct is synchronized against all users on the same CPU. This survived LTP testing. Signed-off-by: Mike Galbraith <umgwanakikbuti@gmail.com> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> [bigeasy: replace the bitspin_lock() with a mutex, get_locked_var() and patch description. Mike then fixed the size magic and made handle lock spinlock_t.] Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> --- mm/Kconfig | 3 -- mm/zsmalloc.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 79 insertions(+), 8 deletions(-) @ mm/Kconfig:643 @ config ZSWAP_ZPOOL_DEFAULT_Z3FOLD config ZSWAP_ZPOOL_DEFAULT_ZSMALLOC bool "zsmalloc" - depends on !PREEMPT_RT select ZSMALLOC help Use the zsmalloc allocator as the default allocator. @ mm/Kconfig:693 @ config Z3FOLD config ZSMALLOC tristate "Memory allocator for compressed pages" - depends on MMU && !PREEMPT_RT + depends on MMU help zsmalloc is a slab-based memory allocator designed to store compressed RAM pages. zsmalloc uses virtual memory mapping --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @ mm/Kconfig:60 @ #include <linux/wait.h> #include <linux/pagemap.h> #include <linux/fs.h> +#include <linux/local_lock.h> #define ZSPAGE_MAGIC 0x58 @ mm/Kconfig:81 @ #define ZS_HANDLE_SIZE (sizeof(unsigned long)) +#ifdef CONFIG_PREEMPT_RT + +struct zsmalloc_handle { + unsigned long addr; + spinlock_t lock; +}; + +#define ZS_HANDLE_ALLOC_SIZE (sizeof(struct zsmalloc_handle)) + +#else + +#define ZS_HANDLE_ALLOC_SIZE (sizeof(unsigned long)) +#endif + /* * Object location (<PFN>, <obj_idx>) is encoded as * a single (unsigned long) handle value. @ mm/Kconfig:311 @ struct zspage { }; struct mapping_area { + local_lock_t lock; char *vm_buf; /* copy buffer for objects that span pages */ char *vm_addr; /* address of kmap_atomic()'ed pages */ enum zs_mapmode vm_mm; /* mapping mode */ @ mm/Kconfig:341 @ static void SetZsPageMovable(struct zs_p static int create_cache(struct zs_pool *pool) { - pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_SIZE, + pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_ALLOC_SIZE, 0, 0, NULL); if (!pool->handle_cachep) return 1; @ mm/Kconfig:365 @ static void destroy_cache(struct zs_pool static unsigned long cache_alloc_handle(struct zs_pool *pool, gfp_t gfp) { - return (unsigned long)kmem_cache_alloc(pool->handle_cachep, - gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE)); + void *p; + + p = kmem_cache_alloc(pool->handle_cachep, + gfp & ~(__GFP_HIGHMEM|__GFP_MOVABLE)); +#ifdef CONFIG_PREEMPT_RT + if (p) { + struct zsmalloc_handle *zh = p; + + spin_lock_init(&zh->lock); + } +#endif + return (unsigned long)p; } +#ifdef CONFIG_PREEMPT_RT +static struct zsmalloc_handle *zs_get_pure_handle(unsigned long handle) +{ + return (void *)(handle & ~((1 << OBJ_TAG_BITS) - 1)); +} +#endif + static void cache_free_handle(struct zs_pool *pool, unsigned long handle) { kmem_cache_free(pool->handle_cachep, (void *)handle); @ mm/Kconfig:404 @ static void cache_free_zspage(struct zs_ static void record_obj(unsigned long handle, unsigned long obj) { +#ifdef CONFIG_PREEMPT_RT + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); + + WRITE_ONCE(zh->addr, obj); +#else /* * lsb of @obj represents handle lock while other bits * represent object value the handle is pointing so * updating shouldn't do store tearing. */ WRITE_ONCE(*(unsigned long *)handle, obj); +#endif } /* zpool driver */ @ mm/Kconfig:497 @ MODULE_ALIAS("zpool-zsmalloc"); #endif /* CONFIG_ZPOOL */ /* per-cpu VM mapping areas for zspage accesses that cross page boundaries */ -static DEFINE_PER_CPU(struct mapping_area, zs_map_area); +static DEFINE_PER_CPU(struct mapping_area, zs_map_area) = { + .lock = INIT_LOCAL_LOCK(lock), +}; static bool is_zspage_isolated(struct zspage *zspage) { @ mm/Kconfig:906 @ static unsigned long location_to_obj(str static unsigned long handle_to_obj(unsigned long handle) { +#ifdef CONFIG_PREEMPT_RT + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); + + return zh->addr; +#else return *(unsigned long *)handle; +#endif } static unsigned long obj_to_head(struct page *page, void *obj) @ mm/Kconfig:926 @ static unsigned long obj_to_head(struct static inline int testpin_tag(unsigned long handle) { +#ifdef CONFIG_PREEMPT_RT + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); + + return spin_is_locked(&zh->lock); +#else return bit_spin_is_locked(HANDLE_PIN_BIT, (unsigned long *)handle); +#endif } static inline int trypin_tag(unsigned long handle) { +#ifdef CONFIG_PREEMPT_RT + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); + + return spin_trylock(&zh->lock); +#else return bit_spin_trylock(HANDLE_PIN_BIT, (unsigned long *)handle); +#endif } static void pin_tag(unsigned long handle) __acquires(bitlock) { +#ifdef CONFIG_PREEMPT_RT + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); + + return spin_lock(&zh->lock); +#else bit_spin_lock(HANDLE_PIN_BIT, (unsigned long *)handle); +#endif } static void unpin_tag(unsigned long handle) __releases(bitlock) { +#ifdef CONFIG_PREEMPT_RT + struct zsmalloc_handle *zh = zs_get_pure_handle(handle); + + return spin_unlock(&zh->lock); +#else bit_spin_unlock(HANDLE_PIN_BIT, (unsigned long *)handle); +#endif } static void reset_page(struct page *page) @ mm/Kconfig:1348 @ void *zs_map_object(struct zs_pool *pool class = pool->size_class[class_idx]; off = (class->size * obj_idx) & ~PAGE_MASK; - area = &get_cpu_var(zs_map_area); + local_lock(&zs_map_area.lock); + area = this_cpu_ptr(&zs_map_area); area->vm_mm = mm; if (off + class->size <= PAGE_SIZE) { /* this object is contained entirely within a page */ @ mm/Kconfig:1403 @ void zs_unmap_object(struct zs_pool *poo __zs_unmap_object(area, pages, off, class->size); } - put_cpu_var(zs_map_area); + local_unlock(&zs_map_area.lock); migrate_read_unlock(zspage); unpin_tag(handle);